|
@@ -0,0 +1,30353 @@
|
|
|
+# HG changeset patch
|
|
|
+# User Ryan VanderMeulen <ryanvm@gmail.com>
|
|
|
+# Date 1715606280 0
|
|
|
+# Mon May 13 13:18:00 2024 +0000
|
|
|
+# Node ID 2ed1cc5bdd50db86ca07044fd055a5af91508e72
|
|
|
+# Parent e39ce4e4fc2791b48f34920241ee3aff3b1e1293
|
|
|
+Bug 1856630 - Update libjpeg-turbo to version 3.0.3. r=tnikkel
|
|
|
+
|
|
|
+Differential Revision: https://phabricator.services.mozilla.com/D209997
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/ChangeLog.md b/media/libjpeg/ChangeLog.md
|
|
|
+--- a/media/libjpeg/ChangeLog.md
|
|
|
++++ b/media/libjpeg/ChangeLog.md
|
|
|
+@@ -1,8 +1,287 @@
|
|
|
++3.0.3
|
|
|
++=====
|
|
|
++
|
|
|
++### Significant changes relative to 3.0.2:
|
|
|
++
|
|
|
++1. Fixed an issue in the build system, introduced in 3.0.2, that caused all
|
|
|
++libjpeg-turbo components to depend on the Visual C++ run-time DLL when built
|
|
|
++with Visual C++ and CMake 3.15 or later, regardless of value of the
|
|
|
++`WITH_CRT_DLL` CMake variable.
|
|
|
++
|
|
|
++2. The x86-64 SIMD extensions now include support for Intel Control-flow
|
|
|
++Enforcement Technology (CET), which is enabled automatically if CET is enabled
|
|
|
++in the C compiler.
|
|
|
++
|
|
|
++3. Fixed a regression introduced by 3.0 beta2[6] that made it impossible for
|
|
|
++calling applications to supply custom Huffman tables when generating
|
|
|
++12-bit-per-component lossy JPEG images using the libjpeg API.
|
|
|
++
|
|
|
++4. Fixed a segfault that occurred when attempting to use the jpegtran `-drop`
|
|
|
++option with a specially-crafted malformed input image or drop image
|
|
|
++(specifically an image in which all of the scans contain fewer components than
|
|
|
++the number of components specified in the Start Of Frame segment.)
|
|
|
++
|
|
|
++
|
|
|
++3.0.2
|
|
|
++=====
|
|
|
++
|
|
|
++### Significant changes relative to 3.0.1:
|
|
|
++
|
|
|
++1. Fixed a signed integer overflow in the `tj3CompressFromYUV8()`,
|
|
|
++`tj3DecodeYUV8()`, `tj3DecompressToYUV8()`, and `tj3EncodeYUV8()` functions,
|
|
|
++detected by the Clang and GCC undefined behavior sanitizers, that could be
|
|
|
++triggered by setting the `align` parameter to an unreasonably large value.
|
|
|
++This issue did not pose a security threat, but removing the warning made it
|
|
|
++easier to detect actual security issues, should they arise in the future.
|
|
|
++
|
|
|
++2. Introduced a new parameter (`TJPARAM_MAXMEMORY` in the TurboJPEG C API and
|
|
|
++`TJ.PARAM_MAXMEMORY` in the TurboJPEG Java API) and a corresponding TJBench
|
|
|
++option (`-maxmemory`) for specifying the maximum amount of memory (in
|
|
|
++megabytes) that will be allocated for intermediate buffers, which are used with
|
|
|
++progressive JPEG compression and decompression, optimized baseline entropy
|
|
|
++coding, lossless JPEG compression, and lossless transformation. The new
|
|
|
++parameter and option serve the same purpose as the `max_memory_to_use` field in
|
|
|
++the `jpeg_memory_mgr` struct in the libjpeg API, the `JPEGMEM` environment
|
|
|
++variable, and the cjpeg/djpeg/jpegtran `-maxmemory` option.
|
|
|
++
|
|
|
++3. Introduced a new parameter (`TJPARAM_MAXPIXELS` in the TurboJPEG C API and
|
|
|
++`TJ.PARAM_MAXPIXELS` in the TurboJPEG Java API) and a corresponding TJBench
|
|
|
++option (`-maxpixels`) for specifying the maximum number of pixels that the
|
|
|
++decompression, lossless transformation, and packed-pixel image loading
|
|
|
++functions/methods will process.
|
|
|
++
|
|
|
++4. Fixed an error ("Unsupported color conversion request") that occurred when
|
|
|
++attempting to decompress a 3-component lossless JPEG image without an Adobe
|
|
|
++APP14 marker. The decompressor now assumes that a 3-component lossless JPEG
|
|
|
++image without an Adobe APP14 marker uses the RGB colorspace if its component
|
|
|
++IDs are 1, 2, and 3.
|
|
|
++
|
|
|
++
|
|
|
++3.0.1
|
|
|
++=====
|
|
|
++
|
|
|
++### Significant changes relative to 3.0.0:
|
|
|
++
|
|
|
++1. The x86-64 SIMD functions now use a standard stack frame, prologue, and
|
|
|
++epilogue so that debuggers and profilers can reliably capture backtraces from
|
|
|
++within the functions.
|
|
|
++
|
|
|
++2. Fixed two minor issues in the interblock smoothing algorithm that caused
|
|
|
++mathematical (but not necessarily perceptible) edge block errors when
|
|
|
++decompressing progressive JPEG images exactly two MCU blocks in width or that
|
|
|
++use vertical chrominance subsampling.
|
|
|
++
|
|
|
++3. Fixed a regression introduced by 3.0 beta2[6] that, in rare cases, caused
|
|
|
++the C Huffman encoder (which is not used by default on x86 and Arm CPUs) to
|
|
|
++generate incorrect results if the Neon SIMD extensions were explicitly disabled
|
|
|
++at build time (by setting the `WITH_SIMD` CMake variable to `0`) in an AArch64
|
|
|
++build of libjpeg-turbo.
|
|
|
++
|
|
|
++
|
|
|
++3.0.0
|
|
|
++=====
|
|
|
++
|
|
|
++### Significant changes relative to 3.0 beta2:
|
|
|
++
|
|
|
++1. The TurboJPEG API now supports 4:4:1 (transposed 4:1:1) chrominance
|
|
|
++subsampling, which allows losslessly transposed or rotated 4:1:1 JPEG images to
|
|
|
++be losslessly cropped, partially decompressed, or decompressed to planar YUV
|
|
|
++images.
|
|
|
++
|
|
|
++2. Fixed various segfaults and buffer overruns (CVE-2023-2804) that occurred
|
|
|
++when attempting to decompress various specially-crafted malformed
|
|
|
++12-bit-per-component and 16-bit-per-component lossless JPEG images using color
|
|
|
++quantization or merged chroma upsampling/color conversion. The underlying
|
|
|
++cause of these issues was that the color quantization and merged chroma
|
|
|
++upsampling/color conversion algorithms were not designed with lossless
|
|
|
++decompression in mind. Since libjpeg-turbo explicitly does not support color
|
|
|
++conversion when compressing or decompressing lossless JPEG images, merged
|
|
|
++chroma upsampling/color conversion never should have been enabled for such
|
|
|
++images. Color quantization is a legacy feature that serves little or no
|
|
|
++purpose with lossless JPEG images, so it is also now disabled when
|
|
|
++decompressing such images. (As a result, djpeg can no longer decompress a
|
|
|
++lossless JPEG image into a GIF image.)
|
|
|
++
|
|
|
++3. Fixed an oversight in 1.4 beta1[8] that caused various segfaults and buffer
|
|
|
++overruns when attempting to decompress various specially-crafted malformed
|
|
|
++12-bit-per-component JPEG images using djpeg with both color quantization and
|
|
|
++RGB565 color conversion enabled.
|
|
|
++
|
|
|
++4. Fixed an issue whereby `jpeg_crop_scanline()` sometimes miscalculated the
|
|
|
++downsampled width for components with 4x2 or 2x4 subsampling factors if
|
|
|
++decompression scaling was enabled. This caused the components to be upsampled
|
|
|
++incompletely, which caused the color converter to read from uninitialized
|
|
|
++memory. With 12-bit data precision, this caused a buffer overrun or underrun
|
|
|
++and subsequent segfault if the sample value read from uninitialized memory was
|
|
|
++outside of the valid sample range.
|
|
|
++
|
|
|
++5. Fixed a long-standing issue whereby the `tj3Transform()` function, when used
|
|
|
++with the `TJXOP_TRANSPOSE`, `TJXOP_TRANSVERSE`, `TJXOP_ROT90`, or
|
|
|
++`TJXOP_ROT270` transform operation and without automatic JPEG destination
|
|
|
++buffer (re)allocation or lossless cropping, computed the worst-case transformed
|
|
|
++JPEG image size based on the source image dimensions rather than the
|
|
|
++transformed image dimensions. If a calling program allocated the JPEG
|
|
|
++destination buffer based on the transformed image dimensions, as the API
|
|
|
++documentation instructs, and attempted to transform a specially-crafted 4:2:2,
|
|
|
++4:4:0, 4:1:1, or 4:4:1 JPEG source image containing a large amount of metadata,
|
|
|
++the issue caused `tj3Transform()` to overflow the JPEG destination buffer
|
|
|
++rather than fail gracefully. The issue could be worked around by setting
|
|
|
++`TJXOPT_COPYNONE`. Note that, irrespective of this issue, `tj3Transform()`
|
|
|
++cannot reliably transform JPEG source images that contain a large amount of
|
|
|
++metadata unless automatic JPEG destination buffer (re)allocation is used or
|
|
|
++`TJXOPT_COPYNONE` is set.
|
|
|
++
|
|
|
++6. Fixed a regression introduced by 3.0 beta2[6] that prevented the djpeg
|
|
|
++`-map` option from working when decompressing 12-bit-per-component lossy JPEG
|
|
|
++images.
|
|
|
++
|
|
|
++7. Fixed an issue that caused the C Huffman encoder (which is not used by
|
|
|
++default on x86 and Arm CPUs) to read from uninitialized memory when attempting
|
|
|
++to transform a specially-crafted malformed arithmetic-coded JPEG source image
|
|
|
++into a baseline Huffman-coded JPEG destination image.
|
|
|
++
|
|
|
++
|
|
|
++2.1.91 (3.0 beta2)
|
|
|
++==================
|
|
|
++
|
|
|
++### Significant changes relative to 2.1.5.1:
|
|
|
++
|
|
|
++1. Significantly sped up the computation of optimal Huffman tables. This
|
|
|
++speeds up the compression of tiny images by as much as 2x and provides a
|
|
|
++noticeable speedup for images as large as 256x256 when using optimal Huffman
|
|
|
++tables.
|
|
|
++
|
|
|
++2. All deprecated fields, constructors, and methods in the TurboJPEG Java API
|
|
|
++have been removed.
|
|
|
++
|
|
|
++3. Arithmetic entropy coding is now supported with 12-bit-per-component JPEG
|
|
|
++images.
|
|
|
++
|
|
|
++4. Overhauled the TurboJPEG API to address long-standing limitations and to
|
|
|
++make the API more extensible and intuitive:
|
|
|
++
|
|
|
++ - All C function names are now prefixed with `tj3`, and all version
|
|
|
++suffixes have been removed from the function names. Future API overhauls will
|
|
|
++increment the prefix to `tj4`, etc., thus retaining backward API/ABI
|
|
|
++compatibility without versioning each individual function.
|
|
|
++ - Stateless boolean flags have been replaced with stateful integer API
|
|
|
++parameters, the values of which persist between function calls. New
|
|
|
++functions/methods (`tj3Set()`/`TJCompressor.set()`/`TJDecompressor.set()` and
|
|
|
++`tj3Get()`/`TJCompressor.get()`/`TJDecompressor.get()`) can be used to set and
|
|
|
++query the value of a particular API parameter.
|
|
|
++ - The JPEG quality and subsampling are now implemented using API
|
|
|
++parameters rather than stateless function arguments (C) or dedicated set/get
|
|
|
++methods (Java.)
|
|
|
++ - `tj3DecompressHeader()` now stores all relevant information about the
|
|
|
++JPEG image, including the width, height, subsampling type, entropy coding
|
|
|
++algorithm, etc., in API parameters rather than returning that information
|
|
|
++through pointer arguments.
|
|
|
++ - `TJFLAG_LIMITSCANS`/`TJ.FLAG_LIMITSCANS` has been reimplemented as an
|
|
|
++API parameter (`TJPARAM_SCANLIMIT`/`TJ.PARAM_SCANLIMIT`) that allows the number
|
|
|
++of scans to be specified.
|
|
|
++ - Optimized baseline entropy coding (the computation of optimal Huffman
|
|
|
++tables, as opposed to using the default Huffman tables) can now be specified,
|
|
|
++using a new API parameter (`TJPARAM_OPTIMIZE`/`TJ.PARAM_OPTIMIZE`), a new
|
|
|
++transform option (`TJXOPT_OPTIMIZE`/`TJTransform.OPT_OPTIMIZE`), and a new
|
|
|
++TJBench option (`-optimize`.)
|
|
|
++ - Arithmetic entropy coding can now be specified or queried, using a new
|
|
|
++API parameter (`TJPARAM_ARITHMETIC`/`TJ.PARAM_ARITHMETIC`), a new transform
|
|
|
++option (`TJXOPT_ARITHMETIC`/`TJTransform.OPT_ARITHMETIC`), and a new TJBench
|
|
|
++option (`-arithmetic`.)
|
|
|
++ - The restart marker interval can now be specified, using new API
|
|
|
++parameters (`TJPARAM_RESTARTROWS`/`TJ.PARAM_RESTARTROWS` and
|
|
|
++`TJPARAM_RESTARTBLOCKS`/`TJ.PARAM_RESTARTBLOCKS`) and a new TJBench option
|
|
|
++(`-restart`.)
|
|
|
++ - Pixel density can now be specified or queried, using new API parameters
|
|
|
++(`TJPARAM_XDENSITY`/`TJ.PARAM_XDENSITY`,
|
|
|
++`TJPARAM_YDENSITY`/`TJ.PARAM_YDENSITY`, and
|
|
|
++`TJPARAM_DENSITYUNITS`/`TJ.PARAM_DENSITYUNITS`.)
|
|
|
++ - The accurate DCT/IDCT algorithms are now the default for both
|
|
|
++compression and decompression, since the "fast" algorithms are considered to be
|
|
|
++a legacy feature. (The "fast" algorithms do not pass the ISO compliance tests,
|
|
|
++and those algorithms are not any faster than the accurate algorithms on modern
|
|
|
++x86 CPUs.)
|
|
|
++ - All C initialization functions have been combined into a single function
|
|
|
++(`tj3Init()`) that accepts an integer argument specifying the subsystems to
|
|
|
++initialize.
|
|
|
++ - All C functions now use the `const` keyword for pointer arguments that
|
|
|
++point to unmodified buffers (and for both dimensions of pointer arguments that
|
|
|
++point to sets of unmodified buffers.)
|
|
|
++ - All C functions now use `size_t` rather than `unsigned long` to
|
|
|
++represent buffer sizes, for compatibility with `malloc()` and to avoid
|
|
|
++disparities in the size of `unsigned long` between LP64 (Un*x) and LLP64
|
|
|
++(Windows) operating systems.
|
|
|
++ - All C buffer size functions now return 0 if an error occurs, rather than
|
|
|
++trying to awkwardly return -1 in an unsigned data type (which could easily be
|
|
|
++misinterpreted as a very large value.)
|
|
|
++ - Decompression scaling is now enabled explicitly, using a new
|
|
|
++function/method (`tj3SetScalingFactor()`/`TJDecompressor.setScalingFactor()`),
|
|
|
++rather than implicitly using awkward "desired width"/"desired height"
|
|
|
++arguments.
|
|
|
++ - Partial image decompression has been implemented, using a new
|
|
|
++function/method (`tj3SetCroppingRegion()`/`TJDecompressor.setCroppingRegion()`)
|
|
|
++and a new TJBench option (`-crop`.)
|
|
|
++ - The JPEG colorspace can now be specified explicitly when compressing,
|
|
|
++using a new API parameter (`TJPARAM_COLORSPACE`/`TJ.PARAM_COLORSPACE`.) This
|
|
|
++allows JPEG images with the RGB and CMYK colorspaces to be created.
|
|
|
++ - TJBench no longer generates error/difference images, since identical
|
|
|
++functionality is already available in ImageMagick.
|
|
|
++ - JPEG images with unknown subsampling configurations can now be
|
|
|
++fully decompressed into packed-pixel images or losslessly transformed (with the
|
|
|
++exception of lossless cropping.) They cannot currently be partially
|
|
|
++decompressed or decompressed into planar YUV images.
|
|
|
++ - `tj3Destroy()` now silently accepts a NULL handle.
|
|
|
++ - `tj3Alloc()` and `tj3Free()` now return/accept void pointers, as
|
|
|
++`malloc()` and `free()` do.
|
|
|
++ - The C image I/O functions now accept a TurboJPEG instance handle, which
|
|
|
++is used to transmit/receive API parameter values and to receive error
|
|
|
++information.
|
|
|
++
|
|
|
++5. Added support for 8-bit-per-component, 12-bit-per-component, and
|
|
|
++16-bit-per-component lossless JPEG images. A new libjpeg API function
|
|
|
++(`jpeg_enable_lossless()`), TurboJPEG API parameters
|
|
|
++(`TJPARAM_LOSSLESS`/`TJ.PARAM_LOSSLESS`,
|
|
|
++`TJPARAM_LOSSLESSPSV`/`TJ.PARAM_LOSSLESSPSV`, and
|
|
|
++`TJPARAM_LOSSLESSPT`/`TJ.PARAM_LOSSLESSPT`), and a cjpeg/TJBench option
|
|
|
++(`-lossless`) can be used to create a lossless JPEG image. (Decompression of
|
|
|
++lossless JPEG images is handled automatically.) Refer to
|
|
|
++[libjpeg.txt](libjpeg.txt), [usage.txt](usage.txt), and the TurboJPEG API
|
|
|
++documentation for more details.
|
|
|
++
|
|
|
++6. Added support for 12-bit-per-component (lossy and lossless) and
|
|
|
++16-bit-per-component (lossless) JPEG images to the libjpeg and TurboJPEG APIs:
|
|
|
++
|
|
|
++ - The existing `data_precision` field in `jpeg_compress_struct` and
|
|
|
++`jpeg_decompress_struct` has been repurposed to enable the creation of
|
|
|
++12-bit-per-component and 16-bit-per-component JPEG images or to detect whether
|
|
|
++a 12-bit-per-component or 16-bit-per-component JPEG image is being
|
|
|
++decompressed.
|
|
|
++ - New 12-bit-per-component and 16-bit-per-component versions of
|
|
|
++`jpeg_write_scanlines()` and `jpeg_read_scanlines()`, as well as new
|
|
|
++12-bit-per-component versions of `jpeg_write_raw_data()`,
|
|
|
++`jpeg_skip_scanlines()`, `jpeg_crop_scanline()`, and `jpeg_read_raw_data()`,
|
|
|
++provide interfaces for compressing from/decompressing to 12-bit-per-component
|
|
|
++and 16-bit-per-component packed-pixel and planar YUV image buffers.
|
|
|
++ - New 12-bit-per-component and 16-bit-per-component compression,
|
|
|
++decompression, and image I/O functions/methods have been added to the TurboJPEG
|
|
|
++API, and a new API parameter (`TJPARAM_PRECISION`/`TJ.PARAM_PRECISION`) can be
|
|
|
++used to query the data precision of a JPEG image. (YUV functions are currently
|
|
|
++limited to 8-bit data precision but can be expanded to accommodate 12-bit data
|
|
|
++precision in the future, if such is deemed beneficial.)
|
|
|
++ - A new cjpeg and TJBench command-line argument (`-precision`) can be used
|
|
|
++to create a 12-bit-per-component or 16-bit-per-component JPEG image.
|
|
|
++(Decompression and transformation of 12-bit-per-component and
|
|
|
++16-bit-per-component JPEG images is handled automatically.)
|
|
|
++
|
|
|
++ Refer to [libjpeg.txt](libjpeg.txt), [usage.txt](usage.txt), and the
|
|
|
++TurboJPEG API documentation for more details.
|
|
|
++
|
|
|
++
|
|
|
+ 2.1.5.1
|
|
|
+ =======
|
|
|
+
|
|
|
+ ### Significant changes relative to 2.1.5:
|
|
|
+
|
|
|
+ 1. The SIMD dispatchers in libjpeg-turbo 2.1.4 and prior stored the list of
|
|
|
+ supported SIMD instruction sets in a global variable, which caused an innocuous
|
|
|
+ race condition whereby the variable could have been initialized multiple times
|
|
|
+@@ -194,19 +473,19 @@ segfault in the 64-bit SSE2 Huffman enco
|
|
|
+ transform a specially-crafted malformed JPEG image.
|
|
|
+
|
|
|
+
|
|
|
+ 2.1.0
|
|
|
+ =====
|
|
|
+
|
|
|
+ ### Significant changes relative to 2.1 beta1:
|
|
|
+
|
|
|
+-1. Fixed a regression introduced by 2.1 beta1[6(b)] whereby attempting to
|
|
|
+-decompress certain progressive JPEG images with one or more component planes of
|
|
|
+-width 8 or less caused a buffer overrun.
|
|
|
++1. Fixed a regression (CVE-2021-29390) introduced by 2.1 beta1[6(b)] whereby
|
|
|
++attempting to decompress certain progressive JPEG images with one or more
|
|
|
++component planes of width 8 or less caused a buffer overrun.
|
|
|
+
|
|
|
+ 2. Fixed a regression introduced by 2.1 beta1[6(b)] whereby attempting to
|
|
|
+ decompress a specially-crafted malformed progressive JPEG image caused the
|
|
|
+ block smoothing algorithm to read from uninitialized memory.
|
|
|
+
|
|
|
+ 3. Fixed an issue in the Arm Neon SIMD Huffman encoders that caused the
|
|
|
+ encoders to generate incorrect results when using the Clang compiler with
|
|
|
+ Visual Studio.
|
|
|
+@@ -1435,17 +1714,17 @@ libjpeg-turbo can now be built by passin
|
|
|
+ configure (Unix) or `-DWITH_12BIT=1` to cmake (Windows.) 12-bit JPEG support
|
|
|
+ is included only for convenience. Enabling this feature disables all of the
|
|
|
+ performance features in libjpeg-turbo, as well as arithmetic coding and the
|
|
|
+ TurboJPEG API. The resulting library still contains the other libjpeg-turbo
|
|
|
+ features (such as the colorspace extensions), but in general, it performs no
|
|
|
+ faster than libjpeg v6b.
|
|
|
+
|
|
|
+ 14. Added ARM 64-bit SIMD acceleration for the YCC-to-RGB color conversion
|
|
|
+-and IDCT algorithms (both are used during JPEG decompression.) For unknown
|
|
|
++and IDCT algorithms (both are used during JPEG decompression.) For
|
|
|
+ reasons (probably related to clang), this code cannot currently be compiled for
|
|
|
+ iOS.
|
|
|
+
|
|
|
+ 15. Fixed an extremely rare bug (CVE-2014-9092) that could cause the Huffman
|
|
|
+ encoder's local buffer to overrun when a very high-frequency MCU is compressed
|
|
|
+ using quality 100 and no subsampling, and when the JPEG output buffer is being
|
|
|
+ dynamically resized by the destination manager. This issue was so rare that,
|
|
|
+ even with a test program specifically designed to make the bug occur (by
|
|
|
+@@ -1921,17 +2200,17 @@ 6. Include distribution package for Cygw
|
|
|
+
|
|
|
+ 7. No longer necessary to specify `--without-simd` on non-x86 architectures,
|
|
|
+ and unit tests now work on those architectures.
|
|
|
+
|
|
|
+
|
|
|
+ 0.0.93
|
|
|
+ ======
|
|
|
+
|
|
|
+-### Significant changes since 0.0.91:
|
|
|
++### Significant changes relative to 0.0.91:
|
|
|
+
|
|
|
+ 1. 2982659: Fixed x86-64 build on FreeBSD systems
|
|
|
+
|
|
|
+ 2. 2988188: Added support for Windows 64-bit systems
|
|
|
+
|
|
|
+
|
|
|
+ 0.0.91
|
|
|
+ ======
|
|
|
+diff --git a/media/libjpeg/LICENSE.md b/media/libjpeg/LICENSE.md
|
|
|
+--- a/media/libjpeg/LICENSE.md
|
|
|
++++ b/media/libjpeg/LICENSE.md
|
|
|
+@@ -1,49 +1,52 @@
|
|
|
+ libjpeg-turbo Licenses
|
|
|
+ ======================
|
|
|
+
|
|
|
+-libjpeg-turbo is covered by three compatible BSD-style open source licenses:
|
|
|
++libjpeg-turbo is covered by two compatible BSD-style open source licenses:
|
|
|
+
|
|
|
+ - The IJG (Independent JPEG Group) License, which is listed in
|
|
|
+ [README.ijg](README.ijg)
|
|
|
+
|
|
|
+- This license applies to the libjpeg API library and associated programs
|
|
|
+- (any code inherited from libjpeg, and any modifications to that code.)
|
|
|
++ This license applies to the libjpeg API library and associated programs,
|
|
|
++ including any code inherited from libjpeg and any modifications to that
|
|
|
++ code. Note that the libjpeg-turbo SIMD source code bears the
|
|
|
++ [zlib License](https://opensource.org/licenses/Zlib), but in the context of
|
|
|
++ the overall libjpeg API library, the terms of the zlib License are subsumed
|
|
|
++ by the terms of the IJG License.
|
|
|
+
|
|
|
+ - The Modified (3-clause) BSD License, which is listed below
|
|
|
+
|
|
|
+- This license covers the TurboJPEG API library and associated programs, as
|
|
|
+- well as the build system.
|
|
|
+-
|
|
|
+-- The [zlib License](https://opensource.org/licenses/Zlib)
|
|
|
+-
|
|
|
+- This license is a subset of the other two, and it covers the libjpeg-turbo
|
|
|
+- SIMD extensions.
|
|
|
++ This license applies to the TurboJPEG API library and associated programs, as
|
|
|
++ well as the build system. Note that the TurboJPEG API library wraps the
|
|
|
++ libjpeg API library, so in the context of the overall TurboJPEG API library,
|
|
|
++ both the terms of the IJG License and the terms of the Modified (3-clause)
|
|
|
++ BSD License apply.
|
|
|
+
|
|
|
+
|
|
|
+ Complying with the libjpeg-turbo Licenses
|
|
|
+ =========================================
|
|
|
+
|
|
|
+ This section provides a roll-up of the libjpeg-turbo licensing terms, to the
|
|
|
+-best of our understanding.
|
|
|
++best of our understanding. This is not a license in and of itself. It is
|
|
|
++intended solely for clarification.
|
|
|
+
|
|
|
+ 1. If you are distributing a modified version of the libjpeg-turbo source,
|
|
|
+ then:
|
|
|
+
|
|
|
+ 1. You cannot alter or remove any existing copyright or license notices
|
|
|
+ from the source.
|
|
|
+
|
|
|
+ **Origin**
|
|
|
+ - Clause 1 of the IJG License
|
|
|
+ - Clause 1 of the Modified BSD License
|
|
|
+ - Clauses 1 and 3 of the zlib License
|
|
|
+
|
|
|
+ 2. You must add your own copyright notice to the header of each source
|
|
|
+- file you modified, so others can tell that you modified that file (if
|
|
|
++ file you modified, so others can tell that you modified that file. (If
|
|
|
+ there is not an existing copyright header in that file, then you can
|
|
|
+ simply add a notice stating that you modified the file.)
|
|
|
+
|
|
|
+ **Origin**
|
|
|
+ - Clause 1 of the IJG License
|
|
|
+ - Clause 2 of the zlib License
|
|
|
+
|
|
|
+ 3. You must include the IJG README file, and you must not alter any of the
|
|
|
+@@ -114,18 +117,18 @@ LIABLE FOR ANY DIRECT, INDIRECT, INCIDEN
|
|
|
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
+ POSSIBILITY OF SUCH DAMAGE.
|
|
|
+
|
|
|
+
|
|
|
+-Why Three Licenses?
|
|
|
+-===================
|
|
|
++Why Two Licenses?
|
|
|
++=================
|
|
|
+
|
|
|
+ The zlib License could have been used instead of the Modified (3-clause) BSD
|
|
|
+ License, and since the IJG License effectively subsumes the distribution
|
|
|
+ conditions of the zlib License, this would have effectively placed
|
|
|
+ libjpeg-turbo binary distributions under the IJG License. However, the IJG
|
|
|
+ License specifically refers to the Independent JPEG Group and does not extend
|
|
|
+ attribution and endorsement protections to other entities. Thus, it was
|
|
|
+ desirable to choose a license that granted us the same protections for new code
|
|
|
+diff --git a/media/libjpeg/MOZCHANGES b/media/libjpeg/MOZCHANGES
|
|
|
+--- a/media/libjpeg/MOZCHANGES
|
|
|
++++ b/media/libjpeg/MOZCHANGES
|
|
|
+@@ -43,16 +43,20 @@ To upgrade to a new revision of libjpeg-
|
|
|
+ * Update jconfig.h and jconfigint.h as noted previously.
|
|
|
+
|
|
|
+ * Update moz.build to build any new files.
|
|
|
+
|
|
|
+ * Finally, tell hg that we've added or removed some files:
|
|
|
+
|
|
|
+ $ hg addremove
|
|
|
+
|
|
|
++== May 9, 2024 (libjpeg-turbo v3.0.3 7fa4b5b762c9a99b46b0b7838f5fd55071b92ea5 2024-05-08) ==
|
|
|
++
|
|
|
++* Updated to v3.0.3 release.
|
|
|
++
|
|
|
+ == February 8, 2023 (libjpeg-turbo v2.1.5.1 8ecba3647edb6dd940463fedf38ca33a8e2a73d1 2023-02-08) ==
|
|
|
+
|
|
|
+ * Updated to v2.1.5.1 release.
|
|
|
+
|
|
|
+ == November 10, 2022 (libjpeg-turbo v2.1.4 8162eddf041e0be26f5c671bb6528723c55fed9d 2022-08-12) ==
|
|
|
+
|
|
|
+ * Updated to v2.1.4 release.
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/README.ijg b/media/libjpeg/README.ijg
|
|
|
+--- a/media/libjpeg/README.ijg
|
|
|
++++ b/media/libjpeg/README.ijg
|
|
|
+@@ -38,17 +38,17 @@ Other documentation files in the distrib
|
|
|
+ User documentation:
|
|
|
+ usage.txt Usage instructions for cjpeg, djpeg, jpegtran,
|
|
|
+ rdjpgcom, and wrjpgcom.
|
|
|
+ *.1 Unix-style man pages for programs (same info as usage.txt).
|
|
|
+ wizard.txt Advanced usage instructions for JPEG wizards only.
|
|
|
+ change.log Version-to-version change highlights.
|
|
|
+ Programmer and internal documentation:
|
|
|
+ libjpeg.txt How to use the JPEG library in your own programs.
|
|
|
+- example.txt Sample code for calling the JPEG library.
|
|
|
++ example.c Sample code for calling the JPEG library.
|
|
|
+ structure.txt Overview of the JPEG library's internal structure.
|
|
|
+ coderules.txt Coding style rules --- please read if you contribute code.
|
|
|
+
|
|
|
+ Please read at least usage.txt. Some information can also be found in the JPEG
|
|
|
+ FAQ (Frequently Asked Questions) article. See ARCHIVE LOCATIONS below to find
|
|
|
+ out where to obtain the FAQ article.
|
|
|
+
|
|
|
+ If you want to understand how the JPEG code works, we suggest reading one or
|
|
|
+@@ -63,27 +63,27 @@ This package contains C software to impl
|
|
|
+ and transcoding. JPEG (pronounced "jay-peg") is a standardized compression
|
|
|
+ method for full-color and grayscale images. JPEG's strong suit is compressing
|
|
|
+ photographic images or other types of images that have smooth color and
|
|
|
+ brightness transitions between neighboring pixels. Images with sharp lines or
|
|
|
+ other abrupt features may not compress well with JPEG, and a higher JPEG
|
|
|
+ quality may have to be used to avoid visible compression artifacts with such
|
|
|
+ images.
|
|
|
+
|
|
|
+-JPEG is lossy, meaning that the output pixels are not necessarily identical to
|
|
|
+-the input pixels. However, on photographic content and other "smooth" images,
|
|
|
+-very good compression ratios can be obtained with no visible compression
|
|
|
+-artifacts, and extremely high compression ratios are possible if you are
|
|
|
+-willing to sacrifice image quality (by reducing the "quality" setting in the
|
|
|
+-compressor.)
|
|
|
++JPEG is normally lossy, meaning that the output pixels are not necessarily
|
|
|
++identical to the input pixels. However, on photographic content and other
|
|
|
++"smooth" images, very good compression ratios can be obtained with no visible
|
|
|
++compression artifacts, and extremely high compression ratios are possible if
|
|
|
++you are willing to sacrifice image quality (by reducing the "quality" setting
|
|
|
++in the compressor.)
|
|
|
+
|
|
|
+-This software implements JPEG baseline, extended-sequential, and progressive
|
|
|
+-compression processes. Provision is made for supporting all variants of these
|
|
|
+-processes, although some uncommon parameter settings aren't implemented yet.
|
|
|
+-We have made no provision for supporting the hierarchical or lossless
|
|
|
++This software implements JPEG baseline, extended-sequential, progressive, and
|
|
|
++lossless compression processes. Provision is made for supporting all variants
|
|
|
++of these processes, although some uncommon parameter settings aren't
|
|
|
++implemented yet. We have made no provision for supporting the hierarchical
|
|
|
+ processes defined in the standard.
|
|
|
+
|
|
|
+ We provide a set of library routines for reading and writing JPEG image files,
|
|
|
+ plus two sample applications "cjpeg" and "djpeg", which use the library to
|
|
|
+ perform conversion between JPEG and some other popular image file formats.
|
|
|
+ The library is intended to be reused in other applications.
|
|
|
+
|
|
|
+ In order to support file conversion and viewing software, we have included
|
|
|
+@@ -236,17 +236,17 @@ http://www.faqs.org/faqs/jpeg-faq.
|
|
|
+
|
|
|
+ FILE FORMAT COMPATIBILITY
|
|
|
+ =========================
|
|
|
+
|
|
|
+ This software implements ITU T.81 | ISO/IEC 10918 with some extensions from
|
|
|
+ ITU T.871 | ISO/IEC 10918-5 (JPEG File Interchange Format-- see REFERENCES).
|
|
|
+ Informally, the term "JPEG image" or "JPEG file" most often refers to JFIF or
|
|
|
+ a subset thereof, but there are other formats containing the name "JPEG" that
|
|
|
+-are incompatible with the DCT-based JPEG standard or with JFIF (for instance,
|
|
|
++are incompatible with the original JPEG standard or with JFIF (for instance,
|
|
|
+ JPEG 2000 and JPEG XR). This software therefore does not support these
|
|
|
+ formats. Indeed, one of the original reasons for developing this free software
|
|
|
+ was to help force convergence on a common, interoperable format standard for
|
|
|
+ JPEG files.
|
|
|
+
|
|
|
+ JFIF is a minimal or "low end" representation. TIFF/JPEG (TIFF revision 6.0 as
|
|
|
+ modified by TIFF Technical Note #2) can be used for "high end" applications
|
|
|
+ that need to record a lot of additional data about an image.
|
|
|
+diff --git a/media/libjpeg/README.md b/media/libjpeg/README.md
|
|
|
+--- a/media/libjpeg/README.md
|
|
|
++++ b/media/libjpeg/README.md
|
|
|
+@@ -16,17 +16,36 @@ colorspace extensions that allow it to c
|
|
|
+ big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java
|
|
|
+ interface.
|
|
|
+
|
|
|
+ libjpeg-turbo was originally based on libjpeg/SIMD, an MMX-accelerated
|
|
|
+ derivative of libjpeg v6b developed by Miyasaka Masaru. The TigerVNC and
|
|
|
+ VirtualGL projects made numerous enhancements to the codec in 2009, and in
|
|
|
+ early 2010, libjpeg-turbo spun off into an independent project, with the goal
|
|
|
+ of making high-speed JPEG compression/decompression technology available to a
|
|
|
+-broader range of users and developers.
|
|
|
++broader range of users and developers. libjpeg-turbo is an ISO/IEC and ITU-T
|
|
|
++reference implementation of the JPEG standard.
|
|
|
++
|
|
|
++More information about libjpeg-turbo can be found at
|
|
|
++<https://libjpeg-turbo.org>.
|
|
|
++
|
|
|
++
|
|
|
++Funding
|
|
|
++=======
|
|
|
++
|
|
|
++libjpeg-turbo is an independent open source project, but we rely on patronage
|
|
|
++and funded development in order to maintain that independence. The easiest way
|
|
|
++to ensure that libjpeg-turbo remains community-focused and free of any one
|
|
|
++organization's agenda is to
|
|
|
++[sponsor our project through GitHub](https://github.com/sponsors/libjpeg-turbo).
|
|
|
++All sponsorship money goes directly toward funding the labor necessary to
|
|
|
++maintain libjpeg-turbo, support the user community, and implement bug fixes and
|
|
|
++strategically important features.
|
|
|
++
|
|
|
++[![Sponsor libjpeg-turbo](https://img.shields.io/github/sponsors/libjpeg-turbo?label=Sponsor&logo=GitHub)](https://github.com/sponsors/libjpeg-turbo)
|
|
|
+
|
|
|
+
|
|
|
+ License
|
|
|
+ =======
|
|
|
+
|
|
|
+ libjpeg-turbo is covered by three compatible BSD-style open source licenses.
|
|
|
+ Refer to [LICENSE.md](LICENSE.md) for a roll-up of license terms.
|
|
|
+
|
|
|
+@@ -240,26 +259,16 @@ By default, libjpeg-turbo 1.3 and later
|
|
|
+ Previously, it was necessary to build libjpeg-turbo from source with libjpeg v8
|
|
|
+ API/ABI emulation in order to use the in-memory source/destination managers,
|
|
|
+ but several projects requested that those functions be included when emulating
|
|
|
+ the libjpeg v6b API/ABI as well. This allows the use of those functions by
|
|
|
+ programs that need them, without breaking ABI compatibility for programs that
|
|
|
+ don't, and it allows those functions to be provided in the "official"
|
|
|
+ libjpeg-turbo binaries.
|
|
|
+
|
|
|
+-Those who are concerned about maintaining strict conformance with the libjpeg
|
|
|
+-v6b or v7 API can pass an argument of `-DWITH_MEM_SRCDST=0` to `cmake` prior to
|
|
|
+-building libjpeg-turbo. This will restore the pre-1.3 behavior, in which
|
|
|
+-`jpeg_mem_src()` and `jpeg_mem_dest()` are only included when emulating the
|
|
|
+-libjpeg v8 API/ABI.
|
|
|
+-
|
|
|
+-On Un*x systems, including the in-memory source/destination managers changes
|
|
|
+-the dynamic library version from 62.2.0 to 62.3.0 if using libjpeg v6b API/ABI
|
|
|
+-emulation and from 7.2.0 to 7.3.0 if using libjpeg v7 API/ABI emulation.
|
|
|
+-
|
|
|
+ Note that, on most Un*x systems, the dynamic linker will not look for a
|
|
|
+ function in a library until that function is actually used. Thus, if a program
|
|
|
+ is built against libjpeg-turbo 1.3+ and uses `jpeg_mem_src()` or
|
|
|
+ `jpeg_mem_dest()`, that program will not fail if run against an older version
|
|
|
+ of libjpeg-turbo or against libjpeg v7- until the program actually tries to
|
|
|
+ call `jpeg_mem_src()` or `jpeg_mem_dest()`. Such is not the case on Windows.
|
|
|
+ If a program is built against the libjpeg-turbo 1.3+ DLL and uses
|
|
|
+ `jpeg_mem_src()` or `jpeg_mem_dest()`, then it must use the libjpeg-turbo 1.3+
|
|
|
+@@ -269,40 +278,45 @@ Both cjpeg and djpeg have been extended
|
|
|
+ source/destination manager functions. See their respective man pages for more
|
|
|
+ details.
|
|
|
+
|
|
|
+
|
|
|
+ Mathematical Compatibility
|
|
|
+ ==========================
|
|
|
+
|
|
|
+ For the most part, libjpeg-turbo should produce identical output to libjpeg
|
|
|
+-v6b. The one exception to this is when using the floating point DCT/IDCT, in
|
|
|
+-which case the outputs of libjpeg v6b and libjpeg-turbo can differ for the
|
|
|
+-following reasons:
|
|
|
++v6b. There are two exceptions:
|
|
|
++
|
|
|
++1. When decompressing a JPEG image that uses 4:4:0 chrominance subsampling, the
|
|
|
++outputs of libjpeg v6b and libjpeg-turbo can differ because libjpeg-turbo
|
|
|
++implements a "fancy" (smooth) 4:4:0 upsampling algorithm and libjpeg did not.
|
|
|
+
|
|
|
+-- The SSE/SSE2 floating point DCT implementation in libjpeg-turbo is ever so
|
|
|
+- slightly more accurate than the implementation in libjpeg v6b, but not by
|
|
|
+- any amount perceptible to human vision (generally in the range of 0.01 to
|
|
|
+- 0.08 dB gain in PNSR.)
|
|
|
++2. When using the floating point DCT/IDCT, the outputs of libjpeg v6b and
|
|
|
++libjpeg-turbo can differ for the following reasons:
|
|
|
++
|
|
|
++ - The SSE/SSE2 floating point DCT implementation in libjpeg-turbo is ever
|
|
|
++ so slightly more accurate than the implementation in libjpeg v6b, but not
|
|
|
++ by any amount perceptible to human vision (generally in the range of 0.01
|
|
|
++ to 0.08 dB gain in PNSR.)
|
|
|
+
|
|
|
+-- When not using the SIMD extensions, libjpeg-turbo uses the more accurate
|
|
|
+- (and slightly faster) floating point IDCT algorithm introduced in libjpeg
|
|
|
+- v8a as opposed to the algorithm used in libjpeg v6b. It should be noted,
|
|
|
+- however, that this algorithm basically brings the accuracy of the floating
|
|
|
+- point IDCT in line with the accuracy of the accurate integer IDCT. The
|
|
|
+- floating point DCT/IDCT algorithms are mainly a legacy feature, and they do
|
|
|
+- not produce significantly more accuracy than the accurate integer algorithms
|
|
|
+- (to put numbers on this, the typical difference in PNSR between the two
|
|
|
+- algorithms is less than 0.10 dB, whereas changing the quality level by 1 in
|
|
|
+- the upper range of the quality scale is typically more like a 1.0 dB
|
|
|
+- difference.)
|
|
|
++ - When not using the SIMD extensions, libjpeg-turbo uses the more accurate
|
|
|
++ (and slightly faster) floating point IDCT algorithm introduced in libjpeg
|
|
|
++ v8a as opposed to the algorithm used in libjpeg v6b. It should be noted,
|
|
|
++ however, that this algorithm basically brings the accuracy of the
|
|
|
++ floating point IDCT in line with the accuracy of the accurate integer
|
|
|
++ IDCT. The floating point DCT/IDCT algorithms are mainly a legacy
|
|
|
++ feature, and they do not produce significantly more accuracy than the
|
|
|
++ accurate integer algorithms. (To put numbers on this, the typical
|
|
|
++ difference in PNSR between the two algorithms is less than 0.10 dB,
|
|
|
++ whereas changing the quality level by 1 in the upper range of the quality
|
|
|
++ scale is typically more like a 1.0 dB difference.)
|
|
|
+
|
|
|
+-- If the floating point algorithms in libjpeg-turbo are not implemented using
|
|
|
+- SIMD instructions on a particular platform, then the accuracy of the
|
|
|
+- floating point DCT/IDCT can depend on the compiler settings.
|
|
|
++ - If the floating point algorithms in libjpeg-turbo are not implemented
|
|
|
++ using SIMD instructions on a particular platform, then the accuracy of
|
|
|
++ the floating point DCT/IDCT can depend on the compiler settings.
|
|
|
+
|
|
|
+ While libjpeg-turbo does emulate the libjpeg v8 API/ABI, under the hood it is
|
|
|
+ still using the same algorithms as libjpeg v6b, so there are several specific
|
|
|
+ cases in which libjpeg-turbo cannot be expected to produce the same output as
|
|
|
+ libjpeg v8:
|
|
|
+
|
|
|
+ - When decompressing using scaling factors of 1/2 and 1/4, because libjpeg v8
|
|
|
+ implements those scaling algorithms differently than libjpeg v6b does, and
|
|
|
+diff --git a/media/libjpeg/jcapimin.c b/media/libjpeg/jcapimin.c
|
|
|
+--- a/media/libjpeg/jcapimin.c
|
|
|
++++ b/media/libjpeg/jcapimin.c
|
|
|
+@@ -18,16 +18,17 @@
|
|
|
+ * are in this file or in jcapistd.c. But also see jcparam.c for
|
|
|
+ * parameter-setup helper routines, jcomapi.c for routines shared by
|
|
|
+ * compression and decompression, and jctrans.c for the transcoding case.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jcmaster.h"
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialization of a JPEG compression object.
|
|
|
+ * The error manager must already be set up (in case memory manager fails).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+@@ -85,18 +86,28 @@ jpeg_CreateCompress(j_compress_ptr cinfo
|
|
|
+ cinfo->natural_order = jpeg_natural_order;
|
|
|
+ cinfo->lim_Se = DCTSIZE2 - 1;
|
|
|
+ #endif
|
|
|
+
|
|
|
+ cinfo->script_space = NULL;
|
|
|
+
|
|
|
+ cinfo->input_gamma = 1.0; /* in case application forgets */
|
|
|
+
|
|
|
++ cinfo->data_precision = BITS_IN_JSAMPLE;
|
|
|
++
|
|
|
+ /* OK, I'm ready */
|
|
|
+ cinfo->global_state = CSTATE_START;
|
|
|
++
|
|
|
++ /* The master struct is used to store extension parameters, so we allocate it
|
|
|
++ * here.
|
|
|
++ */
|
|
|
++ cinfo->master = (struct jpeg_comp_master *)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
|
|
|
++ sizeof(my_comp_master));
|
|
|
++ memset(cinfo->master, 0, sizeof(my_comp_master));
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Destruction of a JPEG compression object
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+@@ -178,18 +189,30 @@ jpeg_finish_compress(j_compress_ptr cinf
|
|
|
+ if (cinfo->progress != NULL) {
|
|
|
+ cinfo->progress->pass_counter = (long)iMCU_row;
|
|
|
+ cinfo->progress->pass_limit = (long)cinfo->total_iMCU_rows;
|
|
|
+ (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
|
|
|
+ }
|
|
|
+ /* We bypass the main controller and invoke coef controller directly;
|
|
|
+ * all work is being done from the coefficient buffer.
|
|
|
+ */
|
|
|
+- if (!(*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE)NULL))
|
|
|
+- ERREXIT(cinfo, JERR_CANT_SUSPEND);
|
|
|
++ if (cinfo->data_precision == 16) {
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ if (!(*cinfo->coef->compress_data_16) (cinfo, (J16SAMPIMAGE)NULL))
|
|
|
++ ERREXIT(cinfo, JERR_CANT_SUSPEND);
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++#endif
|
|
|
++ } else if (cinfo->data_precision == 12) {
|
|
|
++ if (!(*cinfo->coef->compress_data_12) (cinfo, (J12SAMPIMAGE)NULL))
|
|
|
++ ERREXIT(cinfo, JERR_CANT_SUSPEND);
|
|
|
++ } else {
|
|
|
++ if (!(*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE)NULL))
|
|
|
++ ERREXIT(cinfo, JERR_CANT_SUSPEND);
|
|
|
++ }
|
|
|
+ }
|
|
|
+ (*cinfo->master->finish_pass) (cinfo);
|
|
|
+ }
|
|
|
+ /* Write EOI, do final cleanup */
|
|
|
+ (*cinfo->marker->write_file_trailer) (cinfo);
|
|
|
+ (*cinfo->dest->term_destination) (cinfo);
|
|
|
+ /* We can use jpeg_abort to release memory and reset global_state */
|
|
|
+ jpeg_abort((j_common_ptr)cinfo);
|
|
|
+diff --git a/media/libjpeg/jcapistd.c b/media/libjpeg/jcapistd.c
|
|
|
+--- a/media/libjpeg/jcapistd.c
|
|
|
++++ b/media/libjpeg/jcapistd.c
|
|
|
+@@ -1,30 +1,35 @@
|
|
|
+ /*
|
|
|
+ * jcapistd.c
|
|
|
+ *
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+- * This file is part of the Independent JPEG Group's software.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains application interface code for the compression half
|
|
|
+ * of the JPEG library. These are the "standard" API routines that are
|
|
|
+ * used in the normal full-compression case. They are not used by a
|
|
|
+ * transcoding-only application. Note that if an application links in
|
|
|
+ * jpeg_start_compress, it will end up linking in the entire compressor.
|
|
|
+ * We thus must separate this file from jcapimin.c to avoid linking the
|
|
|
+ * whole compression library into a transcoder.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE == 8
|
|
|
++
|
|
|
+ /*
|
|
|
+ * Compression initialization.
|
|
|
+ * Before calling this, all parameters and a data destination must be set up.
|
|
|
+ *
|
|
|
+ * We require a write_all_tables parameter as a failsafe check when writing
|
|
|
+ * multiple datastreams from the same compression object. Since prior runs
|
|
|
+ * will have left all the tables marked sent_table=TRUE, a subsequent run
|
|
|
+ * would emit an abbreviated stream (no tables) by default. This may be what
|
|
|
+@@ -46,117 +51,137 @@ jpeg_start_compress(j_compress_ptr cinfo
|
|
|
+
|
|
|
+ /* (Re)initialize error mgr and destination modules */
|
|
|
+ (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
|
|
|
+ (*cinfo->dest->init_destination) (cinfo);
|
|
|
+ /* Perform master selection of active modules */
|
|
|
+ jinit_compress_master(cinfo);
|
|
|
+ /* Set up for the first pass */
|
|
|
+ (*cinfo->master->prepare_for_pass) (cinfo);
|
|
|
+- /* Ready for application to drive first pass through jpeg_write_scanlines
|
|
|
+- * or jpeg_write_raw_data.
|
|
|
++ /* Ready for application to drive first pass through _jpeg_write_scanlines
|
|
|
++ * or _jpeg_write_raw_data.
|
|
|
+ */
|
|
|
+ cinfo->next_scanline = 0;
|
|
|
+ cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
|
|
|
+ }
|
|
|
+
|
|
|
++#endif
|
|
|
++
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Write some scanlines of data to the JPEG compressor.
|
|
|
+ *
|
|
|
+ * The return value will be the number of lines actually written.
|
|
|
+ * This should be less than the supplied num_lines only in case that
|
|
|
+ * the data destination module has requested suspension of the compressor,
|
|
|
+ * or if more than image_height scanlines are passed in.
|
|
|
+ *
|
|
|
+- * Note: we warn about excess calls to jpeg_write_scanlines() since
|
|
|
++ * Note: we warn about excess calls to _jpeg_write_scanlines() since
|
|
|
+ * this likely signals an application programmer error. However,
|
|
|
+ * excess scanlines passed in the last valid call are *silently* ignored,
|
|
|
+ * so that the application need not adjust num_lines for end-of-image
|
|
|
+ * when using a multiple-scanline buffer.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(JDIMENSION)
|
|
|
+-jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
|
|
|
+- JDIMENSION num_lines)
|
|
|
++_jpeg_write_scanlines(j_compress_ptr cinfo, _JSAMPARRAY scanlines,
|
|
|
++ JDIMENSION num_lines)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
|
|
|
+ JDIMENSION row_ctr, rows_left;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ if (cinfo->global_state != CSTATE_SCANNING)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
+ if (cinfo->next_scanline >= cinfo->image_height)
|
|
|
+ WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
|
|
|
+
|
|
|
+ /* Call progress monitor hook if present */
|
|
|
+ if (cinfo->progress != NULL) {
|
|
|
+ cinfo->progress->pass_counter = (long)cinfo->next_scanline;
|
|
|
+ cinfo->progress->pass_limit = (long)cinfo->image_height;
|
|
|
+ (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Give master control module another chance if this is first call to
|
|
|
+- * jpeg_write_scanlines. This lets output of the frame/scan headers be
|
|
|
++ * _jpeg_write_scanlines. This lets output of the frame/scan headers be
|
|
|
+ * delayed so that application can write COM, etc, markers between
|
|
|
+- * jpeg_start_compress and jpeg_write_scanlines.
|
|
|
++ * jpeg_start_compress and _jpeg_write_scanlines.
|
|
|
+ */
|
|
|
+ if (cinfo->master->call_pass_startup)
|
|
|
+ (*cinfo->master->pass_startup) (cinfo);
|
|
|
+
|
|
|
+ /* Ignore any extra scanlines at bottom of image. */
|
|
|
+ rows_left = cinfo->image_height - cinfo->next_scanline;
|
|
|
+ if (num_lines > rows_left)
|
|
|
+ num_lines = rows_left;
|
|
|
+
|
|
|
+ row_ctr = 0;
|
|
|
+- (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
|
|
|
++ (*cinfo->main->_process_data) (cinfo, scanlines, &row_ctr, num_lines);
|
|
|
+ cinfo->next_scanline += row_ctr;
|
|
|
+ return row_ctr;
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++ return 0;
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
++
|
|
|
+ /*
|
|
|
+ * Alternate entry point to write raw data.
|
|
|
+ * Processes exactly one iMCU row per call, unless suspended.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(JDIMENSION)
|
|
|
+-jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
|
|
|
+- JDIMENSION num_lines)
|
|
|
++_jpeg_write_raw_data(j_compress_ptr cinfo, _JSAMPIMAGE data,
|
|
|
++ JDIMENSION num_lines)
|
|
|
+ {
|
|
|
+ JDIMENSION lines_per_iMCU_row;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
++
|
|
|
+ if (cinfo->global_state != CSTATE_RAW_OK)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
+ if (cinfo->next_scanline >= cinfo->image_height) {
|
|
|
+ WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Call progress monitor hook if present */
|
|
|
+ if (cinfo->progress != NULL) {
|
|
|
+ cinfo->progress->pass_counter = (long)cinfo->next_scanline;
|
|
|
+ cinfo->progress->pass_limit = (long)cinfo->image_height;
|
|
|
+ (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Give master control module another chance if this is first call to
|
|
|
+- * jpeg_write_raw_data. This lets output of the frame/scan headers be
|
|
|
++ * _jpeg_write_raw_data. This lets output of the frame/scan headers be
|
|
|
+ * delayed so that application can write COM, etc, markers between
|
|
|
+- * jpeg_start_compress and jpeg_write_raw_data.
|
|
|
++ * jpeg_start_compress and _jpeg_write_raw_data.
|
|
|
+ */
|
|
|
+ if (cinfo->master->call_pass_startup)
|
|
|
+ (*cinfo->master->pass_startup) (cinfo);
|
|
|
+
|
|
|
+ /* Verify that at least one iMCU row has been passed. */
|
|
|
+ lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
|
|
|
+ if (num_lines < lines_per_iMCU_row)
|
|
|
+ ERREXIT(cinfo, JERR_BUFFER_SIZE);
|
|
|
+
|
|
|
+ /* Directly compress the row. */
|
|
|
+- if (!(*cinfo->coef->compress_data) (cinfo, data)) {
|
|
|
++ if (!(*cinfo->coef->_compress_data) (cinfo, data)) {
|
|
|
+ /* If compressor did not consume the whole row, suspend processing. */
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* OK, we processed one iMCU row. */
|
|
|
+ cinfo->next_scanline += lines_per_iMCU_row;
|
|
|
+ return lines_per_iMCU_row;
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 */
|
|
|
+diff --git a/media/libjpeg/jccoefct.c b/media/libjpeg/jccoefct.c
|
|
|
+--- a/media/libjpeg/jccoefct.c
|
|
|
++++ b/media/libjpeg/jccoefct.c
|
|
|
+@@ -1,26 +1,27 @@
|
|
|
+ /*
|
|
|
+ * jccoefct.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1997, Thomas G. Lane.
|
|
|
+- * It was modified by The libjpeg-turbo Project to include only code and
|
|
|
+- * information relevant to libjpeg-turbo.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains the coefficient buffer controller for compression.
|
|
|
+- * This controller is the top level of the JPEG compressor proper.
|
|
|
++ * This controller is the top level of the lossy JPEG compressor proper.
|
|
|
+ * The coefficient buffer lies between forward-DCT and entropy encoding steps.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* We use a full-image coefficient buffer when doing Huffman optimization,
|
|
|
+ * and also for writing multiple-scan JPEG files. In all cases, the DCT
|
|
|
+ * step is run during the first pass, and subsequent passes need only read
|
|
|
+ * the buffered coefficients.
|
|
|
+ */
|
|
|
+ #ifdef ENTROPY_OPT_SUPPORTED
|
|
|
+@@ -53,21 +54,22 @@ typedef struct {
|
|
|
+ /* In multi-pass modes, we need a virtual block array for each component. */
|
|
|
+ jvirt_barray_ptr whole_image[MAX_COMPONENTS];
|
|
|
+ } my_coef_controller;
|
|
|
+
|
|
|
+ typedef my_coef_controller *my_coef_ptr;
|
|
|
+
|
|
|
+
|
|
|
+ /* Forward declarations */
|
|
|
+-METHODDEF(boolean) compress_data(j_compress_ptr cinfo, JSAMPIMAGE input_buf);
|
|
|
++METHODDEF(boolean) compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf);
|
|
|
+ #ifdef FULL_COEF_BUFFER_SUPPORTED
|
|
|
+ METHODDEF(boolean) compress_first_pass(j_compress_ptr cinfo,
|
|
|
+- JSAMPIMAGE input_buf);
|
|
|
+-METHODDEF(boolean) compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf);
|
|
|
++ _JSAMPIMAGE input_buf);
|
|
|
++METHODDEF(boolean) compress_output(j_compress_ptr cinfo,
|
|
|
++ _JSAMPIMAGE input_buf);
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ start_iMCU_row(j_compress_ptr cinfo)
|
|
|
+ /* Reset within-iMCU-row counters for a new row */
|
|
|
+ {
|
|
|
+ my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
|
|
|
+@@ -101,28 +103,28 @@ start_pass_coef(j_compress_ptr cinfo, J_
|
|
|
+
|
|
|
+ coef->iMCU_row_num = 0;
|
|
|
+ start_iMCU_row(cinfo);
|
|
|
+
|
|
|
+ switch (pass_mode) {
|
|
|
+ case JBUF_PASS_THRU:
|
|
|
+ if (coef->whole_image[0] != NULL)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+- coef->pub.compress_data = compress_data;
|
|
|
++ coef->pub._compress_data = compress_data;
|
|
|
+ break;
|
|
|
+ #ifdef FULL_COEF_BUFFER_SUPPORTED
|
|
|
+ case JBUF_SAVE_AND_PASS:
|
|
|
+ if (coef->whole_image[0] == NULL)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+- coef->pub.compress_data = compress_first_pass;
|
|
|
++ coef->pub._compress_data = compress_first_pass;
|
|
|
+ break;
|
|
|
+ case JBUF_CRANK_DEST:
|
|
|
+ if (coef->whole_image[0] == NULL)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+- coef->pub.compress_data = compress_output;
|
|
|
++ coef->pub._compress_data = compress_output;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ default:
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -133,17 +135,17 @@ start_pass_coef(j_compress_ptr cinfo, J_
|
|
|
+ * per call, ie, v_samp_factor block rows for each component in the image.
|
|
|
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
|
|
|
+ *
|
|
|
+ * NB: input_buf contains a plane for each component in image,
|
|
|
+ * which we index according to the component's SOF position.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(boolean)
|
|
|
+-compress_data(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
|
|
|
++compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
|
|
|
+ {
|
|
|
+ my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
|
|
|
+ JDIMENSION MCU_col_num; /* index of current MCU within row */
|
|
|
+ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
|
|
|
+ JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
|
|
|
+ int blkn, bi, ci, yindex, yoffset, blockcnt;
|
|
|
+ JDIMENSION ypos, xpos;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+@@ -167,20 +169,20 @@ compress_data(j_compress_ptr cinfo, JSAM
|
|
|
+ compptr = cinfo->cur_comp_info[ci];
|
|
|
+ blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width :
|
|
|
+ compptr->last_col_width;
|
|
|
+ xpos = MCU_col_num * compptr->MCU_sample_width;
|
|
|
+ ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
|
|
|
+ for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
|
|
|
+ if (coef->iMCU_row_num < last_iMCU_row ||
|
|
|
+ yoffset + yindex < compptr->last_row_height) {
|
|
|
+- (*cinfo->fdct->forward_DCT) (cinfo, compptr,
|
|
|
+- input_buf[compptr->component_index],
|
|
|
+- coef->MCU_buffer[blkn],
|
|
|
+- ypos, xpos, (JDIMENSION)blockcnt);
|
|
|
++ (*cinfo->fdct->_forward_DCT) (cinfo, compptr,
|
|
|
++ input_buf[compptr->component_index],
|
|
|
++ coef->MCU_buffer[blkn],
|
|
|
++ ypos, xpos, (JDIMENSION)blockcnt);
|
|
|
+ if (blockcnt < compptr->MCU_width) {
|
|
|
+ /* Create some dummy blocks at the right edge of the image. */
|
|
|
+ jzero_far((void *)coef->MCU_buffer[blkn + blockcnt],
|
|
|
+ (compptr->MCU_width - blockcnt) * sizeof(JBLOCK));
|
|
|
+ for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
|
|
|
+ coef->MCU_buffer[blkn + bi][0][0] =
|
|
|
+ coef->MCU_buffer[blkn + bi - 1][0][0];
|
|
|
+ }
|
|
|
+@@ -237,17 +239,17 @@ compress_data(j_compress_ptr cinfo, JSAM
|
|
|
+ * NB: input_buf contains a plane for each component in image. All
|
|
|
+ * components are DCT'd and loaded into the virtual arrays in this pass.
|
|
|
+ * However, it may be that only a subset of the components are emitted to
|
|
|
+ * the entropy encoder during this first pass; be careful about looking
|
|
|
+ * at the scan-dependent variables (MCU dimensions, etc).
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(boolean)
|
|
|
+-compress_first_pass(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
|
|
|
++compress_first_pass(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
|
|
|
+ {
|
|
|
+ my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
|
|
|
+ JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
|
|
|
+ JDIMENSION blocks_across, MCUs_across, MCUindex;
|
|
|
+ int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
|
|
|
+ JCOEF lastDC;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+ JBLOCKARRAY buffer;
|
|
|
+@@ -274,20 +276,20 @@ compress_first_pass(j_compress_ptr cinfo
|
|
|
+ ndummy = (int)(blocks_across % h_samp_factor);
|
|
|
+ if (ndummy > 0)
|
|
|
+ ndummy = h_samp_factor - ndummy;
|
|
|
+ /* Perform DCT for all non-dummy blocks in this iMCU row. Each call
|
|
|
+ * on forward_DCT processes a complete horizontal row of DCT blocks.
|
|
|
+ */
|
|
|
+ for (block_row = 0; block_row < block_rows; block_row++) {
|
|
|
+ thisblockrow = buffer[block_row];
|
|
|
+- (*cinfo->fdct->forward_DCT) (cinfo, compptr,
|
|
|
+- input_buf[ci], thisblockrow,
|
|
|
+- (JDIMENSION)(block_row * DCTSIZE),
|
|
|
+- (JDIMENSION)0, blocks_across);
|
|
|
++ (*cinfo->fdct->_forward_DCT) (cinfo, compptr,
|
|
|
++ input_buf[ci], thisblockrow,
|
|
|
++ (JDIMENSION)(block_row * DCTSIZE),
|
|
|
++ (JDIMENSION)0, blocks_across);
|
|
|
+ if (ndummy > 0) {
|
|
|
+ /* Create dummy blocks at the right edge of the image. */
|
|
|
+ thisblockrow += blocks_across; /* => first dummy block */
|
|
|
+ jzero_far((void *)thisblockrow, ndummy * sizeof(JBLOCK));
|
|
|
+ lastDC = thisblockrow[-1][0];
|
|
|
+ for (bi = 0; bi < ndummy; bi++) {
|
|
|
+ thisblockrow[bi][0] = lastDC;
|
|
|
+ }
|
|
|
+@@ -333,17 +335,17 @@ compress_first_pass(j_compress_ptr cinfo
|
|
|
+ * per call, ie, v_samp_factor block rows for each component in the scan.
|
|
|
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
|
|
|
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
|
|
|
+ *
|
|
|
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(boolean)
|
|
|
+-compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
|
|
|
++compress_output(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
|
|
|
+ {
|
|
|
+ my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
|
|
|
+ JDIMENSION MCU_col_num; /* index of current MCU within row */
|
|
|
+ int blkn, ci, xindex, yindex, yoffset;
|
|
|
+ JDIMENSION start_col;
|
|
|
+ JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
|
|
|
+ JBLOCKROW buffer_ptr;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+@@ -397,20 +399,23 @@ compress_output(j_compress_ptr cinfo, JS
|
|
|
+ #endif /* FULL_COEF_BUFFER_SUPPORTED */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize coefficient buffer controller.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_c_coef_controller(j_compress_ptr cinfo, boolean need_full_buffer)
|
|
|
++_jinit_c_coef_controller(j_compress_ptr cinfo, boolean need_full_buffer)
|
|
|
+ {
|
|
|
+ my_coef_ptr coef;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ coef = (my_coef_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_coef_controller));
|
|
|
+ cinfo->coef = (struct jpeg_c_coef_controller *)coef;
|
|
|
+ coef->pub.start_pass = start_pass_coef;
|
|
|
+
|
|
|
+ /* Create the coefficient buffer. */
|
|
|
+ if (need_full_buffer) {
|
|
|
+diff --git a/media/libjpeg/jccolext.c b/media/libjpeg/jccolext.c
|
|
|
+--- a/media/libjpeg/jccolext.c
|
|
|
++++ b/media/libjpeg/jccolext.c
|
|
|
+@@ -24,112 +24,120 @@
|
|
|
+ *
|
|
|
+ * A starting row offset is provided only for the output buffer. The caller
|
|
|
+ * can easily adjust the passed input_buf value to accommodate any row
|
|
|
+ * offset required on that side.
|
|
|
+ */
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-rgb_ycc_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
++rgb_ycc_convert_internal(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ register int r, g, b;
|
|
|
+ register JLONG *ctab = cconvert->rgb_ycc_tab;
|
|
|
+- register JSAMPROW inptr;
|
|
|
+- register JSAMPROW outptr0, outptr1, outptr2;
|
|
|
++ register _JSAMPROW inptr;
|
|
|
++ register _JSAMPROW outptr0, outptr1, outptr2;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->image_width;
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr = *input_buf++;
|
|
|
+ outptr0 = output_buf[0][output_row];
|
|
|
+ outptr1 = output_buf[1][output_row];
|
|
|
+ outptr2 = output_buf[2][output_row];
|
|
|
+ output_row++;
|
|
|
+ for (col = 0; col < num_cols; col++) {
|
|
|
+ r = RANGE_LIMIT(inptr[RGB_RED]);
|
|
|
+ g = RANGE_LIMIT(inptr[RGB_GREEN]);
|
|
|
+ b = RANGE_LIMIT(inptr[RGB_BLUE]);
|
|
|
+ inptr += RGB_PIXELSIZE;
|
|
|
+- /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
|
|
|
++ /* If the inputs are 0.._MAXJSAMPLE, the outputs of these equations
|
|
|
+ * must be too; we do not need an explicit range-limiting operation.
|
|
|
+ * Hence the value being shifted is never negative, and we don't
|
|
|
+ * need the general RIGHT_SHIFT macro.
|
|
|
+ */
|
|
|
+ /* Y */
|
|
|
+- outptr0[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
|
|
|
+- ctab[b + B_Y_OFF]) >> SCALEBITS);
|
|
|
++ outptr0[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
|
|
|
++ ctab[b + B_Y_OFF]) >> SCALEBITS);
|
|
|
+ /* Cb */
|
|
|
+- outptr1[col] = (JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
|
|
|
+- ctab[b + B_CB_OFF]) >> SCALEBITS);
|
|
|
++ outptr1[col] = (_JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
|
|
|
++ ctab[b + B_CB_OFF]) >> SCALEBITS);
|
|
|
+ /* Cr */
|
|
|
+- outptr2[col] = (JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
|
|
|
+- ctab[b + B_CR_OFF]) >> SCALEBITS);
|
|
|
++ outptr2[col] = (_JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
|
|
|
++ ctab[b + B_CR_OFF]) >> SCALEBITS);
|
|
|
+ }
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /**************** Cases other than RGB -> YCbCr **************/
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert some rows of samples to the JPEG colorspace.
|
|
|
+ * This version handles RGB->grayscale conversion, which is the same
|
|
|
+ * as the RGB->Y portion of RGB->YCbCr.
|
|
|
+ * We assume rgb_ycc_start has been called (we only use the Y tables).
|
|
|
+ */
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-rgb_gray_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
++rgb_gray_convert_internal(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ register int r, g, b;
|
|
|
+ register JLONG *ctab = cconvert->rgb_ycc_tab;
|
|
|
+- register JSAMPROW inptr;
|
|
|
+- register JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->image_width;
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr = *input_buf++;
|
|
|
+ outptr = output_buf[0][output_row];
|
|
|
+ output_row++;
|
|
|
+ for (col = 0; col < num_cols; col++) {
|
|
|
+ r = RANGE_LIMIT(inptr[RGB_RED]);
|
|
|
+ g = RANGE_LIMIT(inptr[RGB_GREEN]);
|
|
|
+ b = RANGE_LIMIT(inptr[RGB_BLUE]);
|
|
|
+ inptr += RGB_PIXELSIZE;
|
|
|
+ /* Y */
|
|
|
+- outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
|
|
|
+- ctab[b + B_Y_OFF]) >> SCALEBITS);
|
|
|
++ outptr[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
|
|
|
++ ctab[b + B_Y_OFF]) >> SCALEBITS);
|
|
|
+ }
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert some rows of samples to the JPEG colorspace.
|
|
|
+ * This version handles extended RGB->plain RGB conversion
|
|
|
+ */
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-rgb_rgb_convert_internal(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
++rgb_rgb_convert_internal(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW inptr;
|
|
|
+- register JSAMPROW outptr0, outptr1, outptr2;
|
|
|
++ register _JSAMPROW inptr;
|
|
|
++ register _JSAMPROW outptr0, outptr1, outptr2;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->image_width;
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr = *input_buf++;
|
|
|
+ outptr0 = output_buf[0][output_row];
|
|
|
+ outptr1 = output_buf[1][output_row];
|
|
|
+ outptr2 = output_buf[2][output_row];
|
|
|
+diff --git a/media/libjpeg/jccolor.c b/media/libjpeg/jccolor.c
|
|
|
+--- a/media/libjpeg/jccolor.c
|
|
|
++++ b/media/libjpeg/jccolor.c
|
|
|
+@@ -12,81 +12,86 @@
|
|
|
+ *
|
|
|
+ * This file contains input colorspace conversion routines.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+ #include "jsimd.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /* Private subobject */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_color_converter pub; /* public fields */
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ /* Private state for RGB->YCC conversion */
|
|
|
+ JLONG *rgb_ycc_tab; /* => table for RGB to YCbCr conversion */
|
|
|
++#endif
|
|
|
+ } my_color_converter;
|
|
|
+
|
|
|
+ typedef my_color_converter *my_cconvert_ptr;
|
|
|
+
|
|
|
+
|
|
|
+ /**************** RGB -> YCbCr conversion: most common case **************/
|
|
|
+
|
|
|
+ /*
|
|
|
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
|
|
|
+- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
|
|
|
++ * normalized to the range 0.._MAXJSAMPLE rather than -0.5 .. 0.5.
|
|
|
+ * The conversion equations to be implemented are therefore
|
|
|
+ * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
|
|
+- * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
|
|
+- * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
|
|
++ * Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + _CENTERJSAMPLE
|
|
|
++ * Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + _CENTERJSAMPLE
|
|
|
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
|
|
|
+- * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
|
|
|
+- * rather than CENTERJSAMPLE, for Cb and Cr. This gave equal positive and
|
|
|
++ * Note: older versions of the IJG code used a zero offset of _MAXJSAMPLE/2,
|
|
|
++ * rather than _CENTERJSAMPLE, for Cb and Cr. This gave equal positive and
|
|
|
+ * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
|
|
|
+ * were not represented exactly. Now we sacrifice exact representation of
|
|
|
+ * maximum red and maximum blue in order to get exact grayscales.
|
|
|
+ *
|
|
|
+ * To avoid floating-point arithmetic, we represent the fractional constants
|
|
|
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
|
|
|
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
|
|
|
+ *
|
|
|
+ * For even more speed, we avoid doing any multiplications in the inner loop
|
|
|
+ * by precalculating the constants times R,G,B for all possible values.
|
|
|
+- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
|
|
|
++ * For 8-bit samples this is very reasonable (only 256 entries per table);
|
|
|
+ * for 12-bit samples it is still acceptable. It's not very reasonable for
|
|
|
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
|
|
|
+ * colorspace anyway.
|
|
|
+- * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
|
|
|
++ * The _CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
|
|
|
+ * in the tables to save adding them separately in the inner loop.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define SCALEBITS 16 /* speediest right-shift on some machines */
|
|
|
+-#define CBCR_OFFSET ((JLONG)CENTERJSAMPLE << SCALEBITS)
|
|
|
++#define CBCR_OFFSET ((JLONG)_CENTERJSAMPLE << SCALEBITS)
|
|
|
+ #define ONE_HALF ((JLONG)1 << (SCALEBITS - 1))
|
|
|
+ #define FIX(x) ((JLONG)((x) * (1L << SCALEBITS) + 0.5))
|
|
|
+
|
|
|
+ /* We allocate one big table and divide it up into eight parts, instead of
|
|
|
+ * doing eight alloc_small requests. This lets us use a single table base
|
|
|
+ * address, which can be held in a register in the inner loops on many
|
|
|
+ * machines (more than can hold all eight addresses, anyway).
|
|
|
+ */
|
|
|
+
|
|
|
+ #define R_Y_OFF 0 /* offset to R => Y section */
|
|
|
+-#define G_Y_OFF (1 * (MAXJSAMPLE + 1)) /* offset to G => Y section */
|
|
|
+-#define B_Y_OFF (2 * (MAXJSAMPLE + 1)) /* etc. */
|
|
|
+-#define R_CB_OFF (3 * (MAXJSAMPLE + 1))
|
|
|
+-#define G_CB_OFF (4 * (MAXJSAMPLE + 1))
|
|
|
+-#define B_CB_OFF (5 * (MAXJSAMPLE + 1))
|
|
|
++#define G_Y_OFF (1 * (_MAXJSAMPLE + 1)) /* offset to G => Y section */
|
|
|
++#define B_Y_OFF (2 * (_MAXJSAMPLE + 1)) /* etc. */
|
|
|
++#define R_CB_OFF (3 * (_MAXJSAMPLE + 1))
|
|
|
++#define G_CB_OFF (4 * (_MAXJSAMPLE + 1))
|
|
|
++#define B_CB_OFF (5 * (_MAXJSAMPLE + 1))
|
|
|
+ #define R_CR_OFF B_CB_OFF /* B=>Cb, R=>Cr are the same */
|
|
|
+-#define G_CR_OFF (6 * (MAXJSAMPLE + 1))
|
|
|
+-#define B_CR_OFF (7 * (MAXJSAMPLE + 1))
|
|
|
+-#define TABLE_SIZE (8 * (MAXJSAMPLE + 1))
|
|
|
++#define G_CR_OFF (6 * (_MAXJSAMPLE + 1))
|
|
|
++#define B_CR_OFF (7 * (_MAXJSAMPLE + 1))
|
|
|
++#define TABLE_SIZE (8 * (_MAXJSAMPLE + 1))
|
|
|
+
|
|
|
+ /* 12-bit samples use a 16-bit data type, so it is possible to pass
|
|
|
+ * out-of-range sample values (< 0 or > 4095) to jpeg_write_scanlines().
|
|
|
+ * Thus, we mask the incoming 12-bit samples to guard against overrunning
|
|
|
+ * or underrunning the conversion tables.
|
|
|
+ */
|
|
|
+
|
|
|
+ #if BITS_IN_JSAMPLE == 12
|
|
|
+@@ -203,52 +208,56 @@ typedef my_color_converter *my_cconvert_
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize for RGB->YCC colorspace conversion.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ rgb_ycc_start(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ JLONG *rgb_ycc_tab;
|
|
|
+ JLONG i;
|
|
|
+
|
|
|
+ /* Allocate and fill in the conversion tables. */
|
|
|
+ cconvert->rgb_ycc_tab = rgb_ycc_tab = (JLONG *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ (TABLE_SIZE * sizeof(JLONG)));
|
|
|
+
|
|
|
+- for (i = 0; i <= MAXJSAMPLE; i++) {
|
|
|
++ for (i = 0; i <= _MAXJSAMPLE; i++) {
|
|
|
+ rgb_ycc_tab[i + R_Y_OFF] = FIX(0.29900) * i;
|
|
|
+ rgb_ycc_tab[i + G_Y_OFF] = FIX(0.58700) * i;
|
|
|
+ rgb_ycc_tab[i + B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
|
|
|
+ rgb_ycc_tab[i + R_CB_OFF] = (-FIX(0.16874)) * i;
|
|
|
+ rgb_ycc_tab[i + G_CB_OFF] = (-FIX(0.33126)) * i;
|
|
|
+ /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
|
|
|
+- * This ensures that the maximum output will round to MAXJSAMPLE
|
|
|
+- * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
|
|
|
++ * This ensures that the maximum output will round to _MAXJSAMPLE
|
|
|
++ * not _MAXJSAMPLE+1, and thus that we don't have to range-limit.
|
|
|
+ */
|
|
|
+ rgb_ycc_tab[i + B_CB_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF - 1;
|
|
|
+ /* B=>Cb and R=>Cr tables are the same
|
|
|
+ rgb_ycc_tab[i + R_CR_OFF] = FIX(0.50000) * i + CBCR_OFFSET + ONE_HALF - 1;
|
|
|
+ */
|
|
|
+ rgb_ycc_tab[i + G_CR_OFF] = (-FIX(0.41869)) * i;
|
|
|
+ rgb_ycc_tab[i + B_CR_OFF] = (-FIX(0.08131)) * i;
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert some rows of samples to the JPEG colorspace.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
++rgb_ycc_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
+ {
|
|
|
+ switch (cinfo->in_color_space) {
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+ extrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
|
|
|
+ num_rows);
|
|
|
+ break;
|
|
|
+ case JCS_EXT_RGBX:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+@@ -285,18 +294,18 @@ rgb_ycc_convert(j_compress_ptr cinfo, JS
|
|
|
+ /**************** Cases other than RGB -> YCbCr **************/
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert some rows of samples to the JPEG colorspace.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
++rgb_gray_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
+ {
|
|
|
+ switch (cinfo->in_color_space) {
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+ extrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
|
|
|
+ num_rows);
|
|
|
+ break;
|
|
|
+ case JCS_EXT_RGBX:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+@@ -330,18 +339,18 @@ rgb_gray_convert(j_compress_ptr cinfo, J
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Extended RGB to plain RGB conversion
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-rgb_rgb_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
++rgb_rgb_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
+ {
|
|
|
+ switch (cinfo->in_color_space) {
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+ extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
|
|
|
+ num_rows);
|
|
|
+ break;
|
|
|
+ case JCS_EXT_RGBX:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+@@ -379,72 +388,76 @@ rgb_rgb_convert(j_compress_ptr cinfo, JS
|
|
|
+ * Convert some rows of samples to the JPEG colorspace.
|
|
|
+ * This version handles Adobe-style CMYK->YCCK conversion,
|
|
|
+ * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same
|
|
|
+ * conversion as above, while passing K (black) unchanged.
|
|
|
+ * We assume rgb_ycc_start has been called.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-cmyk_ycck_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
++cmyk_ycck_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ register int r, g, b;
|
|
|
+ register JLONG *ctab = cconvert->rgb_ycc_tab;
|
|
|
+- register JSAMPROW inptr;
|
|
|
+- register JSAMPROW outptr0, outptr1, outptr2, outptr3;
|
|
|
++ register _JSAMPROW inptr;
|
|
|
++ register _JSAMPROW outptr0, outptr1, outptr2, outptr3;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->image_width;
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr = *input_buf++;
|
|
|
+ outptr0 = output_buf[0][output_row];
|
|
|
+ outptr1 = output_buf[1][output_row];
|
|
|
+ outptr2 = output_buf[2][output_row];
|
|
|
+ outptr3 = output_buf[3][output_row];
|
|
|
+ output_row++;
|
|
|
+ for (col = 0; col < num_cols; col++) {
|
|
|
+- r = MAXJSAMPLE - RANGE_LIMIT(inptr[0]);
|
|
|
+- g = MAXJSAMPLE - RANGE_LIMIT(inptr[1]);
|
|
|
+- b = MAXJSAMPLE - RANGE_LIMIT(inptr[2]);
|
|
|
++ r = _MAXJSAMPLE - RANGE_LIMIT(inptr[0]);
|
|
|
++ g = _MAXJSAMPLE - RANGE_LIMIT(inptr[1]);
|
|
|
++ b = _MAXJSAMPLE - RANGE_LIMIT(inptr[2]);
|
|
|
+ /* K passes through as-is */
|
|
|
+ outptr3[col] = inptr[3];
|
|
|
+ inptr += 4;
|
|
|
+- /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
|
|
|
++ /* If the inputs are 0.._MAXJSAMPLE, the outputs of these equations
|
|
|
+ * must be too; we do not need an explicit range-limiting operation.
|
|
|
+ * Hence the value being shifted is never negative, and we don't
|
|
|
+ * need the general RIGHT_SHIFT macro.
|
|
|
+ */
|
|
|
+ /* Y */
|
|
|
+- outptr0[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
|
|
|
+- ctab[b + B_Y_OFF]) >> SCALEBITS);
|
|
|
++ outptr0[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
|
|
|
++ ctab[b + B_Y_OFF]) >> SCALEBITS);
|
|
|
+ /* Cb */
|
|
|
+- outptr1[col] = (JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
|
|
|
+- ctab[b + B_CB_OFF]) >> SCALEBITS);
|
|
|
++ outptr1[col] = (_JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
|
|
|
++ ctab[b + B_CB_OFF]) >> SCALEBITS);
|
|
|
+ /* Cr */
|
|
|
+- outptr2[col] = (JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
|
|
|
+- ctab[b + B_CR_OFF]) >> SCALEBITS);
|
|
|
++ outptr2[col] = (_JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
|
|
|
++ ctab[b + B_CR_OFF]) >> SCALEBITS);
|
|
|
+ }
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert some rows of samples to the JPEG colorspace.
|
|
|
+ * This version handles grayscale output with no conversion.
|
|
|
+ * The source can be either plain grayscale or YCbCr (since Y == gray).
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-grayscale_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
++grayscale_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW inptr;
|
|
|
+- register JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->image_width;
|
|
|
+ int instride = cinfo->input_components;
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr = *input_buf++;
|
|
|
+ outptr = output_buf[0][output_row];
|
|
|
+ output_row++;
|
|
|
+@@ -458,21 +471,21 @@ grayscale_convert(j_compress_ptr cinfo,
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert some rows of samples to the JPEG colorspace.
|
|
|
+ * This version handles multi-component colorspaces without conversion.
|
|
|
+ * We assume input_components == num_components.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
|
|
+- JDIMENSION output_row, int num_rows)
|
|
|
++null_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW inptr;
|
|
|
+- register JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3;
|
|
|
++ register _JSAMPROW inptr;
|
|
|
++ register _JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3;
|
|
|
+ register JDIMENSION col;
|
|
|
+ register int ci;
|
|
|
+ int nc = cinfo->num_components;
|
|
|
+ JDIMENSION num_cols = cinfo->image_width;
|
|
|
+
|
|
|
+ if (nc == 3) {
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr = *input_buf++;
|
|
|
+@@ -530,20 +543,23 @@ null_method(j_compress_ptr cinfo)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Module initialization routine for input colorspace conversion.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_color_converter(j_compress_ptr cinfo)
|
|
|
++_jinit_color_converter(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_cconvert_ptr cconvert;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ cconvert = (my_cconvert_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_color_converter));
|
|
|
+ cinfo->cconvert = (struct jpeg_color_converter *)cconvert;
|
|
|
+ /* set start_pass to null method until we find out differently */
|
|
|
+ cconvert->pub.start_pass = null_method;
|
|
|
+
|
|
|
+ /* Make sure input_components agrees with in_color_space */
|
|
|
+@@ -580,142 +596,137 @@ jinit_color_converter(j_compress_ptr cin
|
|
|
+ break;
|
|
|
+
|
|
|
+ default: /* JCS_UNKNOWN can be anything */
|
|
|
+ if (cinfo->input_components < 1)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+- /* Check num_components, set conversion method based on requested space */
|
|
|
++ /* Check num_components, set conversion method based on requested space.
|
|
|
++ * NOTE: We do not allow any lossy color conversion algorithms in lossless
|
|
|
++ * mode.
|
|
|
++ */
|
|
|
+ switch (cinfo->jpeg_color_space) {
|
|
|
+ case JCS_GRAYSCALE:
|
|
|
++ if (cinfo->master->lossless &&
|
|
|
++ cinfo->in_color_space != cinfo->jpeg_color_space)
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ if (cinfo->num_components != 1)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
|
|
+ if (cinfo->in_color_space == JCS_GRAYSCALE)
|
|
|
+- cconvert->pub.color_convert = grayscale_convert;
|
|
|
+- else if (cinfo->in_color_space == JCS_RGB ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_RGB ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_RGBX ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_BGR ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_BGRX ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_XBGR ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_XRGB ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_RGBA ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_BGRA ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_ABGR ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_ARGB) {
|
|
|
++ cconvert->pub._color_convert = grayscale_convert;
|
|
|
++ else if (IsExtRGB(cinfo->in_color_space)) {
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_rgb_gray())
|
|
|
+- cconvert->pub.color_convert = jsimd_rgb_gray_convert;
|
|
|
+- else {
|
|
|
++ cconvert->pub._color_convert = jsimd_rgb_gray_convert;
|
|
|
++ else
|
|
|
++#endif
|
|
|
++ {
|
|
|
+ cconvert->pub.start_pass = rgb_ycc_start;
|
|
|
+- cconvert->pub.color_convert = rgb_gray_convert;
|
|
|
++ cconvert->pub._color_convert = rgb_gray_convert;
|
|
|
+ }
|
|
|
+ } else if (cinfo->in_color_space == JCS_YCbCr)
|
|
|
+- cconvert->pub.color_convert = grayscale_convert;
|
|
|
++ cconvert->pub._color_convert = grayscale_convert;
|
|
|
+ else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case JCS_RGB:
|
|
|
++ if (cinfo->master->lossless && !IsExtRGB(cinfo->in_color_space))
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ if (cinfo->num_components != 3)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
|
|
+ if (rgb_red[cinfo->in_color_space] == 0 &&
|
|
|
+ rgb_green[cinfo->in_color_space] == 1 &&
|
|
|
+ rgb_blue[cinfo->in_color_space] == 2 &&
|
|
|
+ rgb_pixelsize[cinfo->in_color_space] == 3) {
|
|
|
+-#if defined(__mips__)
|
|
|
++#if defined(WITH_SIMD) && defined(__mips__)
|
|
|
+ if (jsimd_c_can_null_convert())
|
|
|
+- cconvert->pub.color_convert = jsimd_c_null_convert;
|
|
|
++ cconvert->pub._color_convert = jsimd_c_null_convert;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+- cconvert->pub.color_convert = null_convert;
|
|
|
+- } else if (cinfo->in_color_space == JCS_RGB ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_RGB ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_RGBX ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_BGR ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_BGRX ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_XBGR ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_XRGB ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_RGBA ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_BGRA ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_ABGR ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_ARGB)
|
|
|
+- cconvert->pub.color_convert = rgb_rgb_convert;
|
|
|
++ cconvert->pub._color_convert = null_convert;
|
|
|
++ } else if (IsExtRGB(cinfo->in_color_space))
|
|
|
++ cconvert->pub._color_convert = rgb_rgb_convert;
|
|
|
+ else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case JCS_YCbCr:
|
|
|
++ if (cinfo->master->lossless &&
|
|
|
++ cinfo->in_color_space != cinfo->jpeg_color_space)
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ if (cinfo->num_components != 3)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
|
|
+- if (cinfo->in_color_space == JCS_RGB ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_RGB ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_RGBX ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_BGR ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_BGRX ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_XBGR ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_XRGB ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_RGBA ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_BGRA ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_ABGR ||
|
|
|
+- cinfo->in_color_space == JCS_EXT_ARGB) {
|
|
|
++ if (IsExtRGB(cinfo->in_color_space)) {
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_rgb_ycc())
|
|
|
+- cconvert->pub.color_convert = jsimd_rgb_ycc_convert;
|
|
|
+- else {
|
|
|
++ cconvert->pub._color_convert = jsimd_rgb_ycc_convert;
|
|
|
++ else
|
|
|
++#endif
|
|
|
++ {
|
|
|
+ cconvert->pub.start_pass = rgb_ycc_start;
|
|
|
+- cconvert->pub.color_convert = rgb_ycc_convert;
|
|
|
++ cconvert->pub._color_convert = rgb_ycc_convert;
|
|
|
+ }
|
|
|
+ } else if (cinfo->in_color_space == JCS_YCbCr) {
|
|
|
+-#if defined(__mips__)
|
|
|
++#if defined(WITH_SIMD) && defined(__mips__)
|
|
|
+ if (jsimd_c_can_null_convert())
|
|
|
+- cconvert->pub.color_convert = jsimd_c_null_convert;
|
|
|
++ cconvert->pub._color_convert = jsimd_c_null_convert;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+- cconvert->pub.color_convert = null_convert;
|
|
|
++ cconvert->pub._color_convert = null_convert;
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case JCS_CMYK:
|
|
|
++ if (cinfo->master->lossless &&
|
|
|
++ cinfo->in_color_space != cinfo->jpeg_color_space)
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ if (cinfo->num_components != 4)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
|
|
+ if (cinfo->in_color_space == JCS_CMYK) {
|
|
|
+-#if defined(__mips__)
|
|
|
++#if defined(WITH_SIMD) && defined(__mips__)
|
|
|
+ if (jsimd_c_can_null_convert())
|
|
|
+- cconvert->pub.color_convert = jsimd_c_null_convert;
|
|
|
++ cconvert->pub._color_convert = jsimd_c_null_convert;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+- cconvert->pub.color_convert = null_convert;
|
|
|
++ cconvert->pub._color_convert = null_convert;
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case JCS_YCCK:
|
|
|
++ if (cinfo->master->lossless &&
|
|
|
++ cinfo->in_color_space != cinfo->jpeg_color_space)
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ if (cinfo->num_components != 4)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
|
|
+ if (cinfo->in_color_space == JCS_CMYK) {
|
|
|
+ cconvert->pub.start_pass = rgb_ycc_start;
|
|
|
+- cconvert->pub.color_convert = cmyk_ycck_convert;
|
|
|
++ cconvert->pub._color_convert = cmyk_ycck_convert;
|
|
|
+ } else if (cinfo->in_color_space == JCS_YCCK) {
|
|
|
+-#if defined(__mips__)
|
|
|
++#if defined(WITH_SIMD) && defined(__mips__)
|
|
|
+ if (jsimd_c_can_null_convert())
|
|
|
+- cconvert->pub.color_convert = jsimd_c_null_convert;
|
|
|
++ cconvert->pub._color_convert = jsimd_c_null_convert;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+- cconvert->pub.color_convert = null_convert;
|
|
|
++ cconvert->pub._color_convert = null_convert;
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ break;
|
|
|
+
|
|
|
+ default: /* allow null conversion of JCS_UNKNOWN */
|
|
|
+ if (cinfo->jpeg_color_space != cinfo->in_color_space ||
|
|
|
+ cinfo->num_components != cinfo->input_components)
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+-#if defined(__mips__)
|
|
|
++#if defined(WITH_SIMD) && defined(__mips__)
|
|
|
+ if (jsimd_c_can_null_convert())
|
|
|
+- cconvert->pub.color_convert = jsimd_c_null_convert;
|
|
|
++ cconvert->pub._color_convert = jsimd_c_null_convert;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+- cconvert->pub.color_convert = null_convert;
|
|
|
++ cconvert->pub._color_convert = null_convert;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jcdctmgr.c b/media/libjpeg/jcdctmgr.c
|
|
|
+--- a/media/libjpeg/jcdctmgr.c
|
|
|
++++ b/media/libjpeg/jcdctmgr.c
|
|
|
+@@ -1,17 +1,17 @@
|
|
|
+ /*
|
|
|
+ * jcdctmgr.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+- * Copyright (C) 2011, 2014-2015, D. R. Commander.
|
|
|
++ * Copyright (C) 2011, 2014-2015, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains the forward-DCT management logic.
|
|
|
+ * This code selects a particular DCT implementation to be used,
|
|
|
+ * and it performs related housekeeping chores including coefficient
|
|
|
+ * quantization.
|
|
|
+ */
|
|
|
+@@ -23,20 +23,20 @@
|
|
|
+ #include "jsimddct.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* Private subobject for this module */
|
|
|
+
|
|
|
+ typedef void (*forward_DCT_method_ptr) (DCTELEM *data);
|
|
|
+ typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);
|
|
|
+
|
|
|
+-typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
|
|
|
++typedef void (*convsamp_method_ptr) (_JSAMPARRAY sample_data,
|
|
|
+ JDIMENSION start_col,
|
|
|
+ DCTELEM *workspace);
|
|
|
+-typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
|
|
|
++typedef void (*float_convsamp_method_ptr) (_JSAMPARRAY sample_data,
|
|
|
+ JDIMENSION start_col,
|
|
|
+ FAST_FLOAT *workspace);
|
|
|
+
|
|
|
+ typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors,
|
|
|
+ DCTELEM *workspace);
|
|
|
+ typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
|
|
|
+ FAST_FLOAT *divisors,
|
|
|
+ FAST_FLOAT *workspace);
|
|
|
+@@ -260,20 +260,24 @@ start_pass_fdctmgr(j_compress_ptr cinfo)
|
|
|
+ if (fdct->divisors[qtblno] == NULL) {
|
|
|
+ fdct->divisors[qtblno] = (DCTELEM *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ (DCTSIZE2 * 4) * sizeof(DCTELEM));
|
|
|
+ }
|
|
|
+ dtbl = fdct->divisors[qtblno];
|
|
|
+ for (i = 0; i < DCTSIZE2; i++) {
|
|
|
+ #if BITS_IN_JSAMPLE == 8
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
|
|
|
+ fdct->quantize == jsimd_quantize)
|
|
|
+ fdct->quantize = quantize;
|
|
|
+ #else
|
|
|
++ compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
|
|
|
++#endif
|
|
|
++#else
|
|
|
+ dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ #ifdef DCT_IFAST_SUPPORTED
|
|
|
+ case JDCT_IFAST:
|
|
|
+ {
|
|
|
+@@ -300,23 +304,30 @@ start_pass_fdctmgr(j_compress_ptr cinfo)
|
|
|
+ if (fdct->divisors[qtblno] == NULL) {
|
|
|
+ fdct->divisors[qtblno] = (DCTELEM *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ (DCTSIZE2 * 4) * sizeof(DCTELEM));
|
|
|
+ }
|
|
|
+ dtbl = fdct->divisors[qtblno];
|
|
|
+ for (i = 0; i < DCTSIZE2; i++) {
|
|
|
+ #if BITS_IN_JSAMPLE == 8
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (!compute_reciprocal(
|
|
|
+ DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
|
|
|
+ (JLONG)aanscales[i]),
|
|
|
+ CONST_BITS - 3), &dtbl[i]) &&
|
|
|
+ fdct->quantize == jsimd_quantize)
|
|
|
+ fdct->quantize = quantize;
|
|
|
+ #else
|
|
|
++ compute_reciprocal(
|
|
|
++ DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
|
|
|
++ (JLONG)aanscales[i]),
|
|
|
++ CONST_BITS-3), &dtbl[i]);
|
|
|
++#endif
|
|
|
++#else
|
|
|
+ dtbl[i] = (DCTELEM)
|
|
|
+ DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
|
|
|
+ (JLONG)aanscales[i]),
|
|
|
+ CONST_BITS - 3);
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+ }
|
|
|
+ break;
|
|
|
+@@ -365,40 +376,40 @@ start_pass_fdctmgr(j_compress_ptr cinfo)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Load data into workspace, applying unsigned->signed conversion.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
|
|
|
++convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
|
|
|
+ {
|
|
|
+ register DCTELEM *workspaceptr;
|
|
|
+- register JSAMPROW elemptr;
|
|
|
++ register _JSAMPROW elemptr;
|
|
|
+ register int elemr;
|
|
|
+
|
|
|
+ workspaceptr = workspace;
|
|
|
+ for (elemr = 0; elemr < DCTSIZE; elemr++) {
|
|
|
+ elemptr = sample_data[elemr] + start_col;
|
|
|
+
|
|
|
+ #if DCTSIZE == 8 /* unroll the inner loop */
|
|
|
+- *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
|
|
|
+- *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
|
|
|
+- *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
|
|
|
+- *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
|
|
|
+- *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
|
|
|
+- *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
|
|
|
+- *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
|
|
|
+- *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
|
|
|
++ *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
|
|
|
++ *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
|
|
|
++ *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
|
|
|
++ *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
|
|
|
++ *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
|
|
|
++ *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
|
|
|
++ *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
|
|
|
++ *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
|
|
|
+ #else
|
|
|
+ {
|
|
|
+ register int elemc;
|
|
|
+ for (elemc = DCTSIZE; elemc > 0; elemc--)
|
|
|
+- *workspaceptr++ = (*elemptr++) - CENTERJSAMPLE;
|
|
|
++ *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
|
|
|
+ }
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Quantize/descale the coefficients, and store into coef_blocks[].
|
|
|
+@@ -483,17 +494,17 @@ quantize(JCOEFPTR coef_block, DCTELEM *d
|
|
|
+ *
|
|
|
+ * The input samples are taken from the sample_data[] array starting at
|
|
|
+ * position start_row/start_col, and moving to the right for any additional
|
|
|
+ * blocks. The quantized coefficients are returned in coef_blocks[].
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
|
|
++ _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
|
|
+ JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
|
|
|
+ /* This version is used for integer DCT implementations. */
|
|
|
+ {
|
|
|
+ /* This routine is heavily used, so it's worth coding it tightly. */
|
|
|
+ my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
|
|
|
+ DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
|
|
|
+ DCTELEM *workspace;
|
|
|
+ JDIMENSION bi;
|
|
|
+@@ -517,40 +528,40 @@ forward_DCT(j_compress_ptr cinfo, jpeg_c
|
|
|
+ (*do_quantize) (coef_blocks[bi], divisors, workspace);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ #ifdef DCT_FLOAT_SUPPORTED
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
|
|
|
++convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
|
|
|
+ FAST_FLOAT *workspace)
|
|
|
+ {
|
|
|
+ register FAST_FLOAT *workspaceptr;
|
|
|
+- register JSAMPROW elemptr;
|
|
|
++ register _JSAMPROW elemptr;
|
|
|
+ register int elemr;
|
|
|
+
|
|
|
+ workspaceptr = workspace;
|
|
|
+ for (elemr = 0; elemr < DCTSIZE; elemr++) {
|
|
|
+ elemptr = sample_data[elemr] + start_col;
|
|
|
+ #if DCTSIZE == 8 /* unroll the inner loop */
|
|
|
+- *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
|
|
|
+- *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
|
|
|
+- *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
|
|
|
+- *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
|
|
|
+- *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
|
|
|
+- *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
|
|
|
+- *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
|
|
|
+- *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
|
|
|
++ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
|
|
|
++ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
|
|
|
++ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
|
|
|
++ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
|
|
|
++ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
|
|
|
++ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
|
|
|
++ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
|
|
|
++ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
|
|
|
+ #else
|
|
|
+ {
|
|
|
+ register int elemc;
|
|
|
+ for (elemc = DCTSIZE; elemc > 0; elemc--)
|
|
|
+- *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - CENTERJSAMPLE);
|
|
|
++ *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
|
|
|
+ }
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
|
|
|
+@@ -572,17 +583,17 @@ quantize_float(JCOEFPTR coef_block, FAST
|
|
|
+ */
|
|
|
+ output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
|
|
++ _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
|
|
+ JDIMENSION start_row, JDIMENSION start_col,
|
|
|
+ JDIMENSION num_blocks)
|
|
|
+ /* This version is used for floating-point DCT implementations. */
|
|
|
+ {
|
|
|
+ /* This routine is heavily used, so it's worth coding it tightly. */
|
|
|
+ my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
|
|
|
+ FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
|
|
|
+ FAST_FLOAT *workspace;
|
|
|
+@@ -612,53 +623,62 @@ forward_DCT_float(j_compress_ptr cinfo,
|
|
|
+ #endif /* DCT_FLOAT_SUPPORTED */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize FDCT manager.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_forward_dct(j_compress_ptr cinfo)
|
|
|
++_jinit_forward_dct(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_fdct_ptr fdct;
|
|
|
+ int i;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ fdct = (my_fdct_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_fdct_controller));
|
|
|
+ cinfo->fdct = (struct jpeg_forward_dct *)fdct;
|
|
|
+ fdct->pub.start_pass = start_pass_fdctmgr;
|
|
|
+
|
|
|
+ /* First determine the DCT... */
|
|
|
+ switch (cinfo->dct_method) {
|
|
|
+ #ifdef DCT_ISLOW_SUPPORTED
|
|
|
+ case JDCT_ISLOW:
|
|
|
+- fdct->pub.forward_DCT = forward_DCT;
|
|
|
++ fdct->pub._forward_DCT = forward_DCT;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_fdct_islow())
|
|
|
+ fdct->dct = jsimd_fdct_islow;
|
|
|
+ else
|
|
|
+- fdct->dct = jpeg_fdct_islow;
|
|
|
++#endif
|
|
|
++ fdct->dct = _jpeg_fdct_islow;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ #ifdef DCT_IFAST_SUPPORTED
|
|
|
+ case JDCT_IFAST:
|
|
|
+- fdct->pub.forward_DCT = forward_DCT;
|
|
|
++ fdct->pub._forward_DCT = forward_DCT;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_fdct_ifast())
|
|
|
+ fdct->dct = jsimd_fdct_ifast;
|
|
|
+ else
|
|
|
+- fdct->dct = jpeg_fdct_ifast;
|
|
|
++#endif
|
|
|
++ fdct->dct = _jpeg_fdct_ifast;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ #ifdef DCT_FLOAT_SUPPORTED
|
|
|
+ case JDCT_FLOAT:
|
|
|
+- fdct->pub.forward_DCT = forward_DCT_float;
|
|
|
++ fdct->pub._forward_DCT = forward_DCT_float;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_fdct_float())
|
|
|
+ fdct->float_dct = jsimd_fdct_float;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ fdct->float_dct = jpeg_fdct_float;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ default:
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -666,35 +686,43 @@ jinit_forward_dct(j_compress_ptr cinfo)
|
|
|
+ switch (cinfo->dct_method) {
|
|
|
+ #ifdef DCT_ISLOW_SUPPORTED
|
|
|
+ case JDCT_ISLOW:
|
|
|
+ #endif
|
|
|
+ #ifdef DCT_IFAST_SUPPORTED
|
|
|
+ case JDCT_IFAST:
|
|
|
+ #endif
|
|
|
+ #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_convsamp())
|
|
|
+ fdct->convsamp = jsimd_convsamp;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ fdct->convsamp = convsamp;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_quantize())
|
|
|
+ fdct->quantize = jsimd_quantize;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ fdct->quantize = quantize;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ #ifdef DCT_FLOAT_SUPPORTED
|
|
|
+ case JDCT_FLOAT:
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_convsamp_float())
|
|
|
+ fdct->float_convsamp = jsimd_convsamp_float;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ fdct->float_convsamp = convsamp_float;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_quantize_float())
|
|
|
+ fdct->float_quantize = jsimd_quantize_float;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ fdct->float_quantize = quantize_float;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ default:
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/jcdiffct.c b/media/libjpeg/jcdiffct.c
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jcdiffct.c
|
|
|
+@@ -0,0 +1,411 @@
|
|
|
++/*
|
|
|
++ * jcdiffct.c
|
|
|
++ *
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
++ * Copyright (C) 1994-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ *
|
|
|
++ * This file contains the difference buffer controller for compression.
|
|
|
++ * This controller is the top level of the lossless JPEG compressor proper.
|
|
|
++ * The difference buffer lies between the prediction/differencing and entropy
|
|
|
++ * encoding steps.
|
|
|
++ */
|
|
|
++
|
|
|
++#define JPEG_INTERNALS
|
|
|
++#include "jinclude.h"
|
|
|
++#include "jpeglib.h"
|
|
|
++#include "jlossls.h" /* Private declarations for lossless codec */
|
|
|
++
|
|
|
++
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++
|
|
|
++/* We use a full-image sample buffer when doing Huffman optimization,
|
|
|
++ * and also for writing multiple-scan JPEG files. In all cases, the
|
|
|
++ * full-image buffer is filled during the first pass, and the scaling,
|
|
|
++ * prediction and differencing steps are run during subsequent passes.
|
|
|
++ */
|
|
|
++#ifdef ENTROPY_OPT_SUPPORTED
|
|
|
++#define FULL_SAMP_BUFFER_SUPPORTED
|
|
|
++#else
|
|
|
++#ifdef C_MULTISCAN_FILES_SUPPORTED
|
|
|
++#define FULL_SAMP_BUFFER_SUPPORTED
|
|
|
++#endif
|
|
|
++#endif
|
|
|
++
|
|
|
++
|
|
|
++/* Private buffer controller object */
|
|
|
++
|
|
|
++typedef struct {
|
|
|
++ struct jpeg_c_coef_controller pub; /* public fields */
|
|
|
++
|
|
|
++ JDIMENSION iMCU_row_num; /* iMCU row # within image */
|
|
|
++ JDIMENSION mcu_ctr; /* counts MCUs processed in current row */
|
|
|
++ int MCU_vert_offset; /* counts MCU rows within iMCU row */
|
|
|
++ int MCU_rows_per_iMCU_row; /* number of such rows needed */
|
|
|
++
|
|
|
++ _JSAMPROW cur_row[MAX_COMPONENTS]; /* row of point-transformed samples */
|
|
|
++ _JSAMPROW prev_row[MAX_COMPONENTS]; /* previous row of Pt'd samples */
|
|
|
++ JDIFFARRAY diff_buf[MAX_COMPONENTS]; /* iMCU row of differences */
|
|
|
++
|
|
|
++ /* In multi-pass modes, we need a virtual sample array for each component. */
|
|
|
++ jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
|
|
|
++} my_diff_controller;
|
|
|
++
|
|
|
++typedef my_diff_controller *my_diff_ptr;
|
|
|
++
|
|
|
++
|
|
|
++/* Forward declarations */
|
|
|
++METHODDEF(boolean) compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf);
|
|
|
++#ifdef FULL_SAMP_BUFFER_SUPPORTED
|
|
|
++METHODDEF(boolean) compress_first_pass(j_compress_ptr cinfo,
|
|
|
++ _JSAMPIMAGE input_buf);
|
|
|
++METHODDEF(boolean) compress_output(j_compress_ptr cinfo,
|
|
|
++ _JSAMPIMAGE input_buf);
|
|
|
++#endif
|
|
|
++
|
|
|
++
|
|
|
++LOCAL(void)
|
|
|
++start_iMCU_row(j_compress_ptr cinfo)
|
|
|
++/* Reset within-iMCU-row counters for a new row */
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++
|
|
|
++ /* In an interleaved scan, an MCU row is the same as an iMCU row.
|
|
|
++ * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
|
|
|
++ * But at the bottom of the image, process only what's left.
|
|
|
++ */
|
|
|
++ if (cinfo->comps_in_scan > 1) {
|
|
|
++ diff->MCU_rows_per_iMCU_row = 1;
|
|
|
++ } else {
|
|
|
++ if (diff->iMCU_row_num < (cinfo->total_iMCU_rows-1))
|
|
|
++ diff->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
|
|
|
++ else
|
|
|
++ diff->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
|
|
|
++ }
|
|
|
++
|
|
|
++ diff->mcu_ctr = 0;
|
|
|
++ diff->MCU_vert_offset = 0;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize for a processing pass.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++start_pass_diff(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++
|
|
|
++ /* Because it is hitching a ride on the jpeg_forward_dct struct,
|
|
|
++ * start_pass_lossless() will be called at the start of the initial pass.
|
|
|
++ * This ensures that it will be called at the start of the Huffman
|
|
|
++ * optimization and output passes as well.
|
|
|
++ */
|
|
|
++ if (pass_mode == JBUF_CRANK_DEST)
|
|
|
++ (*cinfo->fdct->start_pass) (cinfo);
|
|
|
++
|
|
|
++ diff->iMCU_row_num = 0;
|
|
|
++ start_iMCU_row(cinfo);
|
|
|
++
|
|
|
++ switch (pass_mode) {
|
|
|
++ case JBUF_PASS_THRU:
|
|
|
++ if (diff->whole_image[0] != NULL)
|
|
|
++ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
++ diff->pub._compress_data = compress_data;
|
|
|
++ break;
|
|
|
++#ifdef FULL_SAMP_BUFFER_SUPPORTED
|
|
|
++ case JBUF_SAVE_AND_PASS:
|
|
|
++ if (diff->whole_image[0] == NULL)
|
|
|
++ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
++ diff->pub._compress_data = compress_first_pass;
|
|
|
++ break;
|
|
|
++ case JBUF_CRANK_DEST:
|
|
|
++ if (diff->whole_image[0] == NULL)
|
|
|
++ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
++ diff->pub._compress_data = compress_output;
|
|
|
++ break;
|
|
|
++#endif
|
|
|
++ default:
|
|
|
++ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
++ break;
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++#define SWAP_ROWS(rowa, rowb) { \
|
|
|
++ _JSAMPROW temp = rowa; \
|
|
|
++ rowa = rowb; rowb = temp; \
|
|
|
++}
|
|
|
++
|
|
|
++/*
|
|
|
++ * Process some data in the single-pass case.
|
|
|
++ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
|
|
|
++ * per call, ie, v_samp_factor rows for each component in the image.
|
|
|
++ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
|
|
|
++ *
|
|
|
++ * NB: input_buf contains a plane for each component in image,
|
|
|
++ * which we index according to the component's SOF position.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(boolean)
|
|
|
++compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++ lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct;
|
|
|
++ JDIMENSION MCU_col_num; /* index of current MCU within row */
|
|
|
++ JDIMENSION MCU_count; /* number of MCUs encoded */
|
|
|
++ JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
|
|
|
++ int ci, compi, yoffset, samp_row, samp_rows, samps_across;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ /* Loop to write as much as one whole iMCU row */
|
|
|
++ for (yoffset = diff->MCU_vert_offset; yoffset < diff->MCU_rows_per_iMCU_row;
|
|
|
++ yoffset++) {
|
|
|
++
|
|
|
++ MCU_col_num = diff->mcu_ctr;
|
|
|
++
|
|
|
++ /* Scale and predict each scanline of the MCU row separately.
|
|
|
++ *
|
|
|
++ * Note: We only do this if we are at the start of an MCU row, ie,
|
|
|
++ * we don't want to reprocess a row suspended by the output.
|
|
|
++ */
|
|
|
++ if (MCU_col_num == 0) {
|
|
|
++ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
++ compptr = cinfo->cur_comp_info[ci];
|
|
|
++ compi = compptr->component_index;
|
|
|
++ if (diff->iMCU_row_num < last_iMCU_row)
|
|
|
++ samp_rows = compptr->v_samp_factor;
|
|
|
++ else {
|
|
|
++ /* NB: can't use last_row_height here, since may not be set! */
|
|
|
++ samp_rows =
|
|
|
++ (int)(compptr->height_in_blocks % compptr->v_samp_factor);
|
|
|
++ if (samp_rows == 0) samp_rows = compptr->v_samp_factor;
|
|
|
++ else {
|
|
|
++ /* Fill dummy difference rows at the bottom edge with zeros, which
|
|
|
++ * will encode to the smallest amount of data.
|
|
|
++ */
|
|
|
++ for (samp_row = samp_rows; samp_row < compptr->v_samp_factor;
|
|
|
++ samp_row++)
|
|
|
++ memset(diff->diff_buf[compi][samp_row], 0,
|
|
|
++ jround_up((long)compptr->width_in_blocks,
|
|
|
++ (long)compptr->h_samp_factor) * sizeof(JDIFF));
|
|
|
++ }
|
|
|
++ }
|
|
|
++ samps_across = compptr->width_in_blocks;
|
|
|
++
|
|
|
++ for (samp_row = 0; samp_row < samp_rows; samp_row++) {
|
|
|
++ (*losslessc->scaler_scale) (cinfo,
|
|
|
++ input_buf[compi][samp_row],
|
|
|
++ diff->cur_row[compi],
|
|
|
++ samps_across);
|
|
|
++ (*losslessc->predict_difference[compi])
|
|
|
++ (cinfo, compi, diff->cur_row[compi], diff->prev_row[compi],
|
|
|
++ diff->diff_buf[compi][samp_row], samps_across);
|
|
|
++ SWAP_ROWS(diff->cur_row[compi], diff->prev_row[compi]);
|
|
|
++ }
|
|
|
++ }
|
|
|
++ }
|
|
|
++ /* Try to write the MCU row (or remaining portion of suspended MCU row). */
|
|
|
++ MCU_count =
|
|
|
++ (*cinfo->entropy->encode_mcus) (cinfo,
|
|
|
++ diff->diff_buf, yoffset, MCU_col_num,
|
|
|
++ cinfo->MCUs_per_row - MCU_col_num);
|
|
|
++ if (MCU_count != cinfo->MCUs_per_row - MCU_col_num) {
|
|
|
++ /* Suspension forced; update state counters and exit */
|
|
|
++ diff->MCU_vert_offset = yoffset;
|
|
|
++ diff->mcu_ctr += MCU_col_num;
|
|
|
++ return FALSE;
|
|
|
++ }
|
|
|
++ /* Completed an MCU row, but perhaps not an iMCU row */
|
|
|
++ diff->mcu_ctr = 0;
|
|
|
++ }
|
|
|
++ /* Completed the iMCU row, advance counters for next one */
|
|
|
++ diff->iMCU_row_num++;
|
|
|
++ start_iMCU_row(cinfo);
|
|
|
++ return TRUE;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++#ifdef FULL_SAMP_BUFFER_SUPPORTED
|
|
|
++
|
|
|
++/*
|
|
|
++ * Process some data in the first pass of a multi-pass case.
|
|
|
++ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
|
|
|
++ * per call, ie, v_samp_factor rows for each component in the image.
|
|
|
++ * This amount of data is read from the source buffer and saved into the
|
|
|
++ * virtual arrays.
|
|
|
++ *
|
|
|
++ * We must also emit the data to the compressor. This is conveniently
|
|
|
++ * done by calling compress_output() after we've loaded the current strip
|
|
|
++ * of the virtual arrays.
|
|
|
++ *
|
|
|
++ * NB: input_buf contains a plane for each component in image. All components
|
|
|
++ * are loaded into the virtual arrays in this pass. However, it may be that
|
|
|
++ * only a subset of the components are emitted to the compressor during
|
|
|
++ * this first pass; be careful about looking at the scan-dependent variables
|
|
|
++ * (MCU dimensions, etc).
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(boolean)
|
|
|
++compress_first_pass(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++ JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
|
|
|
++ JDIMENSION samps_across;
|
|
|
++ int ci, samp_row, samp_rows;
|
|
|
++ _JSAMPARRAY buffer;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ /* Align the virtual buffer for this component. */
|
|
|
++ buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
|
|
|
++ ((j_common_ptr)cinfo, diff->whole_image[ci],
|
|
|
++ diff->iMCU_row_num * compptr->v_samp_factor,
|
|
|
++ (JDIMENSION)compptr->v_samp_factor, TRUE);
|
|
|
++
|
|
|
++ /* Count non-dummy sample rows in this iMCU row. */
|
|
|
++ if (diff->iMCU_row_num < last_iMCU_row)
|
|
|
++ samp_rows = compptr->v_samp_factor;
|
|
|
++ else {
|
|
|
++ /* NB: can't use last_row_height here, since may not be set! */
|
|
|
++ samp_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
|
|
|
++ if (samp_rows == 0) samp_rows = compptr->v_samp_factor;
|
|
|
++ }
|
|
|
++ samps_across = compptr->width_in_blocks;
|
|
|
++
|
|
|
++ /* Perform point transform scaling and prediction/differencing for all
|
|
|
++ * non-dummy rows in this iMCU row. Each call on these functions
|
|
|
++ * processes a complete row of samples.
|
|
|
++ */
|
|
|
++ for (samp_row = 0; samp_row < samp_rows; samp_row++) {
|
|
|
++ memcpy(buffer[samp_row], input_buf[ci][samp_row],
|
|
|
++ samps_across * sizeof(_JSAMPLE));
|
|
|
++ }
|
|
|
++ }
|
|
|
++ /* NB: compress_output will increment iMCU_row_num if successful.
|
|
|
++ * A suspension return will result in redoing all the work above next time.
|
|
|
++ */
|
|
|
++
|
|
|
++ /* Emit data to the compressor, sharing code with subsequent passes */
|
|
|
++ return compress_output(cinfo, input_buf);
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Process some data in subsequent passes of a multi-pass case.
|
|
|
++ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
|
|
|
++ * per call, ie, v_samp_factor rows for each component in the scan.
|
|
|
++ * The data is obtained from the virtual arrays and fed to the compressor.
|
|
|
++ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
|
|
|
++ *
|
|
|
++ * NB: input_buf is ignored; it is likely to be a NULL pointer.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(boolean)
|
|
|
++compress_output(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++ int ci, compi;
|
|
|
++ _JSAMPARRAY buffer[MAX_COMPS_IN_SCAN];
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ /* Align the virtual buffers for the components used in this scan.
|
|
|
++ * NB: during first pass, this is safe only because the buffers will
|
|
|
++ * already be aligned properly, so jmemmgr.c won't need to do any I/O.
|
|
|
++ */
|
|
|
++ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
++ compptr = cinfo->cur_comp_info[ci];
|
|
|
++ compi = compptr->component_index;
|
|
|
++ buffer[compi] = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
|
|
|
++ ((j_common_ptr)cinfo, diff->whole_image[compi],
|
|
|
++ diff->iMCU_row_num * compptr->v_samp_factor,
|
|
|
++ (JDIMENSION)compptr->v_samp_factor, FALSE);
|
|
|
++ }
|
|
|
++
|
|
|
++ return compress_data(cinfo, buffer);
|
|
|
++}
|
|
|
++
|
|
|
++#endif /* FULL_SAMP_BUFFER_SUPPORTED */
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize difference buffer controller.
|
|
|
++ */
|
|
|
++
|
|
|
++GLOBAL(void)
|
|
|
++_jinit_c_diff_controller(j_compress_ptr cinfo, boolean need_full_buffer)
|
|
|
++{
|
|
|
++ my_diff_ptr diff;
|
|
|
++ int ci, row;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ diff = (my_diff_ptr)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ sizeof(my_diff_controller));
|
|
|
++ cinfo->coef = (struct jpeg_c_coef_controller *)diff;
|
|
|
++ diff->pub.start_pass = start_pass_diff;
|
|
|
++
|
|
|
++ /* Create the prediction row buffers. */
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ diff->cur_row[ci] = *(_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
++ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ (JDIMENSION)jround_up((long)compptr->width_in_blocks,
|
|
|
++ (long)compptr->h_samp_factor),
|
|
|
++ (JDIMENSION)1);
|
|
|
++ diff->prev_row[ci] = *(_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
++ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ (JDIMENSION)jround_up((long)compptr->width_in_blocks,
|
|
|
++ (long)compptr->h_samp_factor),
|
|
|
++ (JDIMENSION)1);
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Create the difference buffer. */
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ diff->diff_buf[ci] =
|
|
|
++ ALLOC_DARRAY(JPOOL_IMAGE,
|
|
|
++ (JDIMENSION)jround_up((long)compptr->width_in_blocks,
|
|
|
++ (long)compptr->h_samp_factor),
|
|
|
++ (JDIMENSION)compptr->v_samp_factor);
|
|
|
++ /* Prefill difference rows with zeros. We do this because only actual
|
|
|
++ * data is placed in the buffers during prediction/differencing, leaving
|
|
|
++ * any dummy differences at the right edge as zeros, which will encode
|
|
|
++ * to the smallest amount of data.
|
|
|
++ */
|
|
|
++ for (row = 0; row < compptr->v_samp_factor; row++)
|
|
|
++ memset(diff->diff_buf[ci][row], 0,
|
|
|
++ jround_up((long)compptr->width_in_blocks,
|
|
|
++ (long)compptr->h_samp_factor) * sizeof(JDIFF));
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Create the sample buffer. */
|
|
|
++ if (need_full_buffer) {
|
|
|
++#ifdef FULL_SAMP_BUFFER_SUPPORTED
|
|
|
++ /* Allocate a full-image virtual array for each component, */
|
|
|
++ /* padded to a multiple of samp_factor differences in each direction. */
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ diff->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
|
|
|
++ ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE,
|
|
|
++ (JDIMENSION)jround_up((long)compptr->width_in_blocks,
|
|
|
++ (long)compptr->h_samp_factor),
|
|
|
++ (JDIMENSION)jround_up((long)compptr->height_in_blocks,
|
|
|
++ (long)compptr->v_samp_factor),
|
|
|
++ (JDIMENSION)compptr->v_samp_factor);
|
|
|
++ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
++#endif
|
|
|
++ } else
|
|
|
++ diff->whole_image[0] = NULL; /* flag for no virtual arrays */
|
|
|
++}
|
|
|
++
|
|
|
++#endif /* C_LOSSLESS_SUPPORTED */
|
|
|
+diff --git a/media/libjpeg/jchuff.c b/media/libjpeg/jchuff.c
|
|
|
+--- a/media/libjpeg/jchuff.c
|
|
|
++++ b/media/libjpeg/jchuff.c
|
|
|
+@@ -1,18 +1,21 @@
|
|
|
+ /*
|
|
|
+ * jchuff.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2009-2011, 2014-2016, 2018-2022, D. R. Commander.
|
|
|
++ * Copyright (C) 2009-2011, 2014-2016, 2018-2024, D. R. Commander.
|
|
|
+ * Copyright (C) 2015, Matthieu Darbois.
|
|
|
+ * Copyright (C) 2018, Matthias Räncker.
|
|
|
+ * Copyright (C) 2020, Arm Limited.
|
|
|
++ * Copyright (C) 2022, Felix Hanau.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains Huffman entropy encoding routines.
|
|
|
+ *
|
|
|
+ * Much of the complexity here has to do with supporting output suspension.
|
|
|
+ * If the data destination module demands suspension, we want to be able to
|
|
|
+ * back up to the start of the current MCU. To do this, we copy state
|
|
|
+@@ -21,53 +24,23 @@
|
|
|
+ *
|
|
|
+ * NOTE: All referenced figures are from
|
|
|
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ #include "jsimd.h"
|
|
|
++#else
|
|
|
++#include "jchuff.h" /* Declarations shared with jc*huff.c */
|
|
|
++#endif
|
|
|
+ #include <limits.h>
|
|
|
+-
|
|
|
+-/*
|
|
|
+- * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
|
|
|
+- * used for bit counting rather than the lookup table. This will reduce the
|
|
|
+- * memory footprint by 64k, which is important for some mobile applications
|
|
|
+- * that create many isolated instances of libjpeg-turbo (web browsers, for
|
|
|
+- * instance.) This may improve performance on some mobile platforms as well.
|
|
|
+- * This feature is enabled by default only on Arm processors, because some x86
|
|
|
+- * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
|
|
|
+- * shown to have a significant performance impact even on the x86 chips that
|
|
|
+- * have a fast implementation of it. When building for Armv6, you can
|
|
|
+- * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
|
|
|
+- * flags (this defines __thumb__).
|
|
|
+- */
|
|
|
+-
|
|
|
+-/* NOTE: Both GCC and Clang define __GNUC__ */
|
|
|
+-#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
|
|
|
+- defined(_M_ARM) || defined(_M_ARM64)
|
|
|
+-#if !defined(__thumb__) || defined(__thumb2__)
|
|
|
+-#define USE_CLZ_INTRINSIC
|
|
|
+-#endif
|
|
|
+-#endif
|
|
|
+-
|
|
|
+-#ifdef USE_CLZ_INTRINSIC
|
|
|
+-#if defined(_MSC_VER) && !defined(__clang__)
|
|
|
+-#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
|
|
|
+-#else
|
|
|
+-#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
|
|
|
+-#endif
|
|
|
+-#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
|
|
|
+-#else
|
|
|
+-#include "jpeg_nbits_table.h"
|
|
|
+-#define JPEG_NBITS(x) (jpeg_nbits_table[x])
|
|
|
+-#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
|
|
|
+-#endif
|
|
|
++#include "jpeg_nbits.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* Expanded entropy encoder object for Huffman encoding.
|
|
|
+ *
|
|
|
+ * The savable_state subrecord contains fields that change within an MCU,
|
|
|
+ * but must not be updated permanently until we complete the MCU.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -96,17 +69,19 @@ typedef bit_buf_type simd_bit_buf_type;
|
|
|
+ #else
|
|
|
+ #error Cannot determine word size
|
|
|
+ #endif
|
|
|
+ #define SIMD_BIT_BUF_SIZE (sizeof(simd_bit_buf_type) * 8)
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ union {
|
|
|
+ bit_buf_type c;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ simd_bit_buf_type simd;
|
|
|
++#endif
|
|
|
+ } put_buffer; /* current bit accumulation buffer */
|
|
|
+ int free_bits; /* # of bits available in it */
|
|
|
+ /* (Neon GAS: # of bits now in it) */
|
|
|
+ int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
|
|
|
+ } savable_state;
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_entropy_encoder pub; /* public fields */
|
|
|
+@@ -121,31 +96,35 @@ typedef struct {
|
|
|
+ c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS];
|
|
|
+ c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS];
|
|
|
+
|
|
|
+ #ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */
|
|
|
+ long *dc_count_ptrs[NUM_HUFF_TBLS];
|
|
|
+ long *ac_count_ptrs[NUM_HUFF_TBLS];
|
|
|
+ #endif
|
|
|
+
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ int simd;
|
|
|
++#endif
|
|
|
+ } huff_entropy_encoder;
|
|
|
+
|
|
|
+ typedef huff_entropy_encoder *huff_entropy_ptr;
|
|
|
+
|
|
|
+ /* Working state while writing an MCU.
|
|
|
+ * This struct contains all the fields that are needed by subroutines.
|
|
|
+ */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ JOCTET *next_output_byte; /* => next byte to write in buffer */
|
|
|
+ size_t free_in_buffer; /* # of byte spaces remaining in buffer */
|
|
|
+ savable_state cur; /* Current bit buffer & DC state */
|
|
|
+ j_compress_ptr cinfo; /* dump_buffer needs access to this */
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ int simd;
|
|
|
++#endif
|
|
|
+ } working_state;
|
|
|
+
|
|
|
+
|
|
|
+ /* Forward declarations */
|
|
|
+ METHODDEF(boolean) encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data);
|
|
|
+ METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo);
|
|
|
+ #ifdef ENTROPY_OPT_SUPPORTED
|
|
|
+ METHODDEF(boolean) encode_mcu_gather(j_compress_ptr cinfo,
|
|
|
+@@ -174,17 +153,19 @@ start_pass_huff(j_compress_ptr cinfo, bo
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ } else {
|
|
|
+ entropy->pub.encode_mcu = encode_mcu_huff;
|
|
|
+ entropy->pub.finish_pass = finish_pass_huff;
|
|
|
+ }
|
|
|
+
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ entropy->simd = jsimd_can_huff_encode_one_block();
|
|
|
++#endif
|
|
|
+
|
|
|
+ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
+ compptr = cinfo->cur_comp_info[ci];
|
|
|
+ dctbl = compptr->dc_tbl_no;
|
|
|
+ actbl = compptr->ac_tbl_no;
|
|
|
+ if (gather_statistics) {
|
|
|
+ #ifdef ENTROPY_OPT_SUPPORTED
|
|
|
+ /* Check for invalid table indexes */
|
|
|
+@@ -214,39 +195,42 @@ start_pass_huff(j_compress_ptr cinfo, bo
|
|
|
+ jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
|
|
|
+ &entropy->ac_derived_tbls[actbl]);
|
|
|
+ }
|
|
|
+ /* Initialize DC predictions to 0 */
|
|
|
+ entropy->saved.last_dc_val[ci] = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Initialize bit buffer to empty */
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (entropy->simd) {
|
|
|
+ entropy->saved.put_buffer.simd = 0;
|
|
|
+ #if defined(__aarch64__) && !defined(NEON_INTRINSICS)
|
|
|
+ entropy->saved.free_bits = 0;
|
|
|
+ #else
|
|
|
+ entropy->saved.free_bits = SIMD_BIT_BUF_SIZE;
|
|
|
+ #endif
|
|
|
+- } else {
|
|
|
++ } else
|
|
|
++#endif
|
|
|
++ {
|
|
|
+ entropy->saved.put_buffer.c = 0;
|
|
|
+ entropy->saved.free_bits = BIT_BUF_SIZE;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Initialize restart stuff */
|
|
|
+ entropy->restarts_to_go = cinfo->restart_interval;
|
|
|
+ entropy->next_restart_num = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Compute the derived values for a Huffman table.
|
|
|
+ * This routine also performs some validation checks on the table.
|
|
|
+ *
|
|
|
+- * Note this is also used by jcphuff.c.
|
|
|
++ * Note this is also used by jcphuff.c and jclhuff.c.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,
|
|
|
+ c_derived_tbl **pdtbl)
|
|
|
+ {
|
|
|
+ JHUFF_TBL *htbl;
|
|
|
+ c_derived_tbl *dtbl;
|
|
|
+@@ -312,22 +296,22 @@ jpeg_make_c_derived_tbl(j_compress_ptr c
|
|
|
+
|
|
|
+ /* Set all codeless symbols to have code length 0;
|
|
|
+ * this lets us detect duplicate VAL entries here, and later
|
|
|
+ * allows emit_bits to detect any attempt to emit such symbols.
|
|
|
+ */
|
|
|
+ memset(dtbl->ehufco, 0, sizeof(dtbl->ehufco));
|
|
|
+ memset(dtbl->ehufsi, 0, sizeof(dtbl->ehufsi));
|
|
|
+
|
|
|
+- /* This is also a convenient place to check for out-of-range
|
|
|
+- * and duplicated VAL entries. We allow 0..255 for AC symbols
|
|
|
+- * but only 0..15 for DC. (We could constrain them further
|
|
|
+- * based on data depth and mode, but this seems enough.)
|
|
|
++ /* This is also a convenient place to check for out-of-range and duplicated
|
|
|
++ * VAL entries. We allow 0..255 for AC symbols but only 0..15 for DC in
|
|
|
++ * lossy mode and 0..16 for DC in lossless mode. (We could constrain them
|
|
|
++ * further based on data depth and mode, but this seems enough.)
|
|
|
+ */
|
|
|
+- maxsymbol = isDC ? 15 : 255;
|
|
|
++ maxsymbol = isDC ? (cinfo->master->lossless ? 16 : 15) : 255;
|
|
|
+
|
|
|
+ for (p = 0; p < lastp; p++) {
|
|
|
+ i = htbl->huffval[p];
|
|
|
+ if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
|
|
|
+ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
|
|
|
+ dtbl->ehufco[i] = huffcode[p];
|
|
|
+ dtbl->ehufsi[i] = huffsize[p];
|
|
|
+ }
|
|
|
+@@ -494,24 +478,27 @@ dump_buffer(working_state *state)
|
|
|
+
|
|
|
+ LOCAL(boolean)
|
|
|
+ flush_bits(working_state *state)
|
|
|
+ {
|
|
|
+ JOCTET _buffer[BUFSIZE], *buffer, temp;
|
|
|
+ simd_bit_buf_type put_buffer; int put_bits;
|
|
|
+ int localbuf = 0;
|
|
|
+
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (state->simd) {
|
|
|
+ #if defined(__aarch64__) && !defined(NEON_INTRINSICS)
|
|
|
+ put_bits = state->cur.free_bits;
|
|
|
+ #else
|
|
|
+ put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits;
|
|
|
+ #endif
|
|
|
+ put_buffer = state->cur.put_buffer.simd;
|
|
|
+- } else {
|
|
|
++ } else
|
|
|
++#endif
|
|
|
++ {
|
|
|
+ put_bits = BIT_BUF_SIZE - state->cur.free_bits;
|
|
|
+ put_buffer = state->cur.put_buffer.c;
|
|
|
+ }
|
|
|
+
|
|
|
+ LOAD_BUFFER()
|
|
|
+
|
|
|
+ while (put_bits >= 8) {
|
|
|
+ put_bits -= 8;
|
|
|
+@@ -519,33 +506,38 @@ flush_bits(working_state *state)
|
|
|
+ EMIT_BYTE(temp)
|
|
|
+ }
|
|
|
+ if (put_bits) {
|
|
|
+ /* fill partial byte with ones */
|
|
|
+ temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits));
|
|
|
+ EMIT_BYTE(temp)
|
|
|
+ }
|
|
|
+
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (state->simd) { /* and reset bit buffer to empty */
|
|
|
+ state->cur.put_buffer.simd = 0;
|
|
|
+ #if defined(__aarch64__) && !defined(NEON_INTRINSICS)
|
|
|
+ state->cur.free_bits = 0;
|
|
|
+ #else
|
|
|
+ state->cur.free_bits = SIMD_BIT_BUF_SIZE;
|
|
|
+ #endif
|
|
|
+- } else {
|
|
|
++ } else
|
|
|
++#endif
|
|
|
++ {
|
|
|
+ state->cur.put_buffer.c = 0;
|
|
|
+ state->cur.free_bits = BIT_BUF_SIZE;
|
|
|
+ }
|
|
|
+ STORE_BUFFER()
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
++#ifdef WITH_SIMD
|
|
|
++
|
|
|
+ /* Encode a single block's worth of coefficients */
|
|
|
+
|
|
|
+ LOCAL(boolean)
|
|
|
+ encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,
|
|
|
+ c_derived_tbl *dctbl, c_derived_tbl *actbl)
|
|
|
+ {
|
|
|
+ JOCTET _buffer[BUFSIZE], *buffer;
|
|
|
+ int localbuf = 0;
|
|
|
+@@ -555,24 +547,27 @@ encode_one_block_simd(working_state *sta
|
|
|
+ buffer = jsimd_huff_encode_one_block(state, buffer, block, last_dc_val,
|
|
|
+ dctbl, actbl);
|
|
|
+
|
|
|
+ STORE_BUFFER()
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
++#endif
|
|
|
++
|
|
|
+ LOCAL(boolean)
|
|
|
+ encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
|
|
|
+ c_derived_tbl *dctbl, c_derived_tbl *actbl)
|
|
|
+ {
|
|
|
+ int temp, nbits, free_bits;
|
|
|
+ bit_buf_type put_buffer;
|
|
|
+ JOCTET _buffer[BUFSIZE], *buffer;
|
|
|
+ int localbuf = 0;
|
|
|
++ int max_coef_bits = state->cinfo->data_precision + 2;
|
|
|
+
|
|
|
+ free_bits = state->cur.free_bits;
|
|
|
+ put_buffer = state->cur.put_buffer.c;
|
|
|
+ LOAD_BUFFER()
|
|
|
+
|
|
|
+ /* Encode the DC coefficient difference per section F.1.2.1 */
|
|
|
+
|
|
|
+ temp = block[0] - last_dc_val;
|
|
|
+@@ -583,16 +578,21 @@ encode_one_block(working_state *state, J
|
|
|
+ * Agner Fog. This code assumes we are on a two's complement machine.
|
|
|
+ */
|
|
|
+ nbits = temp >> (CHAR_BIT * sizeof(int) - 1);
|
|
|
+ temp += nbits;
|
|
|
+ nbits ^= temp;
|
|
|
+
|
|
|
+ /* Find the number of bits needed for the magnitude of the coefficient */
|
|
|
+ nbits = JPEG_NBITS(nbits);
|
|
|
++ /* Check for out-of-range coefficient values.
|
|
|
++ * Since we're encoding a difference, the range limit is twice as much.
|
|
|
++ */
|
|
|
++ if (nbits > max_coef_bits + 1)
|
|
|
++ ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
|
|
|
+
|
|
|
+ /* Emit the Huffman-coded symbol for the number of bits.
|
|
|
+ * Emit that number of bits of the value, if positive,
|
|
|
+ * or the complement of its magnitude, if negative.
|
|
|
+ */
|
|
|
+ PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits])
|
|
|
+
|
|
|
+ /* Encode the AC coefficients per section F.1.2.2 */
|
|
|
+@@ -608,16 +608,19 @@ encode_one_block(working_state *state, J
|
|
|
+ if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
|
|
|
+ r += 16; \
|
|
|
+ } else { \
|
|
|
+ /* Branch-less absolute value, bitwise complement, etc., same as above */ \
|
|
|
+ nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \
|
|
|
+ temp += nbits; \
|
|
|
+ nbits ^= temp; \
|
|
|
+ nbits = JPEG_NBITS_NONZERO(nbits); \
|
|
|
++ /* Check for out-of-range coefficient values */ \
|
|
|
++ if (nbits > max_coef_bits) \
|
|
|
++ ERREXIT(state->cinfo, JERR_BAD_DCT_COEF); \
|
|
|
+ /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
|
|
|
+ while (r >= 16 * 16) { \
|
|
|
+ r -= 16 * 16; \
|
|
|
+ PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \
|
|
|
+ } \
|
|
|
+ /* Emit Huffman symbol for run length / number of bits */ \
|
|
|
+ r += nbits; \
|
|
|
+ PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \
|
|
|
+@@ -689,39 +692,44 @@ encode_mcu_huff(j_compress_ptr cinfo, JB
|
|
|
+ int blkn, ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+
|
|
|
+ /* Load up working state */
|
|
|
+ state.next_output_byte = cinfo->dest->next_output_byte;
|
|
|
+ state.free_in_buffer = cinfo->dest->free_in_buffer;
|
|
|
+ state.cur = entropy->saved;
|
|
|
+ state.cinfo = cinfo;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ state.simd = entropy->simd;
|
|
|
++#endif
|
|
|
+
|
|
|
+ /* Emit restart marker if needed */
|
|
|
+ if (cinfo->restart_interval) {
|
|
|
+ if (entropy->restarts_to_go == 0)
|
|
|
+ if (!emit_restart(&state, entropy->next_restart_num))
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Encode the MCU data blocks */
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (entropy->simd) {
|
|
|
+ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
|
|
+ ci = cinfo->MCU_membership[blkn];
|
|
|
+ compptr = cinfo->cur_comp_info[ci];
|
|
|
+ if (!encode_one_block_simd(&state,
|
|
|
+ MCU_data[blkn][0], state.cur.last_dc_val[ci],
|
|
|
+ entropy->dc_derived_tbls[compptr->dc_tbl_no],
|
|
|
+ entropy->ac_derived_tbls[compptr->ac_tbl_no]))
|
|
|
+ return FALSE;
|
|
|
+ /* Update last_dc_val */
|
|
|
+ state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
|
|
|
+ }
|
|
|
+- } else {
|
|
|
++ } else
|
|
|
++#endif
|
|
|
++ {
|
|
|
+ for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
|
|
+ ci = cinfo->MCU_membership[blkn];
|
|
|
+ compptr = cinfo->cur_comp_info[ci];
|
|
|
+ if (!encode_one_block(&state,
|
|
|
+ MCU_data[blkn][0], state.cur.last_dc_val[ci],
|
|
|
+ entropy->dc_derived_tbls[compptr->dc_tbl_no],
|
|
|
+ entropy->ac_derived_tbls[compptr->ac_tbl_no]))
|
|
|
+ return FALSE;
|
|
|
+@@ -759,17 +767,19 @@ finish_pass_huff(j_compress_ptr cinfo)
|
|
|
+ huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
|
|
|
+ working_state state;
|
|
|
+
|
|
|
+ /* Load up working state ... flush_bits needs it */
|
|
|
+ state.next_output_byte = cinfo->dest->next_output_byte;
|
|
|
+ state.free_in_buffer = cinfo->dest->free_in_buffer;
|
|
|
+ state.cur = entropy->saved;
|
|
|
+ state.cinfo = cinfo;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ state.simd = entropy->simd;
|
|
|
++#endif
|
|
|
+
|
|
|
+ /* Flush out the last data */
|
|
|
+ if (!flush_bits(&state))
|
|
|
+ ERREXIT(cinfo, JERR_CANT_SUSPEND);
|
|
|
+
|
|
|
+ /* Update state */
|
|
|
+ cinfo->dest->next_output_byte = state.next_output_byte;
|
|
|
+ cinfo->dest->free_in_buffer = state.free_in_buffer;
|
|
|
+@@ -795,33 +805,34 @@ finish_pass_huff(j_compress_ptr cinfo)
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
|
|
|
+ long dc_counts[], long ac_counts[])
|
|
|
+ {
|
|
|
+ register int temp;
|
|
|
+ register int nbits;
|
|
|
+ register int k, r;
|
|
|
++ int max_coef_bits = cinfo->data_precision + 2;
|
|
|
+
|
|
|
+ /* Encode the DC coefficient difference per section F.1.2.1 */
|
|
|
+
|
|
|
+ temp = block[0] - last_dc_val;
|
|
|
+ if (temp < 0)
|
|
|
+ temp = -temp;
|
|
|
+
|
|
|
+ /* Find the number of bits needed for the magnitude of the coefficient */
|
|
|
+ nbits = 0;
|
|
|
+ while (temp) {
|
|
|
+ nbits++;
|
|
|
+ temp >>= 1;
|
|
|
+ }
|
|
|
+ /* Check for out-of-range coefficient values.
|
|
|
+ * Since we're encoding a difference, the range limit is twice as much.
|
|
|
+ */
|
|
|
+- if (nbits > MAX_COEF_BITS + 1)
|
|
|
++ if (nbits > max_coef_bits + 1)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_DCT_COEF);
|
|
|
+
|
|
|
+ /* Count the Huffman symbol for the number of bits */
|
|
|
+ dc_counts[nbits]++;
|
|
|
+
|
|
|
+ /* Encode the AC coefficients per section F.1.2.2 */
|
|
|
+
|
|
|
+ r = 0; /* r = run length of zeros */
|
|
|
+@@ -840,17 +851,17 @@ htest_one_block(j_compress_ptr cinfo, JC
|
|
|
+ if (temp < 0)
|
|
|
+ temp = -temp;
|
|
|
+
|
|
|
+ /* Find the number of bits needed for the magnitude of the coefficient */
|
|
|
+ nbits = 1; /* there must be at least one 1 bit */
|
|
|
+ while ((temp >>= 1))
|
|
|
+ nbits++;
|
|
|
+ /* Check for out-of-range coefficient values */
|
|
|
+- if (nbits > MAX_COEF_BITS)
|
|
|
++ if (nbits > max_coef_bits)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_DCT_COEF);
|
|
|
+
|
|
|
+ /* Count Huffman symbol for run length / number of bits */
|
|
|
+ ac_counts[(r << 4) + nbits]++;
|
|
|
+
|
|
|
+ r = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+@@ -895,17 +906,17 @@ encode_mcu_gather(j_compress_ptr cinfo,
|
|
|
+ }
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Generate the best Huffman code table for the given counts, fill htbl.
|
|
|
+- * Note this is also used by jcphuff.c.
|
|
|
++ * Note this is also used by jcphuff.c and jclhuff.c.
|
|
|
+ *
|
|
|
+ * The JPEG standard requires that no symbol be assigned a codeword of all
|
|
|
+ * one bits (so that padding bits added at the end of a compressed segment
|
|
|
+ * can't look like a valid code). Because of the canonical ordering of
|
|
|
+ * codewords, this just means that there must be an unused slot in the
|
|
|
+ * longest codeword length category. Annex K (Clause K.2) of
|
|
|
+ * Rec. ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 suggests reserving such a slot
|
|
|
+ * by pretending that symbol 256 is a valid symbol with count 1. In theory
|
|
|
+@@ -927,67 +938,87 @@ encode_mcu_gather(j_compress_ptr cinfo,
|
|
|
+ * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])
|
|
|
+ {
|
|
|
+ #define MAX_CLEN 32 /* assumed maximum initial code length */
|
|
|
+ UINT8 bits[MAX_CLEN + 1]; /* bits[k] = # of symbols with code length k */
|
|
|
++ int bit_pos[MAX_CLEN + 1]; /* # of symbols with smaller code length */
|
|
|
+ int codesize[257]; /* codesize[k] = code length of symbol k */
|
|
|
++ int nz_index[257]; /* index of nonzero symbol in the original freq
|
|
|
++ array */
|
|
|
+ int others[257]; /* next symbol in current branch of tree */
|
|
|
+ int c1, c2;
|
|
|
+ int p, i, j;
|
|
|
+- long v;
|
|
|
++ int num_nz_symbols;
|
|
|
++ long v, v2;
|
|
|
+
|
|
|
+ /* This algorithm is explained in section K.2 of the JPEG standard */
|
|
|
+
|
|
|
+ memset(bits, 0, sizeof(bits));
|
|
|
+ memset(codesize, 0, sizeof(codesize));
|
|
|
+ for (i = 0; i < 257; i++)
|
|
|
+ others[i] = -1; /* init links to empty */
|
|
|
+
|
|
|
+ freq[256] = 1; /* make sure 256 has a nonzero count */
|
|
|
+ /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
|
|
|
+ * that no real symbol is given code-value of all ones, because 256
|
|
|
+ * will be placed last in the largest codeword category.
|
|
|
+ */
|
|
|
+
|
|
|
++ /* Group nonzero frequencies together so we can more easily find the
|
|
|
++ * smallest.
|
|
|
++ */
|
|
|
++ num_nz_symbols = 0;
|
|
|
++ for (i = 0; i < 257; i++) {
|
|
|
++ if (freq[i]) {
|
|
|
++ nz_index[num_nz_symbols] = i;
|
|
|
++ freq[num_nz_symbols] = freq[i];
|
|
|
++ num_nz_symbols++;
|
|
|
++ }
|
|
|
++ }
|
|
|
++
|
|
|
+ /* Huffman's basic algorithm to assign optimal code lengths to symbols */
|
|
|
+
|
|
|
+ for (;;) {
|
|
|
+- /* Find the smallest nonzero frequency, set c1 = its symbol */
|
|
|
+- /* In case of ties, take the larger symbol number */
|
|
|
++ /* Find the two smallest nonzero frequencies; set c1, c2 = their symbols */
|
|
|
++ /* In case of ties, take the larger symbol number. Since we have grouped
|
|
|
++ * the nonzero symbols together, checking for zero symbols is not
|
|
|
++ * necessary.
|
|
|
++ */
|
|
|
+ c1 = -1;
|
|
|
+- v = 1000000000L;
|
|
|
+- for (i = 0; i <= 256; i++) {
|
|
|
+- if (freq[i] && freq[i] <= v) {
|
|
|
+- v = freq[i];
|
|
|
+- c1 = i;
|
|
|
+- }
|
|
|
+- }
|
|
|
+-
|
|
|
+- /* Find the next smallest nonzero frequency, set c2 = its symbol */
|
|
|
+- /* In case of ties, take the larger symbol number */
|
|
|
+ c2 = -1;
|
|
|
+ v = 1000000000L;
|
|
|
+- for (i = 0; i <= 256; i++) {
|
|
|
+- if (freq[i] && freq[i] <= v && i != c1) {
|
|
|
+- v = freq[i];
|
|
|
+- c2 = i;
|
|
|
++ v2 = 1000000000L;
|
|
|
++ for (i = 0; i < num_nz_symbols; i++) {
|
|
|
++ if (freq[i] <= v2) {
|
|
|
++ if (freq[i] <= v) {
|
|
|
++ c2 = c1;
|
|
|
++ v2 = v;
|
|
|
++ v = freq[i];
|
|
|
++ c1 = i;
|
|
|
++ } else {
|
|
|
++ v2 = freq[i];
|
|
|
++ c2 = i;
|
|
|
++ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Done if we've merged everything into one frequency */
|
|
|
+ if (c2 < 0)
|
|
|
+ break;
|
|
|
+
|
|
|
+ /* Else merge the two counts/trees */
|
|
|
+ freq[c1] += freq[c2];
|
|
|
+- freq[c2] = 0;
|
|
|
++ /* Set the frequency to a very high value instead of zero, so we don't have
|
|
|
++ * to check for zero values.
|
|
|
++ */
|
|
|
++ freq[c2] = 1000000001L;
|
|
|
+
|
|
|
+ /* Increment the codesize of everything in c1's tree branch */
|
|
|
+ codesize[c1]++;
|
|
|
+ while (others[c1] >= 0) {
|
|
|
+ c1 = others[c1];
|
|
|
+ codesize[c1]++;
|
|
|
+ }
|
|
|
+
|
|
|
+@@ -997,25 +1028,34 @@ jpeg_gen_optimal_table(j_compress_ptr ci
|
|
|
+ codesize[c2]++;
|
|
|
+ while (others[c2] >= 0) {
|
|
|
+ c2 = others[c2];
|
|
|
+ codesize[c2]++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Now count the number of symbols of each code length */
|
|
|
+- for (i = 0; i <= 256; i++) {
|
|
|
+- if (codesize[i]) {
|
|
|
+- /* The JPEG standard seems to think that this can't happen, */
|
|
|
+- /* but I'm paranoid... */
|
|
|
+- if (codesize[i] > MAX_CLEN)
|
|
|
+- ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
|
|
|
++ for (i = 0; i < num_nz_symbols; i++) {
|
|
|
++ /* The JPEG standard seems to think that this can't happen, */
|
|
|
++ /* but I'm paranoid... */
|
|
|
++ if (codesize[i] > MAX_CLEN)
|
|
|
++ ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
|
|
|
++
|
|
|
++ bits[codesize[i]]++;
|
|
|
++ }
|
|
|
+
|
|
|
+- bits[codesize[i]]++;
|
|
|
+- }
|
|
|
++ /* Count the number of symbols with a length smaller than i bits, so we can
|
|
|
++ * construct the symbol table more efficiently. Note that this includes the
|
|
|
++ * pseudo-symbol 256, but since it is the last symbol, it will not affect the
|
|
|
++ * table.
|
|
|
++ */
|
|
|
++ p = 0;
|
|
|
++ for (i = 1; i <= MAX_CLEN; i++) {
|
|
|
++ bit_pos[i] = p;
|
|
|
++ p += bits[i];
|
|
|
+ }
|
|
|
+
|
|
|
+ /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
|
|
|
+ * Huffman procedure assigned any such lengths, we must adjust the coding.
|
|
|
+ * Here is what Rec. ITU-T T.81 | ISO/IEC 10918-1 says about how this next
|
|
|
+ * bit works: Since symbols are paired for the longest Huffman code, the
|
|
|
+ * symbols are removed from this length category two at a time. The prefix
|
|
|
+ * for the pair (which is one bit shorter) is allocated to one of the pair;
|
|
|
+@@ -1045,24 +1085,19 @@ jpeg_gen_optimal_table(j_compress_ptr ci
|
|
|
+ /* Return final symbol counts (only for lengths 0..16) */
|
|
|
+ memcpy(htbl->bits, bits, sizeof(htbl->bits));
|
|
|
+
|
|
|
+ /* Return a list of the symbols sorted by code length */
|
|
|
+ /* It's not real clear to me why we don't need to consider the codelength
|
|
|
+ * changes made above, but Rec. ITU-T T.81 | ISO/IEC 10918-1 seems to think
|
|
|
+ * this works.
|
|
|
+ */
|
|
|
+- p = 0;
|
|
|
+- for (i = 1; i <= MAX_CLEN; i++) {
|
|
|
+- for (j = 0; j <= 255; j++) {
|
|
|
+- if (codesize[j] == i) {
|
|
|
+- htbl->huffval[p] = (UINT8)j;
|
|
|
+- p++;
|
|
|
+- }
|
|
|
+- }
|
|
|
++ for (i = 0; i < num_nz_symbols - 1; i++) {
|
|
|
++ htbl->huffval[bit_pos[codesize[i]]] = (UINT8)nz_index[i];
|
|
|
++ bit_pos[codesize[i]]++;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Set sent_table FALSE so updated table will be written to JPEG file. */
|
|
|
+ htbl->sent_table = FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+diff --git a/media/libjpeg/jchuff.h b/media/libjpeg/jchuff.h
|
|
|
+--- a/media/libjpeg/jchuff.h
|
|
|
++++ b/media/libjpeg/jchuff.h
|
|
|
+@@ -14,22 +14,16 @@
|
|
|
+ */
|
|
|
+
|
|
|
+ /* The legal range of a DCT coefficient is
|
|
|
+ * -1024 .. +1023 for 8-bit data;
|
|
|
+ * -16384 .. +16383 for 12-bit data.
|
|
|
+ * Hence the magnitude should always fit in 10 or 14 bits respectively.
|
|
|
+ */
|
|
|
+
|
|
|
+-#if BITS_IN_JSAMPLE == 8
|
|
|
+-#define MAX_COEF_BITS 10
|
|
|
+-#else
|
|
|
+-#define MAX_COEF_BITS 14
|
|
|
+-#endif
|
|
|
+-
|
|
|
+ /* The progressive Huffman encoder uses an unsigned 16-bit data type to store
|
|
|
+ * absolute values of coefficients, because it is possible to inject a
|
|
|
+ * coefficient value of -32768 into the encoder by attempting to transform a
|
|
|
+ * malformed 12-bit JPEG image, and the absolute value of -32768 would overflow
|
|
|
+ * a signed 16-bit integer.
|
|
|
+ */
|
|
|
+ typedef unsigned short UJCOEF;
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/jcinit.c b/media/libjpeg/jcinit.c
|
|
|
+--- a/media/libjpeg/jcinit.c
|
|
|
++++ b/media/libjpeg/jcinit.c
|
|
|
+@@ -1,76 +1,145 @@
|
|
|
+ /*
|
|
|
+ * jcinit.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2020, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains initialization logic for the JPEG compressor.
|
|
|
+ * This routine is in charge of selecting the modules to be executed and
|
|
|
+ * making an initialization call to each one.
|
|
|
+ *
|
|
|
+ * Logically, this code belongs in jcmaster.c. It's split out because
|
|
|
+ * linking this routine implies linking the entire compression library.
|
|
|
+ * For a transcoding-only application, we want to be able to use jcmaster.c
|
|
|
+ * without linking in the whole library.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Master selection of compression modules.
|
|
|
+ * This is done once at the start of processing an image. We determine
|
|
|
+ * which modules will be used and give them appropriate initialization calls.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jinit_compress_master(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
+ /* Initialize master control (includes parameter checking/processing) */
|
|
|
+ jinit_c_master_control(cinfo, FALSE /* full compression */);
|
|
|
+
|
|
|
+ /* Preprocessing */
|
|
|
+ if (!cinfo->raw_data_in) {
|
|
|
+- jinit_color_converter(cinfo);
|
|
|
+- jinit_downsampler(cinfo);
|
|
|
+- jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
|
|
|
++ if (cinfo->data_precision == 16) {
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ j16init_color_converter(cinfo);
|
|
|
++ j16init_downsampler(cinfo);
|
|
|
++ j16init_c_prep_controller(cinfo,
|
|
|
++ FALSE /* never need full buffer here */);
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++#endif
|
|
|
++ } else if (cinfo->data_precision == 12) {
|
|
|
++ j12init_color_converter(cinfo);
|
|
|
++ j12init_downsampler(cinfo);
|
|
|
++ j12init_c_prep_controller(cinfo,
|
|
|
++ FALSE /* never need full buffer here */);
|
|
|
++ } else {
|
|
|
++ jinit_color_converter(cinfo);
|
|
|
++ jinit_downsampler(cinfo);
|
|
|
++ jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
|
|
|
++ }
|
|
|
+ }
|
|
|
+- /* Forward DCT */
|
|
|
+- jinit_forward_dct(cinfo);
|
|
|
+- /* Entropy encoding: either Huffman or arithmetic coding. */
|
|
|
+- if (cinfo->arith_code) {
|
|
|
+-#ifdef C_ARITH_CODING_SUPPORTED
|
|
|
+- jinit_arith_encoder(cinfo);
|
|
|
++
|
|
|
++ if (cinfo->master->lossless) {
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ /* Prediction, sample differencing, and point transform */
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ j16init_lossless_compressor(cinfo);
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_lossless_compressor(cinfo);
|
|
|
++ else
|
|
|
++ jinit_lossless_compressor(cinfo);
|
|
|
++ /* Entropy encoding: either Huffman or arithmetic coding. */
|
|
|
++ if (cinfo->arith_code) {
|
|
|
++ ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
|
|
++ } else {
|
|
|
++ jinit_lhuff_encoder(cinfo);
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Need a full-image difference buffer in any multi-pass mode. */
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ j16init_c_diff_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
|
|
|
++ cinfo->optimize_coding));
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_c_diff_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
|
|
|
++ cinfo->optimize_coding));
|
|
|
++ else
|
|
|
++ jinit_c_diff_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
|
|
|
++ cinfo->optimize_coding));
|
|
|
+ #else
|
|
|
+- ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
|
|
++ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ } else {
|
|
|
+- if (cinfo->progressive_mode) {
|
|
|
+-#ifdef C_PROGRESSIVE_SUPPORTED
|
|
|
+- jinit_phuff_encoder(cinfo);
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++ /* Forward DCT */
|
|
|
++ if (cinfo->data_precision == 12)
|
|
|
++ j12init_forward_dct(cinfo);
|
|
|
++ else
|
|
|
++ jinit_forward_dct(cinfo);
|
|
|
++ /* Entropy encoding: either Huffman or arithmetic coding. */
|
|
|
++ if (cinfo->arith_code) {
|
|
|
++#ifdef C_ARITH_CODING_SUPPORTED
|
|
|
++ jinit_arith_encoder(cinfo);
|
|
|
+ #else
|
|
|
+- ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
++ ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
|
|
++#endif
|
|
|
++ } else {
|
|
|
++ if (cinfo->progressive_mode) {
|
|
|
++#ifdef C_PROGRESSIVE_SUPPORTED
|
|
|
++ jinit_phuff_encoder(cinfo);
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+- } else
|
|
|
+- jinit_huff_encoder(cinfo);
|
|
|
++ } else
|
|
|
++ jinit_huff_encoder(cinfo);
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Need a full-image coefficient buffer in any multi-pass mode. */
|
|
|
++ if (cinfo->data_precision == 12)
|
|
|
++ j12init_c_coef_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
|
|
|
++ cinfo->optimize_coding));
|
|
|
++ else
|
|
|
++ jinit_c_coef_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
|
|
|
++ cinfo->optimize_coding));
|
|
|
+ }
|
|
|
+
|
|
|
+- /* Need a full-image coefficient buffer in any multi-pass mode. */
|
|
|
+- jinit_c_coef_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
|
|
|
+- cinfo->optimize_coding));
|
|
|
+- jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ j16init_c_main_controller(cinfo, FALSE /* never need full buffer here */);
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++#endif
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_c_main_controller(cinfo, FALSE /* never need full buffer here */);
|
|
|
++ else
|
|
|
++ jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
|
|
|
+
|
|
|
+ jinit_marker_writer(cinfo);
|
|
|
+
|
|
|
+ /* We can now tell the memory manager to allocate virtual arrays. */
|
|
|
+ (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
|
|
|
+
|
|
|
+ /* Write the datastream header (SOI) immediately.
|
|
|
+ * Frame and scan headers are postponed till later.
|
|
|
+diff --git a/media/libjpeg/jclhuff.c b/media/libjpeg/jclhuff.c
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jclhuff.c
|
|
|
+@@ -0,0 +1,587 @@
|
|
|
++/*
|
|
|
++ * jclhuff.c
|
|
|
++ *
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
++ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ *
|
|
|
++ * This file contains Huffman entropy encoding routines for lossless JPEG.
|
|
|
++ *
|
|
|
++ * Much of the complexity here has to do with supporting output suspension.
|
|
|
++ * If the data destination module demands suspension, we want to be able to
|
|
|
++ * back up to the start of the current MCU. To do this, we copy state
|
|
|
++ * variables into local working storage, and update them back to the
|
|
|
++ * permanent JPEG objects only upon successful completion of an MCU.
|
|
|
++ */
|
|
|
++
|
|
|
++#define JPEG_INTERNALS
|
|
|
++#include "jinclude.h"
|
|
|
++#include "jpeglib.h"
|
|
|
++#include "jlossls.h" /* Private declarations for lossless codec */
|
|
|
++#include "jchuff.h" /* Declarations shared with jc*huff.c */
|
|
|
++
|
|
|
++
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++
|
|
|
++/* The legal range of a spatial difference is
|
|
|
++ * -32767 .. +32768.
|
|
|
++ * Hence the magnitude should always fit in 16 bits.
|
|
|
++ */
|
|
|
++
|
|
|
++#define MAX_DIFF_BITS 16
|
|
|
++
|
|
|
++
|
|
|
++/* Expanded entropy encoder object for Huffman encoding in lossless mode.
|
|
|
++ *
|
|
|
++ * The savable_state subrecord contains fields that change within an MCU,
|
|
|
++ * but must not be updated permanently until we complete the MCU.
|
|
|
++ */
|
|
|
++
|
|
|
++typedef struct {
|
|
|
++ size_t put_buffer; /* current bit-accumulation buffer */
|
|
|
++ int put_bits; /* # of bits now in it */
|
|
|
++} savable_state;
|
|
|
++
|
|
|
++
|
|
|
++typedef struct {
|
|
|
++ int ci, yoffset, MCU_width;
|
|
|
++} lhe_input_ptr_info;
|
|
|
++
|
|
|
++
|
|
|
++typedef struct {
|
|
|
++ struct jpeg_entropy_encoder pub; /* public fields */
|
|
|
++
|
|
|
++ savable_state saved; /* Bit buffer at start of MCU */
|
|
|
++
|
|
|
++ /* These fields are NOT loaded into local working state. */
|
|
|
++ unsigned int restarts_to_go; /* MCUs left in this restart interval */
|
|
|
++ int next_restart_num; /* next restart number to write (0-7) */
|
|
|
++
|
|
|
++ /* Pointers to derived tables (these workspaces have image lifespan) */
|
|
|
++ c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
|
|
|
++
|
|
|
++ /* Pointers to derived tables to be used for each data unit within an MCU */
|
|
|
++ c_derived_tbl *cur_tbls[C_MAX_BLOCKS_IN_MCU];
|
|
|
++
|
|
|
++#ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */
|
|
|
++ long *count_ptrs[NUM_HUFF_TBLS];
|
|
|
++
|
|
|
++ /* Pointers to stats tables to be used for each data unit within an MCU */
|
|
|
++ long *cur_counts[C_MAX_BLOCKS_IN_MCU];
|
|
|
++#endif
|
|
|
++
|
|
|
++ /* Pointers to the proper input difference row for each group of data units
|
|
|
++ * within an MCU. For each component, there are Vi groups of Hi data units.
|
|
|
++ */
|
|
|
++ JDIFFROW input_ptr[C_MAX_BLOCKS_IN_MCU];
|
|
|
++
|
|
|
++ /* Number of input pointers in use for the current MCU. This is the sum
|
|
|
++ * of all Vi in the MCU.
|
|
|
++ */
|
|
|
++ int num_input_ptrs;
|
|
|
++
|
|
|
++ /* Information used for positioning the input pointers within the input
|
|
|
++ * difference rows.
|
|
|
++ */
|
|
|
++ lhe_input_ptr_info input_ptr_info[C_MAX_BLOCKS_IN_MCU];
|
|
|
++
|
|
|
++ /* Index of the proper input pointer for each data unit within an MCU */
|
|
|
++ int input_ptr_index[C_MAX_BLOCKS_IN_MCU];
|
|
|
++
|
|
|
++} lhuff_entropy_encoder;
|
|
|
++
|
|
|
++typedef lhuff_entropy_encoder *lhuff_entropy_ptr;
|
|
|
++
|
|
|
++/* Working state while writing an MCU.
|
|
|
++ * This struct contains all the fields that are needed by subroutines.
|
|
|
++ */
|
|
|
++
|
|
|
++typedef struct {
|
|
|
++ JOCTET *next_output_byte; /* => next byte to write in buffer */
|
|
|
++ size_t free_in_buffer; /* # of byte spaces remaining in buffer */
|
|
|
++ savable_state cur; /* Current bit buffer & DC state */
|
|
|
++ j_compress_ptr cinfo; /* dump_buffer needs access to this */
|
|
|
++} working_state;
|
|
|
++
|
|
|
++
|
|
|
++/* Forward declarations */
|
|
|
++METHODDEF(JDIMENSION) encode_mcus_huff(j_compress_ptr cinfo,
|
|
|
++ JDIFFIMAGE diff_buf,
|
|
|
++ JDIMENSION MCU_row_num,
|
|
|
++ JDIMENSION MCU_col_num,
|
|
|
++ JDIMENSION nMCU);
|
|
|
++METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo);
|
|
|
++#ifdef ENTROPY_OPT_SUPPORTED
|
|
|
++METHODDEF(JDIMENSION) encode_mcus_gather(j_compress_ptr cinfo,
|
|
|
++ JDIFFIMAGE diff_buf,
|
|
|
++ JDIMENSION MCU_row_num,
|
|
|
++ JDIMENSION MCU_col_num,
|
|
|
++ JDIMENSION nMCU);
|
|
|
++METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo);
|
|
|
++#endif
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize for a Huffman-compressed scan.
|
|
|
++ * If gather_statistics is TRUE, we do not output anything during the scan,
|
|
|
++ * just count the Huffman symbols used and generate Huffman code tables.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++start_pass_lhuff(j_compress_ptr cinfo, boolean gather_statistics)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
|
|
|
++ int ci, dctbl, sampn, ptrn, yoffset, xoffset;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ if (gather_statistics) {
|
|
|
++#ifdef ENTROPY_OPT_SUPPORTED
|
|
|
++ entropy->pub.encode_mcus = encode_mcus_gather;
|
|
|
++ entropy->pub.finish_pass = finish_pass_gather;
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
++#endif
|
|
|
++ } else {
|
|
|
++ entropy->pub.encode_mcus = encode_mcus_huff;
|
|
|
++ entropy->pub.finish_pass = finish_pass_huff;
|
|
|
++ }
|
|
|
++
|
|
|
++ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
++ compptr = cinfo->cur_comp_info[ci];
|
|
|
++ dctbl = compptr->dc_tbl_no;
|
|
|
++ if (gather_statistics) {
|
|
|
++#ifdef ENTROPY_OPT_SUPPORTED
|
|
|
++ /* Check for invalid table indexes */
|
|
|
++ /* (make_c_derived_tbl does this in the other path) */
|
|
|
++ if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)
|
|
|
++ ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
|
|
|
++ /* Allocate and zero the statistics tables */
|
|
|
++ /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
|
|
|
++ if (entropy->count_ptrs[dctbl] == NULL)
|
|
|
++ entropy->count_ptrs[dctbl] = (long *)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ 257 * sizeof(long));
|
|
|
++ memset(entropy->count_ptrs[dctbl], 0, 257 * sizeof(long));
|
|
|
++#endif
|
|
|
++ } else {
|
|
|
++ /* Compute derived values for Huffman tables */
|
|
|
++ /* We may do this more than once for a table, but it's not expensive */
|
|
|
++ jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
|
|
|
++ &entropy->derived_tbls[dctbl]);
|
|
|
++ }
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Precalculate encoding info for each sample in an MCU of this scan */
|
|
|
++ for (sampn = 0, ptrn = 0; sampn < cinfo->blocks_in_MCU;) {
|
|
|
++ compptr = cinfo->cur_comp_info[cinfo->MCU_membership[sampn]];
|
|
|
++ ci = compptr->component_index;
|
|
|
++ for (yoffset = 0; yoffset < compptr->MCU_height; yoffset++, ptrn++) {
|
|
|
++ /* Precalculate the setup info for each input pointer */
|
|
|
++ entropy->input_ptr_info[ptrn].ci = ci;
|
|
|
++ entropy->input_ptr_info[ptrn].yoffset = yoffset;
|
|
|
++ entropy->input_ptr_info[ptrn].MCU_width = compptr->MCU_width;
|
|
|
++ for (xoffset = 0; xoffset < compptr->MCU_width; xoffset++, sampn++) {
|
|
|
++ /* Precalculate the input pointer index for each sample */
|
|
|
++ entropy->input_ptr_index[sampn] = ptrn;
|
|
|
++ /* Precalculate which tables to use for each sample */
|
|
|
++ entropy->cur_tbls[sampn] = entropy->derived_tbls[compptr->dc_tbl_no];
|
|
|
++ entropy->cur_counts[sampn] = entropy->count_ptrs[compptr->dc_tbl_no];
|
|
|
++ }
|
|
|
++ }
|
|
|
++ }
|
|
|
++ entropy->num_input_ptrs = ptrn;
|
|
|
++
|
|
|
++ /* Initialize bit buffer to empty */
|
|
|
++ entropy->saved.put_buffer = 0;
|
|
|
++ entropy->saved.put_bits = 0;
|
|
|
++
|
|
|
++ /* Initialize restart stuff */
|
|
|
++ entropy->restarts_to_go = cinfo->restart_interval;
|
|
|
++ entropy->next_restart_num = 0;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/* Outputting bytes to the file */
|
|
|
++
|
|
|
++/* Emit a byte, taking 'action' if must suspend. */
|
|
|
++#define emit_byte(state, val, action) { \
|
|
|
++ *(state)->next_output_byte++ = (JOCTET)(val); \
|
|
|
++ if (--(state)->free_in_buffer == 0) \
|
|
|
++ if (!dump_buffer(state)) \
|
|
|
++ { action; } \
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++LOCAL(boolean)
|
|
|
++dump_buffer(working_state *state)
|
|
|
++/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
|
|
|
++{
|
|
|
++ struct jpeg_destination_mgr *dest = state->cinfo->dest;
|
|
|
++
|
|
|
++ if (!(*dest->empty_output_buffer) (state->cinfo))
|
|
|
++ return FALSE;
|
|
|
++ /* After a successful buffer dump, must reset buffer pointers */
|
|
|
++ state->next_output_byte = dest->next_output_byte;
|
|
|
++ state->free_in_buffer = dest->free_in_buffer;
|
|
|
++ return TRUE;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/* Outputting bits to the file */
|
|
|
++
|
|
|
++/* Only the right 24 bits of put_buffer are used; the valid bits are
|
|
|
++ * left-justified in this part. At most 16 bits can be passed to emit_bits
|
|
|
++ * in one call, and we never retain more than 7 bits in put_buffer
|
|
|
++ * between calls, so 24 bits are sufficient.
|
|
|
++ */
|
|
|
++
|
|
|
++INLINE
|
|
|
++LOCAL(boolean)
|
|
|
++emit_bits(working_state *state, unsigned int code, int size)
|
|
|
++/* Emit some bits; return TRUE if successful, FALSE if must suspend */
|
|
|
++{
|
|
|
++ /* This routine is heavily used, so it's worth coding tightly. */
|
|
|
++ register size_t put_buffer = (size_t)code;
|
|
|
++ register int put_bits = state->cur.put_bits;
|
|
|
++
|
|
|
++ /* if size is 0, caller used an invalid Huffman table entry */
|
|
|
++ if (size == 0)
|
|
|
++ ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
|
|
|
++
|
|
|
++ put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
|
|
|
++
|
|
|
++ put_bits += size; /* new number of bits in buffer */
|
|
|
++
|
|
|
++ put_buffer <<= 24 - put_bits; /* align incoming bits */
|
|
|
++
|
|
|
++ put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
|
|
|
++
|
|
|
++ while (put_bits >= 8) {
|
|
|
++ int c = (int)((put_buffer >> 16) & 0xFF);
|
|
|
++
|
|
|
++ emit_byte(state, c, return FALSE);
|
|
|
++ if (c == 0xFF) { /* need to stuff a zero byte? */
|
|
|
++ emit_byte(state, 0, return FALSE);
|
|
|
++ }
|
|
|
++ put_buffer <<= 8;
|
|
|
++ put_bits -= 8;
|
|
|
++ }
|
|
|
++
|
|
|
++ state->cur.put_buffer = put_buffer; /* update state variables */
|
|
|
++ state->cur.put_bits = put_bits;
|
|
|
++
|
|
|
++ return TRUE;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++LOCAL(boolean)
|
|
|
++flush_bits(working_state *state)
|
|
|
++{
|
|
|
++ if (!emit_bits(state, 0x7F, 7)) /* fill any partial byte with ones */
|
|
|
++ return FALSE;
|
|
|
++ state->cur.put_buffer = 0; /* and reset bit-buffer to empty */
|
|
|
++ state->cur.put_bits = 0;
|
|
|
++ return TRUE;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Emit a restart marker & resynchronize predictions.
|
|
|
++ */
|
|
|
++
|
|
|
++LOCAL(boolean)
|
|
|
++emit_restart(working_state *state, int restart_num)
|
|
|
++{
|
|
|
++ if (!flush_bits(state))
|
|
|
++ return FALSE;
|
|
|
++
|
|
|
++ emit_byte(state, 0xFF, return FALSE);
|
|
|
++ emit_byte(state, JPEG_RST0 + restart_num, return FALSE);
|
|
|
++
|
|
|
++ /* The restart counter is not updated until we successfully write the MCU. */
|
|
|
++
|
|
|
++ return TRUE;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Encode and output nMCU MCUs' worth of Huffman-compressed differences.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(JDIMENSION)
|
|
|
++encode_mcus_huff(j_compress_ptr cinfo, JDIFFIMAGE diff_buf,
|
|
|
++ JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
|
|
|
++ JDIMENSION nMCU)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
|
|
|
++ working_state state;
|
|
|
++ int sampn, ci, yoffset, MCU_width, ptrn;
|
|
|
++ JDIMENSION mcu_num;
|
|
|
++
|
|
|
++ /* Load up working state */
|
|
|
++ state.next_output_byte = cinfo->dest->next_output_byte;
|
|
|
++ state.free_in_buffer = cinfo->dest->free_in_buffer;
|
|
|
++ state.cur = entropy->saved;
|
|
|
++ state.cinfo = cinfo;
|
|
|
++
|
|
|
++ /* Emit restart marker if needed */
|
|
|
++ if (cinfo->restart_interval) {
|
|
|
++ if (entropy->restarts_to_go == 0)
|
|
|
++ if (!emit_restart(&state, entropy->next_restart_num))
|
|
|
++ return 0;
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Set input pointer locations based on MCU_col_num */
|
|
|
++ for (ptrn = 0; ptrn < entropy->num_input_ptrs; ptrn++) {
|
|
|
++ ci = entropy->input_ptr_info[ptrn].ci;
|
|
|
++ yoffset = entropy->input_ptr_info[ptrn].yoffset;
|
|
|
++ MCU_width = entropy->input_ptr_info[ptrn].MCU_width;
|
|
|
++ entropy->input_ptr[ptrn] =
|
|
|
++ diff_buf[ci][MCU_row_num + yoffset] + (MCU_col_num * MCU_width);
|
|
|
++ }
|
|
|
++
|
|
|
++ for (mcu_num = 0; mcu_num < nMCU; mcu_num++) {
|
|
|
++
|
|
|
++ /* Inner loop handles the samples in the MCU */
|
|
|
++ for (sampn = 0; sampn < cinfo->blocks_in_MCU; sampn++) {
|
|
|
++ register int temp, temp2;
|
|
|
++ register int nbits;
|
|
|
++ c_derived_tbl *dctbl = entropy->cur_tbls[sampn];
|
|
|
++
|
|
|
++ /* Encode the difference per section H.1.2.2 */
|
|
|
++
|
|
|
++ /* Input the sample difference */
|
|
|
++ temp = *entropy->input_ptr[entropy->input_ptr_index[sampn]]++;
|
|
|
++
|
|
|
++ if (temp & 0x8000) { /* instead of temp < 0 */
|
|
|
++ temp = (-temp) & 0x7FFF; /* absolute value, mod 2^16 */
|
|
|
++ if (temp == 0) /* special case: magnitude = 32768 */
|
|
|
++ temp2 = temp = 0x8000;
|
|
|
++ temp2 = ~temp; /* one's complement of magnitude */
|
|
|
++ } else {
|
|
|
++ temp &= 0x7FFF; /* abs value mod 2^16 */
|
|
|
++ temp2 = temp; /* magnitude */
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Find the number of bits needed for the magnitude of the difference */
|
|
|
++ nbits = 0;
|
|
|
++ while (temp) {
|
|
|
++ nbits++;
|
|
|
++ temp >>= 1;
|
|
|
++ }
|
|
|
++ /* Check for out-of-range difference values.
|
|
|
++ */
|
|
|
++ if (nbits > MAX_DIFF_BITS)
|
|
|
++ ERREXIT(cinfo, JERR_BAD_DCT_COEF);
|
|
|
++
|
|
|
++ /* Emit the Huffman-coded symbol for the number of bits */
|
|
|
++ if (!emit_bits(&state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
|
|
|
++ return mcu_num;
|
|
|
++
|
|
|
++ /* Emit that number of bits of the value, if positive, */
|
|
|
++ /* or the complement of its magnitude, if negative. */
|
|
|
++ if (nbits && /* emit_bits rejects calls with size 0 */
|
|
|
++ nbits != 16) /* special case: no bits should be emitted */
|
|
|
++ if (!emit_bits(&state, (unsigned int)temp2, nbits))
|
|
|
++ return mcu_num;
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Completed MCU, so update state */
|
|
|
++ cinfo->dest->next_output_byte = state.next_output_byte;
|
|
|
++ cinfo->dest->free_in_buffer = state.free_in_buffer;
|
|
|
++ entropy->saved = state.cur;
|
|
|
++
|
|
|
++ /* Update restart-interval state too */
|
|
|
++ if (cinfo->restart_interval) {
|
|
|
++ if (entropy->restarts_to_go == 0) {
|
|
|
++ entropy->restarts_to_go = cinfo->restart_interval;
|
|
|
++ entropy->next_restart_num++;
|
|
|
++ entropy->next_restart_num &= 7;
|
|
|
++ }
|
|
|
++ entropy->restarts_to_go--;
|
|
|
++ }
|
|
|
++
|
|
|
++ }
|
|
|
++
|
|
|
++ return nMCU;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Finish up at the end of a Huffman-compressed scan.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++finish_pass_huff(j_compress_ptr cinfo)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
|
|
|
++ working_state state;
|
|
|
++
|
|
|
++ /* Load up working state ... flush_bits needs it */
|
|
|
++ state.next_output_byte = cinfo->dest->next_output_byte;
|
|
|
++ state.free_in_buffer = cinfo->dest->free_in_buffer;
|
|
|
++ state.cur = entropy->saved;
|
|
|
++ state.cinfo = cinfo;
|
|
|
++
|
|
|
++ /* Flush out the last data */
|
|
|
++ if (!flush_bits(&state))
|
|
|
++ ERREXIT(cinfo, JERR_CANT_SUSPEND);
|
|
|
++
|
|
|
++ /* Update state */
|
|
|
++ cinfo->dest->next_output_byte = state.next_output_byte;
|
|
|
++ cinfo->dest->free_in_buffer = state.free_in_buffer;
|
|
|
++ entropy->saved = state.cur;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Huffman coding optimization.
|
|
|
++ *
|
|
|
++ * We first scan the supplied data and count the number of uses of each symbol
|
|
|
++ * that is to be Huffman-coded. (This process MUST agree with the code above.)
|
|
|
++ * Then we build a Huffman coding tree for the observed counts.
|
|
|
++ * Symbols which are not needed at all for the particular image are not
|
|
|
++ * assigned any code, which saves space in the DHT marker as well as in
|
|
|
++ * the compressed data.
|
|
|
++ */
|
|
|
++
|
|
|
++#ifdef ENTROPY_OPT_SUPPORTED
|
|
|
++
|
|
|
++/*
|
|
|
++ * Trial-encode nMCU MCUs' worth of Huffman-compressed differences.
|
|
|
++ * No data is actually output, so no suspension return is possible.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(JDIMENSION)
|
|
|
++encode_mcus_gather(j_compress_ptr cinfo, JDIFFIMAGE diff_buf,
|
|
|
++ JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
|
|
|
++ JDIMENSION nMCU)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
|
|
|
++ int sampn, ci, yoffset, MCU_width, ptrn;
|
|
|
++ JDIMENSION mcu_num;
|
|
|
++
|
|
|
++ /* Take care of restart intervals if needed */
|
|
|
++ if (cinfo->restart_interval) {
|
|
|
++ if (entropy->restarts_to_go == 0) {
|
|
|
++ /* Update restart state */
|
|
|
++ entropy->restarts_to_go = cinfo->restart_interval;
|
|
|
++ }
|
|
|
++ entropy->restarts_to_go--;
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Set input pointer locations based on MCU_col_num */
|
|
|
++ for (ptrn = 0; ptrn < entropy->num_input_ptrs; ptrn++) {
|
|
|
++ ci = entropy->input_ptr_info[ptrn].ci;
|
|
|
++ yoffset = entropy->input_ptr_info[ptrn].yoffset;
|
|
|
++ MCU_width = entropy->input_ptr_info[ptrn].MCU_width;
|
|
|
++ entropy->input_ptr[ptrn] =
|
|
|
++ diff_buf[ci][MCU_row_num + yoffset] + (MCU_col_num * MCU_width);
|
|
|
++ }
|
|
|
++
|
|
|
++ for (mcu_num = 0; mcu_num < nMCU; mcu_num++) {
|
|
|
++
|
|
|
++ /* Inner loop handles the samples in the MCU */
|
|
|
++ for (sampn = 0; sampn < cinfo->blocks_in_MCU; sampn++) {
|
|
|
++ register int temp;
|
|
|
++ register int nbits;
|
|
|
++ long *counts = entropy->cur_counts[sampn];
|
|
|
++
|
|
|
++ /* Encode the difference per section H.1.2.2 */
|
|
|
++
|
|
|
++ /* Input the sample difference */
|
|
|
++ temp = *entropy->input_ptr[entropy->input_ptr_index[sampn]]++;
|
|
|
++
|
|
|
++ if (temp & 0x8000) { /* instead of temp < 0 */
|
|
|
++ temp = (-temp) & 0x7FFF; /* absolute value, mod 2^16 */
|
|
|
++ if (temp == 0) /* special case: magnitude = 32768 */
|
|
|
++ temp = 0x8000;
|
|
|
++ } else
|
|
|
++ temp &= 0x7FFF; /* abs value mod 2^16 */
|
|
|
++
|
|
|
++ /* Find the number of bits needed for the magnitude of the difference */
|
|
|
++ nbits = 0;
|
|
|
++ while (temp) {
|
|
|
++ nbits++;
|
|
|
++ temp >>= 1;
|
|
|
++ }
|
|
|
++ /* Check for out-of-range difference values.
|
|
|
++ */
|
|
|
++ if (nbits > MAX_DIFF_BITS)
|
|
|
++ ERREXIT(cinfo, JERR_BAD_DCT_COEF);
|
|
|
++
|
|
|
++ /* Count the Huffman symbol for the number of bits */
|
|
|
++ counts[nbits]++;
|
|
|
++ }
|
|
|
++ }
|
|
|
++
|
|
|
++ return nMCU;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Finish up a statistics-gathering pass and create the new Huffman tables.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++finish_pass_gather(j_compress_ptr cinfo)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
|
|
|
++ int ci, dctbl;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++ JHUFF_TBL **htblptr;
|
|
|
++ boolean did_dc[NUM_HUFF_TBLS];
|
|
|
++
|
|
|
++ /* It's important not to apply jpeg_gen_optimal_table more than once
|
|
|
++ * per table, because it clobbers the input frequency counts!
|
|
|
++ */
|
|
|
++ memset(did_dc, 0, sizeof(did_dc));
|
|
|
++
|
|
|
++ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
++ compptr = cinfo->cur_comp_info[ci];
|
|
|
++ dctbl = compptr->dc_tbl_no;
|
|
|
++ if (!did_dc[dctbl]) {
|
|
|
++ htblptr = &cinfo->dc_huff_tbl_ptrs[dctbl];
|
|
|
++ if (*htblptr == NULL)
|
|
|
++ *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
|
|
|
++ jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[dctbl]);
|
|
|
++ did_dc[dctbl] = TRUE;
|
|
|
++ }
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++#endif /* ENTROPY_OPT_SUPPORTED */
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Module initialization routine for Huffman entropy encoding.
|
|
|
++ */
|
|
|
++
|
|
|
++GLOBAL(void)
|
|
|
++jinit_lhuff_encoder(j_compress_ptr cinfo)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy;
|
|
|
++ int i;
|
|
|
++
|
|
|
++ entropy = (lhuff_entropy_ptr)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ sizeof(lhuff_entropy_encoder));
|
|
|
++ cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
|
|
|
++ entropy->pub.start_pass = start_pass_lhuff;
|
|
|
++
|
|
|
++ /* Mark tables unallocated */
|
|
|
++ for (i = 0; i < NUM_HUFF_TBLS; i++) {
|
|
|
++ entropy->derived_tbls[i] = NULL;
|
|
|
++#ifdef ENTROPY_OPT_SUPPORTED
|
|
|
++ entropy->count_ptrs[i] = NULL;
|
|
|
++#endif
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++#endif /* C_LOSSLESS_SUPPORTED */
|
|
|
+diff --git a/media/libjpeg/jclossls.c b/media/libjpeg/jclossls.c
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jclossls.c
|
|
|
+@@ -0,0 +1,319 @@
|
|
|
++/*
|
|
|
++ * jclossls.c
|
|
|
++ *
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
++ * Copyright (C) 1998, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ *
|
|
|
++ * This file contains prediction, sample differencing, and point transform
|
|
|
++ * routines for the lossless JPEG compressor.
|
|
|
++ */
|
|
|
++
|
|
|
++#define JPEG_INTERNALS
|
|
|
++#include "jinclude.h"
|
|
|
++#include "jpeglib.h"
|
|
|
++#include "jlossls.h"
|
|
|
++
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++
|
|
|
++
|
|
|
++/************************** Sample differencing **************************/
|
|
|
++
|
|
|
++/*
|
|
|
++ * In order to avoid a performance penalty for checking which predictor is
|
|
|
++ * being used and which row is being processed for each call of the
|
|
|
++ * undifferencer, and to promote optimization, we have separate differencing
|
|
|
++ * functions for each predictor selection value.
|
|
|
++ *
|
|
|
++ * We are able to avoid duplicating source code by implementing the predictors
|
|
|
++ * and differencers as macros. Each of the differencing functions is simply a
|
|
|
++ * wrapper around a DIFFERENCE macro with the appropriate PREDICTOR macro
|
|
|
++ * passed as an argument.
|
|
|
++ */
|
|
|
++
|
|
|
++/* Forward declarations */
|
|
|
++LOCAL(void) reset_predictor(j_compress_ptr cinfo, int ci);
|
|
|
++
|
|
|
++
|
|
|
++/* Predictor for the first column of the first row: 2^(P-Pt-1) */
|
|
|
++#define INITIAL_PREDICTORx (1 << (cinfo->data_precision - cinfo->Al - 1))
|
|
|
++
|
|
|
++/* Predictor for the first column of the remaining rows: Rb */
|
|
|
++#define INITIAL_PREDICTOR2 prev_row[0]
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * 1-Dimensional differencer routine.
|
|
|
++ *
|
|
|
++ * This macro implements the 1-D horizontal predictor (1). INITIAL_PREDICTOR
|
|
|
++ * is used as the special case predictor for the first column, which must be
|
|
|
++ * either INITIAL_PREDICTOR2 or INITIAL_PREDICTORx. The remaining samples
|
|
|
++ * use PREDICTOR1.
|
|
|
++ */
|
|
|
++
|
|
|
++#define DIFFERENCE_1D(INITIAL_PREDICTOR) \
|
|
|
++ lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct; \
|
|
|
++ boolean restart = FALSE; \
|
|
|
++ int samp, Ra; \
|
|
|
++ \
|
|
|
++ samp = *input_buf++; \
|
|
|
++ *diff_buf++ = samp - INITIAL_PREDICTOR; \
|
|
|
++ \
|
|
|
++ while (--width) { \
|
|
|
++ Ra = samp; \
|
|
|
++ samp = *input_buf++; \
|
|
|
++ *diff_buf++ = samp - PREDICTOR1; \
|
|
|
++ } \
|
|
|
++ \
|
|
|
++ /* Account for restart interval (no-op if not using restarts) */ \
|
|
|
++ if (cinfo->restart_interval) { \
|
|
|
++ if (--(losslessc->restart_rows_to_go[ci]) == 0) { \
|
|
|
++ reset_predictor(cinfo, ci); \
|
|
|
++ restart = TRUE; \
|
|
|
++ } \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * 2-Dimensional differencer routine.
|
|
|
++ *
|
|
|
++ * This macro implements the 2-D horizontal predictors (#2-7). PREDICTOR2 is
|
|
|
++ * used as the special case predictor for the first column. The remaining
|
|
|
++ * samples use PREDICTOR, which is a function of Ra, Rb, and Rc.
|
|
|
++ *
|
|
|
++ * Because prev_row and output_buf may point to the same storage area (in an
|
|
|
++ * interleaved image with Vi=1, for example), we must take care to buffer Rb/Rc
|
|
|
++ * before writing the current reconstructed sample value into output_buf.
|
|
|
++ */
|
|
|
++
|
|
|
++#define DIFFERENCE_2D(PREDICTOR) \
|
|
|
++ lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct; \
|
|
|
++ int samp, Ra, Rb, Rc; \
|
|
|
++ \
|
|
|
++ Rb = *prev_row++; \
|
|
|
++ samp = *input_buf++; \
|
|
|
++ *diff_buf++ = samp - PREDICTOR2; \
|
|
|
++ \
|
|
|
++ while (--width) { \
|
|
|
++ Rc = Rb; \
|
|
|
++ Rb = *prev_row++; \
|
|
|
++ Ra = samp; \
|
|
|
++ samp = *input_buf++; \
|
|
|
++ *diff_buf++ = samp - PREDICTOR; \
|
|
|
++ } \
|
|
|
++ \
|
|
|
++ /* Account for restart interval (no-op if not using restarts) */ \
|
|
|
++ if (cinfo->restart_interval) { \
|
|
|
++ if (--losslessc->restart_rows_to_go[ci] == 0) \
|
|
|
++ reset_predictor(cinfo, ci); \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Differencers for the second and subsequent rows in a scan or restart
|
|
|
++ * interval. The first sample in the row is differenced using the vertical
|
|
|
++ * predictor (2). The rest of the samples are differenced using the predictor
|
|
|
++ * specified in the scan header.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_difference1(j_compress_ptr cinfo, int ci,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW prev_row,
|
|
|
++ JDIFFROW diff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ DIFFERENCE_1D(INITIAL_PREDICTOR2);
|
|
|
++ (void)(restart);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_difference2(j_compress_ptr cinfo, int ci,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW prev_row,
|
|
|
++ JDIFFROW diff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ DIFFERENCE_2D(PREDICTOR2);
|
|
|
++ (void)(Ra);
|
|
|
++ (void)(Rc);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_difference3(j_compress_ptr cinfo, int ci,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW prev_row,
|
|
|
++ JDIFFROW diff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ DIFFERENCE_2D(PREDICTOR3);
|
|
|
++ (void)(Ra);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_difference4(j_compress_ptr cinfo, int ci,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW prev_row,
|
|
|
++ JDIFFROW diff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ DIFFERENCE_2D(PREDICTOR4);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_difference5(j_compress_ptr cinfo, int ci,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW prev_row,
|
|
|
++ JDIFFROW diff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ DIFFERENCE_2D(PREDICTOR5);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_difference6(j_compress_ptr cinfo, int ci,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW prev_row,
|
|
|
++ JDIFFROW diff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ DIFFERENCE_2D(PREDICTOR6);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_difference7(j_compress_ptr cinfo, int ci,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW prev_row,
|
|
|
++ JDIFFROW diff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ DIFFERENCE_2D(PREDICTOR7);
|
|
|
++ (void)(Rc);
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Differencer for the first row in a scan or restart interval. The first
|
|
|
++ * sample in the row is differenced using the special predictor constant
|
|
|
++ * x = 2 ^ (P-Pt-1). The rest of the samples are differenced using the
|
|
|
++ * 1-D horizontal predictor (1).
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_difference_first_row(j_compress_ptr cinfo, int ci,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW prev_row,
|
|
|
++ JDIFFROW diff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ DIFFERENCE_1D(INITIAL_PREDICTORx);
|
|
|
++
|
|
|
++ /*
|
|
|
++ * Now that we have differenced the first row, we want to use the
|
|
|
++ * differencer that corresponds to the predictor specified in the
|
|
|
++ * scan header.
|
|
|
++ *
|
|
|
++ * Note that we don't do this if we have just reset the predictor
|
|
|
++ * for a new restart interval.
|
|
|
++ */
|
|
|
++ if (!restart) {
|
|
|
++ switch (cinfo->Ss) {
|
|
|
++ case 1:
|
|
|
++ losslessc->predict_difference[ci] = jpeg_difference1;
|
|
|
++ break;
|
|
|
++ case 2:
|
|
|
++ losslessc->predict_difference[ci] = jpeg_difference2;
|
|
|
++ break;
|
|
|
++ case 3:
|
|
|
++ losslessc->predict_difference[ci] = jpeg_difference3;
|
|
|
++ break;
|
|
|
++ case 4:
|
|
|
++ losslessc->predict_difference[ci] = jpeg_difference4;
|
|
|
++ break;
|
|
|
++ case 5:
|
|
|
++ losslessc->predict_difference[ci] = jpeg_difference5;
|
|
|
++ break;
|
|
|
++ case 6:
|
|
|
++ losslessc->predict_difference[ci] = jpeg_difference6;
|
|
|
++ break;
|
|
|
++ case 7:
|
|
|
++ losslessc->predict_difference[ci] = jpeg_difference7;
|
|
|
++ break;
|
|
|
++ }
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++/*
|
|
|
++ * Reset predictor at the start of a pass or restart interval.
|
|
|
++ */
|
|
|
++
|
|
|
++LOCAL(void)
|
|
|
++reset_predictor(j_compress_ptr cinfo, int ci)
|
|
|
++{
|
|
|
++ lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct;
|
|
|
++
|
|
|
++ /* Initialize restart counter */
|
|
|
++ losslessc->restart_rows_to_go[ci] =
|
|
|
++ cinfo->restart_interval / cinfo->MCUs_per_row;
|
|
|
++
|
|
|
++ /* Set difference function to first row function */
|
|
|
++ losslessc->predict_difference[ci] = jpeg_difference_first_row;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/********************** Sample downscaling by 2^Pt ***********************/
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++simple_downscale(j_compress_ptr cinfo,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW output_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ do {
|
|
|
++ *output_buf++ = (_JSAMPLE)RIGHT_SHIFT(*input_buf++, cinfo->Al);
|
|
|
++ } while (--width);
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++noscale(j_compress_ptr cinfo,
|
|
|
++ _JSAMPROW input_buf, _JSAMPROW output_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ memcpy(output_buf, input_buf, width * sizeof(_JSAMPLE));
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize for a processing pass.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++start_pass_lossless(j_compress_ptr cinfo)
|
|
|
++{
|
|
|
++ lossless_comp_ptr losslessc = (lossless_comp_ptr)cinfo->fdct;
|
|
|
++ int ci;
|
|
|
++
|
|
|
++ /* Set scaler function based on Pt */
|
|
|
++ if (cinfo->Al)
|
|
|
++ losslessc->scaler_scale = simple_downscale;
|
|
|
++ else
|
|
|
++ losslessc->scaler_scale = noscale;
|
|
|
++
|
|
|
++ /* Check that the restart interval is an integer multiple of the number
|
|
|
++ * of MCUs in an MCU row.
|
|
|
++ */
|
|
|
++ if (cinfo->restart_interval % cinfo->MCUs_per_row != 0)
|
|
|
++ ERREXIT2(cinfo, JERR_BAD_RESTART,
|
|
|
++ cinfo->restart_interval, cinfo->MCUs_per_row);
|
|
|
++
|
|
|
++ /* Set predictors for start of pass */
|
|
|
++ for (ci = 0; ci < cinfo->num_components; ci++)
|
|
|
++ reset_predictor(cinfo, ci);
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize the lossless compressor.
|
|
|
++ */
|
|
|
++
|
|
|
++GLOBAL(void)
|
|
|
++_jinit_lossless_compressor(j_compress_ptr cinfo)
|
|
|
++{
|
|
|
++ lossless_comp_ptr losslessc;
|
|
|
++
|
|
|
++ /* Create subobject in permanent pool */
|
|
|
++ losslessc = (lossless_comp_ptr)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
|
|
|
++ sizeof(jpeg_lossless_compressor));
|
|
|
++ cinfo->fdct = (struct jpeg_forward_dct *)losslessc;
|
|
|
++ losslessc->pub.start_pass = start_pass_lossless;
|
|
|
++}
|
|
|
++
|
|
|
++#endif /* C_LOSSLESS_SUPPORTED */
|
|
|
+diff --git a/media/libjpeg/jcmainct.c b/media/libjpeg/jcmainct.c
|
|
|
+--- a/media/libjpeg/jcmainct.c
|
|
|
++++ b/media/libjpeg/jcmainct.c
|
|
|
+@@ -1,51 +1,56 @@
|
|
|
+ /*
|
|
|
+ * jcmainct.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+- * It was modified by The libjpeg-turbo Project to include only code relevant
|
|
|
+- * to libjpeg-turbo.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains the main buffer controller for compression.
|
|
|
+ * The main buffer lies between the pre-processor and the JPEG
|
|
|
+ * compressor proper; it holds downsampled data in the JPEG colorspace.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /* Private buffer controller object */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_c_main_controller pub; /* public fields */
|
|
|
+
|
|
|
+ JDIMENSION cur_iMCU_row; /* number of current iMCU row */
|
|
|
+ JDIMENSION rowgroup_ctr; /* counts row groups received in iMCU row */
|
|
|
+ boolean suspended; /* remember if we suspended output */
|
|
|
+ J_BUF_MODE pass_mode; /* current operating mode */
|
|
|
+
|
|
|
+ /* If using just a strip buffer, this points to the entire set of buffers
|
|
|
+ * (we allocate one for each component). In the full-image case, this
|
|
|
+ * points to the currently accessible strips of the virtual arrays.
|
|
|
+ */
|
|
|
+- JSAMPARRAY buffer[MAX_COMPONENTS];
|
|
|
++ _JSAMPARRAY buffer[MAX_COMPONENTS];
|
|
|
+ } my_main_controller;
|
|
|
+
|
|
|
+ typedef my_main_controller *my_main_ptr;
|
|
|
+
|
|
|
+
|
|
|
+ /* Forward declarations */
|
|
|
+ METHODDEF(void) process_data_simple_main(j_compress_ptr cinfo,
|
|
|
+- JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY input_buf,
|
|
|
+ JDIMENSION *in_row_ctr,
|
|
|
+ JDIMENSION in_rows_avail);
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize for a processing pass.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -60,49 +65,49 @@ start_pass_main(j_compress_ptr cinfo, J_
|
|
|
+
|
|
|
+ if (pass_mode != JBUF_PASS_THRU)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+
|
|
|
+ main_ptr->cur_iMCU_row = 0; /* initialize counters */
|
|
|
+ main_ptr->rowgroup_ctr = 0;
|
|
|
+ main_ptr->suspended = FALSE;
|
|
|
+ main_ptr->pass_mode = pass_mode; /* save mode for use by process_data */
|
|
|
+- main_ptr->pub.process_data = process_data_simple_main;
|
|
|
++ main_ptr->pub._process_data = process_data_simple_main;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Process some data.
|
|
|
+ * This routine handles the simple pass-through mode,
|
|
|
+ * where we have only a strip buffer.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-process_data_simple_main(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
++process_data_simple_main(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
+ JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail)
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
++ JDIMENSION data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
+
|
|
|
+ while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
|
|
|
+ /* Read input data if we haven't filled the main buffer yet */
|
|
|
+- if (main_ptr->rowgroup_ctr < DCTSIZE)
|
|
|
+- (*cinfo->prep->pre_process_data) (cinfo, input_buf, in_row_ctr,
|
|
|
+- in_rows_avail, main_ptr->buffer,
|
|
|
+- &main_ptr->rowgroup_ctr,
|
|
|
+- (JDIMENSION)DCTSIZE);
|
|
|
++ if (main_ptr->rowgroup_ctr < data_unit)
|
|
|
++ (*cinfo->prep->_pre_process_data) (cinfo, input_buf, in_row_ctr,
|
|
|
++ in_rows_avail, main_ptr->buffer,
|
|
|
++ &main_ptr->rowgroup_ctr, data_unit);
|
|
|
+
|
|
|
+ /* If we don't have a full iMCU row buffered, return to application for
|
|
|
+ * more data. Note that preprocessor will always pad to fill the iMCU row
|
|
|
+ * at the bottom of the image.
|
|
|
+ */
|
|
|
+- if (main_ptr->rowgroup_ctr != DCTSIZE)
|
|
|
++ if (main_ptr->rowgroup_ctr != data_unit)
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* Send the completed row to the compressor */
|
|
|
+- if (!(*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
|
|
|
++ if (!(*cinfo->coef->_compress_data) (cinfo, main_ptr->buffer)) {
|
|
|
+ /* If compressor did not consume the whole row, then we must need to
|
|
|
+ * suspend processing and return to the application. In this situation
|
|
|
+ * we pretend we didn't yet consume the last input row; otherwise, if
|
|
|
+ * it happened to be the last row of the image, the application would
|
|
|
+ * think we were done.
|
|
|
+ */
|
|
|
+ if (!main_ptr->suspended) {
|
|
|
+ (*in_row_ctr)--;
|
|
|
+@@ -123,21 +128,25 @@ process_data_simple_main(j_compress_ptr
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize main buffer controller.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_c_main_controller(j_compress_ptr cinfo, boolean need_full_buffer)
|
|
|
++_jinit_c_main_controller(j_compress_ptr cinfo, boolean need_full_buffer)
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr;
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
++
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
+
|
|
|
+ main_ptr = (my_main_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_main_controller));
|
|
|
+ cinfo->main = (struct jpeg_c_main_controller *)main_ptr;
|
|
|
+ main_ptr->pub.start_pass = start_pass_main;
|
|
|
+
|
|
|
+ /* We don't need to create a buffer in raw-data mode. */
|
|
|
+@@ -148,15 +157,17 @@ jinit_c_main_controller(j_compress_ptr c
|
|
|
+ * may be of a different size.
|
|
|
+ */
|
|
|
+ if (need_full_buffer) {
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+ } else {
|
|
|
+ /* Allocate a strip buffer for each component */
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+- main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
|
|
|
++ main_ptr->buffer[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- compptr->width_in_blocks * DCTSIZE,
|
|
|
+- (JDIMENSION)(compptr->v_samp_factor * DCTSIZE));
|
|
|
++ compptr->width_in_blocks * data_unit,
|
|
|
++ (JDIMENSION)(compptr->v_samp_factor * data_unit));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jcmarker.c b/media/libjpeg/jcmarker.c
|
|
|
+--- a/media/libjpeg/jcmarker.c
|
|
|
++++ b/media/libjpeg/jcmarker.c
|
|
|
+@@ -1,26 +1,28 @@
|
|
|
+ /*
|
|
|
+ * jcmarker.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1998, Thomas G. Lane.
|
|
|
+ * Modified 2003-2010 by Guido Vollbeding.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2010, D. R. Commander.
|
|
|
++ * Copyright (C) 2010, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains routines to write JPEG datastream markers.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
+
|
|
|
+
|
|
|
+ typedef enum { /* JPEG marker codes */
|
|
|
+ M_SOF0 = 0xc0,
|
|
|
+ M_SOF1 = 0xc1,
|
|
|
+ M_SOF2 = 0xc2,
|
|
|
+ M_SOF3 = 0xc3,
|
|
|
+
|
|
|
+@@ -492,35 +494,36 @@ write_file_header(j_compress_ptr cinfo)
|
|
|
+ * Note that we do not emit the SOF until we have emitted the DQT(s).
|
|
|
+ * This avoids compatibility problems with incorrect implementations that
|
|
|
+ * try to error-check the quant table numbers as soon as they see the SOF.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ write_frame_header(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
+- int ci, prec;
|
|
|
++ int ci, prec = 0;
|
|
|
+ boolean is_baseline;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+
|
|
|
+- /* Emit DQT for each quantization table.
|
|
|
+- * Note that emit_dqt() suppresses any duplicate tables.
|
|
|
+- */
|
|
|
+- prec = 0;
|
|
|
+- for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+- ci++, compptr++) {
|
|
|
+- prec += emit_dqt(cinfo, compptr->quant_tbl_no);
|
|
|
++ if (!cinfo->master->lossless) {
|
|
|
++ /* Emit DQT for each quantization table.
|
|
|
++ * Note that emit_dqt() suppresses any duplicate tables.
|
|
|
++ */
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ prec += emit_dqt(cinfo, compptr->quant_tbl_no);
|
|
|
++ }
|
|
|
++ /* now prec is nonzero iff there are any 16-bit quant tables. */
|
|
|
+ }
|
|
|
+- /* now prec is nonzero iff there are any 16-bit quant tables. */
|
|
|
+
|
|
|
+ /* Check for a non-baseline specification.
|
|
|
+ * Note we assume that Huffman table numbers won't be changed later.
|
|
|
+ */
|
|
|
+ if (cinfo->arith_code || cinfo->progressive_mode ||
|
|
|
+- cinfo->data_precision != 8) {
|
|
|
++ cinfo->master->lossless || cinfo->data_precision != 8) {
|
|
|
+ is_baseline = FALSE;
|
|
|
+ } else {
|
|
|
+ is_baseline = TRUE;
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
|
|
|
+ is_baseline = FALSE;
|
|
|
+ }
|
|
|
+@@ -535,16 +538,18 @@ write_frame_header(j_compress_ptr cinfo)
|
|
|
+ if (cinfo->arith_code) {
|
|
|
+ if (cinfo->progressive_mode)
|
|
|
+ emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
|
|
|
+ else
|
|
|
+ emit_sof(cinfo, M_SOF9); /* SOF code for sequential arithmetic */
|
|
|
+ } else {
|
|
|
+ if (cinfo->progressive_mode)
|
|
|
+ emit_sof(cinfo, M_SOF2); /* SOF code for progressive Huffman */
|
|
|
++ else if (cinfo->master->lossless)
|
|
|
++ emit_sof(cinfo, M_SOF3); /* SOF code for lossless Huffman */
|
|
|
+ else if (is_baseline)
|
|
|
+ emit_sof(cinfo, M_SOF0); /* SOF code for baseline implementation */
|
|
|
+ else
|
|
|
+ emit_sof(cinfo, M_SOF1); /* SOF code for non-baseline Huffman file */
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+@@ -569,20 +574,21 @@ write_scan_header(j_compress_ptr cinfo)
|
|
|
+ emit_dac(cinfo);
|
|
|
+ } else {
|
|
|
+ /* Emit Huffman tables.
|
|
|
+ * Note that emit_dht() suppresses any duplicate tables.
|
|
|
+ */
|
|
|
+ for (i = 0; i < cinfo->comps_in_scan; i++) {
|
|
|
+ compptr = cinfo->cur_comp_info[i];
|
|
|
+ /* DC needs no table for refinement scan */
|
|
|
+- if (cinfo->Ss == 0 && cinfo->Ah == 0)
|
|
|
++ if ((cinfo->Ss == 0 && cinfo->Ah == 0) || cinfo->master->lossless)
|
|
|
+ emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
|
|
|
+- /* AC needs no table when not present */
|
|
|
+- if (cinfo->Se)
|
|
|
++ /* AC needs no table when not present, and lossless mode uses only DC
|
|
|
++ tables. */
|
|
|
++ if (cinfo->Se && !cinfo->master->lossless)
|
|
|
+ emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Emit DRI if required --- note that DRI value could change for each scan.
|
|
|
+ * We avoid wasting space with unnecessary DRIs, however.
|
|
|
+ */
|
|
|
+ if (cinfo->restart_interval != marker->last_restart_interval) {
|
|
|
+diff --git a/media/libjpeg/jcmaster.c b/media/libjpeg/jcmaster.c
|
|
|
+--- a/media/libjpeg/jcmaster.c
|
|
|
++++ b/media/libjpeg/jcmaster.c
|
|
|
+@@ -1,61 +1,32 @@
|
|
|
+ /*
|
|
|
+ * jcmaster.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
+ * Modified 2003-2010 by Guido Vollbeding.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2010, 2016, 2018, D. R. Commander.
|
|
|
++ * Copyright (C) 2010, 2016, 2018, 2022-2024, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains master control logic for the JPEG compressor.
|
|
|
+ * These routines are concerned with parameter validation, initial setup,
|
|
|
+ * and inter-pass control (determining the number of passes and the work
|
|
|
+ * to be done in each pass).
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
+-
|
|
|
+-
|
|
|
+-/* Private state */
|
|
|
+-
|
|
|
+-typedef enum {
|
|
|
+- main_pass, /* input data, also do first output step */
|
|
|
+- huff_opt_pass, /* Huffman code optimization pass */
|
|
|
+- output_pass /* data output pass */
|
|
|
+-} c_pass_type;
|
|
|
+-
|
|
|
+-typedef struct {
|
|
|
+- struct jpeg_comp_master pub; /* public fields */
|
|
|
+-
|
|
|
+- c_pass_type pass_type; /* the type of the current pass */
|
|
|
+-
|
|
|
+- int pass_number; /* # of passes completed */
|
|
|
+- int total_passes; /* total # of passes needed */
|
|
|
+-
|
|
|
+- int scan_number; /* current index in scan_info[] */
|
|
|
+-
|
|
|
+- /*
|
|
|
+- * This is here so we can add libjpeg-turbo version/build information to the
|
|
|
+- * global string table without introducing a new global symbol. Adding this
|
|
|
+- * information to the global string table allows one to examine a binary
|
|
|
+- * object and determine which version of libjpeg-turbo it was built from or
|
|
|
+- * linked against.
|
|
|
+- */
|
|
|
+- const char *jpeg_version;
|
|
|
+-
|
|
|
+-} my_comp_master;
|
|
|
+-
|
|
|
+-typedef my_comp_master *my_master_ptr;
|
|
|
++#include "jpegapicomp.h"
|
|
|
++#include "jcmaster.h"
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Support routines that do various essential calculations.
|
|
|
+ */
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ /*
|
|
|
+@@ -63,33 +34,143 @@ typedef my_comp_master *my_master_ptr;
|
|
|
+ * NOTE: this is exported for possible use by application.
|
|
|
+ * Hence it mustn't do anything that can't be done twice.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo)
|
|
|
+ /* Do computations that are needed before master selection phase */
|
|
|
+ {
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
++
|
|
|
+ /* Hardwire it to "no scaling" */
|
|
|
+ cinfo->jpeg_width = cinfo->image_width;
|
|
|
+ cinfo->jpeg_height = cinfo->image_height;
|
|
|
+- cinfo->min_DCT_h_scaled_size = DCTSIZE;
|
|
|
+- cinfo->min_DCT_v_scaled_size = DCTSIZE;
|
|
|
++ cinfo->min_DCT_h_scaled_size = data_unit;
|
|
|
++ cinfo->min_DCT_v_scaled_size = data_unit;
|
|
|
+ }
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
++LOCAL(boolean)
|
|
|
++using_std_huff_tables(j_compress_ptr cinfo)
|
|
|
++{
|
|
|
++ int i;
|
|
|
++
|
|
|
++ static const UINT8 bits_dc_luminance[17] = {
|
|
|
++ /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0
|
|
|
++ };
|
|
|
++ static const UINT8 val_dc_luminance[] = {
|
|
|
++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
|
|
|
++ };
|
|
|
++
|
|
|
++ static const UINT8 bits_dc_chrominance[17] = {
|
|
|
++ /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
|
|
|
++ };
|
|
|
++ static const UINT8 val_dc_chrominance[] = {
|
|
|
++ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
|
|
|
++ };
|
|
|
++
|
|
|
++ static const UINT8 bits_ac_luminance[17] = {
|
|
|
++ /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d
|
|
|
++ };
|
|
|
++ static const UINT8 val_ac_luminance[] = {
|
|
|
++ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
|
|
|
++ 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
|
|
|
++ 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
|
|
|
++ 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
|
|
|
++ 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
|
|
|
++ 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
|
|
|
++ 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
|
|
|
++ 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
|
|
|
++ 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
|
|
++ 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
|
|
|
++ 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
|
|
|
++ 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
|
|
|
++ 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
|
|
|
++ 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
|
|
|
++ 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
|
|
|
++ 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
|
|
|
++ 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
|
|
|
++ 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
|
|
|
++ 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
|
|
|
++ 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
|
|
|
++ 0xf9, 0xfa
|
|
|
++ };
|
|
|
++
|
|
|
++ static const UINT8 bits_ac_chrominance[17] = {
|
|
|
++ /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77
|
|
|
++ };
|
|
|
++ static const UINT8 val_ac_chrominance[] = {
|
|
|
++ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
|
|
|
++ 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
|
|
|
++ 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
|
|
|
++ 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
|
|
|
++ 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
|
|
|
++ 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
|
|
|
++ 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
|
|
|
++ 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
|
|
|
++ 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
|
|
|
++ 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
|
|
|
++ 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
|
|
|
++ 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
|
|
|
++ 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
|
|
|
++ 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
|
|
|
++ 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
|
|
|
++ 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
|
|
|
++ 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
|
|
|
++ 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
|
|
|
++ 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
|
|
|
++ 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
|
|
|
++ 0xf9, 0xfa
|
|
|
++ };
|
|
|
++
|
|
|
++ if (cinfo->dc_huff_tbl_ptrs[0] == NULL ||
|
|
|
++ cinfo->ac_huff_tbl_ptrs[0] == NULL ||
|
|
|
++ cinfo->dc_huff_tbl_ptrs[1] == NULL ||
|
|
|
++ cinfo->ac_huff_tbl_ptrs[1] == NULL)
|
|
|
++ return FALSE;
|
|
|
++
|
|
|
++ for (i = 2; i < NUM_HUFF_TBLS; i++) {
|
|
|
++ if (cinfo->dc_huff_tbl_ptrs[i] != NULL ||
|
|
|
++ cinfo->ac_huff_tbl_ptrs[i] != NULL)
|
|
|
++ return FALSE;
|
|
|
++ }
|
|
|
++
|
|
|
++ if (memcmp(cinfo->dc_huff_tbl_ptrs[0]->bits, bits_dc_luminance,
|
|
|
++ sizeof(bits_dc_luminance)) ||
|
|
|
++ memcmp(cinfo->dc_huff_tbl_ptrs[0]->huffval, val_dc_luminance,
|
|
|
++ sizeof(val_dc_luminance)) ||
|
|
|
++ memcmp(cinfo->ac_huff_tbl_ptrs[0]->bits, bits_ac_luminance,
|
|
|
++ sizeof(bits_ac_luminance)) ||
|
|
|
++ memcmp(cinfo->ac_huff_tbl_ptrs[0]->huffval, val_ac_luminance,
|
|
|
++ sizeof(val_ac_luminance)) ||
|
|
|
++ memcmp(cinfo->dc_huff_tbl_ptrs[1]->bits, bits_dc_chrominance,
|
|
|
++ sizeof(bits_dc_chrominance)) ||
|
|
|
++ memcmp(cinfo->dc_huff_tbl_ptrs[1]->huffval, val_dc_chrominance,
|
|
|
++ sizeof(val_dc_chrominance)) ||
|
|
|
++ memcmp(cinfo->ac_huff_tbl_ptrs[1]->bits, bits_ac_chrominance,
|
|
|
++ sizeof(bits_ac_chrominance)) ||
|
|
|
++ memcmp(cinfo->ac_huff_tbl_ptrs[1]->huffval, val_ac_chrominance,
|
|
|
++ sizeof(val_ac_chrominance)))
|
|
|
++ return FALSE;
|
|
|
++
|
|
|
++ return TRUE;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
+ LOCAL(void)
|
|
|
+ initial_setup(j_compress_ptr cinfo, boolean transcode_only)
|
|
|
+ /* Do computations that are needed before master selection phase */
|
|
|
+ {
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+ long samplesperrow;
|
|
|
+ JDIMENSION jd_samplesperrow;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ #if JPEG_LIB_VERSION >= 80
|
|
|
+ if (!transcode_only)
|
|
|
+ #endif
|
|
|
+ jpeg_calc_jpeg_dimensions(cinfo);
|
|
|
+ #endif
|
|
|
+
|
|
|
+@@ -104,18 +185,22 @@ initial_setup(j_compress_ptr cinfo, bool
|
|
|
+ ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int)JPEG_MAX_DIMENSION);
|
|
|
+
|
|
|
+ /* Width of an input scanline must be representable as JDIMENSION. */
|
|
|
+ samplesperrow = (long)cinfo->image_width * (long)cinfo->input_components;
|
|
|
+ jd_samplesperrow = (JDIMENSION)samplesperrow;
|
|
|
+ if ((long)jd_samplesperrow != samplesperrow)
|
|
|
+ ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
|
|
|
+
|
|
|
+- /* For now, precision must match compiled-in value... */
|
|
|
+- if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ if (cinfo->data_precision != 8 && cinfo->data_precision != 12 &&
|
|
|
++ cinfo->data_precision != 16)
|
|
|
++#else
|
|
|
++ if (cinfo->data_precision != 8 && cinfo->data_precision != 12)
|
|
|
++#endif
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
+
|
|
|
+ /* Check that number of components won't exceed internal array sizes */
|
|
|
+ if (cinfo->num_components > MAX_COMPONENTS)
|
|
|
+ ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
|
|
|
+ MAX_COMPONENTS);
|
|
|
+
|
|
|
+ /* Compute maximum sampling factors; check factor validity */
|
|
|
+@@ -136,48 +221,52 @@ initial_setup(j_compress_ptr cinfo, bool
|
|
|
+
|
|
|
+ /* Compute dimensions of components */
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ /* Fill in the correct component_index value; don't rely on application */
|
|
|
+ compptr->component_index = ci;
|
|
|
+ /* For compression, we never do DCT scaling. */
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+- compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
|
|
|
++ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = data_unit;
|
|
|
+ #else
|
|
|
+- compptr->DCT_scaled_size = DCTSIZE;
|
|
|
++ compptr->DCT_scaled_size = data_unit;
|
|
|
+ #endif
|
|
|
+- /* Size in DCT blocks */
|
|
|
++ /* Size in data units */
|
|
|
+ compptr->width_in_blocks = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->_jpeg_width * (long)compptr->h_samp_factor,
|
|
|
+- (long)(cinfo->max_h_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_h_samp_factor * data_unit));
|
|
|
+ compptr->height_in_blocks = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->_jpeg_height * (long)compptr->v_samp_factor,
|
|
|
+- (long)(cinfo->max_v_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_v_samp_factor * data_unit));
|
|
|
+ /* Size in samples */
|
|
|
+ compptr->downsampled_width = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->_jpeg_width * (long)compptr->h_samp_factor,
|
|
|
+ (long)cinfo->max_h_samp_factor);
|
|
|
+ compptr->downsampled_height = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->_jpeg_height * (long)compptr->v_samp_factor,
|
|
|
+ (long)cinfo->max_v_samp_factor);
|
|
|
+ /* Mark component needed (this flag isn't actually used for compression) */
|
|
|
+ compptr->component_needed = TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Compute number of fully interleaved MCU rows (number of times that
|
|
|
+- * main controller will call coefficient controller).
|
|
|
++ * main controller will call coefficient or difference controller).
|
|
|
+ */
|
|
|
+ cinfo->total_iMCU_rows = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->_jpeg_height,
|
|
|
+- (long)(cinfo->max_v_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_v_samp_factor * data_unit));
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+-#ifdef C_MULTISCAN_FILES_SUPPORTED
|
|
|
++#if defined(C_MULTISCAN_FILES_SUPPORTED) || defined(C_LOSSLESS_SUPPORTED)
|
|
|
++#define NEED_SCAN_SCRIPT
|
|
|
++#endif
|
|
|
++
|
|
|
++#ifdef NEED_SCAN_SCRIPT
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ validate_script(j_compress_ptr cinfo)
|
|
|
+ /* Verify that the scan script in cinfo->scan_info[] is valid; also
|
|
|
+ * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
|
|
|
+ */
|
|
|
+ {
|
|
|
+ const jpeg_scan_info *scanptr;
|
|
|
+@@ -188,32 +277,48 @@ validate_script(j_compress_ptr cinfo)
|
|
|
+ int *last_bitpos_ptr;
|
|
|
+ int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
|
|
|
+ /* -1 until that coefficient has been seen; then last Al for it */
|
|
|
+ #endif
|
|
|
+
|
|
|
+ if (cinfo->num_scans <= 0)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
|
|
|
+
|
|
|
++#ifndef C_MULTISCAN_FILES_SUPPORTED
|
|
|
++ if (cinfo->num_scans > 1)
|
|
|
++ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
++#endif
|
|
|
++
|
|
|
++ scanptr = cinfo->scan_info;
|
|
|
++ if (scanptr->Ss != 0 && scanptr->Se == 0) {
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ cinfo->master->lossless = TRUE;
|
|
|
++ cinfo->progressive_mode = FALSE;
|
|
|
++ for (ci = 0; ci < cinfo->num_components; ci++)
|
|
|
++ component_sent[ci] = FALSE;
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
++#endif
|
|
|
++ }
|
|
|
+ /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
|
|
|
+ * for progressive JPEG, no scan can have this.
|
|
|
+ */
|
|
|
+- scanptr = cinfo->scan_info;
|
|
|
+- if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2 - 1) {
|
|
|
++ else if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2 - 1) {
|
|
|
+ #ifdef C_PROGRESSIVE_SUPPORTED
|
|
|
+ cinfo->progressive_mode = TRUE;
|
|
|
++ cinfo->master->lossless = FALSE;
|
|
|
+ last_bitpos_ptr = &last_bitpos[0][0];
|
|
|
+ for (ci = 0; ci < cinfo->num_components; ci++)
|
|
|
+ for (coefi = 0; coefi < DCTSIZE2; coefi++)
|
|
|
+ *last_bitpos_ptr++ = -1;
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ } else {
|
|
|
+- cinfo->progressive_mode = FALSE;
|
|
|
++ cinfo->progressive_mode = cinfo->master->lossless = FALSE;
|
|
|
+ for (ci = 0; ci < cinfo->num_components; ci++)
|
|
|
+ component_sent[ci] = FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
|
|
|
+ /* Validate component indexes */
|
|
|
+ ncomps = scanptr->comps_in_scan;
|
|
|
+ if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
|
|
|
+@@ -235,23 +340,20 @@ validate_script(j_compress_ptr cinfo)
|
|
|
+ #ifdef C_PROGRESSIVE_SUPPORTED
|
|
|
+ /* Rec. ITU-T T.81 | ISO/IEC 10918-1 simply gives the ranges 0..13 for Ah
|
|
|
+ * and Al, but that seems wrong: the upper bound ought to depend on data
|
|
|
+ * precision. Perhaps they really meant 0..N+1 for N-bit precision.
|
|
|
+ * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
|
|
|
+ * out-of-range reconstructed DC values during the first DC scan,
|
|
|
+ * which might cause problems for some decoders.
|
|
|
+ */
|
|
|
+-#if BITS_IN_JSAMPLE == 8
|
|
|
+-#define MAX_AH_AL 10
|
|
|
+-#else
|
|
|
+-#define MAX_AH_AL 13
|
|
|
+-#endif
|
|
|
++ int max_Ah_Al = cinfo->data_precision == 12 ? 13 : 10;
|
|
|
++
|
|
|
+ if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
|
|
|
+- Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
|
|
|
++ Ah < 0 || Ah > max_Ah_Al || Al < 0 || Al > max_Ah_Al)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
|
|
|
+ if (Ss == 0) {
|
|
|
+ if (Se != 0) /* DC and AC together not OK */
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
|
|
|
+ } else {
|
|
|
+ if (ncomps != 1) /* AC scans must be for only one component */
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
|
|
|
+ }
|
|
|
+@@ -269,19 +371,35 @@ validate_script(j_compress_ptr cinfo)
|
|
|
+ if (Ah != last_bitpos_ptr[coefi] || Al != Ah - 1)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
|
|
|
+ }
|
|
|
+ last_bitpos_ptr[coefi] = Al;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ #endif
|
|
|
+ } else {
|
|
|
+- /* For sequential JPEG, all progression parameters must be these: */
|
|
|
+- if (Ss != 0 || Se != DCTSIZE2 - 1 || Ah != 0 || Al != 0)
|
|
|
+- ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ if (cinfo->master->lossless) {
|
|
|
++ /* The JPEG spec simply gives the range 0..15 for Al (Pt), but that
|
|
|
++ * seems wrong: the upper bound ought to depend on data precision.
|
|
|
++ * Perhaps they really meant 0..N-1 for N-bit precision, which is what
|
|
|
++ * we allow here. Values greater than or equal to the data precision
|
|
|
++ * will result in a blank image.
|
|
|
++ */
|
|
|
++ if (Ss < 1 || Ss > 7 || /* predictor selection value */
|
|
|
++ Se != 0 || Ah != 0 ||
|
|
|
++ Al < 0 || Al >= cinfo->data_precision) /* point transform */
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
|
|
|
++ } else
|
|
|
++#endif
|
|
|
++ {
|
|
|
++ /* For sequential JPEG, all progression parameters must be these: */
|
|
|
++ if (Ss != 0 || Se != DCTSIZE2 - 1 || Ah != 0 || Al != 0)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
|
|
|
++ }
|
|
|
+ /* Make sure components are not sent twice */
|
|
|
+ for (ci = 0; ci < ncomps; ci++) {
|
|
|
+ thisi = scanptr->component_index[ci];
|
|
|
+ if (component_sent[thisi])
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
|
|
|
+ component_sent[thisi] = TRUE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+@@ -303,26 +421,26 @@ validate_script(j_compress_ptr cinfo)
|
|
|
+ } else {
|
|
|
+ for (ci = 0; ci < cinfo->num_components; ci++) {
|
|
|
+ if (!component_sent[ci])
|
|
|
+ ERREXIT(cinfo, JERR_MISSING_DATA);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+-#endif /* C_MULTISCAN_FILES_SUPPORTED */
|
|
|
++#endif /* NEED_SCAN_SCRIPT */
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ select_scan_parameters(j_compress_ptr cinfo)
|
|
|
+ /* Set up the scan parameters for the current scan */
|
|
|
+ {
|
|
|
+ int ci;
|
|
|
+
|
|
|
+-#ifdef C_MULTISCAN_FILES_SUPPORTED
|
|
|
++#ifdef NEED_SCAN_SCRIPT
|
|
|
+ if (cinfo->scan_info != NULL) {
|
|
|
+ /* Prepare for current scan --- the script is already validated */
|
|
|
+ my_master_ptr master = (my_master_ptr)cinfo->master;
|
|
|
+ const jpeg_scan_info *scanptr = cinfo->scan_info + master->scan_number;
|
|
|
+
|
|
|
+ cinfo->comps_in_scan = scanptr->comps_in_scan;
|
|
|
+ for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
|
|
|
+ cinfo->cur_comp_info[ci] =
|
|
|
+@@ -338,46 +456,49 @@ select_scan_parameters(j_compress_ptr ci
|
|
|
+ /* Prepare for single sequential-JPEG scan containing all components */
|
|
|
+ if (cinfo->num_components > MAX_COMPS_IN_SCAN)
|
|
|
+ ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
|
|
|
+ MAX_COMPS_IN_SCAN);
|
|
|
+ cinfo->comps_in_scan = cinfo->num_components;
|
|
|
+ for (ci = 0; ci < cinfo->num_components; ci++) {
|
|
|
+ cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
|
|
|
+ }
|
|
|
+- cinfo->Ss = 0;
|
|
|
+- cinfo->Se = DCTSIZE2 - 1;
|
|
|
+- cinfo->Ah = 0;
|
|
|
+- cinfo->Al = 0;
|
|
|
++ if (!cinfo->master->lossless) {
|
|
|
++ cinfo->Ss = 0;
|
|
|
++ cinfo->Se = DCTSIZE2 - 1;
|
|
|
++ cinfo->Ah = 0;
|
|
|
++ cinfo->Al = 0;
|
|
|
++ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ per_scan_setup(j_compress_ptr cinfo)
|
|
|
+ /* Do computations that are needed before processing a JPEG scan */
|
|
|
+ /* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
|
|
|
+ {
|
|
|
+ int ci, mcublks, tmp;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
+
|
|
|
+ if (cinfo->comps_in_scan == 1) {
|
|
|
+
|
|
|
+ /* Noninterleaved (single-component) scan */
|
|
|
+ compptr = cinfo->cur_comp_info[0];
|
|
|
+
|
|
|
+ /* Overall image size in MCUs */
|
|
|
+ cinfo->MCUs_per_row = compptr->width_in_blocks;
|
|
|
+ cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
|
|
|
+
|
|
|
+ /* For noninterleaved scan, always one block per MCU */
|
|
|
+ compptr->MCU_width = 1;
|
|
|
+ compptr->MCU_height = 1;
|
|
|
+ compptr->MCU_blocks = 1;
|
|
|
+- compptr->MCU_sample_width = DCTSIZE;
|
|
|
++ compptr->MCU_sample_width = data_unit;
|
|
|
+ compptr->last_col_width = 1;
|
|
|
+ /* For noninterleaved scans, it is convenient to define last_row_height
|
|
|
+ * as the number of block rows present in the last iMCU row.
|
|
|
+ */
|
|
|
+ tmp = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
|
|
|
+ if (tmp == 0) tmp = compptr->v_samp_factor;
|
|
|
+ compptr->last_row_height = tmp;
|
|
|
+
|
|
|
+@@ -390,30 +511,30 @@ per_scan_setup(j_compress_ptr cinfo)
|
|
|
+ /* Interleaved (multi-component) scan */
|
|
|
+ if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
|
|
|
+ ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
|
|
|
+ MAX_COMPS_IN_SCAN);
|
|
|
+
|
|
|
+ /* Overall image size in MCUs */
|
|
|
+ cinfo->MCUs_per_row = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->_jpeg_width,
|
|
|
+- (long)(cinfo->max_h_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_h_samp_factor * data_unit));
|
|
|
+ cinfo->MCU_rows_in_scan = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->_jpeg_height,
|
|
|
+- (long)(cinfo->max_v_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_v_samp_factor * data_unit));
|
|
|
+
|
|
|
+ cinfo->blocks_in_MCU = 0;
|
|
|
+
|
|
|
+ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
+ compptr = cinfo->cur_comp_info[ci];
|
|
|
+ /* Sampling factors give # of blocks of component in each MCU */
|
|
|
+ compptr->MCU_width = compptr->h_samp_factor;
|
|
|
+ compptr->MCU_height = compptr->v_samp_factor;
|
|
|
+ compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
|
|
|
+- compptr->MCU_sample_width = compptr->MCU_width * DCTSIZE;
|
|
|
++ compptr->MCU_sample_width = compptr->MCU_width * data_unit;
|
|
|
+ /* Figure number of non-dummy blocks in last MCU column & row */
|
|
|
+ tmp = (int)(compptr->width_in_blocks % compptr->MCU_width);
|
|
|
+ if (tmp == 0) tmp = compptr->MCU_width;
|
|
|
+ compptr->last_col_width = tmp;
|
|
|
+ tmp = (int)(compptr->height_in_blocks % compptr->MCU_height);
|
|
|
+ if (tmp == 0) tmp = compptr->MCU_height;
|
|
|
+ compptr->last_row_height = tmp;
|
|
|
+ /* Prepare array describing MCU composition */
|
|
|
+@@ -475,17 +596,18 @@ prepare_for_pass(j_compress_ptr cinfo)
|
|
|
+ master->pub.call_pass_startup = TRUE;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ #ifdef ENTROPY_OPT_SUPPORTED
|
|
|
+ case huff_opt_pass:
|
|
|
+ /* Do Huffman optimization for a scan after the first one. */
|
|
|
+ select_scan_parameters(cinfo);
|
|
|
+ per_scan_setup(cinfo);
|
|
|
+- if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code) {
|
|
|
++ if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code ||
|
|
|
++ cinfo->master->lossless) {
|
|
|
+ (*cinfo->entropy->start_pass) (cinfo, TRUE);
|
|
|
+ (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
|
|
|
+ master->pub.call_pass_startup = FALSE;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ /* Special case: Huffman DC refinement scans need no Huffman table
|
|
|
+ * and therefore we can skip the optimization pass for them.
|
|
|
+ */
|
|
|
+@@ -584,43 +706,72 @@ finish_pass_master(j_compress_ptr cinfo)
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize master compression control.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jinit_c_master_control(j_compress_ptr cinfo, boolean transcode_only)
|
|
|
+ {
|
|
|
+- my_master_ptr master;
|
|
|
++ my_master_ptr master = (my_master_ptr)cinfo->master;
|
|
|
++ boolean empty_huff_tables = TRUE;
|
|
|
++ int i;
|
|
|
+
|
|
|
+- master = (my_master_ptr)
|
|
|
+- (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- sizeof(my_comp_master));
|
|
|
+- cinfo->master = (struct jpeg_comp_master *)master;
|
|
|
+ master->pub.prepare_for_pass = prepare_for_pass;
|
|
|
+ master->pub.pass_startup = pass_startup;
|
|
|
+ master->pub.finish_pass = finish_pass_master;
|
|
|
+ master->pub.is_last_pass = FALSE;
|
|
|
+
|
|
|
+- /* Validate parameters, determine derived values */
|
|
|
+- initial_setup(cinfo, transcode_only);
|
|
|
+-
|
|
|
+ if (cinfo->scan_info != NULL) {
|
|
|
+-#ifdef C_MULTISCAN_FILES_SUPPORTED
|
|
|
++#ifdef NEED_SCAN_SCRIPT
|
|
|
+ validate_script(cinfo);
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ } else {
|
|
|
+ cinfo->progressive_mode = FALSE;
|
|
|
+ cinfo->num_scans = 1;
|
|
|
+ }
|
|
|
+
|
|
|
+- if (cinfo->progressive_mode && !cinfo->arith_code) /* TEMPORARY HACK ??? */
|
|
|
+- cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */
|
|
|
++ /* Disable smoothing and subsampling in lossless mode, since those are lossy
|
|
|
++ * algorithms. Set the JPEG colorspace to the input colorspace. Disable raw
|
|
|
++ * (downsampled) data input, because it isn't particularly useful without
|
|
|
++ * subsampling and has not been tested in lossless mode.
|
|
|
++ */
|
|
|
++ if (cinfo->master->lossless) {
|
|
|
++ int ci;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ cinfo->raw_data_in = FALSE;
|
|
|
++ cinfo->smoothing_factor = 0;
|
|
|
++ jpeg_default_colorspace(cinfo);
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++)
|
|
|
++ compptr->h_samp_factor = compptr->v_samp_factor = 1;
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Validate parameters, determine derived values */
|
|
|
++ initial_setup(cinfo, transcode_only);
|
|
|
++
|
|
|
++ if (cinfo->master->lossless || /* TEMPORARY HACK ??? */
|
|
|
++ (cinfo->progressive_mode && !cinfo->arith_code))
|
|
|
++ cinfo->optimize_coding = TRUE; /* assume default tables no good for
|
|
|
++ progressive mode or lossless mode */
|
|
|
++ for (i = 0; i < NUM_HUFF_TBLS; i++) {
|
|
|
++ if (cinfo->dc_huff_tbl_ptrs[i] != NULL ||
|
|
|
++ cinfo->ac_huff_tbl_ptrs[i] != NULL) {
|
|
|
++ empty_huff_tables = FALSE;
|
|
|
++ break;
|
|
|
++ }
|
|
|
++ }
|
|
|
++ if (cinfo->data_precision == 12 && !cinfo->arith_code &&
|
|
|
++ !cinfo->optimize_coding &&
|
|
|
++ (empty_huff_tables || using_std_huff_tables(cinfo)))
|
|
|
++ cinfo->optimize_coding = TRUE; /* assume default tables no good for 12-bit
|
|
|
++ data precision */
|
|
|
+
|
|
|
+ /* Initialize my private state */
|
|
|
+ if (transcode_only) {
|
|
|
+ /* no main pass in transcoding */
|
|
|
+ if (cinfo->optimize_coding)
|
|
|
+ master->pass_type = huff_opt_pass;
|
|
|
+ else
|
|
|
+ master->pass_type = output_pass;
|
|
|
+diff --git a/media/libjpeg/jcmaster.h b/media/libjpeg/jcmaster.h
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jcmaster.h
|
|
|
+@@ -0,0 +1,43 @@
|
|
|
++/*
|
|
|
++ * jcmaster.h
|
|
|
++ *
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
++ * Copyright (C) 1991-1995, Thomas G. Lane.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2016, D. R. Commander.
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ *
|
|
|
++ * This file contains master control structure for the JPEG compressor.
|
|
|
++ */
|
|
|
++
|
|
|
++/* Private state */
|
|
|
++
|
|
|
++typedef enum {
|
|
|
++ main_pass, /* input data, also do first output step */
|
|
|
++ huff_opt_pass, /* Huffman code optimization pass */
|
|
|
++ output_pass /* data output pass */
|
|
|
++} c_pass_type;
|
|
|
++
|
|
|
++typedef struct {
|
|
|
++ struct jpeg_comp_master pub; /* public fields */
|
|
|
++
|
|
|
++ c_pass_type pass_type; /* the type of the current pass */
|
|
|
++
|
|
|
++ int pass_number; /* # of passes completed */
|
|
|
++ int total_passes; /* total # of passes needed */
|
|
|
++
|
|
|
++ int scan_number; /* current index in scan_info[] */
|
|
|
++
|
|
|
++ /*
|
|
|
++ * This is here so we can add libjpeg-turbo version/build information to the
|
|
|
++ * global string table without introducing a new global symbol. Adding this
|
|
|
++ * information to the global string table allows one to examine a binary
|
|
|
++ * object and determine which version of libjpeg-turbo it was built from or
|
|
|
++ * linked against.
|
|
|
++ */
|
|
|
++ const char *jpeg_version;
|
|
|
++
|
|
|
++} my_comp_master;
|
|
|
++
|
|
|
++typedef my_comp_master *my_master_ptr;
|
|
|
+diff --git a/media/libjpeg/jconfig.h b/media/libjpeg/jconfig.h
|
|
|
+--- a/media/libjpeg/jconfig.h
|
|
|
++++ b/media/libjpeg/jconfig.h
|
|
|
+@@ -1,37 +1,39 @@
|
|
|
+ /* Version ID for the JPEG library.
|
|
|
+ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
|
|
|
+ */
|
|
|
+ #define JPEG_LIB_VERSION 62
|
|
|
+
|
|
|
+ /* libjpeg-turbo version */
|
|
|
+-#define LIBJPEG_TURBO_VERSION 2.1.5.1
|
|
|
++#define LIBJPEG_TURBO_VERSION 3.0.3
|
|
|
+
|
|
|
+ /* libjpeg-turbo version in integer form */
|
|
|
+-#define LIBJPEG_TURBO_VERSION_NUMBER 2001005
|
|
|
++#define LIBJPEG_TURBO_VERSION_NUMBER 3000003
|
|
|
+
|
|
|
+-/* Support arithmetic encoding */
|
|
|
++/* Support arithmetic encoding when using 8-bit samples */
|
|
|
+ /* #undef C_ARITH_CODING_SUPPORTED */
|
|
|
+
|
|
|
+-/* Support arithmetic decoding */
|
|
|
++/* Support arithmetic decoding when using 8-bit samples */
|
|
|
+ /* #undef D_ARITH_CODING_SUPPORTED */
|
|
|
+
|
|
|
+ /* Support in-memory source/destination managers */
|
|
|
+ #define MEM_SRCDST_SUPPORTED 1
|
|
|
+
|
|
|
+ /* Use accelerated SIMD routines. */
|
|
|
+ #define WITH_SIMD 1
|
|
|
+
|
|
|
+-/*
|
|
|
+- * Define BITS_IN_JSAMPLE as either
|
|
|
+- * 8 for 8-bit sample values (the usual setting)
|
|
|
+- * 12 for 12-bit sample values
|
|
|
+- * Only 8 and 12 are legal data precisions for lossy JPEG according to the
|
|
|
+- * JPEG standard, and the IJG code does not support anything else!
|
|
|
+- * We do not support run-time selection of data precision, sorry.
|
|
|
++/* This version of libjpeg-turbo supports run-time selection of data precision,
|
|
|
++ * so BITS_IN_JSAMPLE is no longer used to specify the data precision at build
|
|
|
++ * time. However, some downstream software expects the macro to be defined.
|
|
|
++ * Since 12-bit data precision is an opt-in feature that requires explicitly
|
|
|
++ * calling 12-bit-specific libjpeg API functions and using 12-bit-specific data
|
|
|
++ * types, the unmodified portion of the libjpeg API still behaves as if it were
|
|
|
++ * built for 8-bit precision, and JSAMPLE is still literally an 8-bit data
|
|
|
++ * type. Thus, it is correct to define BITS_IN_JSAMPLE to 8 here.
|
|
|
+ */
|
|
|
+-
|
|
|
+-#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */
|
|
|
++#ifndef BITS_IN_JSAMPLE
|
|
|
++#define BITS_IN_JSAMPLE 8
|
|
|
++#endif
|
|
|
+
|
|
|
+ /* Define if your (broken) compiler shifts signed values as if they were
|
|
|
+ unsigned. */
|
|
|
+ /* #undef RIGHT_SHIFT_IS_UNSIGNED */
|
|
|
+diff --git a/media/libjpeg/jconfigint.h b/media/libjpeg/jconfigint.h
|
|
|
+--- a/media/libjpeg/jconfigint.h
|
|
|
++++ b/media/libjpeg/jconfigint.h
|
|
|
+@@ -1,27 +1,30 @@
|
|
|
+ /* libjpeg-turbo build number */
|
|
|
+-#define BUILD "20230208"
|
|
|
++#define BUILD "20240508"
|
|
|
++
|
|
|
++/* How to hide global symbols. */
|
|
|
++#define HIDDEN __attribute__((visibility("hidden")))
|
|
|
+
|
|
|
+ /* Need to use Mozilla-specific function inlining. */
|
|
|
+ #include "mozilla/Attributes.h"
|
|
|
+ #define INLINE MOZ_ALWAYS_INLINE
|
|
|
+
|
|
|
+ /* How to obtain thread-local storage */
|
|
|
+ #if defined(_MSC_VER)
|
|
|
+ #define THREAD_LOCAL __declspec(thread)
|
|
|
+ #else
|
|
|
+ #define THREAD_LOCAL __thread
|
|
|
+ #endif
|
|
|
+
|
|
|
+ /* Define to the full name of this package. */
|
|
|
+-#define PACKAGE_NAME "libjpeg-turbo"
|
|
|
++#define PACKAGE_NAME "libjpeg-turbo"
|
|
|
+
|
|
|
+ /* Version number of package */
|
|
|
+-#define VERSION "2.1.5.1"
|
|
|
++#define VERSION "3.0.3"
|
|
|
+
|
|
|
+ /* The size of `size_t', as computed by sizeof. */
|
|
|
+ #ifdef HAVE_64BIT_BUILD
|
|
|
+ #define SIZEOF_SIZE_T 8
|
|
|
+ #else
|
|
|
+ #define SIZEOF_SIZE_T 4
|
|
|
+ #endif
|
|
|
+
|
|
|
+@@ -47,8 +50,37 @@
|
|
|
+ #if __has_attribute(fallthrough)
|
|
|
+ #define FALLTHROUGH __attribute__((fallthrough));
|
|
|
+ #else
|
|
|
+ #define FALLTHROUGH
|
|
|
+ #endif
|
|
|
+ #else
|
|
|
+ #define FALLTHROUGH
|
|
|
+ #endif
|
|
|
++
|
|
|
++/*
|
|
|
++ * Define BITS_IN_JSAMPLE as either
|
|
|
++ * 8 for 8-bit sample values (the usual setting)
|
|
|
++ * 12 for 12-bit sample values
|
|
|
++ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
|
|
|
++ * JPEG standard, and the IJG code does not support anything else!
|
|
|
++ */
|
|
|
++
|
|
|
++#ifndef BITS_IN_JSAMPLE
|
|
|
++#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */
|
|
|
++#endif
|
|
|
++
|
|
|
++#undef C_ARITH_CODING_SUPPORTED
|
|
|
++#undef D_ARITH_CODING_SUPPORTED
|
|
|
++#undef WITH_SIMD
|
|
|
++
|
|
|
++#if BITS_IN_JSAMPLE == 8
|
|
|
++
|
|
|
++/* Support arithmetic encoding */
|
|
|
++/* #undef C_ARITH_CODING_SUPPORTED */
|
|
|
++
|
|
|
++/* Support arithmetic decoding */
|
|
|
++/* #undef D_ARITH_CODING_SUPPORTED */
|
|
|
++
|
|
|
++/* Use accelerated SIMD routines. */
|
|
|
++#define WITH_SIMD 1
|
|
|
++
|
|
|
++#endif
|
|
|
+diff --git a/media/libjpeg/jcparam.c b/media/libjpeg/jcparam.c
|
|
|
+--- a/media/libjpeg/jcparam.c
|
|
|
++++ b/media/libjpeg/jcparam.c
|
|
|
+@@ -1,16 +1,18 @@
|
|
|
+ /*
|
|
|
+ * jcparam.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1998, Thomas G. Lane.
|
|
|
+ * Modified 2003-2008 by Guido Vollbeding.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2009-2011, 2018, D. R. Commander.
|
|
|
++ * Copyright (C) 2009-2011, 2018, 2023, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains optional default-setting code for the JPEG compressor.
|
|
|
+ * Applications do not have to use this file, but those that don't use it
|
|
|
+ * must know a lot more about the innards of the JPEG code.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -197,17 +199,16 @@ jpeg_set_defaults(j_compress_ptr cinfo)
|
|
|
+ MAX_COMPONENTS * sizeof(jpeg_component_info));
|
|
|
+
|
|
|
+ /* Initialize everything not dependent on the color space */
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ cinfo->scale_num = 1; /* 1:1 scaling */
|
|
|
+ cinfo->scale_denom = 1;
|
|
|
+ #endif
|
|
|
+- cinfo->data_precision = BITS_IN_JSAMPLE;
|
|
|
+ /* Set up two quantization tables using default quality of 75 */
|
|
|
+ jpeg_set_quality(cinfo, 75, TRUE);
|
|
|
+ /* Set up two Huffman tables */
|
|
|
+ std_huff_tables((j_common_ptr)cinfo);
|
|
|
+
|
|
|
+ /* Initialize default arithmetic coding conditioning */
|
|
|
+ for (i = 0; i < NUM_ARITH_TBLS; i++) {
|
|
|
+ cinfo->arith_dc_L[i] = 0;
|
|
|
+@@ -227,17 +228,17 @@ jpeg_set_defaults(j_compress_ptr cinfo)
|
|
|
+
|
|
|
+ /* By default, don't do extra passes to optimize entropy coding */
|
|
|
+ cinfo->optimize_coding = FALSE;
|
|
|
+ /* The standard Huffman tables are only valid for 8-bit data precision.
|
|
|
+ * If the precision is higher, force optimization on so that usable
|
|
|
+ * tables will be computed. This test can be removed if default tables
|
|
|
+ * are supplied that are valid for the desired precision.
|
|
|
+ */
|
|
|
+- if (cinfo->data_precision > 8)
|
|
|
++ if (cinfo->data_precision == 12 && !cinfo->arith_code)
|
|
|
+ cinfo->optimize_coding = TRUE;
|
|
|
+
|
|
|
+ /* By default, use the simpler non-cosited sampling alignment */
|
|
|
+ cinfo->CCIR601_sampling = FALSE;
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ /* By default, apply fancy downsampling */
|
|
|
+ cinfo->do_fancy_downsampling = TRUE;
|
|
|
+@@ -291,17 +292,20 @@ jpeg_default_colorspace(j_compress_ptr c
|
|
|
+ case JCS_EXT_BGR:
|
|
|
+ case JCS_EXT_BGRX:
|
|
|
+ case JCS_EXT_XBGR:
|
|
|
+ case JCS_EXT_XRGB:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+ case JCS_EXT_BGRA:
|
|
|
+ case JCS_EXT_ABGR:
|
|
|
+ case JCS_EXT_ARGB:
|
|
|
+- jpeg_set_colorspace(cinfo, JCS_YCbCr);
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ jpeg_set_colorspace(cinfo, JCS_RGB);
|
|
|
++ else
|
|
|
++ jpeg_set_colorspace(cinfo, JCS_YCbCr);
|
|
|
+ break;
|
|
|
+ case JCS_YCbCr:
|
|
|
+ jpeg_set_colorspace(cinfo, JCS_YCbCr);
|
|
|
+ break;
|
|
|
+ case JCS_CMYK:
|
|
|
+ jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
|
|
|
+ break;
|
|
|
+ case JCS_YCCK:
|
|
|
+@@ -470,16 +474,21 @@ jpeg_simple_progression(j_compress_ptr c
|
|
|
+ int ncomps = cinfo->num_components;
|
|
|
+ int nscans;
|
|
|
+ jpeg_scan_info *scanptr;
|
|
|
+
|
|
|
+ /* Safety check to ensure start_compress not called yet. */
|
|
|
+ if (cinfo->global_state != CSTATE_START)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
+
|
|
|
++ if (cinfo->master->lossless) {
|
|
|
++ cinfo->master->lossless = FALSE;
|
|
|
++ jpeg_default_colorspace(cinfo);
|
|
|
++ }
|
|
|
++
|
|
|
+ /* Figure space needed for script. Calculation must match code below! */
|
|
|
+ if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
|
|
|
+ /* Custom script for YCbCr color images. */
|
|
|
+ nscans = 10;
|
|
|
+ } else {
|
|
|
+ /* All-purpose script for other color spaces. */
|
|
|
+ if (ncomps > MAX_COMPS_IN_SCAN)
|
|
|
+ nscans = 6 * ncomps; /* 2 DC + 4 AC scans per component */
|
|
|
+@@ -534,8 +543,43 @@ jpeg_simple_progression(j_compress_ptr c
|
|
|
+ scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
|
|
|
+ /* Successive approximation final pass */
|
|
|
+ scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
|
|
|
+ scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ #endif /* C_PROGRESSIVE_SUPPORTED */
|
|
|
++
|
|
|
++
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++
|
|
|
++/*
|
|
|
++ * Enable lossless mode.
|
|
|
++ */
|
|
|
++
|
|
|
++GLOBAL(void)
|
|
|
++jpeg_enable_lossless(j_compress_ptr cinfo, int predictor_selection_value,
|
|
|
++ int point_transform)
|
|
|
++{
|
|
|
++ /* Safety check to ensure start_compress not called yet. */
|
|
|
++ if (cinfo->global_state != CSTATE_START)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
++
|
|
|
++ cinfo->master->lossless = TRUE;
|
|
|
++ cinfo->Ss = predictor_selection_value;
|
|
|
++ cinfo->Se = 0;
|
|
|
++ cinfo->Ah = 0;
|
|
|
++ cinfo->Al = point_transform;
|
|
|
++
|
|
|
++ /* The JPEG spec simply gives the range 0..15 for Al (Pt), but that seems
|
|
|
++ * wrong: the upper bound ought to depend on data precision. Perhaps they
|
|
|
++ * really meant 0..N-1 for N-bit precision, which is what we allow here.
|
|
|
++ * Values greater than or equal to the data precision will result in a blank
|
|
|
++ * image.
|
|
|
++ */
|
|
|
++ if (cinfo->Ss < 1 || cinfo->Ss > 7 ||
|
|
|
++ cinfo->Al < 0 || cinfo->Al >= cinfo->data_precision)
|
|
|
++ ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
|
|
|
++ cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
|
|
|
++}
|
|
|
++
|
|
|
++#endif /* C_LOSSLESS_SUPPORTED */
|
|
|
+diff --git a/media/libjpeg/jcphuff.c b/media/libjpeg/jcphuff.c
|
|
|
+--- a/media/libjpeg/jcphuff.c
|
|
|
++++ b/media/libjpeg/jcphuff.c
|
|
|
+@@ -1,82 +1,55 @@
|
|
|
+ /*
|
|
|
+ * jcphuff.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1995-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
|
|
|
++ * Copyright (C) 2011, 2015, 2018, 2021-2022, 2024, D. R. Commander.
|
|
|
+ * Copyright (C) 2016, 2018, 2022, Matthieu Darbois.
|
|
|
+ * Copyright (C) 2020, Arm Limited.
|
|
|
+ * Copyright (C) 2021, Alex Richardson.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains Huffman entropy encoding routines for progressive JPEG.
|
|
|
+ *
|
|
|
+ * We do not support output suspension in this module, since the library
|
|
|
+ * currently does not allow multiple-scan files to be written with output
|
|
|
+ * suspension.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ #include "jsimd.h"
|
|
|
++#else
|
|
|
++#include "jchuff.h" /* Declarations shared with jc*huff.c */
|
|
|
++#endif
|
|
|
+ #include <limits.h>
|
|
|
+
|
|
|
+ #ifdef HAVE_INTRIN_H
|
|
|
+ #include <intrin.h>
|
|
|
+ #ifdef _MSC_VER
|
|
|
+ #ifdef HAVE_BITSCANFORWARD64
|
|
|
+ #pragma intrinsic(_BitScanForward64)
|
|
|
+ #endif
|
|
|
+ #ifdef HAVE_BITSCANFORWARD
|
|
|
+ #pragma intrinsic(_BitScanForward)
|
|
|
+ #endif
|
|
|
+ #endif
|
|
|
+ #endif
|
|
|
+
|
|
|
+ #ifdef C_PROGRESSIVE_SUPPORTED
|
|
|
+
|
|
|
+-/*
|
|
|
+- * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
|
|
|
+- * used for bit counting rather than the lookup table. This will reduce the
|
|
|
+- * memory footprint by 64k, which is important for some mobile applications
|
|
|
+- * that create many isolated instances of libjpeg-turbo (web browsers, for
|
|
|
+- * instance.) This may improve performance on some mobile platforms as well.
|
|
|
+- * This feature is enabled by default only on Arm processors, because some x86
|
|
|
+- * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
|
|
|
+- * shown to have a significant performance impact even on the x86 chips that
|
|
|
+- * have a fast implementation of it. When building for Armv6, you can
|
|
|
+- * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
|
|
|
+- * flags (this defines __thumb__).
|
|
|
+- */
|
|
|
+-
|
|
|
+-/* NOTE: Both GCC and Clang define __GNUC__ */
|
|
|
+-#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
|
|
|
+- defined(_M_ARM) || defined(_M_ARM64)
|
|
|
+-#if !defined(__thumb__) || defined(__thumb2__)
|
|
|
+-#define USE_CLZ_INTRINSIC
|
|
|
+-#endif
|
|
|
+-#endif
|
|
|
+-
|
|
|
+-#ifdef USE_CLZ_INTRINSIC
|
|
|
+-#if defined(_MSC_VER) && !defined(__clang__)
|
|
|
+-#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
|
|
|
+-#else
|
|
|
+-#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
|
|
|
+-#endif
|
|
|
+-#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
|
|
|
+-#else
|
|
|
+-#include "jpeg_nbits_table.h"
|
|
|
+-#define JPEG_NBITS(x) (jpeg_nbits_table[x])
|
|
|
+-#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
|
|
|
+-#endif
|
|
|
++#include "jpeg_nbits.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* Expanded entropy encoder object for progressive Huffman encoding. */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_entropy_encoder pub; /* public fields */
|
|
|
+
|
|
|
+ /* Pointer to routine to prepare data for encode_mcu_AC_first() */
|
|
|
+@@ -218,28 +191,32 @@ start_pass_phuff(j_compress_ptr cinfo, b
|
|
|
+ /* We assume jcmaster.c already validated the scan parameters. */
|
|
|
+
|
|
|
+ /* Select execution routines */
|
|
|
+ if (cinfo->Ah == 0) {
|
|
|
+ if (is_DC_band)
|
|
|
+ entropy->pub.encode_mcu = encode_mcu_DC_first;
|
|
|
+ else
|
|
|
+ entropy->pub.encode_mcu = encode_mcu_AC_first;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_encode_mcu_AC_first_prepare())
|
|
|
+ entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
|
|
|
+ } else {
|
|
|
+ if (is_DC_band)
|
|
|
+ entropy->pub.encode_mcu = encode_mcu_DC_refine;
|
|
|
+ else {
|
|
|
+ entropy->pub.encode_mcu = encode_mcu_AC_refine;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_encode_mcu_AC_refine_prepare())
|
|
|
+ entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
|
|
|
+ /* AC refinement needs a correction bit buffer */
|
|
|
+ if (entropy->bit_buffer == NULL)
|
|
|
+ entropy->bit_buffer = (char *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ MAX_CORR_BITS * sizeof(char));
|
|
|
+ }
|
|
|
+ }
|
|
|
+@@ -484,16 +461,17 @@ encode_mcu_DC_first(j_compress_ptr cinfo
|
|
|
+ phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
|
|
|
+ register int temp, temp2, temp3;
|
|
|
+ register int nbits;
|
|
|
+ int blkn, ci;
|
|
|
+ int Al = cinfo->Al;
|
|
|
+ JBLOCKROW block;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+ ISHIFT_TEMPS
|
|
|
++ int max_coef_bits = cinfo->data_precision + 2;
|
|
|
+
|
|
|
+ entropy->next_output_byte = cinfo->dest->next_output_byte;
|
|
|
+ entropy->free_in_buffer = cinfo->dest->free_in_buffer;
|
|
|
+
|
|
|
+ /* Emit restart marker if needed */
|
|
|
+ if (cinfo->restart_interval)
|
|
|
+ if (entropy->restarts_to_go == 0)
|
|
|
+ emit_restart(entropy, entropy->next_restart_num);
|
|
|
+@@ -526,17 +504,17 @@ encode_mcu_DC_first(j_compress_ptr cinfo
|
|
|
+ /* For a negative input, want temp2 = bitwise complement of abs(input) */
|
|
|
+ temp2 = temp ^ temp3;
|
|
|
+
|
|
|
+ /* Find the number of bits needed for the magnitude of the coefficient */
|
|
|
+ nbits = JPEG_NBITS(temp);
|
|
|
+ /* Check for out-of-range coefficient values.
|
|
|
+ * Since we're encoding a difference, the range limit is twice as much.
|
|
|
+ */
|
|
|
+- if (nbits > MAX_COEF_BITS + 1)
|
|
|
++ if (nbits > max_coef_bits + 1)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_DCT_COEF);
|
|
|
+
|
|
|
+ /* Count/emit the Huffman-coded symbol for the number of bits */
|
|
|
+ emit_symbol(entropy, compptr->dc_tbl_no, nbits);
|
|
|
+
|
|
|
+ /* Emit that number of bits of the value, if positive, */
|
|
|
+ /* or the complement of its magnitude, if negative. */
|
|
|
+ if (nbits) /* emit_bits rejects calls with size 0 */
|
|
|
+@@ -637,17 +615,17 @@ label \
|
|
|
+ while (r > 15) { \
|
|
|
+ emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
|
|
|
+ r -= 16; \
|
|
|
+ } \
|
|
|
+ \
|
|
|
+ /* Find the number of bits needed for the magnitude of the coefficient */ \
|
|
|
+ nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \
|
|
|
+ /* Check for out-of-range coefficient values */ \
|
|
|
+- if (nbits > MAX_COEF_BITS) \
|
|
|
++ if (nbits > max_coef_bits) \
|
|
|
+ ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
|
|
|
+ \
|
|
|
+ /* Count/emit Huffman symbol for run length / number of bits */ \
|
|
|
+ emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
|
|
|
+ \
|
|
|
+ /* Emit that number of bits of the value, if positive, */ \
|
|
|
+ /* or the complement of its magnitude, if negative. */ \
|
|
|
+ emit_bits(entropy, (unsigned int)temp2, nbits); \
|
|
|
+@@ -665,16 +643,17 @@ encode_mcu_AC_first(j_compress_ptr cinfo
|
|
|
+ register int nbits, r;
|
|
|
+ int Sl = cinfo->Se - cinfo->Ss + 1;
|
|
|
+ int Al = cinfo->Al;
|
|
|
+ UJCOEF values_unaligned[2 * DCTSIZE2 + 15];
|
|
|
+ UJCOEF *values;
|
|
|
+ const UJCOEF *cvalue;
|
|
|
+ size_t zerobits;
|
|
|
+ size_t bits[8 / SIZEOF_SIZE_T];
|
|
|
++ int max_coef_bits = cinfo->data_precision + 2;
|
|
|
+
|
|
|
+ entropy->next_output_byte = cinfo->dest->next_output_byte;
|
|
|
+ entropy->free_in_buffer = cinfo->dest->free_in_buffer;
|
|
|
+
|
|
|
+ /* Emit restart marker if needed */
|
|
|
+ if (cinfo->restart_interval)
|
|
|
+ if (entropy->restarts_to_go == 0)
|
|
|
+ emit_restart(entropy, entropy->next_restart_num);
|
|
|
+diff --git a/media/libjpeg/jcprepct.c b/media/libjpeg/jcprepct.c
|
|
|
+--- a/media/libjpeg/jcprepct.c
|
|
|
++++ b/media/libjpeg/jcprepct.c
|
|
|
+@@ -1,13 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jcprepct.c
|
|
|
+ *
|
|
|
+- * This file is part of the Independent JPEG Group's software:
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright (C) 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains the compression preprocessing controller.
|
|
|
+ * This controller manages the color conversion, downsampling,
|
|
|
+ * and edge expansion steps.
|
|
|
+@@ -15,18 +17,21 @@
|
|
|
+ * Most of the complexity here is associated with buffering input rows
|
|
|
+ * as required by the downsampler. See the comments at the head of
|
|
|
+ * jcsample.c for the downsampler's needs.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /* At present, jcsample.c can request context rows only for smoothing.
|
|
|
+ * In the future, we might also need context rows for CCIR601 sampling
|
|
|
+ * or other more-complex downsampling procedures. The code to support
|
|
|
+ * context rows should be compiled only if needed.
|
|
|
+ */
|
|
|
+ #ifdef INPUT_SMOOTHING_SUPPORTED
|
|
|
+ #define CONTEXT_ROWS_SUPPORTED
|
|
|
+ #endif
|
|
|
+@@ -54,17 +59,17 @@
|
|
|
+ /* Private buffer controller object */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_c_prep_controller pub; /* public fields */
|
|
|
+
|
|
|
+ /* Downsampling input buffer. This buffer holds color-converted data
|
|
|
+ * until we have enough to do a downsample step.
|
|
|
+ */
|
|
|
+- JSAMPARRAY color_buf[MAX_COMPONENTS];
|
|
|
++ _JSAMPARRAY color_buf[MAX_COMPONENTS];
|
|
|
+
|
|
|
+ JDIMENSION rows_to_go; /* counts rows remaining in source image */
|
|
|
+ int next_buf_row; /* index of next row to store in color_buf */
|
|
|
+
|
|
|
+ #ifdef CONTEXT_ROWS_SUPPORTED /* only needed for context case */
|
|
|
+ int this_row_group; /* starting row index of group to process */
|
|
|
+ int next_buf_stop; /* downsample when we reach this index */
|
|
|
+ #endif
|
|
|
+@@ -101,85 +106,87 @@ start_pass_prep(j_compress_ptr cinfo, J_
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Expand an image vertically from height input_rows to height output_rows,
|
|
|
+ * by duplicating the bottom row.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+-expand_bottom_edge(JSAMPARRAY image_data, JDIMENSION num_cols, int input_rows,
|
|
|
++expand_bottom_edge(_JSAMPARRAY image_data, JDIMENSION num_cols, int input_rows,
|
|
|
+ int output_rows)
|
|
|
+ {
|
|
|
+ register int row;
|
|
|
+
|
|
|
+ for (row = input_rows; row < output_rows; row++) {
|
|
|
+- jcopy_sample_rows(image_data, input_rows - 1, image_data, row, 1,
|
|
|
+- num_cols);
|
|
|
++ _jcopy_sample_rows(image_data, input_rows - 1, image_data, row, 1,
|
|
|
++ num_cols);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Process some data in the simple no-context case.
|
|
|
+ *
|
|
|
+ * Preprocessor output data is counted in "row groups". A row group
|
|
|
+ * is defined to be v_samp_factor sample rows of each component.
|
|
|
+ * Downsampling will produce this much data from each max_v_samp_factor
|
|
|
+ * input rows.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-pre_process_data(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
++pre_process_data(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
+ JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
|
|
|
+ JDIMENSION out_row_groups_avail)
|
|
|
+ {
|
|
|
+ my_prep_ptr prep = (my_prep_ptr)cinfo->prep;
|
|
|
+ int numrows, ci;
|
|
|
+ JDIMENSION inrows;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
+
|
|
|
+ while (*in_row_ctr < in_rows_avail &&
|
|
|
+ *out_row_group_ctr < out_row_groups_avail) {
|
|
|
+ /* Do color conversion to fill the conversion buffer. */
|
|
|
+ inrows = in_rows_avail - *in_row_ctr;
|
|
|
+ numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
|
|
|
+ numrows = (int)MIN((JDIMENSION)numrows, inrows);
|
|
|
+- (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
|
|
|
+- prep->color_buf,
|
|
|
+- (JDIMENSION)prep->next_buf_row,
|
|
|
+- numrows);
|
|
|
++ (*cinfo->cconvert->_color_convert) (cinfo, input_buf + *in_row_ctr,
|
|
|
++ prep->color_buf,
|
|
|
++ (JDIMENSION)prep->next_buf_row,
|
|
|
++ numrows);
|
|
|
+ *in_row_ctr += numrows;
|
|
|
+ prep->next_buf_row += numrows;
|
|
|
+ prep->rows_to_go -= numrows;
|
|
|
+ /* If at bottom of image, pad to fill the conversion buffer. */
|
|
|
+ if (prep->rows_to_go == 0 &&
|
|
|
+ prep->next_buf_row < cinfo->max_v_samp_factor) {
|
|
|
+ for (ci = 0; ci < cinfo->num_components; ci++) {
|
|
|
+ expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
|
|
|
+ prep->next_buf_row, cinfo->max_v_samp_factor);
|
|
|
+ }
|
|
|
+ prep->next_buf_row = cinfo->max_v_samp_factor;
|
|
|
+ }
|
|
|
+ /* If we've filled the conversion buffer, empty it. */
|
|
|
+ if (prep->next_buf_row == cinfo->max_v_samp_factor) {
|
|
|
+- (*cinfo->downsample->downsample) (cinfo,
|
|
|
+- prep->color_buf, (JDIMENSION)0,
|
|
|
+- output_buf, *out_row_group_ctr);
|
|
|
++ (*cinfo->downsample->_downsample) (cinfo,
|
|
|
++ prep->color_buf, (JDIMENSION)0,
|
|
|
++ output_buf, *out_row_group_ctr);
|
|
|
+ prep->next_buf_row = 0;
|
|
|
+ (*out_row_group_ctr)++;
|
|
|
+ }
|
|
|
+ /* If at bottom of image, pad the output to a full iMCU height.
|
|
|
+ * Note we assume the caller is providing a one-iMCU-height output buffer!
|
|
|
+ */
|
|
|
+ if (prep->rows_to_go == 0 && *out_row_group_ctr < out_row_groups_avail) {
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+- expand_bottom_edge(output_buf[ci], compptr->width_in_blocks * DCTSIZE,
|
|
|
++ expand_bottom_edge(output_buf[ci],
|
|
|
++ compptr->width_in_blocks * data_unit,
|
|
|
+ (int)(*out_row_group_ctr * compptr->v_samp_factor),
|
|
|
+ (int)(out_row_groups_avail * compptr->v_samp_factor));
|
|
|
+ }
|
|
|
+ *out_row_group_ctr = out_row_groups_avail;
|
|
|
+ break; /* can exit outer loop without test */
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+@@ -187,43 +194,43 @@ pre_process_data(j_compress_ptr cinfo, J
|
|
|
+
|
|
|
+ #ifdef CONTEXT_ROWS_SUPPORTED
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Process some data in the context case.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-pre_process_context(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
++pre_process_context(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
+ JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
|
|
|
++ _JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
|
|
|
+ JDIMENSION out_row_groups_avail)
|
|
|
+ {
|
|
|
+ my_prep_ptr prep = (my_prep_ptr)cinfo->prep;
|
|
|
+ int numrows, ci;
|
|
|
+ int buf_height = cinfo->max_v_samp_factor * 3;
|
|
|
+ JDIMENSION inrows;
|
|
|
+
|
|
|
+ while (*out_row_group_ctr < out_row_groups_avail) {
|
|
|
+ if (*in_row_ctr < in_rows_avail) {
|
|
|
+ /* Do color conversion to fill the conversion buffer. */
|
|
|
+ inrows = in_rows_avail - *in_row_ctr;
|
|
|
+ numrows = prep->next_buf_stop - prep->next_buf_row;
|
|
|
+ numrows = (int)MIN((JDIMENSION)numrows, inrows);
|
|
|
+- (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
|
|
|
+- prep->color_buf,
|
|
|
+- (JDIMENSION)prep->next_buf_row,
|
|
|
+- numrows);
|
|
|
++ (*cinfo->cconvert->_color_convert) (cinfo, input_buf + *in_row_ctr,
|
|
|
++ prep->color_buf,
|
|
|
++ (JDIMENSION)prep->next_buf_row,
|
|
|
++ numrows);
|
|
|
+ /* Pad at top of image, if first time through */
|
|
|
+ if (prep->rows_to_go == cinfo->image_height) {
|
|
|
+ for (ci = 0; ci < cinfo->num_components; ci++) {
|
|
|
+ int row;
|
|
|
+ for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
|
|
|
+- jcopy_sample_rows(prep->color_buf[ci], 0, prep->color_buf[ci],
|
|
|
+- -row, 1, cinfo->image_width);
|
|
|
++ _jcopy_sample_rows(prep->color_buf[ci], 0, prep->color_buf[ci],
|
|
|
++ -row, 1, cinfo->image_width);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ *in_row_ctr += numrows;
|
|
|
+ prep->next_buf_row += numrows;
|
|
|
+ prep->rows_to_go -= numrows;
|
|
|
+ } else {
|
|
|
+ /* Return for more data, unless we are at the bottom of the image. */
|
|
|
+@@ -235,19 +242,19 @@ pre_process_context(j_compress_ptr cinfo
|
|
|
+ expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
|
|
|
+ prep->next_buf_row, prep->next_buf_stop);
|
|
|
+ }
|
|
|
+ prep->next_buf_row = prep->next_buf_stop;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /* If we've gotten enough data, downsample a row group. */
|
|
|
+ if (prep->next_buf_row == prep->next_buf_stop) {
|
|
|
+- (*cinfo->downsample->downsample) (cinfo, prep->color_buf,
|
|
|
+- (JDIMENSION)prep->this_row_group,
|
|
|
+- output_buf, *out_row_group_ctr);
|
|
|
++ (*cinfo->downsample->_downsample) (cinfo, prep->color_buf,
|
|
|
++ (JDIMENSION)prep->this_row_group,
|
|
|
++ output_buf, *out_row_group_ctr);
|
|
|
+ (*out_row_group_ctr)++;
|
|
|
+ /* Advance pointers with wraparound as necessary. */
|
|
|
+ prep->this_row_group += cinfo->max_v_samp_factor;
|
|
|
+ if (prep->this_row_group >= buf_height)
|
|
|
+ prep->this_row_group = 0;
|
|
|
+ if (prep->next_buf_row >= buf_height)
|
|
|
+ prep->next_buf_row = 0;
|
|
|
+ prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
|
|
|
+@@ -262,40 +269,41 @@ pre_process_context(j_compress_ptr cinfo
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ create_context_buffer(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_prep_ptr prep = (my_prep_ptr)cinfo->prep;
|
|
|
+ int rgroup_height = cinfo->max_v_samp_factor;
|
|
|
+ int ci, i;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+- JSAMPARRAY true_buffer, fake_buffer;
|
|
|
++ _JSAMPARRAY true_buffer, fake_buffer;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
+
|
|
|
+ /* Grab enough space for fake row pointers for all the components;
|
|
|
+ * we need five row groups' worth of pointers for each component.
|
|
|
+ */
|
|
|
+- fake_buffer = (JSAMPARRAY)
|
|
|
++ fake_buffer = (_JSAMPARRAY)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ (cinfo->num_components * 5 * rgroup_height) *
|
|
|
+- sizeof(JSAMPROW));
|
|
|
++ sizeof(_JSAMPROW));
|
|
|
+
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ /* Allocate the actual buffer space (3 row groups) for this component.
|
|
|
+ * We make the buffer wide enough to allow the downsampler to edge-expand
|
|
|
+ * horizontally within the buffer, if it so chooses.
|
|
|
+ */
|
|
|
+- true_buffer = (*cinfo->mem->alloc_sarray)
|
|
|
++ true_buffer = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (JDIMENSION)(((long)compptr->width_in_blocks * DCTSIZE *
|
|
|
++ (JDIMENSION)(((long)compptr->width_in_blocks * data_unit *
|
|
|
+ cinfo->max_h_samp_factor) / compptr->h_samp_factor),
|
|
|
+ (JDIMENSION)(3 * rgroup_height));
|
|
|
+ /* Copy true buffer row pointers into the middle of the fake row array */
|
|
|
+ memcpy(fake_buffer + rgroup_height, true_buffer,
|
|
|
+- 3 * rgroup_height * sizeof(JSAMPROW));
|
|
|
++ 3 * rgroup_height * sizeof(_JSAMPROW));
|
|
|
+ /* Fill in the above and below wraparound pointers */
|
|
|
+ for (i = 0; i < rgroup_height; i++) {
|
|
|
+ fake_buffer[i] = true_buffer[2 * rgroup_height + i];
|
|
|
+ fake_buffer[4 * rgroup_height + i] = true_buffer[i];
|
|
|
+ }
|
|
|
+ prep->color_buf[ci] = fake_buffer + rgroup_height;
|
|
|
+ fake_buffer += 5 * rgroup_height; /* point to space for next component */
|
|
|
+ }
|
|
|
+@@ -304,21 +312,25 @@ create_context_buffer(j_compress_ptr cin
|
|
|
+ #endif /* CONTEXT_ROWS_SUPPORTED */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize preprocessing controller.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_c_prep_controller(j_compress_ptr cinfo, boolean need_full_buffer)
|
|
|
++_jinit_c_prep_controller(j_compress_ptr cinfo, boolean need_full_buffer)
|
|
|
+ {
|
|
|
+ my_prep_ptr prep;
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
++
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
+
|
|
|
+ if (need_full_buffer) /* safety check */
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+
|
|
|
+ prep = (my_prep_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_prep_controller));
|
|
|
+ cinfo->prep = (struct jpeg_c_prep_controller *)prep;
|
|
|
+@@ -326,26 +338,28 @@ jinit_c_prep_controller(j_compress_ptr c
|
|
|
+
|
|
|
+ /* Allocate the color conversion buffer.
|
|
|
+ * We make the buffer wide enough to allow the downsampler to edge-expand
|
|
|
+ * horizontally within the buffer, if it so chooses.
|
|
|
+ */
|
|
|
+ if (cinfo->downsample->need_context_rows) {
|
|
|
+ /* Set up to provide context rows */
|
|
|
+ #ifdef CONTEXT_ROWS_SUPPORTED
|
|
|
+- prep->pub.pre_process_data = pre_process_context;
|
|
|
++ prep->pub._pre_process_data = pre_process_context;
|
|
|
+ create_context_buffer(cinfo);
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ } else {
|
|
|
+ /* No context, just make it tall enough for one row group */
|
|
|
+- prep->pub.pre_process_data = pre_process_data;
|
|
|
++ prep->pub._pre_process_data = pre_process_data;
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+- prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
|
|
|
++ prep->color_buf[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (JDIMENSION)(((long)compptr->width_in_blocks * DCTSIZE *
|
|
|
++ (JDIMENSION)(((long)compptr->width_in_blocks * data_unit *
|
|
|
+ cinfo->max_h_samp_factor) / compptr->h_samp_factor),
|
|
|
+ (JDIMENSION)cinfo->max_v_samp_factor);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jcsample.c b/media/libjpeg/jcsample.c
|
|
|
+--- a/media/libjpeg/jcsample.c
|
|
|
++++ b/media/libjpeg/jcsample.c
|
|
|
+@@ -1,17 +1,19 @@
|
|
|
+ /*
|
|
|
+ * jcsample.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1996, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+ * Copyright (C) 2014, MIPS Technologies, Inc., California.
|
|
|
+- * Copyright (C) 2015, 2019, D. R. Commander.
|
|
|
++ * Copyright (C) 2015, 2019, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains downsampling routines.
|
|
|
+ *
|
|
|
+ * Downsampling input data is counted in "row groups". A row group
|
|
|
+ * is defined to be max_v_samp_factor pixel rows of each component,
|
|
|
+ * from which the downsampler produces v_samp_factor sample rows.
|
|
|
+@@ -49,23 +51,26 @@
|
|
|
+ * where SF = (smoothing_factor / 1024).
|
|
|
+ * Currently, smoothing is only supported for 2h2v sampling factors.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+ #include "jsimd.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /* Pointer to routine to downsample a single component */
|
|
|
+ typedef void (*downsample1_ptr) (j_compress_ptr cinfo,
|
|
|
+ jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data,
|
|
|
+- JSAMPARRAY output_data);
|
|
|
++ _JSAMPARRAY input_data,
|
|
|
++ _JSAMPARRAY output_data);
|
|
|
+
|
|
|
+ /* Private subobject */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_downsampler pub; /* public fields */
|
|
|
+
|
|
|
+ /* Downsampling method pointers, one per component */
|
|
|
+ downsample1_ptr methods[MAX_COMPONENTS];
|
|
|
+@@ -86,21 +91,21 @@ start_pass_downsample(j_compress_ptr cin
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Expand a component horizontally from width input_cols to width output_cols,
|
|
|
+ * by duplicating the rightmost samples.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+-expand_right_edge(JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
|
|
|
++expand_right_edge(_JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
|
|
|
+ JDIMENSION output_cols)
|
|
|
+ {
|
|
|
+- register JSAMPROW ptr;
|
|
|
+- register JSAMPLE pixval;
|
|
|
++ register _JSAMPROW ptr;
|
|
|
++ register _JSAMPLE pixval;
|
|
|
+ register int count;
|
|
|
+ int row;
|
|
|
+ int numcols = (int)(output_cols - input_cols);
|
|
|
+
|
|
|
+ if (numcols > 0) {
|
|
|
+ for (row = 0; row < num_rows; row++) {
|
|
|
+ ptr = image_data[row] + input_cols;
|
|
|
+ pixval = ptr[-1];
|
|
|
+@@ -113,24 +118,24 @@ expand_right_edge(JSAMPARRAY image_data,
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Do downsampling for a whole row group (all components).
|
|
|
+ *
|
|
|
+ * In this version we simply downsample each component independently.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-sep_downsample(j_compress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_index, JSAMPIMAGE output_buf,
|
|
|
++sep_downsample(j_compress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_index, _JSAMPIMAGE output_buf,
|
|
|
+ JDIMENSION out_row_group_index)
|
|
|
+ {
|
|
|
+ my_downsample_ptr downsample = (my_downsample_ptr)cinfo->downsample;
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+- JSAMPARRAY in_ptr, out_ptr;
|
|
|
++ _JSAMPARRAY in_ptr, out_ptr;
|
|
|
+
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ in_ptr = input_buf[ci] + in_row_index;
|
|
|
+ out_ptr = output_buf[ci] + (out_row_group_index * compptr->v_samp_factor);
|
|
|
+ (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
|
|
|
+ }
|
|
|
+ }
|
|
|
+@@ -140,22 +145,23 @@ sep_downsample(j_compress_ptr cinfo, JSA
|
|
|
+ * Downsample pixel values of a single component.
|
|
|
+ * One row group is processed per call.
|
|
|
+ * This version handles arbitrary integral sampling ratios, without smoothing.
|
|
|
+ * Note that this version is not actually used for customary sampling ratios.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY output_data)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY output_data)
|
|
|
+ {
|
|
|
+ int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
|
|
|
+ JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */
|
|
|
+- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
|
|
|
+- JSAMPROW inptr, outptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
++ JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
|
|
|
++ _JSAMPROW inptr, outptr;
|
|
|
+ JLONG outvalue;
|
|
|
+
|
|
|
+ h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor;
|
|
|
+ v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor;
|
|
|
+ numpix = h_expand * v_expand;
|
|
|
+ numpix2 = numpix / 2;
|
|
|
+
|
|
|
+ /* Expand input data enough to let all the output samples be generated
|
|
|
+@@ -172,39 +178,41 @@ int_downsample(j_compress_ptr cinfo, jpe
|
|
|
+ outcol++, outcol_h += h_expand) {
|
|
|
+ outvalue = 0;
|
|
|
+ for (v = 0; v < v_expand; v++) {
|
|
|
+ inptr = input_data[inrow + v] + outcol_h;
|
|
|
+ for (h = 0; h < h_expand; h++) {
|
|
|
+ outvalue += (JLONG)(*inptr++);
|
|
|
+ }
|
|
|
+ }
|
|
|
+- *outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix);
|
|
|
++ *outptr++ = (_JSAMPLE)((outvalue + numpix2) / numpix);
|
|
|
+ }
|
|
|
+ inrow += v_expand;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Downsample pixel values of a single component.
|
|
|
+ * This version handles the special case of a full-size component,
|
|
|
+ * without smoothing.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ fullsize_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY output_data)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY output_data)
|
|
|
+ {
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
++
|
|
|
+ /* Copy the data */
|
|
|
+- jcopy_sample_rows(input_data, 0, output_data, 0, cinfo->max_v_samp_factor,
|
|
|
+- cinfo->image_width);
|
|
|
++ _jcopy_sample_rows(input_data, 0, output_data, 0, cinfo->max_v_samp_factor,
|
|
|
++ cinfo->image_width);
|
|
|
+ /* Edge-expand */
|
|
|
+ expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width,
|
|
|
+- compptr->width_in_blocks * DCTSIZE);
|
|
|
++ compptr->width_in_blocks * data_unit);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Downsample pixel values of a single component.
|
|
|
+ * This version handles the common case of 2:1 horizontal and 1:1 vertical,
|
|
|
+ * without smoothing.
|
|
|
+ *
|
|
|
+@@ -212,76 +220,78 @@ fullsize_downsample(j_compress_ptr cinfo
|
|
|
+ * integer, we do not want to always round 0.5 up to the next integer.
|
|
|
+ * If we did that, we'd introduce a noticeable bias towards larger values.
|
|
|
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
|
|
|
+ * alternate pixel locations (a simple ordered dither pattern).
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY output_data)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY output_data)
|
|
|
+ {
|
|
|
+ int outrow;
|
|
|
+ JDIMENSION outcol;
|
|
|
+- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
++ JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
+ register int bias;
|
|
|
+
|
|
|
+ /* Expand input data enough to let all the output samples be generated
|
|
|
+ * by the standard loop. Special-casing padded output would be more
|
|
|
+ * efficient.
|
|
|
+ */
|
|
|
+ expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width,
|
|
|
+ output_cols * 2);
|
|
|
+
|
|
|
+ for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
|
|
|
+ outptr = output_data[outrow];
|
|
|
+ inptr = input_data[outrow];
|
|
|
+ bias = 0; /* bias = 0,1,0,1,... for successive samples */
|
|
|
+ for (outcol = 0; outcol < output_cols; outcol++) {
|
|
|
+- *outptr++ = (JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1);
|
|
|
++ *outptr++ = (_JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1);
|
|
|
+ bias ^= 1; /* 0=>1, 1=>0 */
|
|
|
+ inptr += 2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Downsample pixel values of a single component.
|
|
|
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
|
|
|
+ * without smoothing.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY output_data)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY output_data)
|
|
|
+ {
|
|
|
+ int inrow, outrow;
|
|
|
+ JDIMENSION outcol;
|
|
|
+- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
|
|
|
+- register JSAMPROW inptr0, inptr1, outptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
++ JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
|
|
|
++ register _JSAMPROW inptr0, inptr1, outptr;
|
|
|
+ register int bias;
|
|
|
+
|
|
|
+ /* Expand input data enough to let all the output samples be generated
|
|
|
+ * by the standard loop. Special-casing padded output would be more
|
|
|
+ * efficient.
|
|
|
+ */
|
|
|
+ expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width,
|
|
|
+ output_cols * 2);
|
|
|
+
|
|
|
+ inrow = 0;
|
|
|
+ for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
|
|
|
+ outptr = output_data[outrow];
|
|
|
+ inptr0 = input_data[inrow];
|
|
|
+ inptr1 = input_data[inrow + 1];
|
|
|
+ bias = 1; /* bias = 1,2,1,2,... for successive samples */
|
|
|
+ for (outcol = 0; outcol < output_cols; outcol++) {
|
|
|
+- *outptr++ =
|
|
|
+- (JSAMPLE)((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2);
|
|
|
++ *outptr++ = (_JSAMPLE)
|
|
|
++ ((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2);
|
|
|
+ bias ^= 3; /* 1=>2, 2=>1 */
|
|
|
+ inptr0 += 2; inptr1 += 2;
|
|
|
+ }
|
|
|
+ inrow += 2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+@@ -290,22 +300,23 @@ h2v2_downsample(j_compress_ptr cinfo, jp
|
|
|
+ /*
|
|
|
+ * Downsample pixel values of a single component.
|
|
|
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
|
|
|
+ * with smoothing. One row of context is required.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY output_data)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY output_data)
|
|
|
+ {
|
|
|
+ int inrow, outrow;
|
|
|
+ JDIMENSION colctr;
|
|
|
+- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
|
|
|
+- register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
++ JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
|
|
|
++ register _JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
|
|
|
+ JLONG membersum, neighsum, memberscale, neighscale;
|
|
|
+
|
|
|
+ /* Expand input data enough to let all the output samples be generated
|
|
|
+ * by the standard loop. Special-casing padded output would be more
|
|
|
+ * efficient.
|
|
|
+ */
|
|
|
+ expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
|
|
|
+ cinfo->image_width, output_cols * 2);
|
|
|
+@@ -336,64 +347,65 @@ h2v2_smooth_downsample(j_compress_ptr ci
|
|
|
+
|
|
|
+ /* Special case for first column: pretend column -1 is same as column 0 */
|
|
|
+ membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
|
|
|
+ neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
|
|
|
+ inptr0[0] + inptr0[2] + inptr1[0] + inptr1[2];
|
|
|
+ neighsum += neighsum;
|
|
|
+ neighsum += above_ptr[0] + above_ptr[2] + below_ptr[0] + below_ptr[2];
|
|
|
+ membersum = membersum * memberscale + neighsum * neighscale;
|
|
|
+- *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
|
|
|
++ *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
|
|
|
+ inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
|
|
|
+
|
|
|
+ for (colctr = output_cols - 2; colctr > 0; colctr--) {
|
|
|
+ /* sum of pixels directly mapped to this output element */
|
|
|
+ membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
|
|
|
+ /* sum of edge-neighbor pixels */
|
|
|
+ neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
|
|
|
+ inptr0[-1] + inptr0[2] + inptr1[-1] + inptr1[2];
|
|
|
+ /* The edge-neighbors count twice as much as corner-neighbors */
|
|
|
+ neighsum += neighsum;
|
|
|
+ /* Add in the corner-neighbors */
|
|
|
+ neighsum += above_ptr[-1] + above_ptr[2] + below_ptr[-1] + below_ptr[2];
|
|
|
+ /* form final output scaled up by 2^16 */
|
|
|
+ membersum = membersum * memberscale + neighsum * neighscale;
|
|
|
+ /* round, descale and output it */
|
|
|
+- *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
|
|
|
++ *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
|
|
|
+ inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Special case for last column */
|
|
|
+ membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
|
|
|
+ neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
|
|
|
+ inptr0[-1] + inptr0[1] + inptr1[-1] + inptr1[1];
|
|
|
+ neighsum += neighsum;
|
|
|
+ neighsum += above_ptr[-1] + above_ptr[1] + below_ptr[-1] + below_ptr[1];
|
|
|
+ membersum = membersum * memberscale + neighsum * neighscale;
|
|
|
+- *outptr = (JSAMPLE)((membersum + 32768) >> 16);
|
|
|
++ *outptr = (_JSAMPLE)((membersum + 32768) >> 16);
|
|
|
+
|
|
|
+ inrow += 2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Downsample pixel values of a single component.
|
|
|
+ * This version handles the special case of a full-size component,
|
|
|
+ * with smoothing. One row of context is required.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY output_data)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY output_data)
|
|
|
+ {
|
|
|
+ int outrow;
|
|
|
+ JDIMENSION colctr;
|
|
|
+- JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
|
|
|
+- register JSAMPROW inptr, above_ptr, below_ptr, outptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
++ JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
|
|
|
++ register _JSAMPROW inptr, above_ptr, below_ptr, outptr;
|
|
|
+ JLONG membersum, neighsum, memberscale, neighscale;
|
|
|
+ int colsum, lastcolsum, nextcolsum;
|
|
|
+
|
|
|
+ /* Expand input data enough to let all the output samples be generated
|
|
|
+ * by the standard loop. Special-casing padded output would be more
|
|
|
+ * efficient.
|
|
|
+ */
|
|
|
+ expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
|
|
|
+@@ -415,60 +427,63 @@ fullsize_smooth_downsample(j_compress_pt
|
|
|
+ below_ptr = input_data[outrow + 1];
|
|
|
+
|
|
|
+ /* Special case for first column */
|
|
|
+ colsum = (*above_ptr++) + (*below_ptr++) + inptr[0];
|
|
|
+ membersum = *inptr++;
|
|
|
+ nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
|
|
|
+ neighsum = colsum + (colsum - membersum) + nextcolsum;
|
|
|
+ membersum = membersum * memberscale + neighsum * neighscale;
|
|
|
+- *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
|
|
|
++ *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
|
|
|
+ lastcolsum = colsum; colsum = nextcolsum;
|
|
|
+
|
|
|
+ for (colctr = output_cols - 2; colctr > 0; colctr--) {
|
|
|
+ membersum = *inptr++;
|
|
|
+ above_ptr++; below_ptr++;
|
|
|
+ nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
|
|
|
+ neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
|
|
|
+ membersum = membersum * memberscale + neighsum * neighscale;
|
|
|
+- *outptr++ = (JSAMPLE)((membersum + 32768) >> 16);
|
|
|
++ *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
|
|
|
+ lastcolsum = colsum; colsum = nextcolsum;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Special case for last column */
|
|
|
+ membersum = *inptr;
|
|
|
+ neighsum = lastcolsum + (colsum - membersum) + colsum;
|
|
|
+ membersum = membersum * memberscale + neighsum * neighscale;
|
|
|
+- *outptr = (JSAMPLE)((membersum + 32768) >> 16);
|
|
|
++ *outptr = (_JSAMPLE)((membersum + 32768) >> 16);
|
|
|
+
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ #endif /* INPUT_SMOOTHING_SUPPORTED */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Module initialization routine for downsampling.
|
|
|
+ * Note that we must select a routine for each component.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_downsampler(j_compress_ptr cinfo)
|
|
|
++_jinit_downsampler(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_downsample_ptr downsample;
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+ boolean smoothok = TRUE;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ downsample = (my_downsample_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_downsampler));
|
|
|
+ cinfo->downsample = (struct jpeg_downsampler *)downsample;
|
|
|
+ downsample->pub.start_pass = start_pass_downsample;
|
|
|
+- downsample->pub.downsample = sep_downsample;
|
|
|
++ downsample->pub._downsample = sep_downsample;
|
|
|
+ downsample->pub.need_context_rows = FALSE;
|
|
|
+
|
|
|
+ if (cinfo->CCIR601_sampling)
|
|
|
+ ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
|
|
|
+
|
|
|
+ /* Verify we can handle the sampling factors, and set up method pointers */
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+@@ -479,44 +494,50 @@ jinit_downsampler(j_compress_ptr cinfo)
|
|
|
+ downsample->methods[ci] = fullsize_smooth_downsample;
|
|
|
+ downsample->pub.need_context_rows = TRUE;
|
|
|
+ } else
|
|
|
+ #endif
|
|
|
+ downsample->methods[ci] = fullsize_downsample;
|
|
|
+ } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
|
|
|
+ compptr->v_samp_factor == cinfo->max_v_samp_factor) {
|
|
|
+ smoothok = FALSE;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_h2v1_downsample())
|
|
|
+ downsample->methods[ci] = jsimd_h2v1_downsample;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ downsample->methods[ci] = h2v1_downsample;
|
|
|
+ } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
|
|
|
+ compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
|
|
|
+ #ifdef INPUT_SMOOTHING_SUPPORTED
|
|
|
+ if (cinfo->smoothing_factor) {
|
|
|
+-#if defined(__mips__)
|
|
|
++#if defined(WITH_SIMD) && defined(__mips__)
|
|
|
+ if (jsimd_can_h2v2_smooth_downsample())
|
|
|
+ downsample->methods[ci] = jsimd_h2v2_smooth_downsample;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+ downsample->methods[ci] = h2v2_smooth_downsample;
|
|
|
+ downsample->pub.need_context_rows = TRUE;
|
|
|
+ } else
|
|
|
+ #endif
|
|
|
+ {
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_h2v2_downsample())
|
|
|
+ downsample->methods[ci] = jsimd_h2v2_downsample;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ downsample->methods[ci] = h2v2_downsample;
|
|
|
+ }
|
|
|
+ } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
|
|
|
+ (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
|
|
|
+ smoothok = FALSE;
|
|
|
+ downsample->methods[ci] = int_downsample;
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
|
|
|
+ }
|
|
|
+
|
|
|
+ #ifdef INPUT_SMOOTHING_SUPPORTED
|
|
|
+ if (cinfo->smoothing_factor && !smoothok)
|
|
|
+ TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
|
|
|
+ #endif
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jctrans.c b/media/libjpeg/jctrans.c
|
|
|
+--- a/media/libjpeg/jctrans.c
|
|
|
++++ b/media/libjpeg/jctrans.c
|
|
|
+@@ -12,17 +12,17 @@
|
|
|
+ * This file contains library routines for transcoding compression,
|
|
|
+ * that is, writing raw DCT coefficient arrays to an output JPEG file.
|
|
|
+ * The routines in jcapimin.c will also be needed by a transcoder.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* Forward declarations */
|
|
|
+ LOCAL(void) transencode_master_selection(j_compress_ptr cinfo,
|
|
|
+ jvirt_barray_ptr *coef_arrays);
|
|
|
+ LOCAL(void) transencode_coef_controller(j_compress_ptr cinfo,
|
|
|
+ jvirt_barray_ptr *coef_arrays);
|
|
|
+
|
|
|
+@@ -37,16 +37,19 @@ LOCAL(void) transencode_coef_controller(
|
|
|
+ * the time write_coefficients is called; indeed, if the virtual arrays
|
|
|
+ * were requested from this compression object's memory manager, they
|
|
|
+ * typically will be realized during this routine and filled afterwards.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jpeg_write_coefficients(j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays)
|
|
|
+ {
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
++
|
|
|
+ if (cinfo->global_state != CSTATE_START)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
+ /* Mark all tables to be written */
|
|
|
+ jpeg_suppress_tables(cinfo, FALSE);
|
|
|
+ /* (Re)initialize error mgr and destination modules */
|
|
|
+ (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
|
|
|
+ (*cinfo->dest->init_destination) (cinfo);
|
|
|
+ /* Perform master selection of active modules */
|
|
|
+@@ -67,16 +70,19 @@ jpeg_write_coefficients(j_compress_ptr c
|
|
|
+ GLOBAL(void)
|
|
|
+ jpeg_copy_critical_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo)
|
|
|
+ {
|
|
|
+ JQUANT_TBL **qtblptr;
|
|
|
+ jpeg_component_info *incomp, *outcomp;
|
|
|
+ JQUANT_TBL *c_quant, *slot_quant;
|
|
|
+ int tblno, ci, coefi;
|
|
|
+
|
|
|
++ if (srcinfo->master->lossless)
|
|
|
++ ERREXIT(dstinfo, JERR_NOTIMPL);
|
|
|
++
|
|
|
+ /* Safety check to ensure start_compress not called yet. */
|
|
|
+ if (dstinfo->global_state != CSTATE_START)
|
|
|
+ ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
|
|
|
+ /* Copy fundamental image dimensions */
|
|
|
+ dstinfo->image_width = srcinfo->image_width;
|
|
|
+ dstinfo->image_height = srcinfo->image_height;
|
|
|
+ dstinfo->input_components = srcinfo->num_components;
|
|
|
+ dstinfo->in_color_space = srcinfo->jpeg_color_space;
|
|
|
+@@ -359,16 +365,23 @@ compress_output(j_compress_ptr cinfo, JS
|
|
|
+ }
|
|
|
+ /* Completed the iMCU row, advance counters for next one */
|
|
|
+ coef->iMCU_row_num++;
|
|
|
+ start_iMCU_row(cinfo);
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
++METHODDEF(boolean)
|
|
|
++compress_output_12(j_compress_ptr cinfo, J12SAMPIMAGE input_buf)
|
|
|
++{
|
|
|
++ return compress_output(cinfo, (JSAMPIMAGE)input_buf);
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
+ /*
|
|
|
+ * Initialize coefficient buffer controller.
|
|
|
+ *
|
|
|
+ * Each passed coefficient array must be the right size for that
|
|
|
+ * coefficient: width_in_blocks wide and height_in_blocks high,
|
|
|
+ * with unitheight at least v_samp_factor.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -381,16 +394,17 @@ transencode_coef_controller(j_compress_p
|
|
|
+ int i;
|
|
|
+
|
|
|
+ coef = (my_coef_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_coef_controller));
|
|
|
+ cinfo->coef = (struct jpeg_c_coef_controller *)coef;
|
|
|
+ coef->pub.start_pass = start_pass_coef;
|
|
|
+ coef->pub.compress_data = compress_output;
|
|
|
++ coef->pub.compress_data_12 = compress_output_12;
|
|
|
+
|
|
|
+ /* Save pointer to virtual arrays */
|
|
|
+ coef->whole_image = coef_arrays;
|
|
|
+
|
|
|
+ /* Allocate and pre-zero space for dummy DCT blocks. */
|
|
|
+ buffer = (JBLOCKROW)
|
|
|
+ (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
|
|
|
+diff --git a/media/libjpeg/jdapimin.c b/media/libjpeg/jdapimin.c
|
|
|
+--- a/media/libjpeg/jdapimin.c
|
|
|
++++ b/media/libjpeg/jdapimin.c
|
|
|
+@@ -1,15 +1,17 @@
|
|
|
+ /*
|
|
|
+ * jdapimin.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1998, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2016, 2022, D. R. Commander.
|
|
|
++ * Copyright (C) 2016, 2022, 2024, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains application interface code for the decompression half
|
|
|
+ * of the JPEG library. These are the "minimum" API routines that may be
|
|
|
+ * needed in either the normal full-decompression case or the
|
|
|
+ * transcoding-only case.
|
|
|
+ *
|
|
|
+@@ -77,16 +79,18 @@ jpeg_CreateDecompress(j_decompress_ptr c
|
|
|
+ * for COM, APPn markers before calling jpeg_read_header.
|
|
|
+ */
|
|
|
+ cinfo->marker_list = NULL;
|
|
|
+ jinit_marker_reader(cinfo);
|
|
|
+
|
|
|
+ /* And initialize the overall input controller. */
|
|
|
+ jinit_input_controller(cinfo);
|
|
|
+
|
|
|
++ cinfo->data_precision = BITS_IN_JSAMPLE;
|
|
|
++
|
|
|
+ /* OK, I'm ready */
|
|
|
+ cinfo->global_state = DSTATE_START;
|
|
|
+
|
|
|
+ /* The master struct is used to store extension parameters, so we allocate it
|
|
|
+ * here.
|
|
|
+ */
|
|
|
+ cinfo->master = (struct jpeg_decomp_master *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
|
|
|
+@@ -151,23 +155,29 @@ default_decompress_parms(j_decompress_pt
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ /* Saw no special markers, try to guess from the component IDs */
|
|
|
+ int cid0 = cinfo->comp_info[0].component_id;
|
|
|
+ int cid1 = cinfo->comp_info[1].component_id;
|
|
|
+ int cid2 = cinfo->comp_info[2].component_id;
|
|
|
+
|
|
|
+- if (cid0 == 1 && cid1 == 2 && cid2 == 3)
|
|
|
+- cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
|
|
|
+- else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
|
|
|
++ if (cid0 == 1 && cid1 == 2 && cid2 == 3) {
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ cinfo->jpeg_color_space = JCS_RGB; /* assume RGB w/out marker */
|
|
|
++ else
|
|
|
++ cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
|
|
|
++ } else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
|
|
|
+ cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
|
|
|
+ else {
|
|
|
+ TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
|
|
|
+- cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ cinfo->jpeg_color_space = JCS_RGB; /* assume it's RGB */
|
|
|
++ else
|
|
|
++ cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /* Always guess RGB is proper output colorspace. */
|
|
|
+ cinfo->out_color_space = JCS_RGB;
|
|
|
+ break;
|
|
|
+
|
|
|
+ case 4:
|
|
|
+ if (cinfo->saw_Adobe_marker) {
|
|
|
+diff --git a/media/libjpeg/jdapistd.c b/media/libjpeg/jdapistd.c
|
|
|
+--- a/media/libjpeg/jdapistd.c
|
|
|
++++ b/media/libjpeg/jdapistd.c
|
|
|
+@@ -1,36 +1,43 @@
|
|
|
+ /*
|
|
|
+ * jdapistd.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2010, 2015-2020, 2022, D. R. Commander.
|
|
|
++ * Copyright (C) 2010, 2015-2020, 2022-2023, D. R. Commander.
|
|
|
+ * Copyright (C) 2015, Google, Inc.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains application interface code for the decompression half
|
|
|
+ * of the JPEG library. These are the "standard" API routines that are
|
|
|
+ * used in the normal full-decompression case. They are not used by a
|
|
|
+ * transcoding-only application. Note that if an application links in
|
|
|
+ * jpeg_start_decompress, it will end up linking in the entire decompressor.
|
|
|
+ * We thus must separate this file from jdapimin.c to avoid linking the
|
|
|
+ * whole decompression library into a transcoder.
|
|
|
+ */
|
|
|
+
|
|
|
+ #include "jinclude.h"
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
|
|
|
+ #include "jdmainct.h"
|
|
|
+ #include "jdcoefct.h"
|
|
|
++#else
|
|
|
++#define JPEG_INTERNALS
|
|
|
++#include "jpeglib.h"
|
|
|
++#endif
|
|
|
+ #include "jdmaster.h"
|
|
|
+ #include "jdmerge.h"
|
|
|
+ #include "jdsample.h"
|
|
|
+ #include "jmemsys.h"
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE == 8
|
|
|
++
|
|
|
+ /* Forward declarations */
|
|
|
+ LOCAL(boolean) output_pass_setup(j_decompress_ptr cinfo);
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Decompression initialization.
|
|
|
+ * jpeg_read_header must be completed before calling this.
|
|
|
+ *
|
|
|
+@@ -116,58 +123,80 @@ output_pass_setup(j_decompress_ptr cinfo
|
|
|
+ /* Call progress monitor hook if present */
|
|
|
+ if (cinfo->progress != NULL) {
|
|
|
+ cinfo->progress->pass_counter = (long)cinfo->output_scanline;
|
|
|
+ cinfo->progress->pass_limit = (long)cinfo->output_height;
|
|
|
+ (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
|
|
|
+ }
|
|
|
+ /* Process some data */
|
|
|
+ last_scanline = cinfo->output_scanline;
|
|
|
+- (*cinfo->main->process_data) (cinfo, (JSAMPARRAY)NULL,
|
|
|
+- &cinfo->output_scanline, (JDIMENSION)0);
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ (*cinfo->main->process_data_16) (cinfo, (J16SAMPARRAY)NULL,
|
|
|
++ &cinfo->output_scanline,
|
|
|
++ (JDIMENSION)0);
|
|
|
++ else
|
|
|
++#endif
|
|
|
++ if (cinfo->data_precision == 12)
|
|
|
++ (*cinfo->main->process_data_12) (cinfo, (J12SAMPARRAY)NULL,
|
|
|
++ &cinfo->output_scanline,
|
|
|
++ (JDIMENSION)0);
|
|
|
++ else
|
|
|
++ (*cinfo->main->process_data) (cinfo, (JSAMPARRAY)NULL,
|
|
|
++ &cinfo->output_scanline, (JDIMENSION)0);
|
|
|
+ if (cinfo->output_scanline == last_scanline)
|
|
|
+ return FALSE; /* No progress made, must suspend */
|
|
|
+ }
|
|
|
+ /* Finish up dummy pass, and set up for another one */
|
|
|
+ (*cinfo->master->finish_output_pass) (cinfo);
|
|
|
+ (*cinfo->master->prepare_for_output_pass) (cinfo);
|
|
|
+ cinfo->output_scanline = 0;
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif /* QUANT_2PASS_SUPPORTED */
|
|
|
+ }
|
|
|
+ /* Ready for application to drive output pass through
|
|
|
+- * jpeg_read_scanlines or jpeg_read_raw_data.
|
|
|
++ * _jpeg_read_scanlines or _jpeg_read_raw_data.
|
|
|
+ */
|
|
|
+ cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
++#endif /* BITS_IN_JSAMPLE == 8 */
|
|
|
++
|
|
|
++
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Enable partial scanline decompression
|
|
|
+ *
|
|
|
+ * Must be called after jpeg_start_decompress() and before any calls to
|
|
|
+- * jpeg_read_scanlines() or jpeg_skip_scanlines().
|
|
|
++ * _jpeg_read_scanlines() or _jpeg_skip_scanlines().
|
|
|
+ *
|
|
|
+ * Refer to libjpeg.txt for more information.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
|
|
|
+- JDIMENSION *width)
|
|
|
++_jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
|
|
|
++ JDIMENSION *width)
|
|
|
+ {
|
|
|
+ int ci, align, orig_downsampled_width;
|
|
|
+ JDIMENSION input_xoffset;
|
|
|
+ boolean reinit_upsampler = FALSE;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+ #ifdef UPSAMPLE_MERGING_SUPPORTED
|
|
|
+ my_master_ptr master = (my_master_ptr)cinfo->master;
|
|
|
+ #endif
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
++
|
|
|
+ if ((cinfo->global_state != DSTATE_SCANNING &&
|
|
|
+ cinfo->global_state != DSTATE_BUFIMAGE) || cinfo->output_scanline != 0)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
+
|
|
|
+ if (!xoffset || !width)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_CROP_SPEC);
|
|
|
+
|
|
|
+ /* xoffset and width must fall within the output image dimensions. */
|
|
|
+@@ -231,156 +260,170 @@ jpeg_crop_scanline(j_decompress_ptr cinf
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ int hsf = (cinfo->comps_in_scan == 1 && cinfo->num_components == 1) ?
|
|
|
+ 1 : compptr->h_samp_factor;
|
|
|
+
|
|
|
+ /* Set downsampled_width to the new output width. */
|
|
|
+ orig_downsampled_width = compptr->downsampled_width;
|
|
|
+ compptr->downsampled_width =
|
|
|
+- (JDIMENSION)jdiv_round_up((long)(cinfo->output_width *
|
|
|
+- compptr->h_samp_factor),
|
|
|
+- (long)cinfo->max_h_samp_factor);
|
|
|
++ (JDIMENSION)jdiv_round_up((long)cinfo->output_width *
|
|
|
++ (long)(compptr->h_samp_factor *
|
|
|
++ compptr->_DCT_scaled_size),
|
|
|
++ (long)(cinfo->max_h_samp_factor *
|
|
|
++ cinfo->_min_DCT_scaled_size));
|
|
|
+ if (compptr->downsampled_width < 2 && orig_downsampled_width >= 2)
|
|
|
+ reinit_upsampler = TRUE;
|
|
|
+
|
|
|
+ /* Set the first and last iMCU columns that we must decompress. These
|
|
|
+ * values will be used in multi-scan decompressions.
|
|
|
+ */
|
|
|
+ cinfo->master->first_MCU_col[ci] =
|
|
|
+ (JDIMENSION)(long)(*xoffset * hsf) / (long)align;
|
|
|
+ cinfo->master->last_MCU_col[ci] =
|
|
|
+ (JDIMENSION)jdiv_round_up((long)((*xoffset + cinfo->output_width) * hsf),
|
|
|
+ (long)align) - 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (reinit_upsampler) {
|
|
|
+ cinfo->master->jinit_upsampler_no_alloc = TRUE;
|
|
|
+- jinit_upsampler(cinfo);
|
|
|
++ _jinit_upsampler(cinfo);
|
|
|
+ cinfo->master->jinit_upsampler_no_alloc = FALSE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 */
|
|
|
++
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Read some scanlines of data from the JPEG decompressor.
|
|
|
+ *
|
|
|
+ * The return value will be the number of lines actually read.
|
|
|
+ * This may be less than the number requested in several cases,
|
|
|
+ * including bottom of image, data source suspension, and operating
|
|
|
+ * modes that emit multiple scanlines at a time.
|
|
|
+ *
|
|
|
+- * Note: we warn about excess calls to jpeg_read_scanlines() since
|
|
|
++ * Note: we warn about excess calls to _jpeg_read_scanlines() since
|
|
|
+ * this likely signals an application programmer error. However,
|
|
|
+ * an oversize buffer (max_lines > scanlines remaining) is not an error.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(JDIMENSION)
|
|
|
+-jpeg_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
|
|
|
+- JDIMENSION max_lines)
|
|
|
++_jpeg_read_scanlines(j_decompress_ptr cinfo, _JSAMPARRAY scanlines,
|
|
|
++ JDIMENSION max_lines)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
|
|
|
+ JDIMENSION row_ctr;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ if (cinfo->global_state != DSTATE_SCANNING)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
+ if (cinfo->output_scanline >= cinfo->output_height) {
|
|
|
+ WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Call progress monitor hook if present */
|
|
|
+ if (cinfo->progress != NULL) {
|
|
|
+ cinfo->progress->pass_counter = (long)cinfo->output_scanline;
|
|
|
+ cinfo->progress->pass_limit = (long)cinfo->output_height;
|
|
|
+ (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Process some data */
|
|
|
+ row_ctr = 0;
|
|
|
+- (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
|
|
|
++ (*cinfo->main->_process_data) (cinfo, scanlines, &row_ctr, max_lines);
|
|
|
+ cinfo->output_scanline += row_ctr;
|
|
|
+ return row_ctr;
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++ return 0;
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+-/* Dummy color convert function used by jpeg_skip_scanlines() */
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
++
|
|
|
++/* Dummy color convert function used by _jpeg_skip_scanlines() */
|
|
|
+ LOCAL(void)
|
|
|
+-noop_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++noop_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+-/* Dummy quantize function used by jpeg_skip_scanlines() */
|
|
|
++/* Dummy quantize function used by _jpeg_skip_scanlines() */
|
|
|
+ LOCAL(void)
|
|
|
+-noop_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows)
|
|
|
++noop_quantize(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+- * In some cases, it is best to call jpeg_read_scanlines() and discard the
|
|
|
++ * In some cases, it is best to call _jpeg_read_scanlines() and discard the
|
|
|
+ * output, rather than skipping the scanlines, because this allows us to
|
|
|
+ * maintain the internal state of the context-based upsampler. In these cases,
|
|
|
+ * we set up and tear down a dummy color converter in order to avoid valgrind
|
|
|
+ * errors and to achieve the best possible performance.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
|
|
|
+ {
|
|
|
+ JDIMENSION n;
|
|
|
+ #ifdef UPSAMPLE_MERGING_SUPPORTED
|
|
|
+ my_master_ptr master = (my_master_ptr)cinfo->master;
|
|
|
+ #endif
|
|
|
+- JSAMPLE dummy_sample[1] = { 0 };
|
|
|
+- JSAMPROW dummy_row = dummy_sample;
|
|
|
+- JSAMPARRAY scanlines = NULL;
|
|
|
+- void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++ _JSAMPLE dummy_sample[1] = { 0 };
|
|
|
++ _JSAMPROW dummy_row = dummy_sample;
|
|
|
++ _JSAMPARRAY scanlines = NULL;
|
|
|
++ void (*color_convert) (j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows) = NULL;
|
|
|
+- void (*color_quantize) (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows) = NULL;
|
|
|
++ void (*color_quantize) (j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows) = NULL;
|
|
|
+
|
|
|
+- if (cinfo->cconvert && cinfo->cconvert->color_convert) {
|
|
|
+- color_convert = cinfo->cconvert->color_convert;
|
|
|
+- cinfo->cconvert->color_convert = noop_convert;
|
|
|
++ if (cinfo->cconvert && cinfo->cconvert->_color_convert) {
|
|
|
++ color_convert = cinfo->cconvert->_color_convert;
|
|
|
++ cinfo->cconvert->_color_convert = noop_convert;
|
|
|
+ /* This just prevents UBSan from complaining about adding 0 to a NULL
|
|
|
+ * pointer. The pointer isn't actually used.
|
|
|
+ */
|
|
|
+ scanlines = &dummy_row;
|
|
|
+ }
|
|
|
+
|
|
|
+- if (cinfo->cquantize && cinfo->cquantize->color_quantize) {
|
|
|
+- color_quantize = cinfo->cquantize->color_quantize;
|
|
|
+- cinfo->cquantize->color_quantize = noop_quantize;
|
|
|
++ if (cinfo->cquantize && cinfo->cquantize->_color_quantize) {
|
|
|
++ color_quantize = cinfo->cquantize->_color_quantize;
|
|
|
++ cinfo->cquantize->_color_quantize = noop_quantize;
|
|
|
+ }
|
|
|
+
|
|
|
+ #ifdef UPSAMPLE_MERGING_SUPPORTED
|
|
|
+ if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+ scanlines = &upsample->spare_row;
|
|
|
+ }
|
|
|
+ #endif
|
|
|
+
|
|
|
+ for (n = 0; n < num_lines; n++)
|
|
|
+- jpeg_read_scanlines(cinfo, scanlines, 1);
|
|
|
++ _jpeg_read_scanlines(cinfo, scanlines, 1);
|
|
|
+
|
|
|
+ if (color_convert)
|
|
|
+- cinfo->cconvert->color_convert = color_convert;
|
|
|
++ cinfo->cconvert->_color_convert = color_convert;
|
|
|
+
|
|
|
+ if (color_quantize)
|
|
|
+- cinfo->cquantize->color_quantize = color_quantize;
|
|
|
++ cinfo->cquantize->_color_quantize = color_quantize;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+- * Called by jpeg_skip_scanlines(). This partially skips a decompress block by
|
|
|
+- * incrementing the rowgroup counter.
|
|
|
++ * Called by _jpeg_skip_scanlines(). This partially skips a decompress block
|
|
|
++ * by incrementing the rowgroup counter.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ increment_simple_rowgroup_ctr(j_decompress_ptr cinfo, JDIMENSION rows)
|
|
|
+ {
|
|
|
+ JDIMENSION rows_left;
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
+ my_master_ptr master = (my_master_ptr)cinfo->master;
|
|
|
+@@ -409,27 +452,33 @@ increment_simple_rowgroup_ctr(j_decompre
|
|
|
+ * num_lines would move beyond the end of the image, then the actual number of
|
|
|
+ * lines remaining in the image is returned. Otherwise, the return value will
|
|
|
+ * be equal to num_lines.
|
|
|
+ *
|
|
|
+ * Refer to libjpeg.txt for more information.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(JDIMENSION)
|
|
|
+-jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
|
|
|
++_jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
+ my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
|
|
|
+ my_master_ptr master = (my_master_ptr)cinfo->master;
|
|
|
+ my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
|
|
|
+ JDIMENSION i, x;
|
|
|
+ int y;
|
|
|
+ JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row;
|
|
|
+ JDIMENSION lines_to_skip, lines_to_read;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
++
|
|
|
+ /* Two-pass color quantization is not supported. */
|
|
|
+ if (cinfo->quantize_colors && cinfo->two_pass_quantize)
|
|
|
+ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
+
|
|
|
+ if (cinfo->global_state != DSTATE_SCANNING)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
+
|
|
|
+ /* Do not skip past the bottom of the image. */
|
|
|
+@@ -592,21 +641,27 @@ jpeg_skip_scanlines(j_decompress_ptr cin
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Alternate entry point to read raw data.
|
|
|
+ * Processes exactly one iMCU row per call, unless suspended.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(JDIMENSION)
|
|
|
+-jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
|
|
|
+- JDIMENSION max_lines)
|
|
|
++_jpeg_read_raw_data(j_decompress_ptr cinfo, _JSAMPIMAGE data,
|
|
|
++ JDIMENSION max_lines)
|
|
|
+ {
|
|
|
+ JDIMENSION lines_per_iMCU_row;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
++
|
|
|
+ if (cinfo->global_state != DSTATE_RAW_OK)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
+ if (cinfo->output_scanline >= cinfo->output_height) {
|
|
|
+ WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Call progress monitor hook if present */
|
|
|
+@@ -617,24 +672,28 @@ jpeg_read_raw_data(j_decompress_ptr cinf
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Verify that at least one iMCU row can be returned. */
|
|
|
+ lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size;
|
|
|
+ if (max_lines < lines_per_iMCU_row)
|
|
|
+ ERREXIT(cinfo, JERR_BUFFER_SIZE);
|
|
|
+
|
|
|
+ /* Decompress directly into user's buffer. */
|
|
|
+- if (!(*cinfo->coef->decompress_data) (cinfo, data))
|
|
|
++ if (!(*cinfo->coef->_decompress_data) (cinfo, data))
|
|
|
+ return 0; /* suspension forced, can do nothing more */
|
|
|
+
|
|
|
+ /* OK, we processed one iMCU row. */
|
|
|
+ cinfo->output_scanline += lines_per_iMCU_row;
|
|
|
+ return lines_per_iMCU_row;
|
|
|
+ }
|
|
|
+
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 */
|
|
|
++
|
|
|
++
|
|
|
++#if BITS_IN_JSAMPLE == 8
|
|
|
+
|
|
|
+ /* Additional entry points for buffered-image mode. */
|
|
|
+
|
|
|
+ #ifdef D_MULTISCAN_FILES_SUPPORTED
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize for an output pass in buffered-image mode.
|
|
|
+ */
|
|
|
+@@ -682,8 +741,10 @@ jpeg_finish_output(j_decompress_ptr cinf
|
|
|
+ if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
|
|
|
+ return FALSE; /* Suspend, come back later */
|
|
|
+ }
|
|
|
+ cinfo->global_state = DSTATE_BUFIMAGE;
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+ #endif /* D_MULTISCAN_FILES_SUPPORTED */
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE == 8 */
|
|
|
+diff --git a/media/libjpeg/jdatadst.c b/media/libjpeg/jdatadst.c
|
|
|
+--- a/media/libjpeg/jdatadst.c
|
|
|
++++ b/media/libjpeg/jdatadst.c
|
|
|
+@@ -33,31 +33,29 @@ typedef struct {
|
|
|
+ JOCTET *buffer; /* start of buffer */
|
|
|
+ } my_destination_mgr;
|
|
|
+
|
|
|
+ typedef my_destination_mgr *my_dest_ptr;
|
|
|
+
|
|
|
+ #define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */
|
|
|
+
|
|
|
+
|
|
|
+-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
|
|
+ /* Expanded data destination object for memory output */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_destination_mgr pub; /* public fields */
|
|
|
+
|
|
|
+ unsigned char **outbuffer; /* target buffer */
|
|
|
+ unsigned long *outsize;
|
|
|
+ unsigned char *newbuffer; /* newly allocated buffer */
|
|
|
+ JOCTET *buffer; /* start of buffer */
|
|
|
+ size_t bufsize;
|
|
|
+ } my_mem_destination_mgr;
|
|
|
+
|
|
|
+ typedef my_mem_destination_mgr *my_mem_dest_ptr;
|
|
|
+-#endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize destination --- called by jpeg_start_compress
|
|
|
+ * before any data is actually written.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+@@ -69,23 +67,21 @@ init_destination(j_compress_ptr cinfo)
|
|
|
+ dest->buffer = (JOCTET *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ OUTPUT_BUF_SIZE * sizeof(JOCTET));
|
|
|
+
|
|
|
+ dest->pub.next_output_byte = dest->buffer;
|
|
|
+ dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
|
|
|
+ }
|
|
|
+
|
|
|
+-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
|
|
+ METHODDEF(void)
|
|
|
+ init_mem_destination(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
+ /* no work necessary here */
|
|
|
+ }
|
|
|
+-#endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Empty the output buffer --- called whenever buffer fills up.
|
|
|
+ *
|
|
|
+ * In typical applications, this should write the entire output buffer
|
|
|
+ * (ignoring the current state of next_output_byte & free_in_buffer),
|
|
|
+ * reset the pointer & count to the start of the buffer, and return TRUE
|
|
|
+@@ -116,17 +112,16 @@ empty_output_buffer(j_compress_ptr cinfo
|
|
|
+ ERREXIT(cinfo, JERR_FILE_WRITE);
|
|
|
+
|
|
|
+ dest->pub.next_output_byte = dest->buffer;
|
|
|
+ dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
|
|
+ METHODDEF(boolean)
|
|
|
+ empty_mem_output_buffer(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
+ size_t nextsize;
|
|
|
+ JOCTET *nextbuffer;
|
|
|
+ my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest;
|
|
|
+
|
|
|
+ /* Try to allocate new buffer with double size */
|
|
|
+@@ -145,17 +140,16 @@ empty_mem_output_buffer(j_compress_ptr c
|
|
|
+ dest->pub.next_output_byte = nextbuffer + dest->bufsize;
|
|
|
+ dest->pub.free_in_buffer = dest->bufsize;
|
|
|
+
|
|
|
+ dest->buffer = nextbuffer;
|
|
|
+ dest->bufsize = nextsize;
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+-#endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Terminate destination --- called by jpeg_finish_compress
|
|
|
+ * after all data has been written. Usually needs to flush buffer.
|
|
|
+ *
|
|
|
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
|
|
|
+ * application must deal with any cleanup that should happen even
|
|
|
+@@ -174,26 +168,24 @@ term_destination(j_compress_ptr cinfo)
|
|
|
+ ERREXIT(cinfo, JERR_FILE_WRITE);
|
|
|
+ }
|
|
|
+ fflush(dest->outfile);
|
|
|
+ /* Make sure we wrote the output file OK */
|
|
|
+ if (ferror(dest->outfile))
|
|
|
+ ERREXIT(cinfo, JERR_FILE_WRITE);
|
|
|
+ }
|
|
|
+
|
|
|
+-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
|
|
+ METHODDEF(void)
|
|
|
+ term_mem_destination(j_compress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest;
|
|
|
+
|
|
|
+ *dest->outbuffer = dest->buffer;
|
|
|
+ *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer);
|
|
|
+ }
|
|
|
+-#endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Prepare for output to a stdio stream.
|
|
|
+ * The caller must have already opened the stream, and is responsible
|
|
|
+ * for closing it after finishing compression.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -222,17 +214,16 @@ jpeg_stdio_dest(j_compress_ptr cinfo, FI
|
|
|
+ dest = (my_dest_ptr)cinfo->dest;
|
|
|
+ dest->pub.init_destination = init_destination;
|
|
|
+ dest->pub.empty_output_buffer = empty_output_buffer;
|
|
|
+ dest->pub.term_destination = term_destination;
|
|
|
+ dest->outfile = outfile;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
|
|
+ /*
|
|
|
+ * Prepare for output to a memory buffer.
|
|
|
+ * The caller may supply an own initial buffer with appropriate size.
|
|
|
+ * Otherwise, or when the actual data output exceeds the given size,
|
|
|
+ * the library adapts the buffer size as necessary.
|
|
|
+ * The standard library functions malloc/free are used for allocating
|
|
|
+ * larger memory, so the buffer is available to the application after
|
|
|
+ * finishing compression, and then the application is responsible for
|
|
|
+@@ -279,9 +270,8 @@ jpeg_mem_dest(j_compress_ptr cinfo, unsi
|
|
|
+ if (dest->newbuffer == NULL)
|
|
|
+ ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
|
|
|
+ *outsize = OUTPUT_BUF_SIZE;
|
|
|
+ }
|
|
|
+
|
|
|
+ dest->pub.next_output_byte = dest->buffer = *outbuffer;
|
|
|
+ dest->pub.free_in_buffer = dest->bufsize = *outsize;
|
|
|
+ }
|
|
|
+-#endif
|
|
|
+diff --git a/media/libjpeg/jdatasrc.c b/media/libjpeg/jdatasrc.c
|
|
|
+--- a/media/libjpeg/jdatasrc.c
|
|
|
++++ b/media/libjpeg/jdatasrc.c
|
|
|
+@@ -51,23 +51,21 @@ init_source(j_decompress_ptr cinfo)
|
|
|
+
|
|
|
+ /* We reset the empty-input-file flag for each image,
|
|
|
+ * but we don't clear the input buffer.
|
|
|
+ * This is correct behavior for reading a series of images from one source.
|
|
|
+ */
|
|
|
+ src->start_of_file = TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
|
|
+ METHODDEF(void)
|
|
|
+ init_mem_source(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ /* no work necessary here */
|
|
|
+ }
|
|
|
+-#endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Fill the input buffer --- called whenever buffer is emptied.
|
|
|
+ *
|
|
|
+ * In typical applications, this should read fresh data into the buffer
|
|
|
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
|
|
|
+ * reset the pointer & count to the start of the buffer, and return TRUE
|
|
|
+@@ -118,17 +116,16 @@ fill_input_buffer(j_decompress_ptr cinfo
|
|
|
+
|
|
|
+ src->pub.next_input_byte = src->buffer;
|
|
|
+ src->pub.bytes_in_buffer = nbytes;
|
|
|
+ src->start_of_file = FALSE;
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
|
|
+ METHODDEF(boolean)
|
|
|
+ fill_mem_input_buffer(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ static const JOCTET mybuffer[4] = {
|
|
|
+ (JOCTET)0xFF, (JOCTET)JPEG_EOI, 0, 0
|
|
|
+ };
|
|
|
+
|
|
|
+ /* The whole JPEG data is expected to reside in the supplied memory
|
|
|
+@@ -139,17 +136,16 @@ fill_mem_input_buffer(j_decompress_ptr c
|
|
|
+
|
|
|
+ /* Insert a fake EOI marker */
|
|
|
+
|
|
|
+ cinfo->src->next_input_byte = mybuffer;
|
|
|
+ cinfo->src->bytes_in_buffer = 2;
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+-#endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Skip data --- used to skip over a potentially large amount of
|
|
|
+ * uninteresting data (such as an APPn marker).
|
|
|
+ *
|
|
|
+ * Writers of suspendable-input applications must note that skip_input_data
|
|
|
+ * is not granted the right to give a suspension return. If the skip extends
|
|
|
+@@ -248,17 +244,16 @@ jpeg_stdio_src(j_decompress_ptr cinfo, F
|
|
|
+ src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
|
|
|
+ src->pub.term_source = term_source;
|
|
|
+ src->infile = infile;
|
|
|
+ src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
|
|
|
+ src->pub.next_input_byte = NULL; /* until buffer loaded */
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
|
|
+ /*
|
|
|
+ * Prepare for input from a supplied memory buffer.
|
|
|
+ * The buffer must contain the whole JPEG data.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jpeg_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer,
|
|
|
+ unsigned long insize)
|
|
|
+@@ -287,9 +282,8 @@ jpeg_mem_src(j_decompress_ptr cinfo, con
|
|
|
+ src->init_source = init_mem_source;
|
|
|
+ src->fill_input_buffer = fill_mem_input_buffer;
|
|
|
+ src->skip_input_data = skip_input_data;
|
|
|
+ src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
|
|
|
+ src->term_source = term_source;
|
|
|
+ src->bytes_in_buffer = (size_t)insize;
|
|
|
+ src->next_input_byte = (const JOCTET *)inbuffer;
|
|
|
+ }
|
|
|
+-#endif
|
|
|
+diff --git a/media/libjpeg/jdcoefct.c b/media/libjpeg/jdcoefct.c
|
|
|
+--- a/media/libjpeg/jdcoefct.c
|
|
|
++++ b/media/libjpeg/jdcoefct.c
|
|
|
+@@ -1,44 +1,45 @@
|
|
|
+ /*
|
|
|
+ * jdcoefct.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1997, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+- * Copyright (C) 2010, 2015-2016, 2019-2020, 2022, D. R. Commander.
|
|
|
++ * Copyright (C) 2010, 2015-2016, 2019-2020, 2022-2023, D. R. Commander.
|
|
|
+ * Copyright (C) 2015, 2020, Google, Inc.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains the coefficient buffer controller for decompression.
|
|
|
+- * This controller is the top level of the JPEG decompressor proper.
|
|
|
++ * This controller is the top level of the lossy JPEG decompressor proper.
|
|
|
+ * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
|
|
|
+ *
|
|
|
+ * In buffered-image mode, this controller is the interface between
|
|
|
+ * input-oriented processing and output-oriented processing.
|
|
|
+ * Also, the input side (only) is used when reading a file for transcoding.
|
|
|
+ */
|
|
|
+
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jdcoefct.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* Forward declarations */
|
|
|
+ METHODDEF(int) decompress_onepass(j_decompress_ptr cinfo,
|
|
|
+- JSAMPIMAGE output_buf);
|
|
|
++ _JSAMPIMAGE output_buf);
|
|
|
+ #ifdef D_MULTISCAN_FILES_SUPPORTED
|
|
|
+-METHODDEF(int) decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf);
|
|
|
++METHODDEF(int) decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf);
|
|
|
+ #endif
|
|
|
+ #ifdef BLOCK_SMOOTHING_SUPPORTED
|
|
|
+ LOCAL(boolean) smoothing_ok(j_decompress_ptr cinfo);
|
|
|
+ METHODDEF(int) decompress_smooth_data(j_decompress_ptr cinfo,
|
|
|
+- JSAMPIMAGE output_buf);
|
|
|
++ _JSAMPIMAGE output_buf);
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize for an input processing pass.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+@@ -57,19 +58,19 @@ METHODDEF(void)
|
|
|
+ start_output_pass(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ #ifdef BLOCK_SMOOTHING_SUPPORTED
|
|
|
+ my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
|
|
|
+
|
|
|
+ /* If multipass, check to see whether to use block smoothing on this pass */
|
|
|
+ if (coef->pub.coef_arrays != NULL) {
|
|
|
+ if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
|
|
|
+- coef->pub.decompress_data = decompress_smooth_data;
|
|
|
++ coef->pub._decompress_data = decompress_smooth_data;
|
|
|
+ else
|
|
|
+- coef->pub.decompress_data = decompress_data;
|
|
|
++ coef->pub._decompress_data = decompress_data;
|
|
|
+ }
|
|
|
+ #endif
|
|
|
+ cinfo->output_iMCU_row = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Decompress and return some data in the single-pass case.
|
|
|
+@@ -77,27 +78,27 @@ start_output_pass(j_decompress_ptr cinfo
|
|
|
+ * Input and output must run in lockstep since we have only a one-MCU buffer.
|
|
|
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
|
|
|
+ *
|
|
|
+ * NB: output_buf contains a plane for each component in image,
|
|
|
+ * which we index according to the component's SOF position.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(int)
|
|
|
+-decompress_onepass(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
|
|
|
++decompress_onepass(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
|
|
|
+ {
|
|
|
+ my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
|
|
|
+ JDIMENSION MCU_col_num; /* index of current MCU within row */
|
|
|
+ JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
|
|
|
+ JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
|
|
|
+ int blkn, ci, xindex, yindex, yoffset, useful_width;
|
|
|
+- JSAMPARRAY output_ptr;
|
|
|
++ _JSAMPARRAY output_ptr;
|
|
|
+ JDIMENSION start_col, output_col;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+- inverse_DCT_method_ptr inverse_DCT;
|
|
|
++ _inverse_DCT_method_ptr inverse_DCT;
|
|
|
+
|
|
|
+ /* Loop to process as much as one whole iMCU row */
|
|
|
+ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
|
|
|
+ yoffset++) {
|
|
|
+ for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
|
|
|
+ MCU_col_num++) {
|
|
|
+ /* Try to fetch an MCU. Entropy decoder expects buffer to be zeroed. */
|
|
|
+ jzero_far((void *)coef->MCU_buffer[0],
|
|
|
+@@ -124,17 +125,17 @@ decompress_onepass(j_decompress_ptr cinf
|
|
|
+ blkn = 0; /* index of current DCT block within MCU */
|
|
|
+ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
+ compptr = cinfo->cur_comp_info[ci];
|
|
|
+ /* Don't bother to IDCT an uninteresting component. */
|
|
|
+ if (!compptr->component_needed) {
|
|
|
+ blkn += compptr->MCU_blocks;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+- inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
|
|
|
++ inverse_DCT = cinfo->idct->_inverse_DCT[compptr->component_index];
|
|
|
+ useful_width = (MCU_col_num < last_MCU_col) ?
|
|
|
+ compptr->MCU_width : compptr->last_col_width;
|
|
|
+ output_ptr = output_buf[compptr->component_index] +
|
|
|
+ yoffset * compptr->_DCT_scaled_size;
|
|
|
+ start_col = (MCU_col_num - cinfo->master->first_iMCU_col) *
|
|
|
+ compptr->MCU_sample_width;
|
|
|
+ for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
|
|
|
+ if (cinfo->input_iMCU_row < last_iMCU_row ||
|
|
|
+@@ -257,28 +258,28 @@ consume_data(j_decompress_ptr cinfo)
|
|
|
+ * Decompress and return some data in the multi-pass case.
|
|
|
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
|
|
|
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
|
|
|
+ *
|
|
|
+ * NB: output_buf contains a plane for each component in image.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(int)
|
|
|
+-decompress_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
|
|
|
++decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
|
|
|
+ {
|
|
|
+ my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
|
|
|
+ JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
|
|
|
+ JDIMENSION block_num;
|
|
|
+ int ci, block_row, block_rows;
|
|
|
+ JBLOCKARRAY buffer;
|
|
|
+ JBLOCKROW buffer_ptr;
|
|
|
+- JSAMPARRAY output_ptr;
|
|
|
++ _JSAMPARRAY output_ptr;
|
|
|
+ JDIMENSION output_col;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+- inverse_DCT_method_ptr inverse_DCT;
|
|
|
++ _inverse_DCT_method_ptr inverse_DCT;
|
|
|
+
|
|
|
+ /* Force some input to be done if we are getting ahead of the input. */
|
|
|
+ while (cinfo->input_scan_number < cinfo->output_scan_number ||
|
|
|
+ (cinfo->input_scan_number == cinfo->output_scan_number &&
|
|
|
+ cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
|
|
|
+ if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
|
|
|
+ return JPEG_SUSPENDED;
|
|
|
+ }
|
|
|
+@@ -297,17 +298,17 @@ decompress_data(j_decompress_ptr cinfo,
|
|
|
+ /* Count non-dummy DCT block rows in this iMCU row. */
|
|
|
+ if (cinfo->output_iMCU_row < last_iMCU_row)
|
|
|
+ block_rows = compptr->v_samp_factor;
|
|
|
+ else {
|
|
|
+ /* NB: can't use last_row_height here; it is input-side-dependent! */
|
|
|
+ block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
|
|
|
+ if (block_rows == 0) block_rows = compptr->v_samp_factor;
|
|
|
+ }
|
|
|
+- inverse_DCT = cinfo->idct->inverse_DCT[ci];
|
|
|
++ inverse_DCT = cinfo->idct->_inverse_DCT[ci];
|
|
|
+ output_ptr = output_buf[ci];
|
|
|
+ /* Loop over all DCT blocks to be processed. */
|
|
|
+ for (block_row = 0; block_row < block_rows; block_row++) {
|
|
|
+ buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci];
|
|
|
+ output_col = 0;
|
|
|
+ for (block_num = cinfo->master->first_MCU_col[ci];
|
|
|
+ block_num <= cinfo->master->last_MCU_col[ci]; block_num++) {
|
|
|
+ (*inverse_DCT) (cinfo, compptr, (JCOEFPTR)buffer_ptr, output_ptr,
|
|
|
+@@ -420,29 +421,30 @@ smoothing_ok(j_decompress_ptr cinfo)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Variant of decompress_data for use when doing block smoothing.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(int)
|
|
|
+-decompress_smooth_data(j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
|
|
|
++decompress_smooth_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
|
|
|
+ {
|
|
|
+ my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
|
|
|
+ JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
|
|
|
+ JDIMENSION block_num, last_block_column;
|
|
|
+- int ci, block_row, block_rows, access_rows;
|
|
|
++ int ci, block_row, block_rows, access_rows, image_block_row,
|
|
|
++ image_block_rows;
|
|
|
+ JBLOCKARRAY buffer;
|
|
|
+ JBLOCKROW buffer_ptr, prev_prev_block_row, prev_block_row;
|
|
|
+ JBLOCKROW next_block_row, next_next_block_row;
|
|
|
+- JSAMPARRAY output_ptr;
|
|
|
++ _JSAMPARRAY output_ptr;
|
|
|
+ JDIMENSION output_col;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+- inverse_DCT_method_ptr inverse_DCT;
|
|
|
++ _inverse_DCT_method_ptr inverse_DCT;
|
|
|
+ boolean change_dc;
|
|
|
+ JCOEF *workspace;
|
|
|
+ int *coef_bits;
|
|
|
+ JQUANT_TBL *quanttbl;
|
|
|
+ JLONG Q00, Q01, Q02, Q03 = 0, Q10, Q11, Q12 = 0, Q20, Q21 = 0, Q30 = 0, num;
|
|
|
+ int DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12,
|
|
|
+ DC13, DC14, DC15, DC16, DC17, DC18, DC19, DC20, DC21, DC22, DC23, DC24,
|
|
|
+ DC25;
|
|
|
+@@ -491,16 +493,17 @@ decompress_smooth_data(j_decompress_ptr
|
|
|
+ if (cinfo->output_iMCU_row > 1) {
|
|
|
+ access_rows += 2 * compptr->v_samp_factor; /* prior two iMCU rows too */
|
|
|
+ buffer = (*cinfo->mem->access_virt_barray)
|
|
|
+ ((j_common_ptr)cinfo, coef->whole_image[ci],
|
|
|
+ (cinfo->output_iMCU_row - 2) * compptr->v_samp_factor,
|
|
|
+ (JDIMENSION)access_rows, FALSE);
|
|
|
+ buffer += 2 * compptr->v_samp_factor; /* point to current iMCU row */
|
|
|
+ } else if (cinfo->output_iMCU_row > 0) {
|
|
|
++ access_rows += compptr->v_samp_factor; /* prior iMCU row too */
|
|
|
+ buffer = (*cinfo->mem->access_virt_barray)
|
|
|
+ ((j_common_ptr)cinfo, coef->whole_image[ci],
|
|
|
+ (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
|
|
|
+ (JDIMENSION)access_rows, FALSE);
|
|
|
+ buffer += compptr->v_samp_factor; /* point to current iMCU row */
|
|
|
+ } else {
|
|
|
+ buffer = (*cinfo->mem->access_virt_barray)
|
|
|
+ ((j_common_ptr)cinfo, coef->whole_image[ci],
|
|
|
+@@ -530,42 +533,43 @@ decompress_smooth_data(j_decompress_ptr
|
|
|
+ Q11 = quanttbl->quantval[Q11_POS];
|
|
|
+ Q02 = quanttbl->quantval[Q02_POS];
|
|
|
+ if (change_dc) {
|
|
|
+ Q03 = quanttbl->quantval[Q03_POS];
|
|
|
+ Q12 = quanttbl->quantval[Q12_POS];
|
|
|
+ Q21 = quanttbl->quantval[Q21_POS];
|
|
|
+ Q30 = quanttbl->quantval[Q30_POS];
|
|
|
+ }
|
|
|
+- inverse_DCT = cinfo->idct->inverse_DCT[ci];
|
|
|
++ inverse_DCT = cinfo->idct->_inverse_DCT[ci];
|
|
|
+ output_ptr = output_buf[ci];
|
|
|
+ /* Loop over all DCT blocks to be processed. */
|
|
|
++ image_block_rows = block_rows * cinfo->total_iMCU_rows;
|
|
|
+ for (block_row = 0; block_row < block_rows; block_row++) {
|
|
|
++ image_block_row = cinfo->output_iMCU_row * block_rows + block_row;
|
|
|
+ buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci];
|
|
|
+
|
|
|
+- if (block_row > 0 || cinfo->output_iMCU_row > 0)
|
|
|
++ if (image_block_row > 0)
|
|
|
+ prev_block_row =
|
|
|
+ buffer[block_row - 1] + cinfo->master->first_MCU_col[ci];
|
|
|
+ else
|
|
|
+ prev_block_row = buffer_ptr;
|
|
|
+
|
|
|
+- if (block_row > 1 || cinfo->output_iMCU_row > 1)
|
|
|
++ if (image_block_row > 1)
|
|
|
+ prev_prev_block_row =
|
|
|
+ buffer[block_row - 2] + cinfo->master->first_MCU_col[ci];
|
|
|
+ else
|
|
|
+ prev_prev_block_row = prev_block_row;
|
|
|
+
|
|
|
+- if (block_row < block_rows - 1 || cinfo->output_iMCU_row < last_iMCU_row)
|
|
|
++ if (image_block_row < image_block_rows - 1)
|
|
|
+ next_block_row =
|
|
|
+ buffer[block_row + 1] + cinfo->master->first_MCU_col[ci];
|
|
|
+ else
|
|
|
+ next_block_row = buffer_ptr;
|
|
|
+
|
|
|
+- if (block_row < block_rows - 2 ||
|
|
|
+- cinfo->output_iMCU_row + 1 < last_iMCU_row)
|
|
|
++ if (image_block_row < image_block_rows - 2)
|
|
|
+ next_next_block_row =
|
|
|
+ buffer[block_row + 2] + cinfo->master->first_MCU_col[ci];
|
|
|
+ else
|
|
|
+ next_next_block_row = next_block_row;
|
|
|
+
|
|
|
+ /* We fetch the surrounding DC values using a sliding-register approach.
|
|
|
+ * Initialize all 25 here so as to do the right thing on narrow pics.
|
|
|
+ */
|
|
|
+@@ -578,21 +582,21 @@ decompress_smooth_data(j_decompress_ptr
|
|
|
+ last_block_column = compptr->width_in_blocks - 1;
|
|
|
+ for (block_num = cinfo->master->first_MCU_col[ci];
|
|
|
+ block_num <= cinfo->master->last_MCU_col[ci]; block_num++) {
|
|
|
+ /* Fetch current DCT block into workspace so we can modify it. */
|
|
|
+ jcopy_block_row(buffer_ptr, (JBLOCKROW)workspace, (JDIMENSION)1);
|
|
|
+ /* Update DC values */
|
|
|
+ if (block_num == cinfo->master->first_MCU_col[ci] &&
|
|
|
+ block_num < last_block_column) {
|
|
|
+- DC04 = (int)prev_prev_block_row[1][0];
|
|
|
+- DC09 = (int)prev_block_row[1][0];
|
|
|
+- DC14 = (int)buffer_ptr[1][0];
|
|
|
+- DC19 = (int)next_block_row[1][0];
|
|
|
+- DC24 = (int)next_next_block_row[1][0];
|
|
|
++ DC04 = DC05 = (int)prev_prev_block_row[1][0];
|
|
|
++ DC09 = DC10 = (int)prev_block_row[1][0];
|
|
|
++ DC14 = DC15 = (int)buffer_ptr[1][0];
|
|
|
++ DC19 = DC20 = (int)next_block_row[1][0];
|
|
|
++ DC24 = DC25 = (int)next_next_block_row[1][0];
|
|
|
+ }
|
|
|
+ if (block_num + 1 < last_block_column) {
|
|
|
+ DC05 = (int)prev_prev_block_row[2][0];
|
|
|
+ DC10 = (int)prev_block_row[2][0];
|
|
|
+ DC15 = (int)buffer_ptr[2][0];
|
|
|
+ DC20 = (int)next_block_row[2][0];
|
|
|
+ DC25 = (int)next_next_block_row[2][0];
|
|
|
+ }
|
|
|
+@@ -805,20 +809,23 @@ decompress_smooth_data(j_decompress_ptr
|
|
|
+ #endif /* BLOCK_SMOOTHING_SUPPORTED */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize coefficient buffer controller.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
|
|
|
++_jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
|
|
|
+ {
|
|
|
+ my_coef_ptr coef;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ coef = (my_coef_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_coef_controller));
|
|
|
+ cinfo->coef = (struct jpeg_d_coef_controller *)coef;
|
|
|
+ coef->pub.start_input_pass = start_input_pass;
|
|
|
+ coef->pub.start_output_pass = start_output_pass;
|
|
|
+ #ifdef BLOCK_SMOOTHING_SUPPORTED
|
|
|
+ coef->coef_bits_latch = NULL;
|
|
|
+@@ -845,34 +852,34 @@ jinit_d_coef_controller(j_decompress_ptr
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE, TRUE,
|
|
|
+ (JDIMENSION)jround_up((long)compptr->width_in_blocks,
|
|
|
+ (long)compptr->h_samp_factor),
|
|
|
+ (JDIMENSION)jround_up((long)compptr->height_in_blocks,
|
|
|
+ (long)compptr->v_samp_factor),
|
|
|
+ (JDIMENSION)access_rows);
|
|
|
+ }
|
|
|
+ coef->pub.consume_data = consume_data;
|
|
|
+- coef->pub.decompress_data = decompress_data;
|
|
|
++ coef->pub._decompress_data = decompress_data;
|
|
|
+ coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ } else {
|
|
|
+ /* We only need a single-MCU buffer. */
|
|
|
+ JBLOCKROW buffer;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ buffer = (JBLOCKROW)
|
|
|
+ (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ D_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
|
|
|
+ for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
|
|
|
+ coef->MCU_buffer[i] = buffer + i;
|
|
|
+ }
|
|
|
+ coef->pub.consume_data = dummy_consume_data;
|
|
|
+- coef->pub.decompress_data = decompress_onepass;
|
|
|
++ coef->pub._decompress_data = decompress_onepass;
|
|
|
+ coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Allocate the workspace buffer */
|
|
|
+ coef->workspace = (JCOEF *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(JCOEF) * DCTSIZE2);
|
|
|
+ }
|
|
|
+diff --git a/media/libjpeg/jdcoefct.h b/media/libjpeg/jdcoefct.h
|
|
|
+--- a/media/libjpeg/jdcoefct.h
|
|
|
++++ b/media/libjpeg/jdcoefct.h
|
|
|
+@@ -1,24 +1,27 @@
|
|
|
+ /*
|
|
|
+ * jdcoefct.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1997, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+ * Copyright (C) 2020, Google, Inc.
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jpeglib.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /* Block smoothing is only applicable for progressive JPEG, so: */
|
|
|
+ #ifndef D_PROGRESSIVE_SUPPORTED
|
|
|
+ #undef BLOCK_SMOOTHING_SUPPORTED
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
+ /* Private buffer controller object */
|
|
|
+
|
|
|
+@@ -76,8 +79,10 @@ start_iMCU_row(j_decompress_ptr cinfo)
|
|
|
+ coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
|
|
|
+ else
|
|
|
+ coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
|
|
|
+ }
|
|
|
+
|
|
|
+ coef->MCU_ctr = 0;
|
|
|
+ coef->MCU_vert_offset = 0;
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jdcol565.c b/media/libjpeg/jdcol565.c
|
|
|
+--- a/media/libjpeg/jdcol565.c
|
|
|
++++ b/media/libjpeg/jdcol565.c
|
|
|
+@@ -1,39 +1,40 @@
|
|
|
+ /*
|
|
|
+ * jdcol565.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
+ * Modifications:
|
|
|
+ * Copyright (C) 2013, Linaro Limited.
|
|
|
+- * Copyright (C) 2014-2015, D. R. Commander.
|
|
|
++ * Copyright (C) 2014-2015, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains output colorspace conversion routines.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* This file is included by jdcolor.c */
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-ycc_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++ycc_rgb565_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ register int y, cb, cr;
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- register JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ register int *Crrtab = cconvert->Cr_r_tab;
|
|
|
+ register int *Cbbtab = cconvert->Cb_b_tab;
|
|
|
+ register JLONG *Crgtab = cconvert->Cr_g_tab;
|
|
|
+ register JLONG *Cbgtab = cconvert->Cb_g_tab;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ JLONG rgb;
|
|
|
+@@ -86,33 +87,37 @@ ycc_rgb565_convert_internal(j_decompress
|
|
|
+ r = range_limit[y + Crrtab[cr]];
|
|
|
+ g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
|
|
|
+ SCALEBITS))];
|
|
|
+ b = range_limit[y + Cbbtab[cb]];
|
|
|
+ rgb = PACK_SHORT_565(r, g, b);
|
|
|
+ *(INT16 *)outptr = (INT16)rgb;
|
|
|
+ }
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ register int y, cb, cr;
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- register JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ register int *Crrtab = cconvert->Cr_r_tab;
|
|
|
+ register int *Cbbtab = cconvert->Cb_b_tab;
|
|
|
+ register JLONG *Crgtab = cconvert->Cr_g_tab;
|
|
|
+ register JLONG *Cbgtab = cconvert->Cb_g_tab;
|
|
|
+ JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+@@ -172,27 +177,30 @@ ycc_rgb565D_convert_internal(j_decompres
|
|
|
+ g = range_limit[DITHER_565_G(y +
|
|
|
+ ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
|
|
|
+ SCALEBITS)), d0)];
|
|
|
+ b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
|
|
|
+ rgb = PACK_SHORT_565(r, g, b);
|
|
|
+ *(INT16 *)outptr = (INT16)rgb;
|
|
|
+ }
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-rgb_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++rgb_rgb565_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- register JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ JLONG rgb;
|
|
|
+ unsigned int r, g, b;
|
|
|
+
|
|
|
+@@ -232,24 +240,24 @@ rgb_rgb565_convert_internal(j_decompress
|
|
|
+ *(INT16 *)outptr = (INT16)rgb;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- register JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+ register JDIMENSION col;
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+ JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ JLONG rgb;
|
|
|
+ unsigned int r, g, b;
|
|
|
+
|
|
|
+@@ -291,21 +299,21 @@ rgb_rgb565D_convert_internal(j_decompres
|
|
|
+ *(INT16 *)outptr = (INT16)rgb;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-gray_rgb565_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++gray_rgb565_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ JLONG rgb;
|
|
|
+ unsigned int g;
|
|
|
+
|
|
|
+ inptr = input_buf[0][input_row++];
|
|
|
+@@ -331,23 +339,23 @@ gray_rgb565_convert_internal(j_decompres
|
|
|
+ *(INT16 *)outptr = (INT16)rgb;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-gray_rgb565D_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++gray_rgb565D_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
+ register JDIMENSION col;
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+ JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ JLONG rgb;
|
|
|
+ unsigned int g;
|
|
|
+
|
|
|
+ inptr = input_buf[0][input_row++];
|
|
|
+diff --git a/media/libjpeg/jdcolext.c b/media/libjpeg/jdcolext.c
|
|
|
+--- a/media/libjpeg/jdcolext.c
|
|
|
++++ b/media/libjpeg/jdcolext.c
|
|
|
+@@ -1,15 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jdcolext.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2009, 2011, 2015, 2023, D. R. Commander.
|
|
|
++ * Copyright (C) 2009, 2011, 2015, 2022-2023, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains output colorspace conversion routines.
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+ /* This file is included by jdcolor.c */
|
|
|
+@@ -23,28 +23,29 @@
|
|
|
+ * as wide as the input buffer.
|
|
|
+ * A starting row offset is provided only for the input buffer. The caller
|
|
|
+ * can easily adjust the passed output_buf value to accommodate any row
|
|
|
+ * offset required on that side.
|
|
|
+ */
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-ycc_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++ycc_rgb_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ register int y, cb, cr;
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- register JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ register int *Crrtab = cconvert->Cr_r_tab;
|
|
|
+ register int *Cbbtab = cconvert->Cb_b_tab;
|
|
|
+ register JLONG *Crgtab = cconvert->Cr_g_tab;
|
|
|
+ register JLONG *Cbgtab = cconvert->Cb_g_tab;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr0 = input_buf[0][input_row];
|
|
|
+@@ -57,85 +58,88 @@ ycc_rgb_convert_internal(j_decompress_pt
|
|
|
+ cb = inptr1[col];
|
|
|
+ cr = inptr2[col];
|
|
|
+ /* Range-limiting is essential due to noise introduced by DCT losses. */
|
|
|
+ outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
|
|
|
+ outptr[RGB_GREEN] = range_limit[y +
|
|
|
+ ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
|
|
|
+ SCALEBITS))];
|
|
|
+ outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]];
|
|
|
+- /* Set unused byte to MAXJSAMPLE so it can be interpreted as an opaque */
|
|
|
+- /* alpha channel value */
|
|
|
++ /* Set unused byte to _MAXJSAMPLE so it can be interpreted as an */
|
|
|
++ /* opaque alpha channel value */
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ outptr += RGB_PIXELSIZE;
|
|
|
+ }
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
|
|
|
+ * This is provided to support applications that don't want to cope
|
|
|
+ * with grayscale as a separate case.
|
|
|
+ */
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-gray_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++gray_rgb_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr = input_buf[0][input_row++];
|
|
|
+ outptr = *output_buf++;
|
|
|
+ for (col = 0; col < num_cols; col++) {
|
|
|
+ outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
|
|
|
+- /* Set unused byte to MAXJSAMPLE so it can be interpreted as an opaque */
|
|
|
+- /* alpha channel value */
|
|
|
++ /* Set unused byte to _MAXJSAMPLE so it can be interpreted as an */
|
|
|
++ /* opaque alpha channel value */
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ outptr += RGB_PIXELSIZE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert RGB to extended RGB: just swap the order of source pixels
|
|
|
+ */
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-rgb_rgb_convert_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
++rgb_rgb_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf,
|
|
|
+ int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+- register JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr0 = input_buf[0][input_row];
|
|
|
+ inptr1 = input_buf[1][input_row];
|
|
|
+ inptr2 = input_buf[2][input_row];
|
|
|
+ input_row++;
|
|
|
+ outptr = *output_buf++;
|
|
|
+ for (col = 0; col < num_cols; col++) {
|
|
|
+ outptr[RGB_RED] = inptr0[col];
|
|
|
+ outptr[RGB_GREEN] = inptr1[col];
|
|
|
+ outptr[RGB_BLUE] = inptr2[col];
|
|
|
+- /* Set unused byte to MAXJSAMPLE so it can be interpreted as an opaque */
|
|
|
+- /* alpha channel value */
|
|
|
++ /* Set unused byte to _MAXJSAMPLE so it can be interpreted as an */
|
|
|
++ /* opaque alpha channel value */
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ outptr += RGB_PIXELSIZE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+diff --git a/media/libjpeg/jdcolor.c b/media/libjpeg/jdcolor.c
|
|
|
+--- a/media/libjpeg/jdcolor.c
|
|
|
++++ b/media/libjpeg/jdcolor.c
|
|
|
+@@ -1,74 +1,79 @@
|
|
|
+ /*
|
|
|
+ * jdcolor.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
+ * Modified 2011 by Guido Vollbeding.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+- * Copyright (C) 2009, 2011-2012, 2014-2015, D. R. Commander.
|
|
|
++ * Copyright (C) 2009, 2011-2012, 2014-2015, 2022, D. R. Commander.
|
|
|
+ * Copyright (C) 2013, Linaro Limited.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains output colorspace conversion routines.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+ #include "jsimd.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /* Private subobject */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_color_deconverter pub; /* public fields */
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ /* Private state for YCC->RGB conversion */
|
|
|
+ int *Cr_r_tab; /* => table for Cr to R conversion */
|
|
|
+ int *Cb_b_tab; /* => table for Cb to B conversion */
|
|
|
+ JLONG *Cr_g_tab; /* => table for Cr to G conversion */
|
|
|
+ JLONG *Cb_g_tab; /* => table for Cb to G conversion */
|
|
|
+
|
|
|
+ /* Private state for RGB->Y conversion */
|
|
|
+ JLONG *rgb_y_tab; /* => table for RGB to Y conversion */
|
|
|
++#endif
|
|
|
+ } my_color_deconverter;
|
|
|
+
|
|
|
+ typedef my_color_deconverter *my_cconvert_ptr;
|
|
|
+
|
|
|
+
|
|
|
+ /**************** YCbCr -> RGB conversion: most common case **************/
|
|
|
+ /**************** RGB -> Y conversion: less common case **************/
|
|
|
+
|
|
|
+ /*
|
|
|
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
|
|
|
+- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
|
|
|
++ * normalized to the range 0.._MAXJSAMPLE rather than -0.5 .. 0.5.
|
|
|
+ * The conversion equations to be implemented are therefore
|
|
|
+ *
|
|
|
+ * R = Y + 1.40200 * Cr
|
|
|
+ * G = Y - 0.34414 * Cb - 0.71414 * Cr
|
|
|
+ * B = Y + 1.77200 * Cb
|
|
|
+ *
|
|
|
+ * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
|
|
+ *
|
|
|
+- * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
|
|
|
++ * where Cb and Cr represent the incoming values less _CENTERJSAMPLE.
|
|
|
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
|
|
|
+ *
|
|
|
+ * To avoid floating-point arithmetic, we represent the fractional constants
|
|
|
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
|
|
|
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
|
|
|
+ * Notice that Y, being an integral input, does not contribute any fraction
|
|
|
+ * so it need not participate in the rounding.
|
|
|
+ *
|
|
|
+ * For even more speed, we avoid doing any multiplications in the inner loop
|
|
|
+ * by precalculating the constants times Cb and Cr for all possible values.
|
|
|
+- * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
|
|
|
++ * For 8-bit samples this is very reasonable (only 256 entries per table);
|
|
|
+ * for 12-bit samples it is still acceptable. It's not very reasonable for
|
|
|
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
|
|
|
+ * colorspace anyway.
|
|
|
+ * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
|
|
|
+ * values for the G calculation are left scaled up, since we must add them
|
|
|
+ * together before rounding.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -79,19 +84,19 @@ typedef my_color_deconverter *my_cconver
|
|
|
+ /* We allocate one big table for RGB->Y conversion and divide it up into
|
|
|
+ * three parts, instead of doing three alloc_small requests. This lets us
|
|
|
+ * use a single table base address, which can be held in a register in the
|
|
|
+ * inner loops on many machines (more than can hold all three addresses,
|
|
|
+ * anyway).
|
|
|
+ */
|
|
|
+
|
|
|
+ #define R_Y_OFF 0 /* offset to R => Y section */
|
|
|
+-#define G_Y_OFF (1 * (MAXJSAMPLE + 1)) /* offset to G => Y section */
|
|
|
+-#define B_Y_OFF (2 * (MAXJSAMPLE + 1)) /* etc. */
|
|
|
+-#define TABLE_SIZE (3 * (MAXJSAMPLE + 1))
|
|
|
++#define G_Y_OFF (1 * (_MAXJSAMPLE + 1)) /* offset to G => Y section */
|
|
|
++#define B_Y_OFF (2 * (_MAXJSAMPLE + 1)) /* etc. */
|
|
|
++#define TABLE_SIZE (3 * (_MAXJSAMPLE + 1))
|
|
|
+
|
|
|
+
|
|
|
+ /* Include inline routines for colorspace extensions */
|
|
|
+
|
|
|
+ #include "jdcolext.c"
|
|
|
+ #undef RGB_RED
|
|
|
+ #undef RGB_GREEN
|
|
|
+ #undef RGB_BLUE
|
|
|
+@@ -204,59 +209,63 @@ typedef my_color_deconverter *my_cconver
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize tables for YCC->RGB colorspace conversion.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ build_ycc_rgb_table(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ int i;
|
|
|
+ JLONG x;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ cconvert->Cr_r_tab = (int *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (MAXJSAMPLE + 1) * sizeof(int));
|
|
|
++ (_MAXJSAMPLE + 1) * sizeof(int));
|
|
|
+ cconvert->Cb_b_tab = (int *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (MAXJSAMPLE + 1) * sizeof(int));
|
|
|
++ (_MAXJSAMPLE + 1) * sizeof(int));
|
|
|
+ cconvert->Cr_g_tab = (JLONG *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (MAXJSAMPLE + 1) * sizeof(JLONG));
|
|
|
++ (_MAXJSAMPLE + 1) * sizeof(JLONG));
|
|
|
+ cconvert->Cb_g_tab = (JLONG *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (MAXJSAMPLE + 1) * sizeof(JLONG));
|
|
|
++ (_MAXJSAMPLE + 1) * sizeof(JLONG));
|
|
|
+
|
|
|
+- for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
|
|
|
+- /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
|
|
|
+- /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
|
|
|
++ for (i = 0, x = -_CENTERJSAMPLE; i <= _MAXJSAMPLE; i++, x++) {
|
|
|
++ /* i is the actual input pixel value, in the range 0.._MAXJSAMPLE */
|
|
|
++ /* The Cb or Cr value we are thinking of is x = i - _CENTERJSAMPLE */
|
|
|
+ /* Cr=>R value is nearest int to 1.40200 * x */
|
|
|
+ cconvert->Cr_r_tab[i] = (int)
|
|
|
+ RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
|
|
|
+ /* Cb=>B value is nearest int to 1.77200 * x */
|
|
|
+ cconvert->Cb_b_tab[i] = (int)
|
|
|
+ RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
|
|
|
+ /* Cr=>G value is scaled-up -0.71414 * x */
|
|
|
+ cconvert->Cr_g_tab[i] = (-FIX(0.71414)) * x;
|
|
|
+ /* Cb=>G value is scaled-up -0.34414 * x */
|
|
|
+ /* We also add in ONE_HALF so that need not do it in inner loop */
|
|
|
+ cconvert->Cb_g_tab[i] = (-FIX(0.34414)) * x + ONE_HALF;
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert some rows of samples to the output colorspace.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++ycc_rgb_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ switch (cinfo->out_color_space) {
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+ ycc_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
|
|
+ num_rows);
|
|
|
+ break;
|
|
|
+ case JCS_EXT_RGBX:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+@@ -295,77 +304,85 @@ ycc_rgb_convert(j_decompress_ptr cinfo,
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize for RGB->grayscale colorspace conversion.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ build_rgb_y_table(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ JLONG *rgb_y_tab;
|
|
|
+ JLONG i;
|
|
|
+
|
|
|
+ /* Allocate and fill in the conversion tables. */
|
|
|
+ cconvert->rgb_y_tab = rgb_y_tab = (JLONG *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ (TABLE_SIZE * sizeof(JLONG)));
|
|
|
+
|
|
|
+- for (i = 0; i <= MAXJSAMPLE; i++) {
|
|
|
++ for (i = 0; i <= _MAXJSAMPLE; i++) {
|
|
|
+ rgb_y_tab[i + R_Y_OFF] = FIX(0.29900) * i;
|
|
|
+ rgb_y_tab[i + G_Y_OFF] = FIX(0.58700) * i;
|
|
|
+ rgb_y_tab[i + B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert RGB to grayscale.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-rgb_gray_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++rgb_gray_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ register int r, g, b;
|
|
|
+ register JLONG *ctab = cconvert->rgb_y_tab;
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- register JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr0 = input_buf[0][input_row];
|
|
|
+ inptr1 = input_buf[1][input_row];
|
|
|
+ inptr2 = input_buf[2][input_row];
|
|
|
+ input_row++;
|
|
|
+ outptr = *output_buf++;
|
|
|
+ for (col = 0; col < num_cols; col++) {
|
|
|
+ r = inptr0[col];
|
|
|
+ g = inptr1[col];
|
|
|
+ b = inptr2[col];
|
|
|
+ /* Y */
|
|
|
+- outptr[col] = (JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
|
|
|
+- ctab[b + B_Y_OFF]) >> SCALEBITS);
|
|
|
++ outptr[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
|
|
|
++ ctab[b + B_Y_OFF]) >> SCALEBITS);
|
|
|
+ }
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Color conversion for no colorspace change: just copy the data,
|
|
|
+ * converting from separate-planes to interleaved representation.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-null_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++null_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+- register JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr;
|
|
|
++ register _JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr;
|
|
|
+ register JDIMENSION col;
|
|
|
+ register int num_components = cinfo->num_components;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+ int ci;
|
|
|
+
|
|
|
+ if (num_components == 3) {
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr0 = input_buf[0][input_row];
|
|
|
+@@ -413,31 +430,31 @@ null_convert(j_decompress_ptr cinfo, JSA
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Color conversion for grayscale: just copy the data.
|
|
|
+ * This also works for YCbCr -> grayscale conversion, in which
|
|
|
+ * we just copy the Y (luminance) component and ignore chrominance.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-grayscale_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++grayscale_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+- jcopy_sample_rows(input_buf[0], (int)input_row, output_buf, 0, num_rows,
|
|
|
+- cinfo->output_width);
|
|
|
++ _jcopy_sample_rows(input_buf[0], (int)input_row, output_buf, 0, num_rows,
|
|
|
++ cinfo->output_width);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert grayscale to RGB
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-gray_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++gray_rgb_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ switch (cinfo->out_color_space) {
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+ gray_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
|
|
+ num_rows);
|
|
|
+ break;
|
|
|
+ case JCS_EXT_RGBX:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+@@ -471,18 +488,18 @@ gray_rgb_convert(j_decompress_ptr cinfo,
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Convert plain RGB to extended RGB
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-rgb_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++rgb_rgb_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ switch (cinfo->out_color_space) {
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+ rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
|
|
|
+ num_rows);
|
|
|
+ break;
|
|
|
+ case JCS_EXT_RGBX:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+@@ -519,27 +536,28 @@ rgb_rgb_convert(j_decompress_ptr cinfo,
|
|
|
+ /*
|
|
|
+ * Adobe-style YCCK->CMYK conversion.
|
|
|
+ * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
|
|
|
+ * conversion as above, while passing K (black) unchanged.
|
|
|
+ * We assume build_ycc_rgb_table has been called.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-ycck_cmyk_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++ycck_cmyk_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
|
|
|
+ register int y, cb, cr;
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- register JSAMPROW inptr0, inptr1, inptr2, inptr3;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ register _JSAMPROW inptr0, inptr1, inptr2, inptr3;
|
|
|
+ register JDIMENSION col;
|
|
|
+ JDIMENSION num_cols = cinfo->output_width;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ register int *Crrtab = cconvert->Cr_r_tab;
|
|
|
+ register int *Cbbtab = cconvert->Cb_b_tab;
|
|
|
+ register JLONG *Crgtab = cconvert->Cr_g_tab;
|
|
|
+ register JLONG *Cbgtab = cconvert->Cb_g_tab;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ while (--num_rows >= 0) {
|
|
|
+ inptr0 = input_buf[0][input_row];
|
|
|
+@@ -548,26 +566,29 @@ ycck_cmyk_convert(j_decompress_ptr cinfo
|
|
|
+ inptr3 = input_buf[3][input_row];
|
|
|
+ input_row++;
|
|
|
+ outptr = *output_buf++;
|
|
|
+ for (col = 0; col < num_cols; col++) {
|
|
|
+ y = inptr0[col];
|
|
|
+ cb = inptr1[col];
|
|
|
+ cr = inptr2[col];
|
|
|
+ /* Range-limiting is essential due to noise introduced by DCT losses. */
|
|
|
+- outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])]; /* red */
|
|
|
+- outptr[1] = range_limit[MAXJSAMPLE - (y + /* green */
|
|
|
++ outptr[0] = range_limit[_MAXJSAMPLE - (y + Crrtab[cr])]; /* red */
|
|
|
++ outptr[1] = range_limit[_MAXJSAMPLE - (y + /* green */
|
|
|
+ ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
|
|
|
+ SCALEBITS)))];
|
|
|
+- outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */
|
|
|
++ outptr[2] = range_limit[_MAXJSAMPLE - (y + Cbbtab[cb])]; /* blue */
|
|
|
+ /* K passes through unchanged */
|
|
|
+ outptr[3] = inptr3[col];
|
|
|
+ outptr += 4;
|
|
|
+ }
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * RGB565 conversion
|
|
|
+ */
|
|
|
+
|
|
|
+ #define PACK_SHORT_565_LE(r, g, b) \
|
|
|
+@@ -647,73 +668,73 @@ static INLINE boolean is_big_endian(void
|
|
|
+ #undef ycc_rgb565D_convert_internal
|
|
|
+ #undef rgb_rgb565_convert_internal
|
|
|
+ #undef rgb_rgb565D_convert_internal
|
|
|
+ #undef gray_rgb565_convert_internal
|
|
|
+ #undef gray_rgb565D_convert_internal
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++ycc_rgb565_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ ycc_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ else
|
|
|
+ ycc_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-ycc_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++ycc_rgb565D_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ ycc_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ else
|
|
|
+ ycc_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-rgb_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++rgb_rgb565_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ rgb_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ else
|
|
|
+ rgb_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-rgb_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++rgb_rgb565D_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ rgb_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ else
|
|
|
+ rgb_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-gray_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++gray_rgb565_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ gray_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ else
|
|
|
+ gray_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-gray_rgb565D_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows)
|
|
|
++gray_rgb565D_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ gray_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ else
|
|
|
+ gray_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+@@ -728,21 +749,24 @@ start_pass_dcolor(j_decompress_ptr cinfo
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Module initialization routine for output colorspace conversion.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_color_deconverter(j_decompress_ptr cinfo)
|
|
|
++_jinit_color_deconverter(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_cconvert_ptr cconvert;
|
|
|
+ int ci;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ cconvert = (my_cconvert_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_color_deconverter));
|
|
|
+ cinfo->cconvert = (struct jpeg_color_deconverter *)cconvert;
|
|
|
+ cconvert->pub.start_pass = start_pass_dcolor;
|
|
|
+
|
|
|
+ /* Make sure num_components agrees with jpeg_color_space */
|
|
|
+ switch (cinfo->jpeg_color_space) {
|
|
|
+@@ -767,115 +791,135 @@ jinit_color_deconverter(j_decompress_ptr
|
|
|
+ if (cinfo->num_components < 1)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Set out_color_components and conversion method based on requested space.
|
|
|
+ * Also clear the component_needed flags for any unused components,
|
|
|
+ * so that earlier pipeline stages can avoid useless computation.
|
|
|
++ * NOTE: We do not allow any lossy color conversion algorithms in lossless
|
|
|
++ * mode.
|
|
|
+ */
|
|
|
+
|
|
|
+ switch (cinfo->out_color_space) {
|
|
|
+ case JCS_GRAYSCALE:
|
|
|
++ if (cinfo->master->lossless &&
|
|
|
++ cinfo->jpeg_color_space != cinfo->out_color_space)
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ cinfo->out_color_components = 1;
|
|
|
+ if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
|
|
|
+ cinfo->jpeg_color_space == JCS_YCbCr) {
|
|
|
+- cconvert->pub.color_convert = grayscale_convert;
|
|
|
++ cconvert->pub._color_convert = grayscale_convert;
|
|
|
+ /* For color->grayscale conversion, only the Y (0) component is needed */
|
|
|
+ for (ci = 1; ci < cinfo->num_components; ci++)
|
|
|
+ cinfo->comp_info[ci].component_needed = FALSE;
|
|
|
+ } else if (cinfo->jpeg_color_space == JCS_RGB) {
|
|
|
+- cconvert->pub.color_convert = rgb_gray_convert;
|
|
|
++ cconvert->pub._color_convert = rgb_gray_convert;
|
|
|
+ build_rgb_y_table(cinfo);
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case JCS_RGB:
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+ case JCS_EXT_RGBX:
|
|
|
+ case JCS_EXT_BGR:
|
|
|
+ case JCS_EXT_BGRX:
|
|
|
+ case JCS_EXT_XBGR:
|
|
|
+ case JCS_EXT_XRGB:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+ case JCS_EXT_BGRA:
|
|
|
+ case JCS_EXT_ABGR:
|
|
|
+ case JCS_EXT_ARGB:
|
|
|
++ if (cinfo->master->lossless && cinfo->jpeg_color_space != JCS_RGB)
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
|
|
|
+ if (cinfo->jpeg_color_space == JCS_YCbCr) {
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_ycc_rgb())
|
|
|
+- cconvert->pub.color_convert = jsimd_ycc_rgb_convert;
|
|
|
+- else {
|
|
|
+- cconvert->pub.color_convert = ycc_rgb_convert;
|
|
|
++ cconvert->pub._color_convert = jsimd_ycc_rgb_convert;
|
|
|
++ else
|
|
|
++#endif
|
|
|
++ {
|
|
|
++ cconvert->pub._color_convert = ycc_rgb_convert;
|
|
|
+ build_ycc_rgb_table(cinfo);
|
|
|
+ }
|
|
|
+ } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
|
|
|
+- cconvert->pub.color_convert = gray_rgb_convert;
|
|
|
++ cconvert->pub._color_convert = gray_rgb_convert;
|
|
|
+ } else if (cinfo->jpeg_color_space == JCS_RGB) {
|
|
|
+ if (rgb_red[cinfo->out_color_space] == 0 &&
|
|
|
+ rgb_green[cinfo->out_color_space] == 1 &&
|
|
|
+ rgb_blue[cinfo->out_color_space] == 2 &&
|
|
|
+ rgb_pixelsize[cinfo->out_color_space] == 3)
|
|
|
+- cconvert->pub.color_convert = null_convert;
|
|
|
++ cconvert->pub._color_convert = null_convert;
|
|
|
+ else
|
|
|
+- cconvert->pub.color_convert = rgb_rgb_convert;
|
|
|
++ cconvert->pub._color_convert = rgb_rgb_convert;
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case JCS_RGB565:
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ cinfo->out_color_components = 3;
|
|
|
+ if (cinfo->dither_mode == JDITHER_NONE) {
|
|
|
+ if (cinfo->jpeg_color_space == JCS_YCbCr) {
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_ycc_rgb565())
|
|
|
+- cconvert->pub.color_convert = jsimd_ycc_rgb565_convert;
|
|
|
+- else {
|
|
|
+- cconvert->pub.color_convert = ycc_rgb565_convert;
|
|
|
++ cconvert->pub._color_convert = jsimd_ycc_rgb565_convert;
|
|
|
++ else
|
|
|
++#endif
|
|
|
++ {
|
|
|
++ cconvert->pub._color_convert = ycc_rgb565_convert;
|
|
|
+ build_ycc_rgb_table(cinfo);
|
|
|
+ }
|
|
|
+ } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
|
|
|
+- cconvert->pub.color_convert = gray_rgb565_convert;
|
|
|
++ cconvert->pub._color_convert = gray_rgb565_convert;
|
|
|
+ } else if (cinfo->jpeg_color_space == JCS_RGB) {
|
|
|
+- cconvert->pub.color_convert = rgb_rgb565_convert;
|
|
|
++ cconvert->pub._color_convert = rgb_rgb565_convert;
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ } else {
|
|
|
+ /* only ordered dithering is supported */
|
|
|
+ if (cinfo->jpeg_color_space == JCS_YCbCr) {
|
|
|
+- cconvert->pub.color_convert = ycc_rgb565D_convert;
|
|
|
++ cconvert->pub._color_convert = ycc_rgb565D_convert;
|
|
|
+ build_ycc_rgb_table(cinfo);
|
|
|
+ } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
|
|
|
+- cconvert->pub.color_convert = gray_rgb565D_convert;
|
|
|
++ cconvert->pub._color_convert = gray_rgb565D_convert;
|
|
|
+ } else if (cinfo->jpeg_color_space == JCS_RGB) {
|
|
|
+- cconvert->pub.color_convert = rgb_rgb565D_convert;
|
|
|
++ cconvert->pub._color_convert = rgb_rgb565D_convert;
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ }
|
|
|
+ break;
|
|
|
+
|
|
|
+ case JCS_CMYK:
|
|
|
++ if (cinfo->master->lossless &&
|
|
|
++ cinfo->jpeg_color_space != cinfo->out_color_space)
|
|
|
++ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ cinfo->out_color_components = 4;
|
|
|
+ if (cinfo->jpeg_color_space == JCS_YCCK) {
|
|
|
+- cconvert->pub.color_convert = ycck_cmyk_convert;
|
|
|
++ cconvert->pub._color_convert = ycck_cmyk_convert;
|
|
|
+ build_ycc_rgb_table(cinfo);
|
|
|
+ } else if (cinfo->jpeg_color_space == JCS_CMYK) {
|
|
|
+- cconvert->pub.color_convert = null_convert;
|
|
|
++ cconvert->pub._color_convert = null_convert;
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ break;
|
|
|
+
|
|
|
+ default:
|
|
|
+ /* Permit null conversion to same output space */
|
|
|
+ if (cinfo->out_color_space == cinfo->jpeg_color_space) {
|
|
|
+ cinfo->out_color_components = cinfo->num_components;
|
|
|
+- cconvert->pub.color_convert = null_convert;
|
|
|
++ cconvert->pub._color_convert = null_convert;
|
|
|
+ } else /* unsupported non-null conversion */
|
|
|
+ ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (cinfo->quantize_colors)
|
|
|
+ cinfo->output_components = 1; /* single colormapped output component */
|
|
|
+ else
|
|
|
+ cinfo->output_components = cinfo->out_color_components;
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jdct.h b/media/libjpeg/jdct.h
|
|
|
+--- a/media/libjpeg/jdct.h
|
|
|
++++ b/media/libjpeg/jdct.h
|
|
|
+@@ -1,32 +1,34 @@
|
|
|
+ /*
|
|
|
+ * jdct.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2015, D. R. Commander.
|
|
|
++ * Copyright (C) 2015, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This include file contains common declarations for the forward and
|
|
|
+ * inverse DCT modules. These declarations are private to the DCT managers
|
|
|
+ * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
|
|
|
+ * The individual DCT algorithms are kept in separate files to ease
|
|
|
+ * machine-dependent tuning (e.g., assembly coding).
|
|
|
+ */
|
|
|
+
|
|
|
++#include "jsamplecomp.h"
|
|
|
++
|
|
|
+
|
|
|
+ /*
|
|
|
+ * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
|
|
|
+ * the DCT is to be performed in-place in that buffer. Type DCTELEM is int
|
|
|
+ * for 8-bit samples, JLONG for 12-bit samples. (NOTE: Floating-point DCT
|
|
|
+ * implementations use an array of type FAST_FLOAT, instead.)
|
|
|
+- * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE).
|
|
|
++ * The DCT inputs are expected to be signed (range +-_CENTERJSAMPLE).
|
|
|
+ * The DCT outputs are returned scaled up by a factor of 8; they therefore
|
|
|
+ * have a range of +-8K for 8-bit data, +-128K for 12-bit data. This
|
|
|
+ * convention improves accuracy in integer implementations and saves some
|
|
|
+ * work in floating-point ones.
|
|
|
+ * Quantization of the output coefficients is done by jcdctmgr.c. This
|
|
|
+ * step requires an unsigned type and also one with twice the bits.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -71,88 +73,99 @@ typedef MULTIPLIER IFAST_MULT_TYPE; /*
|
|
|
+ typedef JLONG IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */
|
|
|
+ #define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */
|
|
|
+ #endif
|
|
|
+ typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Each IDCT routine is responsible for range-limiting its results and
|
|
|
+- * converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could
|
|
|
++ * converting them to unsigned form (0.._MAXJSAMPLE). The raw outputs could
|
|
|
+ * be quite far out of range if the input data is corrupt, so a bulletproof
|
|
|
+ * range-limiting step is required. We use a mask-and-table-lookup method
|
|
|
+ * to do the combined operations quickly. See the comments with
|
|
|
+ * prepare_range_limit_table (in jdmaster.c) for more info.
|
|
|
+ */
|
|
|
+
|
|
|
+-#define IDCT_range_limit(cinfo) ((cinfo)->sample_range_limit + CENTERJSAMPLE)
|
|
|
++#define IDCT_range_limit(cinfo) \
|
|
|
++ ((_JSAMPLE *)((cinfo)->sample_range_limit) + _CENTERJSAMPLE)
|
|
|
+
|
|
|
+-#define RANGE_MASK (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
|
|
|
++#define RANGE_MASK (_MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
|
|
|
+
|
|
|
+
|
|
|
+ /* Extern declarations for the forward and inverse DCT routines. */
|
|
|
+
|
|
|
+-EXTERN(void) jpeg_fdct_islow(DCTELEM *data);
|
|
|
+-EXTERN(void) jpeg_fdct_ifast(DCTELEM *data);
|
|
|
++EXTERN(void) _jpeg_fdct_islow(DCTELEM *data);
|
|
|
++EXTERN(void) _jpeg_fdct_ifast(DCTELEM *data);
|
|
|
+ EXTERN(void) jpeg_fdct_float(FAST_FLOAT *data);
|
|
|
+
|
|
|
+-EXTERN(void) jpeg_idct_islow(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_ifast(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_float(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_7x7(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_6x6(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_5x5(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_4x4(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_3x3(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_2x2(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_1x1(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_9x9(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_10x10(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_11x11(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_12x12(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_13x13(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_14x14(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_15x15(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
+-EXTERN(void) jpeg_idct_16x16(j_decompress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_islow(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_ifast(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_float(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_7x7(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
++ _JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_6x6(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
++ _JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_5x5(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
++ _JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_4x4(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
++ _JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_3x3(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
++ _JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_2x2(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
++ _JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_1x1(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
++ _JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_9x9(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr, JCOEFPTR coef_block,
|
|
|
++ _JSAMPARRAY output_buf, JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_10x10(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_11x11(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_12x12(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_13x13(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_14x14(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_15x15(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
++EXTERN(void) _jpeg_idct_16x16(j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Macros for handling fixed-point arithmetic; these are used by many
|
|
|
+ * but not all of the DCT/IDCT modules.
|
|
|
+ *
|
|
|
+ * All values are expected to be of type JLONG.
|
|
|
+ * Fractional constants are scaled left by CONST_BITS bits.
|
|
|
+diff --git a/media/libjpeg/jddctmgr.c b/media/libjpeg/jddctmgr.c
|
|
|
+--- a/media/libjpeg/jddctmgr.c
|
|
|
++++ b/media/libjpeg/jddctmgr.c
|
|
|
+@@ -21,17 +21,17 @@
|
|
|
+ * dequantization multiplier table needed by the IDCT routine.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+ #include "jdct.h" /* Private declarations for DCT subsystem */
|
|
|
+ #include "jsimddct.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The decompressor input side (jdinput.c) saves away the appropriate
|
|
|
+ * quantization table for each component at the start of the first scan
|
|
|
+ * involving that component. (This is necessary in order to correctly
|
|
|
+ * decode files that reuse Q-table slots.)
|
|
|
+ * When we are ready to make an output pass, the saved Q-table is converted
|
|
|
+@@ -95,142 +95,152 @@ typedef union {
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ start_pass(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_idct_ptr idct = (my_idct_ptr)cinfo->idct;
|
|
|
+ int ci, i;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+ int method = 0;
|
|
|
+- inverse_DCT_method_ptr method_ptr = NULL;
|
|
|
++ _inverse_DCT_method_ptr method_ptr = NULL;
|
|
|
+ JQUANT_TBL *qtbl;
|
|
|
+
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ /* Select the proper IDCT routine for this component's scaling */
|
|
|
+ switch (compptr->_DCT_scaled_size) {
|
|
|
+ #ifdef IDCT_SCALING_SUPPORTED
|
|
|
+ case 1:
|
|
|
+- method_ptr = jpeg_idct_1x1;
|
|
|
++ method_ptr = _jpeg_idct_1x1;
|
|
|
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 2:
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_idct_2x2())
|
|
|
+ method_ptr = jsimd_idct_2x2;
|
|
|
+ else
|
|
|
+- method_ptr = jpeg_idct_2x2;
|
|
|
++#endif
|
|
|
++ method_ptr = _jpeg_idct_2x2;
|
|
|
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 3:
|
|
|
+- method_ptr = jpeg_idct_3x3;
|
|
|
++ method_ptr = _jpeg_idct_3x3;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 4:
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_idct_4x4())
|
|
|
+ method_ptr = jsimd_idct_4x4;
|
|
|
+ else
|
|
|
+- method_ptr = jpeg_idct_4x4;
|
|
|
++#endif
|
|
|
++ method_ptr = _jpeg_idct_4x4;
|
|
|
+ method = JDCT_ISLOW; /* jidctred uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 5:
|
|
|
+- method_ptr = jpeg_idct_5x5;
|
|
|
++ method_ptr = _jpeg_idct_5x5;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 6:
|
|
|
+-#if defined(__mips__)
|
|
|
++#if defined(WITH_SIMD) && defined(__mips__)
|
|
|
+ if (jsimd_can_idct_6x6())
|
|
|
+ method_ptr = jsimd_idct_6x6;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+- method_ptr = jpeg_idct_6x6;
|
|
|
++ method_ptr = _jpeg_idct_6x6;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 7:
|
|
|
+- method_ptr = jpeg_idct_7x7;
|
|
|
++ method_ptr = _jpeg_idct_7x7;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ case DCTSIZE:
|
|
|
+ switch (cinfo->dct_method) {
|
|
|
+ #ifdef DCT_ISLOW_SUPPORTED
|
|
|
+ case JDCT_ISLOW:
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_idct_islow())
|
|
|
+ method_ptr = jsimd_idct_islow;
|
|
|
+ else
|
|
|
+- method_ptr = jpeg_idct_islow;
|
|
|
++#endif
|
|
|
++ method_ptr = _jpeg_idct_islow;
|
|
|
+ method = JDCT_ISLOW;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ #ifdef DCT_IFAST_SUPPORTED
|
|
|
+ case JDCT_IFAST:
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_idct_ifast())
|
|
|
+ method_ptr = jsimd_idct_ifast;
|
|
|
+ else
|
|
|
+- method_ptr = jpeg_idct_ifast;
|
|
|
++#endif
|
|
|
++ method_ptr = _jpeg_idct_ifast;
|
|
|
+ method = JDCT_IFAST;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ #ifdef DCT_FLOAT_SUPPORTED
|
|
|
+ case JDCT_FLOAT:
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_idct_float())
|
|
|
+ method_ptr = jsimd_idct_float;
|
|
|
+ else
|
|
|
+- method_ptr = jpeg_idct_float;
|
|
|
++#endif
|
|
|
++ method_ptr = _jpeg_idct_float;
|
|
|
+ method = JDCT_FLOAT;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ default:
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ #ifdef IDCT_SCALING_SUPPORTED
|
|
|
+ case 9:
|
|
|
+- method_ptr = jpeg_idct_9x9;
|
|
|
++ method_ptr = _jpeg_idct_9x9;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 10:
|
|
|
+- method_ptr = jpeg_idct_10x10;
|
|
|
++ method_ptr = _jpeg_idct_10x10;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 11:
|
|
|
+- method_ptr = jpeg_idct_11x11;
|
|
|
++ method_ptr = _jpeg_idct_11x11;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 12:
|
|
|
+-#if defined(__mips__)
|
|
|
++#if defined(WITH_SIMD) && defined(__mips__)
|
|
|
+ if (jsimd_can_idct_12x12())
|
|
|
+ method_ptr = jsimd_idct_12x12;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+- method_ptr = jpeg_idct_12x12;
|
|
|
++ method_ptr = _jpeg_idct_12x12;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 13:
|
|
|
+- method_ptr = jpeg_idct_13x13;
|
|
|
++ method_ptr = _jpeg_idct_13x13;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 14:
|
|
|
+- method_ptr = jpeg_idct_14x14;
|
|
|
++ method_ptr = _jpeg_idct_14x14;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 15:
|
|
|
+- method_ptr = jpeg_idct_15x15;
|
|
|
++ method_ptr = _jpeg_idct_15x15;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ case 16:
|
|
|
+- method_ptr = jpeg_idct_16x16;
|
|
|
++ method_ptr = _jpeg_idct_16x16;
|
|
|
+ method = JDCT_ISLOW; /* jidctint uses islow-style table */
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ default:
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+- idct->pub.inverse_DCT[ci] = method_ptr;
|
|
|
++ idct->pub._inverse_DCT[ci] = method_ptr;
|
|
|
+ /* Create multiplier table from quant table.
|
|
|
+ * However, we can skip this if the component is uninteresting
|
|
|
+ * or if we already built the table. Also, if no quant table
|
|
|
+ * has yet been saved for the component, we leave the
|
|
|
+ * multiplier table all-zero; we'll be reading zeroes from the
|
|
|
+ * coefficient controller's buffer anyway.
|
|
|
+ */
|
|
|
+ if (!compptr->component_needed || idct->cur_method[ci] == method)
|
|
|
+@@ -322,22 +332,25 @@ start_pass(j_decompress_ptr cinfo)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize IDCT manager.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_inverse_dct(j_decompress_ptr cinfo)
|
|
|
++_jinit_inverse_dct(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_idct_ptr idct;
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ idct = (my_idct_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_idct_controller));
|
|
|
+ cinfo->idct = (struct jpeg_inverse_dct *)idct;
|
|
|
+ idct->pub.start_pass = start_pass;
|
|
|
+
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+diff --git a/media/libjpeg/jddiffct.c b/media/libjpeg/jddiffct.c
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jddiffct.c
|
|
|
+@@ -0,0 +1,403 @@
|
|
|
++/*
|
|
|
++ * jddiffct.c
|
|
|
++ *
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
++ * Copyright (C) 1994-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ *
|
|
|
++ * This file contains the [un]difference buffer controller for decompression.
|
|
|
++ * This controller is the top level of the lossless JPEG decompressor proper.
|
|
|
++ * The difference buffer lies between the entropy decoding and
|
|
|
++ * prediction/undifferencing steps. The undifference buffer lies between the
|
|
|
++ * prediction/undifferencing and scaling steps.
|
|
|
++ *
|
|
|
++ * In buffered-image mode, this controller is the interface between
|
|
|
++ * input-oriented processing and output-oriented processing.
|
|
|
++ */
|
|
|
++
|
|
|
++#define JPEG_INTERNALS
|
|
|
++#include "jinclude.h"
|
|
|
++#include "jpeglib.h"
|
|
|
++#include "jlossls.h" /* Private declarations for lossless codec */
|
|
|
++
|
|
|
++
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++
|
|
|
++/* Private buffer controller object */
|
|
|
++
|
|
|
++typedef struct {
|
|
|
++ struct jpeg_d_coef_controller pub; /* public fields */
|
|
|
++
|
|
|
++ /* These variables keep track of the current location of the input side. */
|
|
|
++ /* cinfo->input_iMCU_row is also used for this. */
|
|
|
++ JDIMENSION MCU_ctr; /* counts MCUs processed in current row */
|
|
|
++ unsigned int restart_rows_to_go; /* MCU rows left in this restart
|
|
|
++ interval */
|
|
|
++ unsigned int MCU_vert_offset; /* counts MCU rows within iMCU row */
|
|
|
++ unsigned int MCU_rows_per_iMCU_row; /* number of such rows needed */
|
|
|
++
|
|
|
++ /* The output side's location is represented by cinfo->output_iMCU_row. */
|
|
|
++
|
|
|
++ JDIFFARRAY diff_buf[MAX_COMPONENTS]; /* iMCU row of differences */
|
|
|
++ JDIFFARRAY undiff_buf[MAX_COMPONENTS]; /* iMCU row of undiff'd samples */
|
|
|
++
|
|
|
++#ifdef D_MULTISCAN_FILES_SUPPORTED
|
|
|
++ /* In multi-pass modes, we need a virtual sample array for each component. */
|
|
|
++ jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
|
|
|
++#endif
|
|
|
++} my_diff_controller;
|
|
|
++
|
|
|
++typedef my_diff_controller *my_diff_ptr;
|
|
|
++
|
|
|
++/* Forward declarations */
|
|
|
++METHODDEF(int) decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf);
|
|
|
++#ifdef D_MULTISCAN_FILES_SUPPORTED
|
|
|
++METHODDEF(int) output_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf);
|
|
|
++#endif
|
|
|
++
|
|
|
++
|
|
|
++LOCAL(void)
|
|
|
++start_iMCU_row(j_decompress_ptr cinfo)
|
|
|
++/* Reset within-iMCU-row counters for a new row (input side) */
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++
|
|
|
++ /* In an interleaved scan, an MCU row is the same as an iMCU row.
|
|
|
++ * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
|
|
|
++ * But at the bottom of the image, process only what's left.
|
|
|
++ */
|
|
|
++ if (cinfo->comps_in_scan > 1) {
|
|
|
++ diff->MCU_rows_per_iMCU_row = 1;
|
|
|
++ } else {
|
|
|
++ if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
|
|
|
++ diff->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
|
|
|
++ else
|
|
|
++ diff->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
|
|
|
++ }
|
|
|
++
|
|
|
++ diff->MCU_ctr = 0;
|
|
|
++ diff->MCU_vert_offset = 0;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize for an input processing pass.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++start_input_pass(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++
|
|
|
++ /* Because it is hitching a ride on the jpeg_inverse_dct struct,
|
|
|
++ * start_pass_lossless() will be called at the start of the output pass.
|
|
|
++ * This ensures that it will be called at the start of the input pass as
|
|
|
++ * well.
|
|
|
++ */
|
|
|
++ (*cinfo->idct->start_pass) (cinfo);
|
|
|
++
|
|
|
++ /* Check that the restart interval is an integer multiple of the number
|
|
|
++ * of MCUs in an MCU row.
|
|
|
++ */
|
|
|
++ if (cinfo->restart_interval % cinfo->MCUs_per_row != 0)
|
|
|
++ ERREXIT2(cinfo, JERR_BAD_RESTART,
|
|
|
++ cinfo->restart_interval, cinfo->MCUs_per_row);
|
|
|
++
|
|
|
++ /* Initialize restart counter */
|
|
|
++ diff->restart_rows_to_go = cinfo->restart_interval / cinfo->MCUs_per_row;
|
|
|
++
|
|
|
++ cinfo->input_iMCU_row = 0;
|
|
|
++ start_iMCU_row(cinfo);
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Check for a restart marker & resynchronize decoder, undifferencer.
|
|
|
++ * Returns FALSE if must suspend.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(boolean)
|
|
|
++process_restart(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++
|
|
|
++ if (!(*cinfo->entropy->process_restart) (cinfo))
|
|
|
++ return FALSE;
|
|
|
++
|
|
|
++ (*cinfo->idct->start_pass) (cinfo);
|
|
|
++
|
|
|
++ /* Reset restart counter */
|
|
|
++ diff->restart_rows_to_go = cinfo->restart_interval / cinfo->MCUs_per_row;
|
|
|
++
|
|
|
++ return TRUE;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize for an output processing pass.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++start_output_pass(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ cinfo->output_iMCU_row = 0;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Decompress and return some data in the supplied buffer.
|
|
|
++ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
|
|
|
++ * Input and output must run in lockstep since we have only a one-MCU buffer.
|
|
|
++ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
|
|
|
++ *
|
|
|
++ * NB: output_buf contains a plane for each component in image,
|
|
|
++ * which we index according to the component's SOF position.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(int)
|
|
|
++decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++ lossless_decomp_ptr losslessd = (lossless_decomp_ptr)cinfo->idct;
|
|
|
++ JDIMENSION MCU_col_num; /* index of current MCU within row */
|
|
|
++ JDIMENSION MCU_count; /* number of MCUs decoded */
|
|
|
++ JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
|
|
|
++ int ci, compi, row, prev_row;
|
|
|
++ unsigned int yoffset;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ /* Loop to process as much as one whole iMCU row */
|
|
|
++ for (yoffset = diff->MCU_vert_offset; yoffset < diff->MCU_rows_per_iMCU_row;
|
|
|
++ yoffset++) {
|
|
|
++
|
|
|
++ /* Process restart marker if needed; may have to suspend */
|
|
|
++ if (cinfo->restart_interval) {
|
|
|
++ if (diff->restart_rows_to_go == 0)
|
|
|
++ if (!process_restart(cinfo))
|
|
|
++ return JPEG_SUSPENDED;
|
|
|
++ }
|
|
|
++
|
|
|
++ MCU_col_num = diff->MCU_ctr;
|
|
|
++ /* Try to fetch an MCU row (or remaining portion of suspended MCU row). */
|
|
|
++ MCU_count =
|
|
|
++ (*cinfo->entropy->decode_mcus) (cinfo,
|
|
|
++ diff->diff_buf, yoffset, MCU_col_num,
|
|
|
++ cinfo->MCUs_per_row - MCU_col_num);
|
|
|
++ if (MCU_count != cinfo->MCUs_per_row - MCU_col_num) {
|
|
|
++ /* Suspension forced; update state counters and exit */
|
|
|
++ diff->MCU_vert_offset = yoffset;
|
|
|
++ diff->MCU_ctr += MCU_count;
|
|
|
++ return JPEG_SUSPENDED;
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Account for restart interval (no-op if not using restarts) */
|
|
|
++ if (cinfo->restart_interval)
|
|
|
++ diff->restart_rows_to_go--;
|
|
|
++
|
|
|
++ /* Completed an MCU row, but perhaps not an iMCU row */
|
|
|
++ diff->MCU_ctr = 0;
|
|
|
++ }
|
|
|
++
|
|
|
++ /*
|
|
|
++ * Undifference and scale each scanline of the disassembled MCU row
|
|
|
++ * separately. We do not process dummy samples at the end of a scanline
|
|
|
++ * or dummy rows at the end of the image.
|
|
|
++ */
|
|
|
++ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
++ compptr = cinfo->cur_comp_info[ci];
|
|
|
++ compi = compptr->component_index;
|
|
|
++ for (row = 0, prev_row = compptr->v_samp_factor - 1;
|
|
|
++ row < (cinfo->input_iMCU_row == last_iMCU_row ?
|
|
|
++ compptr->last_row_height : compptr->v_samp_factor);
|
|
|
++ prev_row = row, row++) {
|
|
|
++ (*losslessd->predict_undifference[compi])
|
|
|
++ (cinfo, compi, diff->diff_buf[compi][row],
|
|
|
++ diff->undiff_buf[compi][prev_row], diff->undiff_buf[compi][row],
|
|
|
++ compptr->width_in_blocks);
|
|
|
++ (*losslessd->scaler_scale) (cinfo, diff->undiff_buf[compi][row],
|
|
|
++ output_buf[compi][row],
|
|
|
++ compptr->width_in_blocks);
|
|
|
++ }
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Completed the iMCU row, advance counters for next one.
|
|
|
++ *
|
|
|
++ * NB: output_data will increment output_iMCU_row.
|
|
|
++ * This counter is not needed for the single-pass case
|
|
|
++ * or the input side of the multi-pass case.
|
|
|
++ */
|
|
|
++ if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
|
|
|
++ start_iMCU_row(cinfo);
|
|
|
++ return JPEG_ROW_COMPLETED;
|
|
|
++ }
|
|
|
++ /* Completed the scan */
|
|
|
++ (*cinfo->inputctl->finish_input_pass) (cinfo);
|
|
|
++ return JPEG_SCAN_COMPLETED;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Dummy consume-input routine for single-pass operation.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(int)
|
|
|
++dummy_consume_data(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ return JPEG_SUSPENDED; /* Always indicate nothing was done */
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++#ifdef D_MULTISCAN_FILES_SUPPORTED
|
|
|
++
|
|
|
++/*
|
|
|
++ * Consume input data and store it in the full-image sample buffer.
|
|
|
++ * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
|
|
|
++ * ie, v_samp_factor rows for each component in the scan.
|
|
|
++ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(int)
|
|
|
++consume_data(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++ int ci, compi;
|
|
|
++ _JSAMPARRAY buffer[MAX_COMPS_IN_SCAN];
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ /* Align the virtual buffers for the components used in this scan. */
|
|
|
++ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
++ compptr = cinfo->cur_comp_info[ci];
|
|
|
++ compi = compptr->component_index;
|
|
|
++ buffer[compi] = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
|
|
|
++ ((j_common_ptr)cinfo, diff->whole_image[compi],
|
|
|
++ cinfo->input_iMCU_row * compptr->v_samp_factor,
|
|
|
++ (JDIMENSION)compptr->v_samp_factor, TRUE);
|
|
|
++ }
|
|
|
++
|
|
|
++ return decompress_data(cinfo, buffer);
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Output some data from the full-image sample buffer in the multi-pass case.
|
|
|
++ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
|
|
|
++ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
|
|
|
++ *
|
|
|
++ * NB: output_buf contains a plane for each component in image.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(int)
|
|
|
++output_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
|
|
|
++{
|
|
|
++ my_diff_ptr diff = (my_diff_ptr)cinfo->coef;
|
|
|
++ JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
|
|
|
++ int ci, samp_rows, row;
|
|
|
++ _JSAMPARRAY buffer;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ /* Force some input to be done if we are getting ahead of the input. */
|
|
|
++ while (cinfo->input_scan_number < cinfo->output_scan_number ||
|
|
|
++ (cinfo->input_scan_number == cinfo->output_scan_number &&
|
|
|
++ cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
|
|
|
++ if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
|
|
|
++ return JPEG_SUSPENDED;
|
|
|
++ }
|
|
|
++
|
|
|
++ /* OK, output from the virtual arrays. */
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ /* Align the virtual buffer for this component. */
|
|
|
++ buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
|
|
|
++ ((j_common_ptr)cinfo, diff->whole_image[ci],
|
|
|
++ cinfo->output_iMCU_row * compptr->v_samp_factor,
|
|
|
++ (JDIMENSION)compptr->v_samp_factor, FALSE);
|
|
|
++
|
|
|
++ if (cinfo->output_iMCU_row < last_iMCU_row)
|
|
|
++ samp_rows = compptr->v_samp_factor;
|
|
|
++ else {
|
|
|
++ /* NB: can't use last_row_height here; it is input-side-dependent! */
|
|
|
++ samp_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
|
|
|
++ if (samp_rows == 0) samp_rows = compptr->v_samp_factor;
|
|
|
++ }
|
|
|
++
|
|
|
++ for (row = 0; row < samp_rows; row++) {
|
|
|
++ memcpy(output_buf[ci][row], buffer[row],
|
|
|
++ compptr->width_in_blocks * sizeof(_JSAMPLE));
|
|
|
++ }
|
|
|
++ }
|
|
|
++
|
|
|
++ if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
|
|
|
++ return JPEG_ROW_COMPLETED;
|
|
|
++ return JPEG_SCAN_COMPLETED;
|
|
|
++}
|
|
|
++
|
|
|
++#endif /* D_MULTISCAN_FILES_SUPPORTED */
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize difference buffer controller.
|
|
|
++ */
|
|
|
++
|
|
|
++GLOBAL(void)
|
|
|
++_jinit_d_diff_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
|
|
|
++{
|
|
|
++ my_diff_ptr diff;
|
|
|
++ int ci;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ diff = (my_diff_ptr)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ sizeof(my_diff_controller));
|
|
|
++ cinfo->coef = (struct jpeg_d_coef_controller *)diff;
|
|
|
++ diff->pub.start_input_pass = start_input_pass;
|
|
|
++ diff->pub.start_output_pass = start_output_pass;
|
|
|
++
|
|
|
++ /* Create the [un]difference buffers. */
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ diff->diff_buf[ci] =
|
|
|
++ ALLOC_DARRAY(JPOOL_IMAGE,
|
|
|
++ (JDIMENSION)jround_up((long)compptr->width_in_blocks,
|
|
|
++ (long)compptr->h_samp_factor),
|
|
|
++ (JDIMENSION)compptr->v_samp_factor);
|
|
|
++ diff->undiff_buf[ci] =
|
|
|
++ ALLOC_DARRAY(JPOOL_IMAGE,
|
|
|
++ (JDIMENSION)jround_up((long)compptr->width_in_blocks,
|
|
|
++ (long)compptr->h_samp_factor),
|
|
|
++ (JDIMENSION)compptr->v_samp_factor);
|
|
|
++ }
|
|
|
++
|
|
|
++ if (need_full_buffer) {
|
|
|
++#ifdef D_MULTISCAN_FILES_SUPPORTED
|
|
|
++ /* Allocate a full-image virtual array for each component. */
|
|
|
++ int access_rows;
|
|
|
++
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ access_rows = compptr->v_samp_factor;
|
|
|
++ diff->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
|
|
|
++ ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE,
|
|
|
++ (JDIMENSION)jround_up((long)compptr->width_in_blocks,
|
|
|
++ (long)compptr->h_samp_factor),
|
|
|
++ (JDIMENSION)jround_up((long)compptr->height_in_blocks,
|
|
|
++ (long)compptr->v_samp_factor),
|
|
|
++ (JDIMENSION)access_rows);
|
|
|
++ }
|
|
|
++ diff->pub.consume_data = consume_data;
|
|
|
++ diff->pub._decompress_data = output_data;
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
++#endif
|
|
|
++ } else {
|
|
|
++ diff->pub.consume_data = dummy_consume_data;
|
|
|
++ diff->pub._decompress_data = decompress_data;
|
|
|
++ diff->whole_image[0] = NULL; /* flag for no virtual arrays */
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++#endif /* D_LOSSLESS_SUPPORTED */
|
|
|
+diff --git a/media/libjpeg/jdhuff.c b/media/libjpeg/jdhuff.c
|
|
|
+--- a/media/libjpeg/jdhuff.c
|
|
|
++++ b/media/libjpeg/jdhuff.c
|
|
|
+@@ -1,15 +1,17 @@
|
|
|
+ /*
|
|
|
+ * jdhuff.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2009-2011, 2016, 2018-2019, D. R. Commander.
|
|
|
++ * Copyright (C) 2009-2011, 2016, 2018-2019, 2022, D. R. Commander.
|
|
|
+ * Copyright (C) 2018, Matthias Räncker.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains Huffman entropy decoding routines.
|
|
|
+ *
|
|
|
+ * Much of the complexity here has to do with supporting input suspension.
|
|
|
+ * If the data source module demands suspension, we want to be able to back
|
|
|
+@@ -19,18 +21,18 @@
|
|
|
+ *
|
|
|
+ * NOTE: All referenced figures are from
|
|
|
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jdhuff.h" /* Declarations shared with jdphuff.c */
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jdhuff.h" /* Declarations shared with jd*huff.c */
|
|
|
++#include "jpegapicomp.h"
|
|
|
+ #include "jstdhuff.c"
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Expanded entropy decoder object for Huffman decoding.
|
|
|
+ *
|
|
|
+ * The savable_state subrecord contains fields that change within an MCU,
|
|
|
+ * but must not be updated permanently until we complete the MCU.
|
|
|
+@@ -129,17 +131,17 @@ start_pass_huff_decoder(j_decompress_ptr
|
|
|
+ entropy->restarts_to_go = cinfo->restart_interval;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Compute the derived values for a Huffman table.
|
|
|
+ * This routine also performs some validation checks on the table.
|
|
|
+ *
|
|
|
+- * Note this is also used by jdphuff.c.
|
|
|
++ * Note this is also used by jdphuff.c and jdlhuff.c.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
|
|
|
+ d_derived_tbl **pdtbl)
|
|
|
+ {
|
|
|
+ JHUFF_TBL *htbl;
|
|
|
+ d_derived_tbl *dtbl;
|
|
|
+@@ -240,32 +242,32 @@ jpeg_make_d_derived_tbl(j_decompress_ptr
|
|
|
+ dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
|
|
|
+ lookbits++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Validate symbols as being reasonable.
|
|
|
+ * For AC tables, we make no check, but accept all byte values 0..255.
|
|
|
+- * For DC tables, we require the symbols to be in range 0..15.
|
|
|
+- * (Tighter bounds could be applied depending on the data depth and mode,
|
|
|
+- * but this is sufficient to ensure safe decoding.)
|
|
|
++ * For DC tables, we require the symbols to be in range 0..15 in lossy mode
|
|
|
++ * and 0..16 in lossless mode. (Tighter bounds could be applied depending on
|
|
|
++ * the data depth and mode, but this is sufficient to ensure safe decoding.)
|
|
|
+ */
|
|
|
+ if (isDC) {
|
|
|
+ for (i = 0; i < numsymbols; i++) {
|
|
|
+ int sym = htbl->huffval[i];
|
|
|
+- if (sym < 0 || sym > 15)
|
|
|
++ if (sym < 0 || sym > (cinfo->master->lossless ? 16 : 15))
|
|
|
+ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+- * Out-of-line code for bit fetching (shared with jdphuff.c).
|
|
|
++ * Out-of-line code for bit fetching (shared with jdphuff.c and jdlhuff.c).
|
|
|
+ * See jdhuff.h for info about usage.
|
|
|
+ * Note: current values of get_buffer and bits_left are passed as parameters,
|
|
|
+ * but are returned in the corresponding fields of the state struct.
|
|
|
+ *
|
|
|
+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
|
|
|
+ * of get_buffer to be used. (On machines with wider words, an even larger
|
|
|
+ * buffer could be used.) However, on some machines 32-bit shifts are
|
|
|
+ * quite slow and take time proportional to the number of places shifted.
|
|
|
+diff --git a/media/libjpeg/jdhuff.h b/media/libjpeg/jdhuff.h
|
|
|
+--- a/media/libjpeg/jdhuff.h
|
|
|
++++ b/media/libjpeg/jdhuff.h
|
|
|
+@@ -1,22 +1,25 @@
|
|
|
+ /*
|
|
|
+ * jdhuff.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright (C) 2010-2011, 2015-2016, 2021, D. R. Commander.
|
|
|
+ * Copyright (C) 2018, Matthias Räncker.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains declarations for Huffman entropy decoding routines
|
|
|
+- * that are shared between the sequential decoder (jdhuff.c) and the
|
|
|
+- * progressive decoder (jdphuff.c). No other modules need to see these.
|
|
|
++ * that are shared between the sequential decoder (jdhuff.c), the progressive
|
|
|
++ * decoder (jdphuff.c), and the lossless decoder (jdlhuff.c). No other modules
|
|
|
++ * need to see these.
|
|
|
+ */
|
|
|
+
|
|
|
+ #include "jconfigint.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* Derived data constructed for each Huffman table */
|
|
|
+
|
|
|
+ #define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */
|
|
|
+diff --git a/media/libjpeg/jdinput.c b/media/libjpeg/jdinput.c
|
|
|
+--- a/media/libjpeg/jdinput.c
|
|
|
++++ b/media/libjpeg/jdinput.c
|
|
|
+@@ -1,29 +1,32 @@
|
|
|
+ /*
|
|
|
+ * jdinput.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright (C) 2010, 2016, 2018, 2022, D. R. Commander.
|
|
|
+ * Copyright (C) 2015, Google, Inc.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains input control logic for the JPEG decompressor.
|
|
|
+ * These routines are concerned with controlling the decompressor's input
|
|
|
+- * processing (marker reading and coefficient decoding). The actual input
|
|
|
+- * reading is done in jdmarker.c, jdhuff.c, and jdphuff.c.
|
|
|
++ * processing (marker reading and coefficient/difference decoding).
|
|
|
++ * The actual input reading is done in jdmarker.c, jdhuff.c, jdphuff.c,
|
|
|
++ * and jdlhuff.c.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* Private state */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_input_controller pub; /* public fields */
|
|
|
+
|
|
|
+ boolean inheaders; /* TRUE until first SOS is reached */
|
|
|
+@@ -41,24 +44,30 @@ METHODDEF(int) consume_markers(j_decompr
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ initial_setup(j_decompress_ptr cinfo)
|
|
|
+ /* Called once, when first SOS marker is reached */
|
|
|
+ {
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
+
|
|
|
+ /* Make sure image isn't bigger than I can handle */
|
|
|
+ if ((long)cinfo->image_height > (long)JPEG_MAX_DIMENSION ||
|
|
|
+ (long)cinfo->image_width > (long)JPEG_MAX_DIMENSION)
|
|
|
+ ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int)JPEG_MAX_DIMENSION);
|
|
|
+
|
|
|
+ /* For now, precision must match compiled-in value... */
|
|
|
+- if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ if (cinfo->data_precision != 8 && cinfo->data_precision != 12 &&
|
|
|
++ cinfo->data_precision != 16)
|
|
|
++#else
|
|
|
++ if (cinfo->data_precision != 8 && cinfo->data_precision != 12)
|
|
|
++#endif
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
+
|
|
|
+ /* Check that number of components won't exceed internal array sizes */
|
|
|
+ if (cinfo->num_components > MAX_COMPONENTS)
|
|
|
+ ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
|
|
|
+ MAX_COMPONENTS);
|
|
|
+
|
|
|
+ /* Compute maximum sampling factors; check factor validity */
|
|
|
+@@ -73,46 +82,46 @@ initial_setup(j_decompress_ptr cinfo)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_SAMPLING);
|
|
|
+ cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
|
|
|
+ compptr->h_samp_factor);
|
|
|
+ cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
|
|
|
+ compptr->v_samp_factor);
|
|
|
+ }
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 80
|
|
|
+- cinfo->block_size = DCTSIZE;
|
|
|
++ cinfo->block_size = data_unit;
|
|
|
+ cinfo->natural_order = jpeg_natural_order;
|
|
|
+ cinfo->lim_Se = DCTSIZE2 - 1;
|
|
|
+ #endif
|
|
|
+
|
|
|
+- /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE.
|
|
|
+- * In the full decompressor, this will be overridden by jdmaster.c;
|
|
|
++ /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE in lossy
|
|
|
++ * mode. In the full decompressor, this will be overridden by jdmaster.c;
|
|
|
+ * but in the transcoder, jdmaster.c is not used, so we must do it here.
|
|
|
+ */
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+- cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = DCTSIZE;
|
|
|
++ cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = data_unit;
|
|
|
+ #else
|
|
|
+- cinfo->min_DCT_scaled_size = DCTSIZE;
|
|
|
++ cinfo->min_DCT_scaled_size = data_unit;
|
|
|
+ #endif
|
|
|
+
|
|
|
+ /* Compute dimensions of components */
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+- compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = DCTSIZE;
|
|
|
++ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = data_unit;
|
|
|
+ #else
|
|
|
+- compptr->DCT_scaled_size = DCTSIZE;
|
|
|
++ compptr->DCT_scaled_size = data_unit;
|
|
|
+ #endif
|
|
|
+- /* Size in DCT blocks */
|
|
|
++ /* Size in data units */
|
|
|
+ compptr->width_in_blocks = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->image_width * (long)compptr->h_samp_factor,
|
|
|
+- (long)(cinfo->max_h_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_h_samp_factor * data_unit));
|
|
|
+ compptr->height_in_blocks = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->image_height * (long)compptr->v_samp_factor,
|
|
|
+- (long)(cinfo->max_v_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_v_samp_factor * data_unit));
|
|
|
+ /* Set the first and last MCU columns to decompress from multi-scan images.
|
|
|
+ * By default, decompress all of the MCU columns.
|
|
|
+ */
|
|
|
+ cinfo->master->first_MCU_col[ci] = 0;
|
|
|
+ cinfo->master->last_MCU_col[ci] = compptr->width_in_blocks - 1;
|
|
|
+ /* downsampled_width and downsampled_height will also be overridden by
|
|
|
+ * jdmaster.c if we are doing full decompression. The transcoder library
|
|
|
+ * doesn't use these values, but the calling application might.
|
|
|
+@@ -128,51 +137,52 @@ initial_setup(j_decompress_ptr cinfo)
|
|
|
+ compptr->component_needed = TRUE;
|
|
|
+ /* Mark no quantization table yet saved for component */
|
|
|
+ compptr->quant_table = NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Compute number of fully interleaved MCU rows. */
|
|
|
+ cinfo->total_iMCU_rows = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->image_height,
|
|
|
+- (long)(cinfo->max_v_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_v_samp_factor * data_unit));
|
|
|
+
|
|
|
+ /* Decide whether file contains multiple scans */
|
|
|
+ if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
|
|
|
+ cinfo->inputctl->has_multiple_scans = TRUE;
|
|
|
+ else
|
|
|
+ cinfo->inputctl->has_multiple_scans = FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ per_scan_setup(j_decompress_ptr cinfo)
|
|
|
+ /* Do computations that are needed before processing a JPEG scan */
|
|
|
+ /* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
|
|
|
+ {
|
|
|
+ int ci, mcublks, tmp;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
++ int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
|
|
|
+
|
|
|
+ if (cinfo->comps_in_scan == 1) {
|
|
|
+
|
|
|
+ /* Noninterleaved (single-component) scan */
|
|
|
+ compptr = cinfo->cur_comp_info[0];
|
|
|
+
|
|
|
+ /* Overall image size in MCUs */
|
|
|
+ cinfo->MCUs_per_row = compptr->width_in_blocks;
|
|
|
+ cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
|
|
|
+
|
|
|
+- /* For noninterleaved scan, always one block per MCU */
|
|
|
++ /* For noninterleaved scan, always one data unit per MCU */
|
|
|
+ compptr->MCU_width = 1;
|
|
|
+ compptr->MCU_height = 1;
|
|
|
+ compptr->MCU_blocks = 1;
|
|
|
+ compptr->MCU_sample_width = compptr->_DCT_scaled_size;
|
|
|
+ compptr->last_col_width = 1;
|
|
|
+ /* For noninterleaved scans, it is convenient to define last_row_height
|
|
|
+- * as the number of block rows present in the last iMCU row.
|
|
|
++ * as the number of data unit rows present in the last iMCU row.
|
|
|
+ */
|
|
|
+ tmp = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
|
|
|
+ if (tmp == 0) tmp = compptr->v_samp_factor;
|
|
|
+ compptr->last_row_height = tmp;
|
|
|
+
|
|
|
+ /* Prepare array describing MCU composition */
|
|
|
+ cinfo->blocks_in_MCU = 1;
|
|
|
+ cinfo->MCU_membership[0] = 0;
|
|
|
+@@ -182,32 +192,32 @@ per_scan_setup(j_decompress_ptr cinfo)
|
|
|
+ /* Interleaved (multi-component) scan */
|
|
|
+ if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
|
|
|
+ ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
|
|
|
+ MAX_COMPS_IN_SCAN);
|
|
|
+
|
|
|
+ /* Overall image size in MCUs */
|
|
|
+ cinfo->MCUs_per_row = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->image_width,
|
|
|
+- (long)(cinfo->max_h_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_h_samp_factor * data_unit));
|
|
|
+ cinfo->MCU_rows_in_scan = (JDIMENSION)
|
|
|
+ jdiv_round_up((long)cinfo->image_height,
|
|
|
+- (long)(cinfo->max_v_samp_factor * DCTSIZE));
|
|
|
++ (long)(cinfo->max_v_samp_factor * data_unit));
|
|
|
+
|
|
|
+ cinfo->blocks_in_MCU = 0;
|
|
|
+
|
|
|
+ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
+ compptr = cinfo->cur_comp_info[ci];
|
|
|
+- /* Sampling factors give # of blocks of component in each MCU */
|
|
|
++ /* Sampling factors give # of data units of component in each MCU */
|
|
|
+ compptr->MCU_width = compptr->h_samp_factor;
|
|
|
+ compptr->MCU_height = compptr->v_samp_factor;
|
|
|
+ compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
|
|
|
+ compptr->MCU_sample_width = compptr->MCU_width *
|
|
|
+ compptr->_DCT_scaled_size;
|
|
|
+- /* Figure number of non-dummy blocks in last MCU column & row */
|
|
|
++ /* Figure number of non-dummy data units in last MCU column & row */
|
|
|
+ tmp = (int)(compptr->width_in_blocks % compptr->MCU_width);
|
|
|
+ if (tmp == 0) tmp = compptr->MCU_width;
|
|
|
+ compptr->last_col_width = tmp;
|
|
|
+ tmp = (int)(compptr->height_in_blocks % compptr->MCU_height);
|
|
|
+ if (tmp == 0) tmp = compptr->MCU_height;
|
|
|
+ compptr->last_row_height = tmp;
|
|
|
+ /* Prepare array describing MCU composition */
|
|
|
+ mcublks = compptr->MCU_blocks;
|
|
|
+@@ -276,44 +286,45 @@ latch_quant_tables(j_decompress_ptr cinf
|
|
|
+ * the entire decompressor (during jpeg_start_decompress).
|
|
|
+ * Subsequent calls come from consume_markers, below.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ start_input_pass(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ per_scan_setup(cinfo);
|
|
|
+- latch_quant_tables(cinfo);
|
|
|
++ if (!cinfo->master->lossless)
|
|
|
++ latch_quant_tables(cinfo);
|
|
|
+ (*cinfo->entropy->start_pass) (cinfo);
|
|
|
+ (*cinfo->coef->start_input_pass) (cinfo);
|
|
|
+ cinfo->inputctl->consume_input = cinfo->coef->consume_data;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Finish up after inputting a compressed-data scan.
|
|
|
+- * This is called by the coefficient controller after it's read all
|
|
|
+- * the expected data of the scan.
|
|
|
++ * This is called by the coefficient or difference controller after it's read
|
|
|
++ * all the expected data of the scan.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ finish_input_pass(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ cinfo->inputctl->consume_input = consume_markers;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Read JPEG markers before, between, or after compressed-data scans.
|
|
|
+ * Change state as necessary when a new scan is reached.
|
|
|
+ * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
|
|
|
+ *
|
|
|
+ * The consume_input method pointer points either here or to the
|
|
|
+- * coefficient controller's consume_data routine, depending on whether
|
|
|
+- * we are reading a compressed data segment or inter-segment markers.
|
|
|
++ * coefficient or difference controller's consume_data routine, depending on
|
|
|
++ * whether we are reading a compressed data segment or inter-segment markers.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(int)
|
|
|
+ consume_markers(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_inputctl_ptr inputctl = (my_inputctl_ptr)cinfo->inputctl;
|
|
|
+ int val;
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/jdlhuff.c b/media/libjpeg/jdlhuff.c
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jdlhuff.c
|
|
|
+@@ -0,0 +1,302 @@
|
|
|
++/*
|
|
|
++ * jdlhuff.c
|
|
|
++ *
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
++ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ *
|
|
|
++ * This file contains Huffman entropy decoding routines for lossless JPEG.
|
|
|
++ *
|
|
|
++ * Much of the complexity here has to do with supporting input suspension.
|
|
|
++ * If the data source module demands suspension, we want to be able to back
|
|
|
++ * up to the start of the current MCU. To do this, we copy state variables
|
|
|
++ * into local working storage, and update them back to the permanent
|
|
|
++ * storage only upon successful completion of an MCU.
|
|
|
++ */
|
|
|
++
|
|
|
++#define JPEG_INTERNALS
|
|
|
++#include "jinclude.h"
|
|
|
++#include "jpeglib.h"
|
|
|
++#include "jlossls.h" /* Private declarations for lossless codec */
|
|
|
++#include "jdhuff.h" /* Declarations shared with jd*huff.c */
|
|
|
++
|
|
|
++
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++
|
|
|
++typedef struct {
|
|
|
++ int ci, yoffset, MCU_width;
|
|
|
++} lhd_output_ptr_info;
|
|
|
++
|
|
|
++/*
|
|
|
++ * Expanded entropy decoder object for Huffman decoding in lossless mode.
|
|
|
++ */
|
|
|
++
|
|
|
++typedef struct {
|
|
|
++ struct jpeg_entropy_decoder pub; /* public fields */
|
|
|
++
|
|
|
++ /* These fields are loaded into local variables at start of each MCU.
|
|
|
++ * In case of suspension, we exit WITHOUT updating them.
|
|
|
++ */
|
|
|
++ bitread_perm_state bitstate; /* Bit buffer at start of MCU */
|
|
|
++
|
|
|
++ /* Pointers to derived tables (these workspaces have image lifespan) */
|
|
|
++ d_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
|
|
|
++
|
|
|
++ /* Precalculated info set up by start_pass for use in decode_mcus: */
|
|
|
++
|
|
|
++ /* Pointers to derived tables to be used for each data unit within an MCU */
|
|
|
++ d_derived_tbl *cur_tbls[D_MAX_BLOCKS_IN_MCU];
|
|
|
++
|
|
|
++ /* Pointers to the proper output difference row for each group of data units
|
|
|
++ * within an MCU. For each component, there are Vi groups of Hi data units.
|
|
|
++ */
|
|
|
++ JDIFFROW output_ptr[D_MAX_BLOCKS_IN_MCU];
|
|
|
++
|
|
|
++ /* Number of output pointers in use for the current MCU. This is the sum
|
|
|
++ * of all Vi in the MCU.
|
|
|
++ */
|
|
|
++ int num_output_ptrs;
|
|
|
++
|
|
|
++ /* Information used for positioning the output pointers within the output
|
|
|
++ * difference rows.
|
|
|
++ */
|
|
|
++ lhd_output_ptr_info output_ptr_info[D_MAX_BLOCKS_IN_MCU];
|
|
|
++
|
|
|
++ /* Index of the proper output pointer for each data unit within an MCU */
|
|
|
++ int output_ptr_index[D_MAX_BLOCKS_IN_MCU];
|
|
|
++
|
|
|
++} lhuff_entropy_decoder;
|
|
|
++
|
|
|
++typedef lhuff_entropy_decoder *lhuff_entropy_ptr;
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize for a Huffman-compressed scan.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++start_pass_lhuff_decoder(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
|
|
|
++ int ci, dctbl, sampn, ptrn, yoffset, xoffset;
|
|
|
++ jpeg_component_info *compptr;
|
|
|
++
|
|
|
++ for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
|
|
|
++ compptr = cinfo->cur_comp_info[ci];
|
|
|
++ dctbl = compptr->dc_tbl_no;
|
|
|
++ /* Make sure requested tables are present */
|
|
|
++ if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS ||
|
|
|
++ cinfo->dc_huff_tbl_ptrs[dctbl] == NULL)
|
|
|
++ ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
|
|
|
++ /* Compute derived values for Huffman tables */
|
|
|
++ /* We may do this more than once for a table, but it's not expensive */
|
|
|
++ jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl,
|
|
|
++ &entropy->derived_tbls[dctbl]);
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Precalculate decoding info for each sample in an MCU of this scan */
|
|
|
++ for (sampn = 0, ptrn = 0; sampn < cinfo->blocks_in_MCU;) {
|
|
|
++ compptr = cinfo->cur_comp_info[cinfo->MCU_membership[sampn]];
|
|
|
++ ci = compptr->component_index;
|
|
|
++ for (yoffset = 0; yoffset < compptr->MCU_height; yoffset++, ptrn++) {
|
|
|
++ /* Precalculate the setup info for each output pointer */
|
|
|
++ entropy->output_ptr_info[ptrn].ci = ci;
|
|
|
++ entropy->output_ptr_info[ptrn].yoffset = yoffset;
|
|
|
++ entropy->output_ptr_info[ptrn].MCU_width = compptr->MCU_width;
|
|
|
++ for (xoffset = 0; xoffset < compptr->MCU_width; xoffset++, sampn++) {
|
|
|
++ /* Precalculate the output pointer index for each sample */
|
|
|
++ entropy->output_ptr_index[sampn] = ptrn;
|
|
|
++ /* Precalculate which table to use for each sample */
|
|
|
++ entropy->cur_tbls[sampn] = entropy->derived_tbls[compptr->dc_tbl_no];
|
|
|
++ }
|
|
|
++ }
|
|
|
++ }
|
|
|
++ entropy->num_output_ptrs = ptrn;
|
|
|
++
|
|
|
++ /* Initialize bitread state variables */
|
|
|
++ entropy->bitstate.bits_left = 0;
|
|
|
++ entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
|
|
|
++ entropy->pub.insufficient_data = FALSE;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Figure F.12: extend sign bit.
|
|
|
++ * On some machines, a shift and add will be faster than a table lookup.
|
|
|
++ */
|
|
|
++
|
|
|
++#define AVOID_TABLES
|
|
|
++#ifdef AVOID_TABLES
|
|
|
++
|
|
|
++#define NEG_1 ((unsigned int)-1)
|
|
|
++#define HUFF_EXTEND(x, s) \
|
|
|
++ ((x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((NEG_1) << (s)) + 1)))
|
|
|
++
|
|
|
++#else
|
|
|
++
|
|
|
++#define HUFF_EXTEND(x, s) \
|
|
|
++ ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
|
|
|
++
|
|
|
++static const int extend_test[16] = { /* entry n is 2**(n-1) */
|
|
|
++ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
|
|
|
++ 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
|
|
|
++};
|
|
|
++
|
|
|
++static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */
|
|
|
++ 0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1,
|
|
|
++ ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1,
|
|
|
++ ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1,
|
|
|
++ ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1
|
|
|
++};
|
|
|
++
|
|
|
++#endif /* AVOID_TABLES */
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Check for a restart marker & resynchronize decoder.
|
|
|
++ * Returns FALSE if must suspend.
|
|
|
++ */
|
|
|
++
|
|
|
++LOCAL(boolean)
|
|
|
++process_restart(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
|
|
|
++
|
|
|
++ /* Throw away any unused bits remaining in bit buffer; */
|
|
|
++ /* include any full bytes in next_marker's count of discarded bytes */
|
|
|
++ cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
|
|
|
++ entropy->bitstate.bits_left = 0;
|
|
|
++
|
|
|
++ /* Advance past the RSTn marker */
|
|
|
++ if (!(*cinfo->marker->read_restart_marker) (cinfo))
|
|
|
++ return FALSE;
|
|
|
++
|
|
|
++ /* Reset out-of-data flag, unless read_restart_marker left us smack up
|
|
|
++ * against a marker. In that case we will end up treating the next data
|
|
|
++ * segment as empty, and we can avoid producing bogus output pixels by
|
|
|
++ * leaving the flag set.
|
|
|
++ */
|
|
|
++ if (cinfo->unread_marker == 0)
|
|
|
++ entropy->pub.insufficient_data = FALSE;
|
|
|
++
|
|
|
++ return TRUE;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Decode and return nMCU MCUs' worth of Huffman-compressed differences.
|
|
|
++ * Each MCU is also disassembled and placed accordingly in diff_buf.
|
|
|
++ *
|
|
|
++ * MCU_col_num specifies the column of the first MCU being requested within
|
|
|
++ * the MCU row. This tells us where to position the output row pointers in
|
|
|
++ * diff_buf.
|
|
|
++ *
|
|
|
++ * Returns the number of MCUs decoded. This may be less than nMCU MCUs if
|
|
|
++ * data source requested suspension. In that case no changes have been made
|
|
|
++ * to permanent state. (Exception: some output differences may already have
|
|
|
++ * been assigned. This is harmless for this module, since we'll just
|
|
|
++ * re-assign them on the next call.)
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(JDIMENSION)
|
|
|
++decode_mcus(j_decompress_ptr cinfo, JDIFFIMAGE diff_buf,
|
|
|
++ JDIMENSION MCU_row_num, JDIMENSION MCU_col_num, JDIMENSION nMCU)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy = (lhuff_entropy_ptr)cinfo->entropy;
|
|
|
++ int sampn, ci, yoffset, MCU_width, ptrn;
|
|
|
++ JDIMENSION mcu_num;
|
|
|
++ BITREAD_STATE_VARS;
|
|
|
++
|
|
|
++ /* Set output pointer locations based on MCU_col_num */
|
|
|
++ for (ptrn = 0; ptrn < entropy->num_output_ptrs; ptrn++) {
|
|
|
++ ci = entropy->output_ptr_info[ptrn].ci;
|
|
|
++ yoffset = entropy->output_ptr_info[ptrn].yoffset;
|
|
|
++ MCU_width = entropy->output_ptr_info[ptrn].MCU_width;
|
|
|
++ entropy->output_ptr[ptrn] =
|
|
|
++ diff_buf[ci][MCU_row_num + yoffset] + (MCU_col_num * MCU_width);
|
|
|
++ }
|
|
|
++
|
|
|
++ /*
|
|
|
++ * If we've run out of data, zero out the buffers and return.
|
|
|
++ * By resetting the undifferencer, the output samples will be CENTERJSAMPLE.
|
|
|
++ *
|
|
|
++ * NB: We should find a way to do this without interacting with the
|
|
|
++ * undifferencer module directly.
|
|
|
++ */
|
|
|
++ if (entropy->pub.insufficient_data) {
|
|
|
++ for (ptrn = 0; ptrn < entropy->num_output_ptrs; ptrn++)
|
|
|
++ jzero_far((void FAR *)entropy->output_ptr[ptrn],
|
|
|
++ nMCU * entropy->output_ptr_info[ptrn].MCU_width *
|
|
|
++ sizeof(JDIFF));
|
|
|
++
|
|
|
++ (*cinfo->idct->start_pass) (cinfo);
|
|
|
++
|
|
|
++ } else {
|
|
|
++
|
|
|
++ /* Load up working state */
|
|
|
++ BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
|
|
|
++
|
|
|
++ /* Outer loop handles the number of MCUs requested */
|
|
|
++
|
|
|
++ for (mcu_num = 0; mcu_num < nMCU; mcu_num++) {
|
|
|
++
|
|
|
++ /* Inner loop handles the samples in the MCU */
|
|
|
++ for (sampn = 0; sampn < cinfo->blocks_in_MCU; sampn++) {
|
|
|
++ d_derived_tbl *dctbl = entropy->cur_tbls[sampn];
|
|
|
++ register int s, r;
|
|
|
++
|
|
|
++ /* Section H.2.2: decode the sample difference */
|
|
|
++ HUFF_DECODE(s, br_state, dctbl, return mcu_num, label1);
|
|
|
++ if (s) {
|
|
|
++ if (s == 16) /* special case: always output 32768 */
|
|
|
++ s = 32768;
|
|
|
++ else { /* normal case: fetch subsequent bits */
|
|
|
++ CHECK_BIT_BUFFER(br_state, s, return mcu_num);
|
|
|
++ r = GET_BITS(s);
|
|
|
++ s = HUFF_EXTEND(r, s);
|
|
|
++ }
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Output the sample difference */
|
|
|
++ *entropy->output_ptr[entropy->output_ptr_index[sampn]]++ = (JDIFF)s;
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Completed MCU, so update state */
|
|
|
++ BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
|
|
|
++ }
|
|
|
++ }
|
|
|
++
|
|
|
++ return nMCU;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Module initialization routine for lossless mode Huffman entropy decoding.
|
|
|
++ */
|
|
|
++
|
|
|
++GLOBAL(void)
|
|
|
++jinit_lhuff_decoder(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ lhuff_entropy_ptr entropy;
|
|
|
++ int i;
|
|
|
++
|
|
|
++ entropy = (lhuff_entropy_ptr)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ sizeof(lhuff_entropy_decoder));
|
|
|
++ cinfo->entropy = (struct jpeg_entropy_decoder *)entropy;
|
|
|
++ entropy->pub.start_pass = start_pass_lhuff_decoder;
|
|
|
++ entropy->pub.decode_mcus = decode_mcus;
|
|
|
++ entropy->pub.process_restart = process_restart;
|
|
|
++
|
|
|
++ /* Mark tables unallocated */
|
|
|
++ for (i = 0; i < NUM_HUFF_TBLS; i++) {
|
|
|
++ entropy->derived_tbls[i] = NULL;
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++#endif /* D_LOSSLESS_SUPPORTED */
|
|
|
+diff --git a/media/libjpeg/jdlossls.c b/media/libjpeg/jdlossls.c
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jdlossls.c
|
|
|
+@@ -0,0 +1,289 @@
|
|
|
++/*
|
|
|
++ * jdlossls.c
|
|
|
++ *
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
++ * Copyright (C) 1998, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ *
|
|
|
++ * This file contains prediction, sample undifferencing, point transform, and
|
|
|
++ * sample scaling routines for the lossless JPEG decompressor.
|
|
|
++ */
|
|
|
++
|
|
|
++#define JPEG_INTERNALS
|
|
|
++#include "jinclude.h"
|
|
|
++#include "jpeglib.h"
|
|
|
++#include "jlossls.h"
|
|
|
++
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++
|
|
|
++
|
|
|
++/**************** Sample undifferencing (reconstruction) *****************/
|
|
|
++
|
|
|
++/*
|
|
|
++ * In order to avoid a performance penalty for checking which predictor is
|
|
|
++ * being used and which row is being processed for each call of the
|
|
|
++ * undifferencer, and to promote optimization, we have separate undifferencing
|
|
|
++ * functions for each predictor selection value.
|
|
|
++ *
|
|
|
++ * We are able to avoid duplicating source code by implementing the predictors
|
|
|
++ * and undifferencers as macros. Each of the undifferencing functions is
|
|
|
++ * simply a wrapper around an UNDIFFERENCE macro with the appropriate PREDICTOR
|
|
|
++ * macro passed as an argument.
|
|
|
++ */
|
|
|
++
|
|
|
++/* Predictor for the first column of the first row: 2^(P-Pt-1) */
|
|
|
++#define INITIAL_PREDICTORx (1 << (cinfo->data_precision - cinfo->Al - 1))
|
|
|
++
|
|
|
++/* Predictor for the first column of the remaining rows: Rb */
|
|
|
++#define INITIAL_PREDICTOR2 prev_row[0]
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * 1-Dimensional undifferencer routine.
|
|
|
++ *
|
|
|
++ * This macro implements the 1-D horizontal predictor (1). INITIAL_PREDICTOR
|
|
|
++ * is used as the special case predictor for the first column, which must be
|
|
|
++ * either INITIAL_PREDICTOR2 or INITIAL_PREDICTORx. The remaining samples
|
|
|
++ * use PREDICTOR1.
|
|
|
++ *
|
|
|
++ * The reconstructed sample is supposed to be calculated modulo 2^16, so we
|
|
|
++ * logically AND the result with 0xFFFF.
|
|
|
++ */
|
|
|
++
|
|
|
++#define UNDIFFERENCE_1D(INITIAL_PREDICTOR) \
|
|
|
++ int Ra; \
|
|
|
++ \
|
|
|
++ Ra = (*diff_buf++ + INITIAL_PREDICTOR) & 0xFFFF; \
|
|
|
++ *undiff_buf++ = Ra; \
|
|
|
++ \
|
|
|
++ while (--width) { \
|
|
|
++ Ra = (*diff_buf++ + PREDICTOR1) & 0xFFFF; \
|
|
|
++ *undiff_buf++ = Ra; \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * 2-Dimensional undifferencer routine.
|
|
|
++ *
|
|
|
++ * This macro implements the 2-D horizontal predictors (#2-7). PREDICTOR2 is
|
|
|
++ * used as the special case predictor for the first column. The remaining
|
|
|
++ * samples use PREDICTOR, which is a function of Ra, Rb, and Rc.
|
|
|
++ *
|
|
|
++ * Because prev_row and output_buf may point to the same storage area (in an
|
|
|
++ * interleaved image with Vi=1, for example), we must take care to buffer Rb/Rc
|
|
|
++ * before writing the current reconstructed sample value into output_buf.
|
|
|
++ *
|
|
|
++ * The reconstructed sample is supposed to be calculated modulo 2^16, so we
|
|
|
++ * logically AND the result with 0xFFFF.
|
|
|
++ */
|
|
|
++
|
|
|
++#define UNDIFFERENCE_2D(PREDICTOR) \
|
|
|
++ int Ra, Rb, Rc; \
|
|
|
++ \
|
|
|
++ Rb = *prev_row++; \
|
|
|
++ Ra = (*diff_buf++ + PREDICTOR2) & 0xFFFF; \
|
|
|
++ *undiff_buf++ = Ra; \
|
|
|
++ \
|
|
|
++ while (--width) { \
|
|
|
++ Rc = Rb; \
|
|
|
++ Rb = *prev_row++; \
|
|
|
++ Ra = (*diff_buf++ + PREDICTOR) & 0xFFFF; \
|
|
|
++ *undiff_buf++ = Ra; \
|
|
|
++ }
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Undifferencers for the second and subsequent rows in a scan or restart
|
|
|
++ * interval. The first sample in the row is undifferenced using the vertical
|
|
|
++ * predictor (2). The rest of the samples are undifferenced using the
|
|
|
++ * predictor specified in the scan header.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_undifference1(j_decompress_ptr cinfo, int comp_index,
|
|
|
++ JDIFFROW diff_buf, JDIFFROW prev_row,
|
|
|
++ JDIFFROW undiff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ UNDIFFERENCE_1D(INITIAL_PREDICTOR2);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_undifference2(j_decompress_ptr cinfo, int comp_index,
|
|
|
++ JDIFFROW diff_buf, JDIFFROW prev_row,
|
|
|
++ JDIFFROW undiff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ UNDIFFERENCE_2D(PREDICTOR2);
|
|
|
++ (void)(Rc);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_undifference3(j_decompress_ptr cinfo, int comp_index,
|
|
|
++ JDIFFROW diff_buf, JDIFFROW prev_row,
|
|
|
++ JDIFFROW undiff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ UNDIFFERENCE_2D(PREDICTOR3);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_undifference4(j_decompress_ptr cinfo, int comp_index,
|
|
|
++ JDIFFROW diff_buf, JDIFFROW prev_row,
|
|
|
++ JDIFFROW undiff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ UNDIFFERENCE_2D(PREDICTOR4);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_undifference5(j_decompress_ptr cinfo, int comp_index,
|
|
|
++ JDIFFROW diff_buf, JDIFFROW prev_row,
|
|
|
++ JDIFFROW undiff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ UNDIFFERENCE_2D(PREDICTOR5);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_undifference6(j_decompress_ptr cinfo, int comp_index,
|
|
|
++ JDIFFROW diff_buf, JDIFFROW prev_row,
|
|
|
++ JDIFFROW undiff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ UNDIFFERENCE_2D(PREDICTOR6);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_undifference7(j_decompress_ptr cinfo, int comp_index,
|
|
|
++ JDIFFROW diff_buf, JDIFFROW prev_row,
|
|
|
++ JDIFFROW undiff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ UNDIFFERENCE_2D(PREDICTOR7);
|
|
|
++ (void)(Rc);
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Undifferencer for the first row in a scan or restart interval. The first
|
|
|
++ * sample in the row is undifferenced using the special predictor constant
|
|
|
++ * x=2^(P-Pt-1). The rest of the samples are undifferenced using the
|
|
|
++ * 1-D horizontal predictor (1).
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++jpeg_undifference_first_row(j_decompress_ptr cinfo, int comp_index,
|
|
|
++ JDIFFROW diff_buf, JDIFFROW prev_row,
|
|
|
++ JDIFFROW undiff_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ lossless_decomp_ptr losslessd = (lossless_decomp_ptr)cinfo->idct;
|
|
|
++
|
|
|
++ UNDIFFERENCE_1D(INITIAL_PREDICTORx);
|
|
|
++
|
|
|
++ /*
|
|
|
++ * Now that we have undifferenced the first row, we want to use the
|
|
|
++ * undifferencer that corresponds to the predictor specified in the
|
|
|
++ * scan header.
|
|
|
++ */
|
|
|
++ switch (cinfo->Ss) {
|
|
|
++ case 1:
|
|
|
++ losslessd->predict_undifference[comp_index] = jpeg_undifference1;
|
|
|
++ break;
|
|
|
++ case 2:
|
|
|
++ losslessd->predict_undifference[comp_index] = jpeg_undifference2;
|
|
|
++ break;
|
|
|
++ case 3:
|
|
|
++ losslessd->predict_undifference[comp_index] = jpeg_undifference3;
|
|
|
++ break;
|
|
|
++ case 4:
|
|
|
++ losslessd->predict_undifference[comp_index] = jpeg_undifference4;
|
|
|
++ break;
|
|
|
++ case 5:
|
|
|
++ losslessd->predict_undifference[comp_index] = jpeg_undifference5;
|
|
|
++ break;
|
|
|
++ case 6:
|
|
|
++ losslessd->predict_undifference[comp_index] = jpeg_undifference6;
|
|
|
++ break;
|
|
|
++ case 7:
|
|
|
++ losslessd->predict_undifference[comp_index] = jpeg_undifference7;
|
|
|
++ break;
|
|
|
++ }
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*********************** Sample upscaling by 2^Pt ************************/
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++simple_upscale(j_decompress_ptr cinfo,
|
|
|
++ JDIFFROW diff_buf, _JSAMPROW output_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ do {
|
|
|
++ *output_buf++ = (_JSAMPLE)(*diff_buf++ << cinfo->Al);
|
|
|
++ } while (--width);
|
|
|
++}
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++noscale(j_decompress_ptr cinfo,
|
|
|
++ JDIFFROW diff_buf, _JSAMPROW output_buf, JDIMENSION width)
|
|
|
++{
|
|
|
++ do {
|
|
|
++ *output_buf++ = (_JSAMPLE)(*diff_buf++);
|
|
|
++ } while (--width);
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize for an input processing pass.
|
|
|
++ */
|
|
|
++
|
|
|
++METHODDEF(void)
|
|
|
++start_pass_lossless(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ lossless_decomp_ptr losslessd = (lossless_decomp_ptr)cinfo->idct;
|
|
|
++ int ci;
|
|
|
++
|
|
|
++ /* Check that the scan parameters Ss, Se, Ah, Al are OK for lossless JPEG.
|
|
|
++ *
|
|
|
++ * Ss is the predictor selection value (psv). Legal values for sequential
|
|
|
++ * lossless JPEG are: 1 <= psv <= 7.
|
|
|
++ *
|
|
|
++ * Se and Ah are not used and should be zero.
|
|
|
++ *
|
|
|
++ * Al specifies the point transform (Pt).
|
|
|
++ * Legal values are: 0 <= Pt <= (data precision - 1).
|
|
|
++ */
|
|
|
++ if (cinfo->Ss < 1 || cinfo->Ss > 7 ||
|
|
|
++ cinfo->Se != 0 || cinfo->Ah != 0 ||
|
|
|
++ cinfo->Al < 0 || cinfo->Al >= cinfo->data_precision)
|
|
|
++ ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
|
|
|
++ cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
|
|
|
++
|
|
|
++ /* Set undifference functions to first row function */
|
|
|
++ for (ci = 0; ci < cinfo->num_components; ci++)
|
|
|
++ losslessd->predict_undifference[ci] = jpeg_undifference_first_row;
|
|
|
++
|
|
|
++ /* Set scaler function based on Pt */
|
|
|
++ if (cinfo->Al)
|
|
|
++ losslessd->scaler_scale = simple_upscale;
|
|
|
++ else
|
|
|
++ losslessd->scaler_scale = noscale;
|
|
|
++}
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Initialize the lossless decompressor.
|
|
|
++ */
|
|
|
++
|
|
|
++GLOBAL(void)
|
|
|
++_jinit_lossless_decompressor(j_decompress_ptr cinfo)
|
|
|
++{
|
|
|
++ lossless_decomp_ptr losslessd;
|
|
|
++
|
|
|
++ /* Create subobject in permanent pool */
|
|
|
++ losslessd = (lossless_decomp_ptr)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
|
|
|
++ sizeof(jpeg_lossless_decompressor));
|
|
|
++ cinfo->idct = (struct jpeg_inverse_dct *)losslessd;
|
|
|
++ losslessd->pub.start_pass = start_pass_lossless;
|
|
|
++}
|
|
|
++
|
|
|
++#endif /* D_LOSSLESS_SUPPORTED */
|
|
|
+diff --git a/media/libjpeg/jdmainct.c b/media/libjpeg/jdmainct.c
|
|
|
+--- a/media/libjpeg/jdmainct.c
|
|
|
++++ b/media/libjpeg/jdmainct.c
|
|
|
+@@ -1,61 +1,64 @@
|
|
|
+ /*
|
|
|
+ * jdmainct.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2010, 2016, D. R. Commander.
|
|
|
++ * Copyright (C) 2010, 2016, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains the main buffer controller for decompression.
|
|
|
+ * The main buffer lies between the JPEG decompressor proper and the
|
|
|
+ * post-processor; it holds downsampled data in the JPEG colorspace.
|
|
|
+ *
|
|
|
+ * Note that this code is bypassed in raw-data mode, since the application
|
|
|
+ * supplies the equivalent of the main buffer in that case.
|
|
|
+ */
|
|
|
+
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jdmainct.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /*
|
|
|
+ * In the current system design, the main buffer need never be a full-image
|
|
|
+- * buffer; any full-height buffers will be found inside the coefficient or
|
|
|
+- * postprocessing controllers. Nonetheless, the main controller is not
|
|
|
+- * trivial. Its responsibility is to provide context rows for upsampling/
|
|
|
+- * rescaling, and doing this in an efficient fashion is a bit tricky.
|
|
|
++ * buffer; any full-height buffers will be found inside the coefficient,
|
|
|
++ * difference, or postprocessing controllers. Nonetheless, the main controller
|
|
|
++ * is not trivial. Its responsibility is to provide context rows for
|
|
|
++ * upsampling/rescaling, and doing this in an efficient fashion is a bit
|
|
|
++ * tricky.
|
|
|
+ *
|
|
|
+ * Postprocessor input data is counted in "row groups". A row group
|
|
|
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
|
|
|
+ * sample rows of each component. (We require DCT_scaled_size values to be
|
|
|
+ * chosen such that these numbers are integers. In practice DCT_scaled_size
|
|
|
+ * values will likely be powers of two, so we actually have the stronger
|
|
|
+ * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
|
|
|
+ * Upsampling will typically produce max_v_samp_factor pixel rows from each
|
|
|
+ * row group (times any additional scale factor that the upsampler is
|
|
|
+ * applying).
|
|
|
+ *
|
|
|
+- * The coefficient controller will deliver data to us one iMCU row at a time;
|
|
|
+- * each iMCU row contains v_samp_factor * DCT_scaled_size sample rows, or
|
|
|
+- * exactly min_DCT_scaled_size row groups. (This amount of data corresponds
|
|
|
+- * to one row of MCUs when the image is fully interleaved.) Note that the
|
|
|
+- * number of sample rows varies across components, but the number of row
|
|
|
+- * groups does not. Some garbage sample rows may be included in the last iMCU
|
|
|
+- * row at the bottom of the image.
|
|
|
++ * The coefficient or difference controller will deliver data to us one iMCU
|
|
|
++ * row at a time; each iMCU row contains v_samp_factor * DCT_scaled_size sample
|
|
|
++ * rows, or exactly min_DCT_scaled_size row groups. (This amount of data
|
|
|
++ * corresponds to one row of MCUs when the image is fully interleaved.) Note
|
|
|
++ * that the number of sample rows varies across components, but the number of
|
|
|
++ * row groups does not. Some garbage sample rows may be included in the last
|
|
|
++ * iMCU row at the bottom of the image.
|
|
|
+ *
|
|
|
+ * Depending on the vertical scaling algorithm used, the upsampler may need
|
|
|
+ * access to the sample row(s) above and below its current input row group.
|
|
|
+ * The upsampler is required to set need_context_rows TRUE at global selection
|
|
|
+ * time if so. When need_context_rows is FALSE, this controller can simply
|
|
|
+- * obtain one iMCU row at a time from the coefficient controller and dole it
|
|
|
+- * out as row groups to the postprocessor.
|
|
|
++ * obtain one iMCU row at a time from the coefficient or difference controller
|
|
|
++ * and dole it out as row groups to the postprocessor.
|
|
|
+ *
|
|
|
+ * When need_context_rows is TRUE, this controller guarantees that the buffer
|
|
|
+ * passed to postprocessing contains at least one row group's worth of samples
|
|
|
+ * above and below the row group(s) being processed. Note that the context
|
|
|
+ * rows "above" the first passed row group appear at negative row offsets in
|
|
|
+ * the passed buffer. At the top and bottom of the image, the required
|
|
|
+ * context rows are manufactured by duplicating the first or last real sample
|
|
|
+ * row; this avoids having special cases in the upsampling inner loops.
|
|
|
+@@ -108,61 +111,62 @@
|
|
|
+ * rows when min_DCT_scaled_size is 1. That combination seems unlikely to
|
|
|
+ * be worth providing --- if someone wants a 1/8th-size preview, they probably
|
|
|
+ * want it quick and dirty, so a context-free upsampler is sufficient.
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+ /* Forward declarations */
|
|
|
+ METHODDEF(void) process_data_simple_main(j_decompress_ptr cinfo,
|
|
|
+- JSAMPARRAY output_buf,
|
|
|
++ _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr,
|
|
|
+ JDIMENSION out_rows_avail);
|
|
|
+ METHODDEF(void) process_data_context_main(j_decompress_ptr cinfo,
|
|
|
+- JSAMPARRAY output_buf,
|
|
|
++ _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr,
|
|
|
+ JDIMENSION out_rows_avail);
|
|
|
+ #ifdef QUANT_2PASS_SUPPORTED
|
|
|
+ METHODDEF(void) process_data_crank_post(j_decompress_ptr cinfo,
|
|
|
+- JSAMPARRAY output_buf,
|
|
|
++ _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr,
|
|
|
+ JDIMENSION out_rows_avail);
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ alloc_funny_pointers(j_decompress_ptr cinfo)
|
|
|
+ /* Allocate space for the funny pointer lists.
|
|
|
+ * This is done only once, not once per pass.
|
|
|
+ */
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
+ int ci, rgroup;
|
|
|
+ int M = cinfo->_min_DCT_scaled_size;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+- JSAMPARRAY xbuf;
|
|
|
++ _JSAMPARRAY xbuf;
|
|
|
+
|
|
|
+ /* Get top-level space for component array pointers.
|
|
|
+ * We alloc both arrays with one call to save a few cycles.
|
|
|
+ */
|
|
|
+- main_ptr->xbuffer[0] = (JSAMPIMAGE)
|
|
|
++ main_ptr->xbuffer[0] = (_JSAMPIMAGE)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- cinfo->num_components * 2 * sizeof(JSAMPARRAY));
|
|
|
++ cinfo->num_components * 2 *
|
|
|
++ sizeof(_JSAMPARRAY));
|
|
|
+ main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components;
|
|
|
+
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
|
|
|
+ cinfo->_min_DCT_scaled_size; /* height of a row group of component */
|
|
|
+ /* Get space for pointer lists --- M+4 row groups in each list.
|
|
|
+ * We alloc both pointer lists with one call to save a few cycles.
|
|
|
+ */
|
|
|
+- xbuf = (JSAMPARRAY)
|
|
|
++ xbuf = (_JSAMPARRAY)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- 2 * (rgroup * (M + 4)) * sizeof(JSAMPROW));
|
|
|
++ 2 * (rgroup * (M + 4)) * sizeof(_JSAMPROW));
|
|
|
+ xbuf += rgroup; /* want one row group at negative offsets */
|
|
|
+ main_ptr->xbuffer[0][ci] = xbuf;
|
|
|
+ xbuf += rgroup * (M + 4);
|
|
|
+ main_ptr->xbuffer[1][ci] = xbuf;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+@@ -174,17 +178,17 @@ make_funny_pointers(j_decompress_ptr cin
|
|
|
+ * This routine just fills in the curiously ordered lists.
|
|
|
+ * This will be repeated at the beginning of each pass.
|
|
|
+ */
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
+ int ci, i, rgroup;
|
|
|
+ int M = cinfo->_min_DCT_scaled_size;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+- JSAMPARRAY buf, xbuf0, xbuf1;
|
|
|
++ _JSAMPARRAY buf, xbuf0, xbuf1;
|
|
|
+
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
|
|
|
+ cinfo->_min_DCT_scaled_size; /* height of a row group of component */
|
|
|
+ xbuf0 = main_ptr->xbuffer[0][ci];
|
|
|
+ xbuf1 = main_ptr->xbuffer[1][ci];
|
|
|
+ /* First copy the workspace pointers as-is */
|
|
|
+@@ -214,17 +218,17 @@ set_bottom_pointers(j_decompress_ptr cin
|
|
|
+ /* Change the pointer lists to duplicate the last sample row at the bottom
|
|
|
+ * of the image. whichptr indicates which xbuffer holds the final iMCU row.
|
|
|
+ * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
|
|
|
+ */
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
+ int ci, i, rgroup, iMCUheight, rows_left;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+- JSAMPARRAY xbuf;
|
|
|
++ _JSAMPARRAY xbuf;
|
|
|
+
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ /* Count sample rows in one iMCU row and in one row group */
|
|
|
+ iMCUheight = compptr->v_samp_factor * compptr->_DCT_scaled_size;
|
|
|
+ rgroup = iMCUheight / cinfo->_min_DCT_scaled_size;
|
|
|
+ /* Count nondummy sample rows remaining for this component */
|
|
|
+ rows_left = (int)(compptr->downsampled_height % (JDIMENSION)iMCUheight);
|
|
|
+@@ -253,113 +257,113 @@ set_bottom_pointers(j_decompress_ptr cin
|
|
|
+ METHODDEF(void)
|
|
|
+ start_pass_main(j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
+
|
|
|
+ switch (pass_mode) {
|
|
|
+ case JBUF_PASS_THRU:
|
|
|
+ if (cinfo->upsample->need_context_rows) {
|
|
|
+- main_ptr->pub.process_data = process_data_context_main;
|
|
|
++ main_ptr->pub._process_data = process_data_context_main;
|
|
|
+ make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
|
|
|
+ main_ptr->whichptr = 0; /* Read first iMCU row into xbuffer[0] */
|
|
|
+ main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
|
|
|
+ main_ptr->iMCU_row_ctr = 0;
|
|
|
+ } else {
|
|
|
+ /* Simple case with no context needed */
|
|
|
+- main_ptr->pub.process_data = process_data_simple_main;
|
|
|
++ main_ptr->pub._process_data = process_data_simple_main;
|
|
|
+ }
|
|
|
+ main_ptr->buffer_full = FALSE; /* Mark buffer empty */
|
|
|
+ main_ptr->rowgroup_ctr = 0;
|
|
|
+ break;
|
|
|
+ #ifdef QUANT_2PASS_SUPPORTED
|
|
|
+ case JBUF_CRANK_DEST:
|
|
|
+ /* For last pass of 2-pass quantization, just crank the postprocessor */
|
|
|
+- main_ptr->pub.process_data = process_data_crank_post;
|
|
|
++ main_ptr->pub._process_data = process_data_crank_post;
|
|
|
+ break;
|
|
|
+ #endif
|
|
|
+ default:
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Process some data.
|
|
|
+ * This handles the simple case where no context is required.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-process_data_simple_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
|
|
|
++process_data_simple_main(j_decompress_ptr cinfo, _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
+ JDIMENSION rowgroups_avail;
|
|
|
+
|
|
|
+ /* Read input data if we haven't filled the main buffer yet */
|
|
|
+ if (!main_ptr->buffer_full) {
|
|
|
+- if (!(*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer))
|
|
|
++ if (!(*cinfo->coef->_decompress_data) (cinfo, main_ptr->buffer))
|
|
|
+ return; /* suspension forced, can do nothing more */
|
|
|
+ main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */
|
|
|
+ }
|
|
|
+
|
|
|
+ /* There are always min_DCT_scaled_size row groups in an iMCU row. */
|
|
|
+ rowgroups_avail = (JDIMENSION)cinfo->_min_DCT_scaled_size;
|
|
|
+ /* Note: at the bottom of the image, we may pass extra garbage row groups
|
|
|
+ * to the postprocessor. The postprocessor has to check for bottom
|
|
|
+ * of image anyway (at row resolution), so no point in us doing it too.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* Feed the postprocessor */
|
|
|
+- (*cinfo->post->post_process_data) (cinfo, main_ptr->buffer,
|
|
|
+- &main_ptr->rowgroup_ctr, rowgroups_avail,
|
|
|
+- output_buf, out_row_ctr, out_rows_avail);
|
|
|
++ (*cinfo->post->_post_process_data) (cinfo, main_ptr->buffer,
|
|
|
++ &main_ptr->rowgroup_ctr, rowgroups_avail,
|
|
|
++ output_buf, out_row_ctr, out_rows_avail);
|
|
|
+
|
|
|
+ /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
|
|
|
+ if (main_ptr->rowgroup_ctr >= rowgroups_avail) {
|
|
|
+ main_ptr->buffer_full = FALSE;
|
|
|
+ main_ptr->rowgroup_ctr = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Process some data.
|
|
|
+ * This handles the case where context rows must be provided.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-process_data_context_main(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
|
|
|
++process_data_context_main(j_decompress_ptr cinfo, _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
+
|
|
|
+ /* Read input data if we haven't filled the main buffer yet */
|
|
|
+ if (!main_ptr->buffer_full) {
|
|
|
+- if (!(*cinfo->coef->decompress_data) (cinfo,
|
|
|
+- main_ptr->xbuffer[main_ptr->whichptr]))
|
|
|
++ if (!(*cinfo->coef->_decompress_data) (cinfo,
|
|
|
++ main_ptr->xbuffer[main_ptr->whichptr]))
|
|
|
+ return; /* suspension forced, can do nothing more */
|
|
|
+ main_ptr->buffer_full = TRUE; /* OK, we have an iMCU row to work with */
|
|
|
+ main_ptr->iMCU_row_ctr++; /* count rows received */
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Postprocessor typically will not swallow all the input data it is handed
|
|
|
+ * in one call (due to filling the output buffer first). Must be prepared
|
|
|
+ * to exit and restart. This switch lets us keep track of how far we got.
|
|
|
+ * Note that each case falls through to the next on successful completion.
|
|
|
+ */
|
|
|
+ switch (main_ptr->context_state) {
|
|
|
+ case CTX_POSTPONED_ROW:
|
|
|
+ /* Call postprocessor using previously set pointers for postponed row */
|
|
|
+- (*cinfo->post->post_process_data) (cinfo,
|
|
|
+- main_ptr->xbuffer[main_ptr->whichptr],
|
|
|
+- &main_ptr->rowgroup_ctr,
|
|
|
+- main_ptr->rowgroups_avail, output_buf,
|
|
|
+- out_row_ctr, out_rows_avail);
|
|
|
++ (*cinfo->post->_post_process_data) (cinfo,
|
|
|
++ main_ptr->xbuffer[main_ptr->whichptr],
|
|
|
++ &main_ptr->rowgroup_ctr,
|
|
|
++ main_ptr->rowgroups_avail, output_buf,
|
|
|
++ out_row_ctr, out_rows_avail);
|
|
|
+ if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
|
|
|
+ return; /* Need to suspend */
|
|
|
+ main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
|
|
|
+ if (*out_row_ctr >= out_rows_avail)
|
|
|
+ return; /* Postprocessor exactly filled output buf */
|
|
|
+ FALLTHROUGH /*FALLTHROUGH*/
|
|
|
+ case CTX_PREPARE_FOR_IMCU:
|
|
|
+ /* Prepare to process first M-1 row groups of this iMCU row */
|
|
|
+@@ -369,21 +373,21 @@ process_data_context_main(j_decompress_p
|
|
|
+ * the last sample row, and adjust rowgroups_avail to ignore padding rows.
|
|
|
+ */
|
|
|
+ if (main_ptr->iMCU_row_ctr == cinfo->total_iMCU_rows)
|
|
|
+ set_bottom_pointers(cinfo);
|
|
|
+ main_ptr->context_state = CTX_PROCESS_IMCU;
|
|
|
+ FALLTHROUGH /*FALLTHROUGH*/
|
|
|
+ case CTX_PROCESS_IMCU:
|
|
|
+ /* Call postprocessor using previously set pointers */
|
|
|
+- (*cinfo->post->post_process_data) (cinfo,
|
|
|
+- main_ptr->xbuffer[main_ptr->whichptr],
|
|
|
+- &main_ptr->rowgroup_ctr,
|
|
|
+- main_ptr->rowgroups_avail, output_buf,
|
|
|
+- out_row_ctr, out_rows_avail);
|
|
|
++ (*cinfo->post->_post_process_data) (cinfo,
|
|
|
++ main_ptr->xbuffer[main_ptr->whichptr],
|
|
|
++ &main_ptr->rowgroup_ctr,
|
|
|
++ main_ptr->rowgroups_avail, output_buf,
|
|
|
++ out_row_ctr, out_rows_avail);
|
|
|
+ if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
|
|
|
+ return; /* Need to suspend */
|
|
|
+ /* After the first iMCU, change wraparound pointers to normal state */
|
|
|
+ if (main_ptr->iMCU_row_ctr == 1)
|
|
|
+ set_wraparound_pointers(cinfo);
|
|
|
+ /* Prepare to load new iMCU row using other xbuffer list */
|
|
|
+ main_ptr->whichptr ^= 1; /* 0=>1 or 1=>0 */
|
|
|
+ main_ptr->buffer_full = FALSE;
|
|
|
+@@ -400,38 +404,41 @@ process_data_context_main(j_decompress_p
|
|
|
+ * Process some data.
|
|
|
+ * Final pass of two-pass quantization: just call the postprocessor.
|
|
|
+ * Source data will be the postprocessor controller's internal buffer.
|
|
|
+ */
|
|
|
+
|
|
|
+ #ifdef QUANT_2PASS_SUPPORTED
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-process_data_crank_post(j_decompress_ptr cinfo, JSAMPARRAY output_buf,
|
|
|
++process_data_crank_post(j_decompress_ptr cinfo, _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
|
|
|
+ {
|
|
|
+- (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE)NULL,
|
|
|
+- (JDIMENSION *)NULL, (JDIMENSION)0,
|
|
|
+- output_buf, out_row_ctr, out_rows_avail);
|
|
|
++ (*cinfo->post->_post_process_data) (cinfo, (_JSAMPIMAGE)NULL,
|
|
|
++ (JDIMENSION *)NULL, (JDIMENSION)0,
|
|
|
++ output_buf, out_row_ctr, out_rows_avail);
|
|
|
+ }
|
|
|
+
|
|
|
+ #endif /* QUANT_2PASS_SUPPORTED */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize main buffer controller.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_d_main_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
|
|
|
++_jinit_d_main_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr;
|
|
|
+ int ci, rgroup, ngroups;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ main_ptr = (my_main_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_main_controller));
|
|
|
+ cinfo->main = (struct jpeg_d_main_controller *)main_ptr;
|
|
|
+ main_ptr->pub.start_pass = start_pass_main;
|
|
|
+
|
|
|
+ if (need_full_buffer) /* shouldn't happen */
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+@@ -447,14 +454,16 @@ jinit_d_main_controller(j_decompress_ptr
|
|
|
+ } else {
|
|
|
+ ngroups = cinfo->_min_DCT_scaled_size;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
|
|
|
+ cinfo->_min_DCT_scaled_size; /* height of a row group of component */
|
|
|
+- main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
|
|
|
++ main_ptr->buffer[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ compptr->width_in_blocks * compptr->_DCT_scaled_size,
|
|
|
+ (JDIMENSION)(rgroup * ngroups));
|
|
|
+ }
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jdmainct.h b/media/libjpeg/jdmainct.h
|
|
|
+--- a/media/libjpeg/jdmainct.h
|
|
|
++++ b/media/libjpeg/jdmainct.h
|
|
|
+@@ -1,37 +1,42 @@
|
|
|
+ /*
|
|
|
+ * jdmainct.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /* Private buffer controller object */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_d_main_controller pub; /* public fields */
|
|
|
+
|
|
|
+ /* Pointer to allocated workspace (M or M+2 row groups). */
|
|
|
+- JSAMPARRAY buffer[MAX_COMPONENTS];
|
|
|
++ _JSAMPARRAY buffer[MAX_COMPONENTS];
|
|
|
+
|
|
|
+ boolean buffer_full; /* Have we gotten an iMCU row from decoder? */
|
|
|
+ JDIMENSION rowgroup_ctr; /* counts row groups output to postprocessor */
|
|
|
+
|
|
|
+ /* Remaining fields are only used in the context case. */
|
|
|
+
|
|
|
+ /* These are the master pointers to the funny-order pointer lists. */
|
|
|
+- JSAMPIMAGE xbuffer[2]; /* pointers to weird pointer lists */
|
|
|
++ _JSAMPIMAGE xbuffer[2]; /* pointers to weird pointer lists */
|
|
|
+
|
|
|
+ int whichptr; /* indicates which pointer set is now in use */
|
|
|
+ int context_state; /* process_data state machine status */
|
|
|
+ JDIMENSION rowgroups_avail; /* row groups available to postprocessor */
|
|
|
+ JDIMENSION iMCU_row_ctr; /* counts iMCU rows to detect image top/bot */
|
|
|
+ } my_main_controller;
|
|
|
+
|
|
|
+ typedef my_main_controller *my_main_ptr;
|
|
|
+@@ -48,24 +53,26 @@ set_wraparound_pointers(j_decompress_ptr
|
|
|
+ /* Set up the "wraparound" pointers at top and bottom of the pointer lists.
|
|
|
+ * This changes the pointer list state from top-of-image to the normal state.
|
|
|
+ */
|
|
|
+ {
|
|
|
+ my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
|
|
|
+ int ci, i, rgroup;
|
|
|
+ int M = cinfo->_min_DCT_scaled_size;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+- JSAMPARRAY xbuf0, xbuf1;
|
|
|
++ _JSAMPARRAY xbuf0, xbuf1;
|
|
|
+
|
|
|
+ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+ ci++, compptr++) {
|
|
|
+ rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
|
|
|
+ cinfo->_min_DCT_scaled_size; /* height of a row group of component */
|
|
|
+ xbuf0 = main_ptr->xbuffer[0][ci];
|
|
|
+ xbuf1 = main_ptr->xbuffer[1][ci];
|
|
|
+ for (i = 0; i < rgroup; i++) {
|
|
|
+ xbuf0[i - rgroup] = xbuf0[rgroup * (M + 1) + i];
|
|
|
+ xbuf1[i - rgroup] = xbuf1[rgroup * (M + 1) + i];
|
|
|
+ xbuf0[rgroup * (M + 2) + i] = xbuf0[i];
|
|
|
+ xbuf1[rgroup * (M + 2) + i] = xbuf1[i];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jdmarker.c b/media/libjpeg/jdmarker.c
|
|
|
+--- a/media/libjpeg/jdmarker.c
|
|
|
++++ b/media/libjpeg/jdmarker.c
|
|
|
+@@ -1,13 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jdmarker.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1998, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright (C) 2012, 2015, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains routines to decode JPEG datastream markers.
|
|
|
+ * Most of the complexity arises from our desire to support input
|
|
|
+ * suspension: if not all of the data for a marker is available,
|
|
|
+@@ -232,25 +234,27 @@ get_soi(j_decompress_ptr cinfo)
|
|
|
+
|
|
|
+ cinfo->marker->saw_SOI = TRUE;
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(boolean)
|
|
|
+-get_sof(j_decompress_ptr cinfo, boolean is_prog, boolean is_arith)
|
|
|
++get_sof(j_decompress_ptr cinfo, boolean is_prog, boolean is_lossless,
|
|
|
++ boolean is_arith)
|
|
|
+ /* Process a SOFn marker */
|
|
|
+ {
|
|
|
+ JLONG length;
|
|
|
+ int c, ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+ INPUT_VARS(cinfo);
|
|
|
+
|
|
|
+ cinfo->progressive_mode = is_prog;
|
|
|
++ cinfo->master->lossless = is_lossless;
|
|
|
+ cinfo->arith_code = is_arith;
|
|
|
+
|
|
|
+ INPUT_2BYTES(cinfo, length, return FALSE);
|
|
|
+
|
|
|
+ INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
|
|
|
+ INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
|
|
|
+ INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
|
|
|
+ INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
|
|
|
+@@ -985,42 +989,50 @@ read_markers(j_decompress_ptr cinfo)
|
|
|
+ switch (cinfo->unread_marker) {
|
|
|
+ case M_SOI:
|
|
|
+ if (!get_soi(cinfo))
|
|
|
+ return JPEG_SUSPENDED;
|
|
|
+ break;
|
|
|
+
|
|
|
+ case M_SOF0: /* Baseline */
|
|
|
+ case M_SOF1: /* Extended sequential, Huffman */
|
|
|
+- if (!get_sof(cinfo, FALSE, FALSE))
|
|
|
++ if (!get_sof(cinfo, FALSE, FALSE, FALSE))
|
|
|
+ return JPEG_SUSPENDED;
|
|
|
+ break;
|
|
|
+
|
|
|
+ case M_SOF2: /* Progressive, Huffman */
|
|
|
+- if (!get_sof(cinfo, TRUE, FALSE))
|
|
|
++ if (!get_sof(cinfo, TRUE, FALSE, FALSE))
|
|
|
++ return JPEG_SUSPENDED;
|
|
|
++ break;
|
|
|
++
|
|
|
++ case M_SOF3: /* Lossless, Huffman */
|
|
|
++ if (!get_sof(cinfo, FALSE, TRUE, FALSE))
|
|
|
+ return JPEG_SUSPENDED;
|
|
|
+ break;
|
|
|
+
|
|
|
+ case M_SOF9: /* Extended sequential, arithmetic */
|
|
|
+- if (!get_sof(cinfo, FALSE, TRUE))
|
|
|
++ if (!get_sof(cinfo, FALSE, FALSE, TRUE))
|
|
|
+ return JPEG_SUSPENDED;
|
|
|
+ break;
|
|
|
+
|
|
|
+ case M_SOF10: /* Progressive, arithmetic */
|
|
|
+- if (!get_sof(cinfo, TRUE, TRUE))
|
|
|
++ if (!get_sof(cinfo, TRUE, FALSE, TRUE))
|
|
|
++ return JPEG_SUSPENDED;
|
|
|
++ break;
|
|
|
++
|
|
|
++ case M_SOF11: /* Lossless, arithmetic */
|
|
|
++ if (!get_sof(cinfo, FALSE, TRUE, TRUE))
|
|
|
+ return JPEG_SUSPENDED;
|
|
|
+ break;
|
|
|
+
|
|
|
+ /* Currently unsupported SOFn types */
|
|
|
+- case M_SOF3: /* Lossless, Huffman */
|
|
|
+ case M_SOF5: /* Differential sequential, Huffman */
|
|
|
+ case M_SOF6: /* Differential progressive, Huffman */
|
|
|
+ case M_SOF7: /* Differential lossless, Huffman */
|
|
|
+ case M_JPG: /* Reserved for JPEG extensions */
|
|
|
+- case M_SOF11: /* Lossless, arithmetic */
|
|
|
+ case M_SOF13: /* Differential sequential, arithmetic */
|
|
|
+ case M_SOF14: /* Differential progressive, arithmetic */
|
|
|
+ case M_SOF15: /* Differential lossless, arithmetic */
|
|
|
+ ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case M_SOS:
|
|
|
+ if (!get_sos(cinfo))
|
|
|
+diff --git a/media/libjpeg/jdmaster.c b/media/libjpeg/jdmaster.c
|
|
|
+--- a/media/libjpeg/jdmaster.c
|
|
|
++++ b/media/libjpeg/jdmaster.c
|
|
|
+@@ -1,43 +1,48 @@
|
|
|
+ /*
|
|
|
+ * jdmaster.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
+ * Modified 2002-2009 by Guido Vollbeding.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2009-2011, 2016, 2019, 2022, D. R. Commander.
|
|
|
++ * Copyright (C) 2009-2011, 2016, 2019, 2022-2023, D. R. Commander.
|
|
|
+ * Copyright (C) 2013, Linaro Limited.
|
|
|
+ * Copyright (C) 2015, Google, Inc.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains master control logic for the JPEG decompressor.
|
|
|
+ * These routines are concerned with selecting the modules to be executed
|
|
|
+ * and with determining the number of passes and the work to be done in each
|
|
|
+ * pass.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
+ #include "jdmaster.h"
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Determine whether merged upsample/color conversion should be used.
|
|
|
+ * CRUCIAL: this must match the actual capabilities of jdmerge.c!
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(boolean)
|
|
|
+ use_merged_upsample(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ #ifdef UPSAMPLE_MERGING_SUPPORTED
|
|
|
++ /* Colorspace conversion is not supported with lossless JPEG images */
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ return FALSE;
|
|
|
+ /* Merging is the equivalent of plain box-filter upsampling */
|
|
|
+ if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling)
|
|
|
+ return FALSE;
|
|
|
+ /* jdmerge.c only supports YCC=>RGB and YCC=>RGB565 color conversion */
|
|
|
+ if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
|
|
|
+ (cinfo->out_color_space != JCS_RGB &&
|
|
|
+ cinfo->out_color_space != JCS_RGB565 &&
|
|
|
+ cinfo->out_color_space != JCS_EXT_RGB &&
|
|
|
+@@ -92,164 +97,164 @@ jpeg_core_output_dimensions(j_decompress
|
|
|
+ /* Do computations that are needed before master selection phase.
|
|
|
+ * This function is used for transcoding and full decompression.
|
|
|
+ */
|
|
|
+ {
|
|
|
+ #ifdef IDCT_SCALING_SUPPORTED
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+
|
|
|
+- /* Compute actual output image dimensions and DCT scaling choices. */
|
|
|
+- if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) {
|
|
|
+- /* Provide 1/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 1;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 1;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) {
|
|
|
+- /* Provide 2/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 2L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 2L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 2;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 2;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) {
|
|
|
+- /* Provide 3/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 3L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 3L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 3;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 3;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) {
|
|
|
+- /* Provide 4/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 4L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 4L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 4;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 4;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) {
|
|
|
+- /* Provide 5/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 5L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 5L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 5;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 5;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) {
|
|
|
+- /* Provide 6/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 6L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 6L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 6;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 6;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) {
|
|
|
+- /* Provide 7/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 7L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 7L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 7;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 7;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) {
|
|
|
+- /* Provide 8/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 8L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 8L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 8;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 8;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) {
|
|
|
+- /* Provide 9/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 9L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 9L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 9;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 9;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) {
|
|
|
+- /* Provide 10/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 10L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 10L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 10;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 10;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) {
|
|
|
+- /* Provide 11/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 11L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 11L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 11;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 11;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) {
|
|
|
+- /* Provide 12/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 12L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 12L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 12;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 12;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) {
|
|
|
+- /* Provide 13/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 13L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 13L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 13;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 13;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) {
|
|
|
+- /* Provide 14/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 14L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 14L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 14;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 14;
|
|
|
+- } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) {
|
|
|
+- /* Provide 15/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 15L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 15L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 15;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 15;
|
|
|
+- } else {
|
|
|
+- /* Provide 16/block_size scaling */
|
|
|
+- cinfo->output_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width * 16L, (long)DCTSIZE);
|
|
|
+- cinfo->output_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height * 16L, (long)DCTSIZE);
|
|
|
+- cinfo->_min_DCT_h_scaled_size = 16;
|
|
|
+- cinfo->_min_DCT_v_scaled_size = 16;
|
|
|
++ if (!cinfo->master->lossless) {
|
|
|
++ /* Compute actual output image dimensions and DCT scaling choices. */
|
|
|
++ if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) {
|
|
|
++ /* Provide 1/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 1;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 1;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) {
|
|
|
++ /* Provide 2/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 2L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 2L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 2;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 2;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) {
|
|
|
++ /* Provide 3/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 3L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 3L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 3;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 3;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) {
|
|
|
++ /* Provide 4/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 4L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 4L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 4;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 4;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) {
|
|
|
++ /* Provide 5/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 5L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 5L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 5;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 5;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) {
|
|
|
++ /* Provide 6/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 6L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 6L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 6;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 6;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) {
|
|
|
++ /* Provide 7/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 7L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 7L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 7;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 7;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) {
|
|
|
++ /* Provide 8/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 8L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 8L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 8;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 8;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) {
|
|
|
++ /* Provide 9/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 9L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 9L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 9;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 9;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) {
|
|
|
++ /* Provide 10/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 10L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 10L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 10;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 10;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) {
|
|
|
++ /* Provide 11/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 11L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 11L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 11;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 11;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) {
|
|
|
++ /* Provide 12/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 12L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 12L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 12;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 12;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) {
|
|
|
++ /* Provide 13/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 13L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 13L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 13;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 13;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) {
|
|
|
++ /* Provide 14/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 14L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 14L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 14;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 14;
|
|
|
++ } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) {
|
|
|
++ /* Provide 15/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 15L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 15L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 15;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 15;
|
|
|
++ } else {
|
|
|
++ /* Provide 16/block_size scaling */
|
|
|
++ cinfo->output_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width * 16L, (long)DCTSIZE);
|
|
|
++ cinfo->output_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height * 16L, (long)DCTSIZE);
|
|
|
++ cinfo->_min_DCT_h_scaled_size = 16;
|
|
|
++ cinfo->_min_DCT_v_scaled_size = 16;
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Recompute dimensions of components */
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size;
|
|
|
++ compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size;
|
|
|
++ }
|
|
|
++ } else
|
|
|
++#endif /* !IDCT_SCALING_SUPPORTED */
|
|
|
++ {
|
|
|
++ /* Hardwire it to "no scaling" */
|
|
|
++ cinfo->output_width = cinfo->image_width;
|
|
|
++ cinfo->output_height = cinfo->image_height;
|
|
|
++ /* jdinput.c has already initialized DCT_scaled_size,
|
|
|
++ * and has computed unscaled downsampled_width and downsampled_height.
|
|
|
++ */
|
|
|
+ }
|
|
|
+-
|
|
|
+- /* Recompute dimensions of components */
|
|
|
+- for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+- ci++, compptr++) {
|
|
|
+- compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size;
|
|
|
+- compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size;
|
|
|
+- }
|
|
|
+-
|
|
|
+-#else /* !IDCT_SCALING_SUPPORTED */
|
|
|
+-
|
|
|
+- /* Hardwire it to "no scaling" */
|
|
|
+- cinfo->output_width = cinfo->image_width;
|
|
|
+- cinfo->output_height = cinfo->image_height;
|
|
|
+- /* jdinput.c has already initialized DCT_scaled_size,
|
|
|
+- * and has computed unscaled downsampled_width and downsampled_height.
|
|
|
+- */
|
|
|
+-
|
|
|
+-#endif /* IDCT_SCALING_SUPPORTED */
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Compute output image dimensions and related values.
|
|
|
+ * NOTE: this is exported for possible use by application.
|
|
|
+ * Hence it mustn't do anything that can't be done twice.
|
|
|
+ * Also note that it may be called before the master module is initialized!
|
|
|
+@@ -268,65 +273,67 @@ jpeg_calc_output_dimensions(j_decompress
|
|
|
+ if (cinfo->global_state != DSTATE_READY)
|
|
|
+ ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
|
|
|
+
|
|
|
+ /* Compute core output image dimensions and DCT scaling choices. */
|
|
|
+ jpeg_core_output_dimensions(cinfo);
|
|
|
+
|
|
|
+ #ifdef IDCT_SCALING_SUPPORTED
|
|
|
+
|
|
|
+- /* In selecting the actual DCT scaling for each component, we try to
|
|
|
+- * scale up the chroma components via IDCT scaling rather than upsampling.
|
|
|
+- * This saves time if the upsampler gets to use 1:1 scaling.
|
|
|
+- * Note this code adapts subsampling ratios which are powers of 2.
|
|
|
+- */
|
|
|
+- for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+- ci++, compptr++) {
|
|
|
+- int ssize = cinfo->_min_DCT_scaled_size;
|
|
|
+- while (ssize < DCTSIZE &&
|
|
|
+- ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) %
|
|
|
+- (compptr->h_samp_factor * ssize * 2) == 0) &&
|
|
|
+- ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) %
|
|
|
+- (compptr->v_samp_factor * ssize * 2) == 0)) {
|
|
|
+- ssize = ssize * 2;
|
|
|
++ if (!cinfo->master->lossless) {
|
|
|
++ /* In selecting the actual DCT scaling for each component, we try to
|
|
|
++ * scale up the chroma components via IDCT scaling rather than upsampling.
|
|
|
++ * This saves time if the upsampler gets to use 1:1 scaling.
|
|
|
++ * Note this code adapts subsampling ratios which are powers of 2.
|
|
|
++ */
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ int ssize = cinfo->_min_DCT_scaled_size;
|
|
|
++ while (ssize < DCTSIZE &&
|
|
|
++ ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) %
|
|
|
++ (compptr->h_samp_factor * ssize * 2) == 0) &&
|
|
|
++ ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) %
|
|
|
++ (compptr->v_samp_factor * ssize * 2) == 0)) {
|
|
|
++ ssize = ssize * 2;
|
|
|
++ }
|
|
|
++#if JPEG_LIB_VERSION >= 70
|
|
|
++ compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = ssize;
|
|
|
++#else
|
|
|
++ compptr->DCT_scaled_size = ssize;
|
|
|
++#endif
|
|
|
+ }
|
|
|
+-#if JPEG_LIB_VERSION >= 70
|
|
|
+- compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = ssize;
|
|
|
+-#else
|
|
|
+- compptr->DCT_scaled_size = ssize;
|
|
|
+-#endif
|
|
|
+- }
|
|
|
+
|
|
|
+- /* Recompute downsampled dimensions of components;
|
|
|
+- * application needs to know these if using raw downsampled data.
|
|
|
+- */
|
|
|
+- for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
+- ci++, compptr++) {
|
|
|
+- /* Size in samples, after IDCT scaling */
|
|
|
+- compptr->downsampled_width = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_width *
|
|
|
+- (long)(compptr->h_samp_factor * compptr->_DCT_scaled_size),
|
|
|
+- (long)(cinfo->max_h_samp_factor * DCTSIZE));
|
|
|
+- compptr->downsampled_height = (JDIMENSION)
|
|
|
+- jdiv_round_up((long)cinfo->image_height *
|
|
|
+- (long)(compptr->v_samp_factor * compptr->_DCT_scaled_size),
|
|
|
+- (long)(cinfo->max_v_samp_factor * DCTSIZE));
|
|
|
++ /* Recompute downsampled dimensions of components;
|
|
|
++ * application needs to know these if using raw downsampled data.
|
|
|
++ */
|
|
|
++ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
|
|
|
++ ci++, compptr++) {
|
|
|
++ /* Size in samples, after IDCT scaling */
|
|
|
++ compptr->downsampled_width = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_width *
|
|
|
++ (long)(compptr->h_samp_factor *
|
|
|
++ compptr->_DCT_scaled_size),
|
|
|
++ (long)(cinfo->max_h_samp_factor * DCTSIZE));
|
|
|
++ compptr->downsampled_height = (JDIMENSION)
|
|
|
++ jdiv_round_up((long)cinfo->image_height *
|
|
|
++ (long)(compptr->v_samp_factor *
|
|
|
++ compptr->_DCT_scaled_size),
|
|
|
++ (long)(cinfo->max_v_samp_factor * DCTSIZE));
|
|
|
++ }
|
|
|
++ } else
|
|
|
++#endif /* IDCT_SCALING_SUPPORTED */
|
|
|
++ {
|
|
|
++ /* Hardwire it to "no scaling" */
|
|
|
++ cinfo->output_width = cinfo->image_width;
|
|
|
++ cinfo->output_height = cinfo->image_height;
|
|
|
++ /* jdinput.c has already initialized DCT_scaled_size to DCTSIZE,
|
|
|
++ * and has computed unscaled downsampled_width and downsampled_height.
|
|
|
++ */
|
|
|
+ }
|
|
|
+
|
|
|
+-#else /* !IDCT_SCALING_SUPPORTED */
|
|
|
+-
|
|
|
+- /* Hardwire it to "no scaling" */
|
|
|
+- cinfo->output_width = cinfo->image_width;
|
|
|
+- cinfo->output_height = cinfo->image_height;
|
|
|
+- /* jdinput.c has already initialized DCT_scaled_size to DCTSIZE,
|
|
|
+- * and has computed unscaled downsampled_width and downsampled_height.
|
|
|
+- */
|
|
|
+-
|
|
|
+-#endif /* IDCT_SCALING_SUPPORTED */
|
|
|
+-
|
|
|
+ /* Report number of components in selected colorspace. */
|
|
|
+ /* Probably this should be in the color conversion module... */
|
|
|
+ switch (cinfo->out_color_space) {
|
|
|
+ case JCS_GRAYSCALE:
|
|
|
+ cinfo->out_color_components = 1;
|
|
|
+ break;
|
|
|
+ case JCS_RGB:
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+@@ -404,37 +411,93 @@ jpeg_calc_output_dimensions(j_decompress
|
|
|
+ * sample_range_limit + CENTERJSAMPLE.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ prepare_range_limit_table(j_decompress_ptr cinfo)
|
|
|
+ /* Allocate and fill in the sample_range_limit table */
|
|
|
+ {
|
|
|
+ JSAMPLE *table;
|
|
|
++ J12SAMPLE *table12;
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ J16SAMPLE *table16;
|
|
|
++#endif
|
|
|
+ int i;
|
|
|
+
|
|
|
+- table = (JSAMPLE *)
|
|
|
+- (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (5 * (MAXJSAMPLE + 1) + CENTERJSAMPLE) * sizeof(JSAMPLE));
|
|
|
+- table += (MAXJSAMPLE + 1); /* allow negative subscripts of simple table */
|
|
|
+- cinfo->sample_range_limit = table;
|
|
|
+- /* First segment of "simple" table: limit[x] = 0 for x < 0 */
|
|
|
+- memset(table - (MAXJSAMPLE + 1), 0, (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
|
|
|
+- /* Main part of "simple" table: limit[x] = x */
|
|
|
+- for (i = 0; i <= MAXJSAMPLE; i++)
|
|
|
+- table[i] = (JSAMPLE)i;
|
|
|
+- table += CENTERJSAMPLE; /* Point to where post-IDCT table starts */
|
|
|
+- /* End of simple table, rest of first half of post-IDCT table */
|
|
|
+- for (i = CENTERJSAMPLE; i < 2 * (MAXJSAMPLE + 1); i++)
|
|
|
+- table[i] = MAXJSAMPLE;
|
|
|
+- /* Second half of post-IDCT table */
|
|
|
+- memset(table + (2 * (MAXJSAMPLE + 1)), 0,
|
|
|
+- (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
|
|
|
+- memcpy(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
|
|
|
+- cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
|
|
|
++ if (cinfo->data_precision == 16) {
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ table16 = (J16SAMPLE *)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ (5 * (MAXJ16SAMPLE + 1) + CENTERJ16SAMPLE) *
|
|
|
++ sizeof(J16SAMPLE));
|
|
|
++ table16 += (MAXJ16SAMPLE + 1); /* allow negative subscripts of simple
|
|
|
++ table */
|
|
|
++ cinfo->sample_range_limit = (JSAMPLE *)table16;
|
|
|
++ /* First segment of "simple" table: limit[x] = 0 for x < 0 */
|
|
|
++ memset(table16 - (MAXJ16SAMPLE + 1), 0,
|
|
|
++ (MAXJ16SAMPLE + 1) * sizeof(J16SAMPLE));
|
|
|
++ /* Main part of "simple" table: limit[x] = x */
|
|
|
++ for (i = 0; i <= MAXJ16SAMPLE; i++)
|
|
|
++ table16[i] = (J16SAMPLE)i;
|
|
|
++ table16 += CENTERJ16SAMPLE; /* Point to where post-IDCT table starts */
|
|
|
++ /* End of simple table, rest of first half of post-IDCT table */
|
|
|
++ for (i = CENTERJ16SAMPLE; i < 2 * (MAXJ16SAMPLE + 1); i++)
|
|
|
++ table16[i] = MAXJ16SAMPLE;
|
|
|
++ /* Second half of post-IDCT table */
|
|
|
++ memset(table16 + (2 * (MAXJ16SAMPLE + 1)), 0,
|
|
|
++ (2 * (MAXJ16SAMPLE + 1) - CENTERJ16SAMPLE) * sizeof(J16SAMPLE));
|
|
|
++ memcpy(table16 + (4 * (MAXJ16SAMPLE + 1) - CENTERJ16SAMPLE),
|
|
|
++ cinfo->sample_range_limit, CENTERJ16SAMPLE * sizeof(J16SAMPLE));
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++#endif
|
|
|
++ } else if (cinfo->data_precision == 12) {
|
|
|
++ table12 = (J12SAMPLE *)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ (5 * (MAXJ12SAMPLE + 1) + CENTERJ12SAMPLE) *
|
|
|
++ sizeof(J12SAMPLE));
|
|
|
++ table12 += (MAXJ12SAMPLE + 1); /* allow negative subscripts of simple
|
|
|
++ table */
|
|
|
++ cinfo->sample_range_limit = (JSAMPLE *)table12;
|
|
|
++ /* First segment of "simple" table: limit[x] = 0 for x < 0 */
|
|
|
++ memset(table12 - (MAXJ12SAMPLE + 1), 0,
|
|
|
++ (MAXJ12SAMPLE + 1) * sizeof(J12SAMPLE));
|
|
|
++ /* Main part of "simple" table: limit[x] = x */
|
|
|
++ for (i = 0; i <= MAXJ12SAMPLE; i++)
|
|
|
++ table12[i] = (J12SAMPLE)i;
|
|
|
++ table12 += CENTERJ12SAMPLE; /* Point to where post-IDCT table starts */
|
|
|
++ /* End of simple table, rest of first half of post-IDCT table */
|
|
|
++ for (i = CENTERJ12SAMPLE; i < 2 * (MAXJ12SAMPLE + 1); i++)
|
|
|
++ table12[i] = MAXJ12SAMPLE;
|
|
|
++ /* Second half of post-IDCT table */
|
|
|
++ memset(table12 + (2 * (MAXJ12SAMPLE + 1)), 0,
|
|
|
++ (2 * (MAXJ12SAMPLE + 1) - CENTERJ12SAMPLE) * sizeof(J12SAMPLE));
|
|
|
++ memcpy(table12 + (4 * (MAXJ12SAMPLE + 1) - CENTERJ12SAMPLE),
|
|
|
++ cinfo->sample_range_limit, CENTERJ12SAMPLE * sizeof(J12SAMPLE));
|
|
|
++ } else {
|
|
|
++ table = (JSAMPLE *)
|
|
|
++ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
++ (5 * (MAXJSAMPLE + 1) + CENTERJSAMPLE) * sizeof(JSAMPLE));
|
|
|
++ table += (MAXJSAMPLE + 1); /* allow negative subscripts of simple table */
|
|
|
++ cinfo->sample_range_limit = table;
|
|
|
++ /* First segment of "simple" table: limit[x] = 0 for x < 0 */
|
|
|
++ memset(table - (MAXJSAMPLE + 1), 0, (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
|
|
|
++ /* Main part of "simple" table: limit[x] = x */
|
|
|
++ for (i = 0; i <= MAXJSAMPLE; i++)
|
|
|
++ table[i] = (JSAMPLE)i;
|
|
|
++ table += CENTERJSAMPLE; /* Point to where post-IDCT table starts */
|
|
|
++ /* End of simple table, rest of first half of post-IDCT table */
|
|
|
++ for (i = CENTERJSAMPLE; i < 2 * (MAXJSAMPLE + 1); i++)
|
|
|
++ table[i] = MAXJSAMPLE;
|
|
|
++ /* Second half of post-IDCT table */
|
|
|
++ memset(table + (2 * (MAXJSAMPLE + 1)), 0,
|
|
|
++ (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
|
|
|
++ memcpy(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
|
|
|
++ cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
|
|
|
++ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Master selection of decompression modules.
|
|
|
+ * This is done once at jpeg_start_decompress time. We determine
|
|
|
+ * which modules will be used and give them appropriate initialization calls.
|
|
|
+ * We also initialize the decompressor input side to begin consuming data.
|
|
|
+@@ -447,16 +510,27 @@ prepare_range_limit_table(j_decompress_p
|
|
|
+ LOCAL(void)
|
|
|
+ master_selection(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_master_ptr master = (my_master_ptr)cinfo->master;
|
|
|
+ boolean use_c_buffer;
|
|
|
+ long samplesperrow;
|
|
|
+ JDIMENSION jd_samplesperrow;
|
|
|
+
|
|
|
++ /* Disable IDCT scaling and raw (downsampled) data output in lossless mode.
|
|
|
++ * IDCT scaling is not useful in lossless mode, and it must be disabled in
|
|
|
++ * order to properly calculate the output dimensions. Raw data output isn't
|
|
|
++ * particularly useful without subsampling and has not been tested in
|
|
|
++ * lossless mode.
|
|
|
++ */
|
|
|
++ if (cinfo->master->lossless) {
|
|
|
++ cinfo->raw_data_out = FALSE;
|
|
|
++ cinfo->scale_num = cinfo->scale_denom = 1;
|
|
|
++ }
|
|
|
++
|
|
|
+ /* Initialize dimensions and other stuff */
|
|
|
+ jpeg_calc_output_dimensions(cinfo);
|
|
|
+ prepare_range_limit_table(cinfo);
|
|
|
+
|
|
|
+ /* Width of an output scanline must be representable as JDIMENSION. */
|
|
|
+ samplesperrow = (long)cinfo->output_width *
|
|
|
+ (long)cinfo->out_color_components;
|
|
|
+ jd_samplesperrow = (JDIMENSION)samplesperrow;
|
|
|
+@@ -475,92 +549,183 @@ master_selection(j_decompress_ptr cinfo)
|
|
|
+ cinfo->enable_1pass_quant = FALSE;
|
|
|
+ cinfo->enable_external_quant = FALSE;
|
|
|
+ cinfo->enable_2pass_quant = FALSE;
|
|
|
+ }
|
|
|
+ if (cinfo->quantize_colors) {
|
|
|
+ if (cinfo->raw_data_out)
|
|
|
+ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
+ /* 2-pass quantizer only works in 3-component color space. */
|
|
|
+- if (cinfo->out_color_components != 3) {
|
|
|
++ if (cinfo->out_color_components != 3 ||
|
|
|
++ cinfo->out_color_space == JCS_RGB565) {
|
|
|
+ cinfo->enable_1pass_quant = TRUE;
|
|
|
+ cinfo->enable_external_quant = FALSE;
|
|
|
+ cinfo->enable_2pass_quant = FALSE;
|
|
|
+ cinfo->colormap = NULL;
|
|
|
+ } else if (cinfo->colormap != NULL) {
|
|
|
+ cinfo->enable_external_quant = TRUE;
|
|
|
+ } else if (cinfo->two_pass_quantize) {
|
|
|
+ cinfo->enable_2pass_quant = TRUE;
|
|
|
+ } else {
|
|
|
+ cinfo->enable_1pass_quant = TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (cinfo->enable_1pass_quant) {
|
|
|
+ #ifdef QUANT_1PASS_SUPPORTED
|
|
|
+- jinit_1pass_quantizer(cinfo);
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_1pass_quantizer(cinfo);
|
|
|
++ else
|
|
|
++ jinit_1pass_quantizer(cinfo);
|
|
|
+ master->quantizer_1pass = cinfo->cquantize;
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+
|
|
|
+ /* We use the 2-pass code to map to external colormaps. */
|
|
|
+ if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
|
|
|
+ #ifdef QUANT_2PASS_SUPPORTED
|
|
|
+- jinit_2pass_quantizer(cinfo);
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_2pass_quantizer(cinfo);
|
|
|
++ else
|
|
|
++ jinit_2pass_quantizer(cinfo);
|
|
|
+ master->quantizer_2pass = cinfo->cquantize;
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+ /* If both quantizers are initialized, the 2-pass one is left active;
|
|
|
+ * this is necessary for starting with quantization to an external map.
|
|
|
+ */
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Post-processing: in particular, color conversion first */
|
|
|
+ if (!cinfo->raw_data_out) {
|
|
|
+ if (master->using_merged_upsample) {
|
|
|
+ #ifdef UPSAMPLE_MERGING_SUPPORTED
|
|
|
+- jinit_merged_upsampler(cinfo); /* does color conversion too */
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_merged_upsampler(cinfo); /* does color conversion too */
|
|
|
++ else
|
|
|
++ jinit_merged_upsampler(cinfo); /* does color conversion too */
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ } else {
|
|
|
+- jinit_color_deconverter(cinfo);
|
|
|
+- jinit_upsampler(cinfo);
|
|
|
++ if (cinfo->data_precision == 16) {
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ j16init_color_deconverter(cinfo);
|
|
|
++ j16init_upsampler(cinfo);
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++#endif
|
|
|
++ } else if (cinfo->data_precision == 12) {
|
|
|
++ j12init_color_deconverter(cinfo);
|
|
|
++ j12init_upsampler(cinfo);
|
|
|
++ } else {
|
|
|
++ jinit_color_deconverter(cinfo);
|
|
|
++ jinit_upsampler(cinfo);
|
|
|
++ }
|
|
|
+ }
|
|
|
+- jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ j16init_d_post_controller(cinfo, cinfo->enable_2pass_quant);
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++#endif
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_d_post_controller(cinfo, cinfo->enable_2pass_quant);
|
|
|
++ else
|
|
|
++ jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
|
|
|
+ }
|
|
|
+- /* Inverse DCT */
|
|
|
+- jinit_inverse_dct(cinfo);
|
|
|
+- /* Entropy decoding: either Huffman or arithmetic coding. */
|
|
|
+- if (cinfo->arith_code) {
|
|
|
+-#ifdef D_ARITH_CODING_SUPPORTED
|
|
|
+- jinit_arith_decoder(cinfo);
|
|
|
++
|
|
|
++ if (cinfo->master->lossless) {
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ /* Prediction, sample undifferencing, point transform, and sample size
|
|
|
++ * scaling
|
|
|
++ */
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ j16init_lossless_decompressor(cinfo);
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_lossless_decompressor(cinfo);
|
|
|
++ else
|
|
|
++ jinit_lossless_decompressor(cinfo);
|
|
|
++ /* Entropy decoding: either Huffman or arithmetic coding. */
|
|
|
++ if (cinfo->arith_code) {
|
|
|
++ ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
|
|
++ } else {
|
|
|
++ jinit_lhuff_decoder(cinfo);
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Initialize principal buffer controllers. */
|
|
|
++ use_c_buffer = cinfo->inputctl->has_multiple_scans ||
|
|
|
++ cinfo->buffered_image;
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ j16init_d_diff_controller(cinfo, use_c_buffer);
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_d_diff_controller(cinfo, use_c_buffer);
|
|
|
++ else
|
|
|
++ jinit_d_diff_controller(cinfo, use_c_buffer);
|
|
|
+ #else
|
|
|
+- ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
|
|
++ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ } else {
|
|
|
+- if (cinfo->progressive_mode) {
|
|
|
+-#ifdef D_PROGRESSIVE_SUPPORTED
|
|
|
+- jinit_phuff_decoder(cinfo);
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++ /* Inverse DCT */
|
|
|
++ if (cinfo->data_precision == 12)
|
|
|
++ j12init_inverse_dct(cinfo);
|
|
|
++ else
|
|
|
++ jinit_inverse_dct(cinfo);
|
|
|
++ /* Entropy decoding: either Huffman or arithmetic coding. */
|
|
|
++ if (cinfo->arith_code) {
|
|
|
++#ifdef D_ARITH_CODING_SUPPORTED
|
|
|
++ jinit_arith_decoder(cinfo);
|
|
|
+ #else
|
|
|
+- ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
++ ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
|
|
|
++#endif
|
|
|
++ } else {
|
|
|
++ if (cinfo->progressive_mode) {
|
|
|
++#ifdef D_PROGRESSIVE_SUPPORTED
|
|
|
++ jinit_phuff_decoder(cinfo);
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+- } else
|
|
|
+- jinit_huff_decoder(cinfo);
|
|
|
++ } else
|
|
|
++ jinit_huff_decoder(cinfo);
|
|
|
++ }
|
|
|
++
|
|
|
++ /* Initialize principal buffer controllers. */
|
|
|
++ use_c_buffer = cinfo->inputctl->has_multiple_scans ||
|
|
|
++ cinfo->buffered_image;
|
|
|
++ if (cinfo->data_precision == 12)
|
|
|
++ j12init_d_coef_controller(cinfo, use_c_buffer);
|
|
|
++ else
|
|
|
++ jinit_d_coef_controller(cinfo, use_c_buffer);
|
|
|
+ }
|
|
|
+
|
|
|
+- /* Initialize principal buffer controllers. */
|
|
|
+- use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
|
|
|
+- jinit_d_coef_controller(cinfo, use_c_buffer);
|
|
|
+-
|
|
|
+- if (!cinfo->raw_data_out)
|
|
|
+- jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
|
|
|
++ if (!cinfo->raw_data_out) {
|
|
|
++ if (cinfo->data_precision == 16)
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ j16init_d_main_controller(cinfo,
|
|
|
++ FALSE /* never need full buffer here */);
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++#endif
|
|
|
++ else if (cinfo->data_precision == 12)
|
|
|
++ j12init_d_main_controller(cinfo,
|
|
|
++ FALSE /* never need full buffer here */);
|
|
|
++ else
|
|
|
++ jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
|
|
|
++ }
|
|
|
+
|
|
|
+ /* We can now tell the memory manager to allocate virtual arrays. */
|
|
|
+ (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
|
|
|
+
|
|
|
+ /* Initialize input side of decompressor to consume first scan. */
|
|
|
+ (*cinfo->inputctl->start_input_pass) (cinfo);
|
|
|
+
|
|
|
+ /* Set the first and last iMCU columns to decompress from single-scan images.
|
|
|
+diff --git a/media/libjpeg/jdmerge.c b/media/libjpeg/jdmerge.c
|
|
|
+--- a/media/libjpeg/jdmerge.c
|
|
|
++++ b/media/libjpeg/jdmerge.c
|
|
|
+@@ -1,16 +1,16 @@
|
|
|
+ /*
|
|
|
+ * jdmerge.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+- * Copyright (C) 2009, 2011, 2014-2015, 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2009, 2011, 2014-2015, 2020, 2022, D. R. Commander.
|
|
|
+ * Copyright (C) 2013, Linaro Limited.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains code for merged upsampling/color conversion.
|
|
|
+ *
|
|
|
+ * This file combines functions from jdsample.c and jdcolor.c;
|
|
|
+ * read those files first to understand what's going on.
|
|
|
+@@ -162,30 +162,30 @@ build_ycc_rgb_table(j_decompress_ptr cin
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+ int i;
|
|
|
+ JLONG x;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ upsample->Cr_r_tab = (int *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (MAXJSAMPLE + 1) * sizeof(int));
|
|
|
++ (_MAXJSAMPLE + 1) * sizeof(int));
|
|
|
+ upsample->Cb_b_tab = (int *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (MAXJSAMPLE + 1) * sizeof(int));
|
|
|
++ (_MAXJSAMPLE + 1) * sizeof(int));
|
|
|
+ upsample->Cr_g_tab = (JLONG *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (MAXJSAMPLE + 1) * sizeof(JLONG));
|
|
|
++ (_MAXJSAMPLE + 1) * sizeof(JLONG));
|
|
|
+ upsample->Cb_g_tab = (JLONG *)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (MAXJSAMPLE + 1) * sizeof(JLONG));
|
|
|
++ (_MAXJSAMPLE + 1) * sizeof(JLONG));
|
|
|
+
|
|
|
+- for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
|
|
|
+- /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
|
|
|
+- /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
|
|
|
++ for (i = 0, x = -_CENTERJSAMPLE; i <= _MAXJSAMPLE; i++, x++) {
|
|
|
++ /* i is the actual input pixel value, in the range 0.._MAXJSAMPLE */
|
|
|
++ /* The Cb or Cr value we are thinking of is x = i - _CENTERJSAMPLE */
|
|
|
+ /* Cr=>R value is nearest int to 1.40200 * x */
|
|
|
+ upsample->Cr_r_tab[i] = (int)
|
|
|
+ RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
|
|
|
+ /* Cb=>B value is nearest int to 1.77200 * x */
|
|
|
+ upsample->Cb_b_tab[i] = (int)
|
|
|
+ RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
|
|
|
+ /* Cr=>G value is scaled-up -0.71414 * x */
|
|
|
+ upsample->Cr_g_tab[i] = (-FIX(0.71414)) * x;
|
|
|
+@@ -214,33 +214,33 @@ start_pass_merged_upsample(j_decompress_
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Control routine to do upsampling (and color conversion).
|
|
|
+ *
|
|
|
+ * The control routine just handles the row buffering considerations.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-merged_2v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++merged_2v_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+- JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
|
|
|
+ /* 2:1 vertical sampling case: may need a spare row. */
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+- JSAMPROW work_ptrs[2];
|
|
|
++ _JSAMPROW work_ptrs[2];
|
|
|
+ JDIMENSION num_rows; /* number of rows returned to caller */
|
|
|
+
|
|
|
+ if (upsample->spare_full) {
|
|
|
+ /* If we have a spare row saved from a previous cycle, just return it. */
|
|
|
+ JDIMENSION size = upsample->out_row_width;
|
|
|
+ if (cinfo->out_color_space == JCS_RGB565)
|
|
|
+ size = cinfo->output_width * 2;
|
|
|
+- jcopy_sample_rows(&upsample->spare_row, 0, output_buf + *out_row_ctr, 0, 1,
|
|
|
+- size);
|
|
|
++ _jcopy_sample_rows(&upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
|
|
|
++ 1, size);
|
|
|
+ num_rows = 1;
|
|
|
+ upsample->spare_full = FALSE;
|
|
|
+ } else {
|
|
|
+ /* Figure number of rows to return to caller. */
|
|
|
+ num_rows = 2;
|
|
|
+ /* Not more than the distance to the end of the image. */
|
|
|
+ if (num_rows > upsample->rows_to_go)
|
|
|
+ num_rows = upsample->rows_to_go;
|
|
|
+@@ -265,19 +265,19 @@ merged_2v_upsample(j_decompress_ptr cinf
|
|
|
+ upsample->rows_to_go -= num_rows;
|
|
|
+ /* When the buffer is emptied, declare this input row group consumed */
|
|
|
+ if (!upsample->spare_full)
|
|
|
+ (*in_row_group_ctr)++;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-merged_1v_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++merged_1v_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+- JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
|
|
|
+ /* 1:1 vertical sampling case: much easier, never need a spare row. */
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+
|
|
|
+ /* Just do the upsampling. */
|
|
|
+ (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
|
|
|
+ output_buf + *out_row_ctr);
|
|
|
+@@ -297,18 +297,18 @@ merged_1v_upsample(j_decompress_ptr cinf
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
|
|
|
++h2v1_merged_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ switch (cinfo->out_color_space) {
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+ extrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ break;
|
|
|
+ case JCS_EXT_RGBX:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+@@ -342,18 +342,18 @@ h2v1_merged_upsample(j_decompress_ptr ci
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
|
|
|
++h2v2_merged_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ switch (cinfo->out_color_space) {
|
|
|
+ case JCS_EXT_RGB:
|
|
|
+ extrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ break;
|
|
|
+ case JCS_EXT_RGBX:
|
|
|
+ case JCS_EXT_RGBA:
|
|
|
+@@ -469,57 +469,57 @@ static INLINE boolean is_big_endian(void
|
|
|
+ int test_value = 1;
|
|
|
+ if (*(char *)&test_value != 1)
|
|
|
+ return TRUE;
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-h2v1_merged_upsample_565(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
|
|
|
++h2v1_merged_upsample_565(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ h2v1_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ else
|
|
|
+ h2v1_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-h2v1_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
|
|
|
++h2v1_merged_upsample_565D(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ h2v1_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ else
|
|
|
+ h2v1_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-h2v2_merged_upsample_565(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
|
|
|
++h2v2_merged_upsample_565(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ h2v2_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ else
|
|
|
+ h2v2_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-h2v2_merged_upsample_565D(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
|
|
|
++h2v2_merged_upsample_565D(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ if (is_big_endian())
|
|
|
+ h2v2_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ else
|
|
|
+ h2v2_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr,
|
|
|
+ output_buf);
|
|
|
+ }
|
|
|
+@@ -529,51 +529,58 @@ h2v2_merged_upsample_565D(j_decompress_p
|
|
|
+ * Module initialization routine for merged upsampling/color conversion.
|
|
|
+ *
|
|
|
+ * NB: this is called under the conditions determined by use_merged_upsample()
|
|
|
+ * in jdmaster.c. That routine MUST correspond to the actual capabilities
|
|
|
+ * of this module; no safety checks are made here.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_merged_upsampler(j_decompress_ptr cinfo)
|
|
|
++_jinit_merged_upsampler(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ upsample = (my_merged_upsample_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_merged_upsampler));
|
|
|
+ cinfo->upsample = (struct jpeg_upsampler *)upsample;
|
|
|
+ upsample->pub.start_pass = start_pass_merged_upsample;
|
|
|
+ upsample->pub.need_context_rows = FALSE;
|
|
|
+
|
|
|
+ upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
|
|
|
+
|
|
|
+ if (cinfo->max_v_samp_factor == 2) {
|
|
|
+- upsample->pub.upsample = merged_2v_upsample;
|
|
|
++ upsample->pub._upsample = merged_2v_upsample;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_h2v2_merged_upsample())
|
|
|
+ upsample->upmethod = jsimd_h2v2_merged_upsample;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ upsample->upmethod = h2v2_merged_upsample;
|
|
|
+ if (cinfo->out_color_space == JCS_RGB565) {
|
|
|
+ if (cinfo->dither_mode != JDITHER_NONE) {
|
|
|
+ upsample->upmethod = h2v2_merged_upsample_565D;
|
|
|
+ } else {
|
|
|
+ upsample->upmethod = h2v2_merged_upsample_565;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /* Allocate a spare row buffer */
|
|
|
+- upsample->spare_row = (JSAMPROW)
|
|
|
++ upsample->spare_row = (_JSAMPROW)
|
|
|
+ (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (size_t)(upsample->out_row_width * sizeof(JSAMPLE)));
|
|
|
++ (size_t)(upsample->out_row_width * sizeof(_JSAMPLE)));
|
|
|
+ } else {
|
|
|
+- upsample->pub.upsample = merged_1v_upsample;
|
|
|
++ upsample->pub._upsample = merged_1v_upsample;
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_h2v1_merged_upsample())
|
|
|
+ upsample->upmethod = jsimd_h2v1_merged_upsample;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ upsample->upmethod = h2v1_merged_upsample;
|
|
|
+ if (cinfo->out_color_space == JCS_RGB565) {
|
|
|
+ if (cinfo->dither_mode != JDITHER_NONE) {
|
|
|
+ upsample->upmethod = h2v1_merged_upsample_565D;
|
|
|
+ } else {
|
|
|
+ upsample->upmethod = h2v1_merged_upsample_565;
|
|
|
+ }
|
|
|
+ }
|
|
|
+diff --git a/media/libjpeg/jdmerge.h b/media/libjpeg/jdmerge.h
|
|
|
+--- a/media/libjpeg/jdmerge.h
|
|
|
++++ b/media/libjpeg/jdmerge.h
|
|
|
+@@ -1,46 +1,47 @@
|
|
|
+ /*
|
|
|
+ * jdmerge.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2020, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+ #ifdef UPSAMPLE_MERGING_SUPPORTED
|
|
|
+
|
|
|
+
|
|
|
+ /* Private subobject */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_upsampler pub; /* public fields */
|
|
|
+
|
|
|
+ /* Pointer to routine to do actual upsampling/conversion of one row group */
|
|
|
+- void (*upmethod) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
|
|
|
++ void (*upmethod) (j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf);
|
|
|
+
|
|
|
+ /* Private state for YCC->RGB conversion */
|
|
|
+ int *Cr_r_tab; /* => table for Cr to R conversion */
|
|
|
+ int *Cb_b_tab; /* => table for Cb to B conversion */
|
|
|
+ JLONG *Cr_g_tab; /* => table for Cr to G conversion */
|
|
|
+ JLONG *Cb_g_tab; /* => table for Cb to G conversion */
|
|
|
+
|
|
|
+ /* For 2:1 vertical sampling, we produce two output rows at a time.
|
|
|
+ * We need a "spare" row buffer to hold the second output row if the
|
|
|
+ * application provides just a one-row buffer; we also use the spare
|
|
|
+ * to discard the dummy last row if the image height is odd.
|
|
|
+ */
|
|
|
+- JSAMPROW spare_row;
|
|
|
++ _JSAMPROW spare_row;
|
|
|
+ boolean spare_full; /* T if spare buffer is occupied */
|
|
|
+
|
|
|
+ JDIMENSION out_row_width; /* samples per output row */
|
|
|
+ JDIMENSION rows_to_go; /* counts rows remaining in image */
|
|
|
+ } my_merged_upsampler;
|
|
|
+
|
|
|
+ typedef my_merged_upsampler *my_merged_upsample_ptr;
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/jdmrg565.c b/media/libjpeg/jdmrg565.c
|
|
|
+--- a/media/libjpeg/jdmrg565.c
|
|
|
++++ b/media/libjpeg/jdmrg565.c
|
|
|
+@@ -1,37 +1,38 @@
|
|
|
+ /*
|
|
|
+ * jdmrg565.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright (C) 2013, Linaro Limited.
|
|
|
+- * Copyright (C) 2014-2015, 2018, 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2014-2015, 2018, 2020, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains code for merged upsampling/color conversion.
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo,
|
|
|
++ _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION in_row_group_ctr,
|
|
|
+- JSAMPARRAY output_buf)
|
|
|
++ _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+ register int y, cred, cgreen, cblue;
|
|
|
+ int cb, cr;
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+ JDIMENSION col;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ int *Crrtab = upsample->Cr_r_tab;
|
|
|
+ int *Cbbtab = upsample->Cb_b_tab;
|
|
|
+ JLONG *Crgtab = upsample->Cr_g_tab;
|
|
|
+ JLONG *Cbgtab = upsample->Cb_g_tab;
|
|
|
+ unsigned int r, g, b;
|
|
|
+ JLONG rgb;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+@@ -81,28 +82,28 @@ h2v1_merged_upsample_565_internal(j_deco
|
|
|
+ *(INT16 *)outptr = (INT16)rgb;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+ h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
|
|
|
+- JSAMPIMAGE input_buf,
|
|
|
++ _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION in_row_group_ctr,
|
|
|
+- JSAMPARRAY output_buf)
|
|
|
++ _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+ register int y, cred, cgreen, cblue;
|
|
|
+ int cb, cr;
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+ JDIMENSION col;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ int *Crrtab = upsample->Cr_r_tab;
|
|
|
+ int *Cbbtab = upsample->Cb_b_tab;
|
|
|
+ JLONG *Crgtab = upsample->Cr_g_tab;
|
|
|
+ JLONG *Cbgtab = upsample->Cb_g_tab;
|
|
|
+ JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
|
|
|
+ unsigned int r, g, b;
|
|
|
+ JLONG rgb;
|
|
|
+ SHIFT_TEMPS
|
|
|
+@@ -154,28 +155,28 @@ h2v1_merged_upsample_565D_internal(j_dec
|
|
|
+ rgb = PACK_SHORT_565(r, g, b);
|
|
|
+ *(INT16 *)outptr = (INT16)rgb;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION in_row_group_ctr,
|
|
|
+- JSAMPARRAY output_buf)
|
|
|
++ _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+ register int y, cred, cgreen, cblue;
|
|
|
+ int cb, cr;
|
|
|
+- register JSAMPROW outptr0, outptr1;
|
|
|
+- JSAMPROW inptr00, inptr01, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr0, outptr1;
|
|
|
++ _JSAMPROW inptr00, inptr01, inptr1, inptr2;
|
|
|
+ JDIMENSION col;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ int *Crrtab = upsample->Cr_r_tab;
|
|
|
+ int *Cbbtab = upsample->Cb_b_tab;
|
|
|
+ JLONG *Crgtab = upsample->Cr_g_tab;
|
|
|
+ JLONG *Cbgtab = upsample->Cb_g_tab;
|
|
|
+ unsigned int r, g, b;
|
|
|
+ JLONG rgb;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+@@ -250,28 +251,28 @@ h2v2_merged_upsample_565_internal(j_deco
|
|
|
+ *(INT16 *)outptr1 = (INT16)rgb;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+ h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
|
|
|
+- JSAMPIMAGE input_buf,
|
|
|
++ _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION in_row_group_ctr,
|
|
|
+- JSAMPARRAY output_buf)
|
|
|
++ _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+ register int y, cred, cgreen, cblue;
|
|
|
+ int cb, cr;
|
|
|
+- register JSAMPROW outptr0, outptr1;
|
|
|
+- JSAMPROW inptr00, inptr01, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr0, outptr1;
|
|
|
++ _JSAMPROW inptr00, inptr01, inptr1, inptr2;
|
|
|
+ JDIMENSION col;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ int *Crrtab = upsample->Cr_r_tab;
|
|
|
+ int *Cbbtab = upsample->Cb_b_tab;
|
|
|
+ JLONG *Crgtab = upsample->Cr_g_tab;
|
|
|
+ JLONG *Cbgtab = upsample->Cb_g_tab;
|
|
|
+ JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
|
|
|
+ JLONG d1 = dither_matrix[(cinfo->output_scanline + 1) & DITHER_MASK];
|
|
|
+ unsigned int r, g, b;
|
|
|
+ JLONG rgb;
|
|
|
+diff --git a/media/libjpeg/jdmrgext.c b/media/libjpeg/jdmrgext.c
|
|
|
+--- a/media/libjpeg/jdmrgext.c
|
|
|
++++ b/media/libjpeg/jdmrgext.c
|
|
|
+@@ -1,43 +1,43 @@
|
|
|
+ /*
|
|
|
+ * jdmrgext.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2011, 2015, 2020, 2023, D. R. Commander.
|
|
|
++ * Copyright (C) 2011, 2015, 2020, 2022-2023, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains code for merged upsampling/color conversion.
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+ /* This file is included by jdmerge.c */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ */
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-h2v1_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++h2v1_merged_upsample_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION in_row_group_ctr,
|
|
|
+- JSAMPARRAY output_buf)
|
|
|
++ _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+ register int y, cred, cgreen, cblue;
|
|
|
+ int cb, cr;
|
|
|
+- register JSAMPROW outptr;
|
|
|
+- JSAMPROW inptr0, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr;
|
|
|
++ _JSAMPROW inptr0, inptr1, inptr2;
|
|
|
+ JDIMENSION col;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ int *Crrtab = upsample->Cr_r_tab;
|
|
|
+ int *Cbbtab = upsample->Cb_b_tab;
|
|
|
+ JLONG *Crgtab = upsample->Cr_g_tab;
|
|
|
+ JLONG *Cbgtab = upsample->Cb_g_tab;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ inptr0 = input_buf[0][in_row_group_ctr];
|
|
|
+ inptr1 = input_buf[1][in_row_group_ctr];
|
|
|
+@@ -52,64 +52,64 @@ h2v1_merged_upsample_internal(j_decompre
|
|
|
+ cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
|
|
+ cblue = Cbbtab[cb];
|
|
|
+ /* Fetch 2 Y values and emit 2 pixels */
|
|
|
+ y = *inptr0++;
|
|
|
+ outptr[RGB_RED] = range_limit[y + cred];
|
|
|
+ outptr[RGB_GREEN] = range_limit[y + cgreen];
|
|
|
+ outptr[RGB_BLUE] = range_limit[y + cblue];
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ outptr += RGB_PIXELSIZE;
|
|
|
+ y = *inptr0++;
|
|
|
+ outptr[RGB_RED] = range_limit[y + cred];
|
|
|
+ outptr[RGB_GREEN] = range_limit[y + cgreen];
|
|
|
+ outptr[RGB_BLUE] = range_limit[y + cblue];
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ outptr += RGB_PIXELSIZE;
|
|
|
+ }
|
|
|
+ /* If image width is odd, do the last output column separately */
|
|
|
+ if (cinfo->output_width & 1) {
|
|
|
+ cb = *inptr1;
|
|
|
+ cr = *inptr2;
|
|
|
+ cred = Crrtab[cr];
|
|
|
+ cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
|
|
+ cblue = Cbbtab[cb];
|
|
|
+ y = *inptr0;
|
|
|
+ outptr[RGB_RED] = range_limit[y + cred];
|
|
|
+ outptr[RGB_GREEN] = range_limit[y + cgreen];
|
|
|
+ outptr[RGB_BLUE] = range_limit[y + cblue];
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ */
|
|
|
+
|
|
|
+ INLINE
|
|
|
+ LOCAL(void)
|
|
|
+-h2v2_merged_upsample_internal(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++h2v2_merged_upsample_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION in_row_group_ctr,
|
|
|
+- JSAMPARRAY output_buf)
|
|
|
++ _JSAMPARRAY output_buf)
|
|
|
+ {
|
|
|
+ my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
|
|
|
+ register int y, cred, cgreen, cblue;
|
|
|
+ int cb, cr;
|
|
|
+- register JSAMPROW outptr0, outptr1;
|
|
|
+- JSAMPROW inptr00, inptr01, inptr1, inptr2;
|
|
|
++ register _JSAMPROW outptr0, outptr1;
|
|
|
++ _JSAMPROW inptr00, inptr01, inptr1, inptr2;
|
|
|
+ JDIMENSION col;
|
|
|
+ /* copy these pointers into registers if possible */
|
|
|
+- register JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ int *Crrtab = upsample->Cr_r_tab;
|
|
|
+ int *Cbbtab = upsample->Cb_b_tab;
|
|
|
+ JLONG *Crgtab = upsample->Cr_g_tab;
|
|
|
+ JLONG *Cbgtab = upsample->Cb_g_tab;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ inptr00 = input_buf[0][in_row_group_ctr * 2];
|
|
|
+ inptr01 = input_buf[0][in_row_group_ctr * 2 + 1];
|
|
|
+@@ -126,59 +126,59 @@ h2v2_merged_upsample_internal(j_decompre
|
|
|
+ cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
|
|
+ cblue = Cbbtab[cb];
|
|
|
+ /* Fetch 4 Y values and emit 4 pixels */
|
|
|
+ y = *inptr00++;
|
|
|
+ outptr0[RGB_RED] = range_limit[y + cred];
|
|
|
+ outptr0[RGB_GREEN] = range_limit[y + cgreen];
|
|
|
+ outptr0[RGB_BLUE] = range_limit[y + cblue];
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr0[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr0[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ outptr0 += RGB_PIXELSIZE;
|
|
|
+ y = *inptr00++;
|
|
|
+ outptr0[RGB_RED] = range_limit[y + cred];
|
|
|
+ outptr0[RGB_GREEN] = range_limit[y + cgreen];
|
|
|
+ outptr0[RGB_BLUE] = range_limit[y + cblue];
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr0[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr0[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ outptr0 += RGB_PIXELSIZE;
|
|
|
+ y = *inptr01++;
|
|
|
+ outptr1[RGB_RED] = range_limit[y + cred];
|
|
|
+ outptr1[RGB_GREEN] = range_limit[y + cgreen];
|
|
|
+ outptr1[RGB_BLUE] = range_limit[y + cblue];
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr1[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr1[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ outptr1 += RGB_PIXELSIZE;
|
|
|
+ y = *inptr01++;
|
|
|
+ outptr1[RGB_RED] = range_limit[y + cred];
|
|
|
+ outptr1[RGB_GREEN] = range_limit[y + cgreen];
|
|
|
+ outptr1[RGB_BLUE] = range_limit[y + cblue];
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr1[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr1[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ outptr1 += RGB_PIXELSIZE;
|
|
|
+ }
|
|
|
+ /* If image width is odd, do the last output column separately */
|
|
|
+ if (cinfo->output_width & 1) {
|
|
|
+ cb = *inptr1;
|
|
|
+ cr = *inptr2;
|
|
|
+ cred = Crrtab[cr];
|
|
|
+ cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
|
|
+ cblue = Cbbtab[cb];
|
|
|
+ y = *inptr00;
|
|
|
+ outptr0[RGB_RED] = range_limit[y + cred];
|
|
|
+ outptr0[RGB_GREEN] = range_limit[y + cgreen];
|
|
|
+ outptr0[RGB_BLUE] = range_limit[y + cblue];
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr0[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr0[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ y = *inptr01;
|
|
|
+ outptr1[RGB_RED] = range_limit[y + cred];
|
|
|
+ outptr1[RGB_GREEN] = range_limit[y + cgreen];
|
|
|
+ outptr1[RGB_BLUE] = range_limit[y + cblue];
|
|
|
+ #ifdef RGB_ALPHA
|
|
|
+- outptr1[RGB_ALPHA] = MAXJSAMPLE;
|
|
|
++ outptr1[RGB_ALPHA] = _MAXJSAMPLE;
|
|
|
+ #endif
|
|
|
+ }
|
|
|
+ }
|
|
|
+diff --git a/media/libjpeg/jdphuff.c b/media/libjpeg/jdphuff.c
|
|
|
+--- a/media/libjpeg/jdphuff.c
|
|
|
++++ b/media/libjpeg/jdphuff.c
|
|
|
+@@ -1,13 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jdphuff.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1995-1997, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright (C) 2015-2016, 2018-2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains Huffman entropy decoding routines for progressive JPEG.
|
|
|
+ *
|
|
|
+ * Much of the complexity here has to do with supporting input suspension.
|
|
|
+@@ -18,17 +20,17 @@
|
|
|
+ *
|
|
|
+ * NOTE: All referenced figures are from
|
|
|
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jdhuff.h" /* Declarations shared with jdhuff.c */
|
|
|
++#include "jdhuff.h" /* Declarations shared with jd*huff.c */
|
|
|
+ #include <limits.h>
|
|
|
+
|
|
|
+
|
|
|
+ #ifdef D_PROGRESSIVE_SUPPORTED
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Expanded entropy decoder object for progressive Huffman decoding.
|
|
|
+ *
|
|
|
+diff --git a/media/libjpeg/jdpostct.c b/media/libjpeg/jdpostct.c
|
|
|
+--- a/media/libjpeg/jdpostct.c
|
|
|
++++ b/media/libjpeg/jdpostct.c
|
|
|
+@@ -1,15 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jdpostct.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1996, Thomas G. Lane.
|
|
|
+- * It was modified by The libjpeg-turbo Project to include only code relevant
|
|
|
+- * to libjpeg-turbo.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022-2023, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains the decompression postprocessing controller.
|
|
|
+ * This controller manages the upsampling, color conversion, and color
|
|
|
+ * quantization/reduction steps; specifically, it controls the buffering
|
|
|
+ * between upsample/color conversion and color quantization/reduction.
|
|
|
+ *
|
|
|
+@@ -17,259 +17,275 @@
|
|
|
+ * work to do, and it just hands off to the upsample/color conversion code.
|
|
|
+ * An integrated upsample/convert/quantize process would replace this module
|
|
|
+ * entirely.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /* Private buffer controller object */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_d_post_controller pub; /* public fields */
|
|
|
+
|
|
|
+ /* Color quantization source buffer: this holds output data from
|
|
|
+ * the upsample/color conversion step to be passed to the quantizer.
|
|
|
+ * For two-pass color quantization, we need a full-image buffer;
|
|
|
+ * for one-pass operation, a strip buffer is sufficient.
|
|
|
+ */
|
|
|
+ jvirt_sarray_ptr whole_image; /* virtual array, or NULL if one-pass */
|
|
|
+- JSAMPARRAY buffer; /* strip buffer, or current strip of virtual */
|
|
|
++ _JSAMPARRAY buffer; /* strip buffer, or current strip of virtual */
|
|
|
+ JDIMENSION strip_height; /* buffer size in rows */
|
|
|
+ /* for two-pass mode only: */
|
|
|
+ JDIMENSION starting_row; /* row # of first row in current strip */
|
|
|
+ JDIMENSION next_row; /* index of next row to fill/empty in strip */
|
|
|
+ } my_post_controller;
|
|
|
+
|
|
|
+ typedef my_post_controller *my_post_ptr;
|
|
|
+
|
|
|
+
|
|
|
+ /* Forward declarations */
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ METHODDEF(void) post_process_1pass(j_decompress_ptr cinfo,
|
|
|
+- JSAMPIMAGE input_buf,
|
|
|
++ _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+ JDIMENSION in_row_groups_avail,
|
|
|
+- JSAMPARRAY output_buf,
|
|
|
++ _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr,
|
|
|
+ JDIMENSION out_rows_avail);
|
|
|
+-#ifdef QUANT_2PASS_SUPPORTED
|
|
|
++#endif
|
|
|
++#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
|
|
|
+ METHODDEF(void) post_process_prepass(j_decompress_ptr cinfo,
|
|
|
+- JSAMPIMAGE input_buf,
|
|
|
++ _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+ JDIMENSION in_row_groups_avail,
|
|
|
+- JSAMPARRAY output_buf,
|
|
|
++ _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr,
|
|
|
+ JDIMENSION out_rows_avail);
|
|
|
+ METHODDEF(void) post_process_2pass(j_decompress_ptr cinfo,
|
|
|
+- JSAMPIMAGE input_buf,
|
|
|
++ _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+ JDIMENSION in_row_groups_avail,
|
|
|
+- JSAMPARRAY output_buf,
|
|
|
++ _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr,
|
|
|
+ JDIMENSION out_rows_avail);
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize for a processing pass.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ start_pass_dpost(j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
|
|
|
+ {
|
|
|
+ my_post_ptr post = (my_post_ptr)cinfo->post;
|
|
|
+
|
|
|
+ switch (pass_mode) {
|
|
|
+ case JBUF_PASS_THRU:
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ if (cinfo->quantize_colors) {
|
|
|
+ /* Single-pass processing with color quantization. */
|
|
|
+- post->pub.post_process_data = post_process_1pass;
|
|
|
++ post->pub._post_process_data = post_process_1pass;
|
|
|
+ /* We could be doing buffered-image output before starting a 2-pass
|
|
|
+ * color quantization; in that case, jinit_d_post_controller did not
|
|
|
+ * allocate a strip buffer. Use the virtual-array buffer as workspace.
|
|
|
+ */
|
|
|
+ if (post->buffer == NULL) {
|
|
|
+- post->buffer = (*cinfo->mem->access_virt_sarray)
|
|
|
++ post->buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
|
|
|
+ ((j_common_ptr)cinfo, post->whole_image,
|
|
|
+ (JDIMENSION)0, post->strip_height, TRUE);
|
|
|
+ }
|
|
|
+- } else {
|
|
|
++ } else
|
|
|
++#endif
|
|
|
++ {
|
|
|
+ /* For single-pass processing without color quantization,
|
|
|
+ * I have no work to do; just call the upsampler directly.
|
|
|
+ */
|
|
|
+- post->pub.post_process_data = cinfo->upsample->upsample;
|
|
|
++ post->pub._post_process_data = cinfo->upsample->_upsample;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+-#ifdef QUANT_2PASS_SUPPORTED
|
|
|
++#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
|
|
|
+ case JBUF_SAVE_AND_PASS:
|
|
|
+ /* First pass of 2-pass quantization */
|
|
|
+ if (post->whole_image == NULL)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+- post->pub.post_process_data = post_process_prepass;
|
|
|
++ post->pub._post_process_data = post_process_prepass;
|
|
|
+ break;
|
|
|
+ case JBUF_CRANK_DEST:
|
|
|
+ /* Second pass of 2-pass quantization */
|
|
|
+ if (post->whole_image == NULL)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+- post->pub.post_process_data = post_process_2pass;
|
|
|
++ post->pub._post_process_data = post_process_2pass;
|
|
|
+ break;
|
|
|
+-#endif /* QUANT_2PASS_SUPPORTED */
|
|
|
++#endif /* defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
|
|
|
+ default:
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ post->starting_row = post->next_row = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Process some data in the one-pass (strip buffer) case.
|
|
|
+ * This is used for color precision reduction as well as one-pass quantization.
|
|
|
+ */
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
++
|
|
|
+ METHODDEF(void)
|
|
|
+-post_process_1pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++post_process_1pass(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+- JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
|
|
|
+ {
|
|
|
+ my_post_ptr post = (my_post_ptr)cinfo->post;
|
|
|
+ JDIMENSION num_rows, max_rows;
|
|
|
+
|
|
|
+ /* Fill the buffer, but not more than what we can dump out in one go. */
|
|
|
+ /* Note we rely on the upsampler to detect bottom of image. */
|
|
|
+ max_rows = out_rows_avail - *out_row_ctr;
|
|
|
+ if (max_rows > post->strip_height)
|
|
|
+ max_rows = post->strip_height;
|
|
|
+ num_rows = 0;
|
|
|
+- (*cinfo->upsample->upsample) (cinfo, input_buf, in_row_group_ctr,
|
|
|
+- in_row_groups_avail, post->buffer, &num_rows,
|
|
|
+- max_rows);
|
|
|
++ (*cinfo->upsample->_upsample) (cinfo, input_buf, in_row_group_ctr,
|
|
|
++ in_row_groups_avail, post->buffer, &num_rows,
|
|
|
++ max_rows);
|
|
|
+ /* Quantize and emit data. */
|
|
|
+- (*cinfo->cquantize->color_quantize) (cinfo, post->buffer,
|
|
|
+- output_buf + *out_row_ctr,
|
|
|
+- (int)num_rows);
|
|
|
++ (*cinfo->cquantize->_color_quantize) (cinfo, post->buffer,
|
|
|
++ output_buf + *out_row_ctr,
|
|
|
++ (int)num_rows);
|
|
|
+ *out_row_ctr += num_rows;
|
|
|
+ }
|
|
|
+
|
|
|
++#endif
|
|
|
+
|
|
|
+-#ifdef QUANT_2PASS_SUPPORTED
|
|
|
++
|
|
|
++#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Process some data in the first pass of 2-pass quantization.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-post_process_prepass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++post_process_prepass(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+- JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
|
|
|
+ {
|
|
|
+ my_post_ptr post = (my_post_ptr)cinfo->post;
|
|
|
+ JDIMENSION old_next_row, num_rows;
|
|
|
+
|
|
|
+ /* Reposition virtual buffer if at start of strip. */
|
|
|
+ if (post->next_row == 0) {
|
|
|
+- post->buffer = (*cinfo->mem->access_virt_sarray)
|
|
|
++ post->buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
|
|
|
+ ((j_common_ptr)cinfo, post->whole_image,
|
|
|
+ post->starting_row, post->strip_height, TRUE);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Upsample some data (up to a strip height's worth). */
|
|
|
+ old_next_row = post->next_row;
|
|
|
+- (*cinfo->upsample->upsample) (cinfo, input_buf, in_row_group_ctr,
|
|
|
+- in_row_groups_avail, post->buffer,
|
|
|
+- &post->next_row, post->strip_height);
|
|
|
++ (*cinfo->upsample->_upsample) (cinfo, input_buf, in_row_group_ctr,
|
|
|
++ in_row_groups_avail, post->buffer,
|
|
|
++ &post->next_row, post->strip_height);
|
|
|
+
|
|
|
+ /* Allow quantizer to scan new data. No data is emitted, */
|
|
|
+ /* but we advance out_row_ctr so outer loop can tell when we're done. */
|
|
|
+ if (post->next_row > old_next_row) {
|
|
|
+ num_rows = post->next_row - old_next_row;
|
|
|
+- (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
|
|
|
+- (JSAMPARRAY)NULL, (int)num_rows);
|
|
|
++ (*cinfo->cquantize->_color_quantize) (cinfo, post->buffer + old_next_row,
|
|
|
++ (_JSAMPARRAY)NULL, (int)num_rows);
|
|
|
+ *out_row_ctr += num_rows;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Advance if we filled the strip. */
|
|
|
+ if (post->next_row >= post->strip_height) {
|
|
|
+ post->starting_row += post->strip_height;
|
|
|
+ post->next_row = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Process some data in the second pass of 2-pass quantization.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-post_process_2pass(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++post_process_2pass(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+- JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
|
|
|
+ {
|
|
|
+ my_post_ptr post = (my_post_ptr)cinfo->post;
|
|
|
+ JDIMENSION num_rows, max_rows;
|
|
|
+
|
|
|
+ /* Reposition virtual buffer if at start of strip. */
|
|
|
+ if (post->next_row == 0) {
|
|
|
+- post->buffer = (*cinfo->mem->access_virt_sarray)
|
|
|
++ post->buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
|
|
|
+ ((j_common_ptr)cinfo, post->whole_image,
|
|
|
+ post->starting_row, post->strip_height, FALSE);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Determine number of rows to emit. */
|
|
|
+ num_rows = post->strip_height - post->next_row; /* available in strip */
|
|
|
+ max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
|
|
|
+ if (num_rows > max_rows)
|
|
|
+ num_rows = max_rows;
|
|
|
+ /* We have to check bottom of image here, can't depend on upsampler. */
|
|
|
+ max_rows = cinfo->output_height - post->starting_row;
|
|
|
+ if (num_rows > max_rows)
|
|
|
+ num_rows = max_rows;
|
|
|
+
|
|
|
+ /* Quantize and emit data. */
|
|
|
+- (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + post->next_row,
|
|
|
+- output_buf + *out_row_ctr,
|
|
|
+- (int)num_rows);
|
|
|
++ (*cinfo->cquantize->_color_quantize) (cinfo, post->buffer + post->next_row,
|
|
|
++ output_buf + *out_row_ctr,
|
|
|
++ (int)num_rows);
|
|
|
+ *out_row_ctr += num_rows;
|
|
|
+
|
|
|
+ /* Advance if we filled the strip. */
|
|
|
+ post->next_row += num_rows;
|
|
|
+ if (post->next_row >= post->strip_height) {
|
|
|
+ post->starting_row += post->strip_height;
|
|
|
+ post->next_row = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+-#endif /* QUANT_2PASS_SUPPORTED */
|
|
|
++#endif /* defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize postprocessing controller.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_d_post_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
|
|
|
++_jinit_d_post_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
|
|
|
+ {
|
|
|
+ my_post_ptr post;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ post = (my_post_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_post_controller));
|
|
|
+ cinfo->post = (struct jpeg_d_post_controller *)post;
|
|
|
+ post->pub.start_pass = start_pass_dpost;
|
|
|
+ post->whole_image = NULL; /* flag for no virtual arrays */
|
|
|
+ post->buffer = NULL; /* flag for no strip buffer */
|
|
|
+
|
|
|
+ /* Create the quantization buffer, if needed */
|
|
|
+ if (cinfo->quantize_colors) {
|
|
|
++#if BITS_IN_JSAMPLE != 16
|
|
|
+ /* The buffer strip height is max_v_samp_factor, which is typically
|
|
|
+ * an efficient number of rows for upsampling to return.
|
|
|
+ * (In the presence of output rescaling, we might want to be smarter?)
|
|
|
+ */
|
|
|
+ post->strip_height = (JDIMENSION)cinfo->max_v_samp_factor;
|
|
|
+ if (need_full_buffer) {
|
|
|
+ /* Two-pass color quantization: need full-image storage. */
|
|
|
+ /* We round up the number of rows to a multiple of the strip height. */
|
|
|
+@@ -280,15 +296,20 @@ jinit_d_post_controller(j_decompress_ptr
|
|
|
+ (JDIMENSION)jround_up((long)cinfo->output_height,
|
|
|
+ (long)post->strip_height),
|
|
|
+ post->strip_height);
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
|
|
|
+ #endif /* QUANT_2PASS_SUPPORTED */
|
|
|
+ } else {
|
|
|
+ /* One-pass color quantization: just make a strip buffer. */
|
|
|
+- post->buffer = (*cinfo->mem->alloc_sarray)
|
|
|
++ post->buffer = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ cinfo->output_width * cinfo->out_color_components,
|
|
|
+ post->strip_height);
|
|
|
+ }
|
|
|
++#else
|
|
|
++ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
++#endif
|
|
|
+ }
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jdsample.c b/media/libjpeg/jdsample.c
|
|
|
+--- a/media/libjpeg/jdsample.c
|
|
|
++++ b/media/libjpeg/jdsample.c
|
|
|
+@@ -1,16 +1,16 @@
|
|
|
+ /*
|
|
|
+ * jdsample.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+- * Copyright (C) 2010, 2015-2016, D. R. Commander.
|
|
|
++ * Copyright (C) 2010, 2015-2016, 2022, D. R. Commander.
|
|
|
+ * Copyright (C) 2014, MIPS Technologies, Inc., California.
|
|
|
+ * Copyright (C) 2015, Google, Inc.
|
|
|
+ * Copyright (C) 2019-2020, Arm Limited.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains upsampling routines.
|
|
|
+ *
|
|
|
+@@ -23,20 +23,22 @@
|
|
|
+ * An excellent reference for image resampling is
|
|
|
+ * Digital Image Warping, George Wolberg, 1990.
|
|
|
+ * Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
|
|
|
+ */
|
|
|
+
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jdsample.h"
|
|
|
+ #include "jsimd.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
+ /*
|
|
|
+ * Initialize for an upsampling pass.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ start_pass_upsample(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
|
|
|
+@@ -52,19 +54,19 @@ start_pass_upsample(j_decompress_ptr cin
|
|
|
+ * Control routine to do upsampling (and color conversion).
|
|
|
+ *
|
|
|
+ * In this version we upsample each component independently.
|
|
|
+ * We upsample one row group into the conversion buffer, then apply
|
|
|
+ * color conversion a row at a time.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-sep_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
++sep_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail,
|
|
|
+- JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
|
|
|
++ _JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
|
|
|
+ JDIMENSION out_rows_avail)
|
|
|
+ {
|
|
|
+ my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+ JDIMENSION num_rows;
|
|
|
+
|
|
|
+ /* Fill the conversion buffer, if it's empty */
|
|
|
+@@ -90,19 +92,20 @@ sep_upsample(j_decompress_ptr cinfo, JSA
|
|
|
+ */
|
|
|
+ if (num_rows > upsample->rows_to_go)
|
|
|
+ num_rows = upsample->rows_to_go;
|
|
|
+ /* And not more than what the client can accept: */
|
|
|
+ out_rows_avail -= *out_row_ctr;
|
|
|
+ if (num_rows > out_rows_avail)
|
|
|
+ num_rows = out_rows_avail;
|
|
|
+
|
|
|
+- (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
|
|
|
+- (JDIMENSION)upsample->next_row_out,
|
|
|
+- output_buf + *out_row_ctr, (int)num_rows);
|
|
|
++ (*cinfo->cconvert->_color_convert) (cinfo, upsample->color_buf,
|
|
|
++ (JDIMENSION)upsample->next_row_out,
|
|
|
++ output_buf + *out_row_ctr,
|
|
|
++ (int)num_rows);
|
|
|
+
|
|
|
+ /* Adjust counts */
|
|
|
+ *out_row_ctr += num_rows;
|
|
|
+ upsample->rows_to_go -= num_rows;
|
|
|
+ upsample->next_row_out += num_rows;
|
|
|
+ /* When the buffer is emptied, declare this input row group consumed */
|
|
|
+ if (upsample->next_row_out >= cinfo->max_v_samp_factor)
|
|
|
+ (*in_row_group_ctr)++;
|
|
|
+@@ -119,30 +122,30 @@ sep_upsample(j_decompress_ptr cinfo, JSA
|
|
|
+ * For full-size components, we just make color_buf[ci] point at the
|
|
|
+ * input buffer, and thus avoid copying any data. Note that this is
|
|
|
+ * safe only because sep_upsample doesn't declare the input row group
|
|
|
+ * "consumed" until we are done color converting and emitting it.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ fullsize_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
|
|
|
+ {
|
|
|
+ *output_data_ptr = input_data;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * This is a no-op version used for "uninteresting" components.
|
|
|
+ * These components will not be referenced by color conversion.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ noop_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
|
|
|
+ {
|
|
|
+ *output_data_ptr = NULL; /* safety check */
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * This version handles any integral sampling ratios.
|
|
|
+ * This is not used for typical JPEG files, so it need not be fast.
|
|
|
+@@ -151,24 +154,24 @@ noop_upsample(j_decompress_ptr cinfo, jp
|
|
|
+ * pixels. The hi-falutin sampling literature refers to this as a
|
|
|
+ * "box filter". A box filter tends to introduce visible artifacts,
|
|
|
+ * so if you are actually going to use 3:1 or 4:1 sampling ratios
|
|
|
+ * you would be well advised to improve this code.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
|
|
|
+ {
|
|
|
+ my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
|
|
|
+- JSAMPARRAY output_data = *output_data_ptr;
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
+- register JSAMPLE invalue;
|
|
|
++ _JSAMPARRAY output_data = *output_data_ptr;
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
++ register _JSAMPLE invalue;
|
|
|
+ register int h;
|
|
|
+- JSAMPROW outend;
|
|
|
++ _JSAMPROW outend;
|
|
|
+ int h_expand, v_expand;
|
|
|
+ int inrow, outrow;
|
|
|
+
|
|
|
+ h_expand = upsample->h_expand[compptr->component_index];
|
|
|
+ v_expand = upsample->v_expand[compptr->component_index];
|
|
|
+
|
|
|
+ inrow = outrow = 0;
|
|
|
+ while (outrow < cinfo->max_v_samp_factor) {
|
|
|
+@@ -179,38 +182,38 @@ int_upsample(j_decompress_ptr cinfo, jpe
|
|
|
+ while (outptr < outend) {
|
|
|
+ invalue = *inptr++;
|
|
|
+ for (h = h_expand; h > 0; h--) {
|
|
|
+ *outptr++ = invalue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /* Generate any additional output rows by duplicating the first one */
|
|
|
+ if (v_expand > 1) {
|
|
|
+- jcopy_sample_rows(output_data, outrow, output_data, outrow + 1,
|
|
|
+- v_expand - 1, cinfo->output_width);
|
|
|
++ _jcopy_sample_rows(output_data, outrow, output_data, outrow + 1,
|
|
|
++ v_expand - 1, cinfo->output_width);
|
|
|
+ }
|
|
|
+ inrow++;
|
|
|
+ outrow += v_expand;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ * It's still a box filter.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
|
|
|
+ {
|
|
|
+- JSAMPARRAY output_data = *output_data_ptr;
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
+- register JSAMPLE invalue;
|
|
|
+- JSAMPROW outend;
|
|
|
++ _JSAMPARRAY output_data = *output_data_ptr;
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
++ register _JSAMPLE invalue;
|
|
|
++ _JSAMPROW outend;
|
|
|
+ int inrow;
|
|
|
+
|
|
|
+ for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
|
|
|
+ inptr = input_data[inrow];
|
|
|
+ outptr = output_data[inrow];
|
|
|
+ outend = outptr + cinfo->output_width;
|
|
|
+ while (outptr < outend) {
|
|
|
+ invalue = *inptr++;
|
|
|
+@@ -223,36 +226,36 @@ h2v1_upsample(j_decompress_ptr cinfo, jp
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ * It's still a box filter.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
|
|
|
+ {
|
|
|
+- JSAMPARRAY output_data = *output_data_ptr;
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
+- register JSAMPLE invalue;
|
|
|
+- JSAMPROW outend;
|
|
|
++ _JSAMPARRAY output_data = *output_data_ptr;
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
++ register _JSAMPLE invalue;
|
|
|
++ _JSAMPROW outend;
|
|
|
+ int inrow, outrow;
|
|
|
+
|
|
|
+ inrow = outrow = 0;
|
|
|
+ while (outrow < cinfo->max_v_samp_factor) {
|
|
|
+ inptr = input_data[inrow];
|
|
|
+ outptr = output_data[outrow];
|
|
|
+ outend = outptr + cinfo->output_width;
|
|
|
+ while (outptr < outend) {
|
|
|
+ invalue = *inptr++;
|
|
|
+ *outptr++ = invalue;
|
|
|
+ *outptr++ = invalue;
|
|
|
+ }
|
|
|
+- jcopy_sample_rows(output_data, outrow, output_data, outrow + 1, 1,
|
|
|
+- cinfo->output_width);
|
|
|
++ _jcopy_sample_rows(output_data, outrow, output_data, outrow + 1, 1,
|
|
|
++ cinfo->output_width);
|
|
|
+ inrow++;
|
|
|
+ outrow += 2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
|
|
|
+@@ -266,60 +269,60 @@ h2v2_upsample(j_decompress_ptr cinfo, jp
|
|
|
+ * integer, we do not want to always round 0.5 up to the next integer.
|
|
|
+ * If we did that, we'd introduce a noticeable bias towards larger values.
|
|
|
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
|
|
|
+ * alternate pixel locations (a simple ordered dither pattern).
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
|
|
|
+ {
|
|
|
+- JSAMPARRAY output_data = *output_data_ptr;
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
++ _JSAMPARRAY output_data = *output_data_ptr;
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
+ register int invalue;
|
|
|
+ register JDIMENSION colctr;
|
|
|
+ int inrow;
|
|
|
+
|
|
|
+ for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
|
|
|
+ inptr = input_data[inrow];
|
|
|
+ outptr = output_data[inrow];
|
|
|
+ /* Special case for first column */
|
|
|
+ invalue = *inptr++;
|
|
|
+- *outptr++ = (JSAMPLE)invalue;
|
|
|
+- *outptr++ = (JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2);
|
|
|
++ *outptr++ = (_JSAMPLE)invalue;
|
|
|
++ *outptr++ = (_JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2);
|
|
|
+
|
|
|
+ for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
|
|
|
+ /* General case: 3/4 * nearer pixel + 1/4 * further pixel */
|
|
|
+ invalue = (*inptr++) * 3;
|
|
|
+- *outptr++ = (JSAMPLE)((invalue + inptr[-2] + 1) >> 2);
|
|
|
+- *outptr++ = (JSAMPLE)((invalue + inptr[0] + 2) >> 2);
|
|
|
++ *outptr++ = (_JSAMPLE)((invalue + inptr[-2] + 1) >> 2);
|
|
|
++ *outptr++ = (_JSAMPLE)((invalue + inptr[0] + 2) >> 2);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Special case for last column */
|
|
|
+ invalue = *inptr;
|
|
|
+- *outptr++ = (JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2);
|
|
|
+- *outptr++ = (JSAMPLE)invalue;
|
|
|
++ *outptr++ = (_JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2);
|
|
|
++ *outptr++ = (_JSAMPLE)invalue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Fancy processing for 1:1 horizontal and 2:1 vertical (4:4:0 subsampling).
|
|
|
+ *
|
|
|
+ * This is a less common case, but it can be encountered when losslessly
|
|
|
+ * rotating/transposing a JPEG file that uses 4:2:2 chroma subsampling.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
|
|
|
+ {
|
|
|
+- JSAMPARRAY output_data = *output_data_ptr;
|
|
|
+- JSAMPROW inptr0, inptr1, outptr;
|
|
|
++ _JSAMPARRAY output_data = *output_data_ptr;
|
|
|
++ _JSAMPROW inptr0, inptr1, outptr;
|
|
|
+ #if BITS_IN_JSAMPLE == 8
|
|
|
+ int thiscolsum, bias;
|
|
|
+ #else
|
|
|
+ JLONG thiscolsum, bias;
|
|
|
+ #endif
|
|
|
+ JDIMENSION colctr;
|
|
|
+ int inrow, outrow, v;
|
|
|
+
|
|
|
+@@ -334,17 +337,17 @@ h1v2_fancy_upsample(j_decompress_ptr cin
|
|
|
+ } else { /* next nearest is row below */
|
|
|
+ inptr1 = input_data[inrow + 1];
|
|
|
+ bias = 2;
|
|
|
+ }
|
|
|
+ outptr = output_data[outrow++];
|
|
|
+
|
|
|
+ for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
|
|
|
+ thiscolsum = (*inptr0++) * 3 + (*inptr1++);
|
|
|
+- *outptr++ = (JSAMPLE)((thiscolsum + bias) >> 2);
|
|
|
++ *outptr++ = (_JSAMPLE)((thiscolsum + bias) >> 2);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ inrow++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+@@ -352,20 +355,20 @@ h1v2_fancy_upsample(j_decompress_ptr cin
|
|
|
+ * Again a triangle filter; see comments for h2v1 case, above.
|
|
|
+ *
|
|
|
+ * It is OK for us to reference the adjacent input rows because we demanded
|
|
|
+ * context from the main buffer controller (see initialization code).
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
++ _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
|
|
|
+ {
|
|
|
+- JSAMPARRAY output_data = *output_data_ptr;
|
|
|
+- register JSAMPROW inptr0, inptr1, outptr;
|
|
|
++ _JSAMPARRAY output_data = *output_data_ptr;
|
|
|
++ register _JSAMPROW inptr0, inptr1, outptr;
|
|
|
+ #if BITS_IN_JSAMPLE == 8
|
|
|
+ register int thiscolsum, lastcolsum, nextcolsum;
|
|
|
+ #else
|
|
|
+ register JLONG thiscolsum, lastcolsum, nextcolsum;
|
|
|
+ #endif
|
|
|
+ register JDIMENSION colctr;
|
|
|
+ int inrow, outrow, v;
|
|
|
+
|
|
|
+@@ -378,58 +381,61 @@ h2v2_fancy_upsample(j_decompress_ptr cin
|
|
|
+ inptr1 = input_data[inrow - 1];
|
|
|
+ else /* next nearest is row below */
|
|
|
+ inptr1 = input_data[inrow + 1];
|
|
|
+ outptr = output_data[outrow++];
|
|
|
+
|
|
|
+ /* Special case for first column */
|
|
|
+ thiscolsum = (*inptr0++) * 3 + (*inptr1++);
|
|
|
+ nextcolsum = (*inptr0++) * 3 + (*inptr1++);
|
|
|
+- *outptr++ = (JSAMPLE)((thiscolsum * 4 + 8) >> 4);
|
|
|
+- *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
|
|
|
++ *outptr++ = (_JSAMPLE)((thiscolsum * 4 + 8) >> 4);
|
|
|
++ *outptr++ = (_JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
|
|
|
+ lastcolsum = thiscolsum; thiscolsum = nextcolsum;
|
|
|
+
|
|
|
+ for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
|
|
|
+ /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
|
|
|
+ /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
|
|
|
+ nextcolsum = (*inptr0++) * 3 + (*inptr1++);
|
|
|
+- *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
|
|
|
+- *outptr++ = (JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
|
|
|
++ *outptr++ = (_JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
|
|
|
++ *outptr++ = (_JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
|
|
|
+ lastcolsum = thiscolsum; thiscolsum = nextcolsum;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Special case for last column */
|
|
|
+- *outptr++ = (JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
|
|
|
+- *outptr++ = (JSAMPLE)((thiscolsum * 4 + 7) >> 4);
|
|
|
++ *outptr++ = (_JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
|
|
|
++ *outptr++ = (_JSAMPLE)((thiscolsum * 4 + 7) >> 4);
|
|
|
+ }
|
|
|
+ inrow++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Module initialization routine for upsampling.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_upsampler(j_decompress_ptr cinfo)
|
|
|
++_jinit_upsampler(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_upsample_ptr upsample;
|
|
|
+ int ci;
|
|
|
+ jpeg_component_info *compptr;
|
|
|
+ boolean need_buffer, do_fancy;
|
|
|
+ int h_in_group, v_in_group, h_out_group, v_out_group;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ if (!cinfo->master->jinit_upsampler_no_alloc) {
|
|
|
+ upsample = (my_upsample_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_upsampler));
|
|
|
+ cinfo->upsample = (struct jpeg_upsampler *)upsample;
|
|
|
+ upsample->pub.start_pass = start_pass_upsample;
|
|
|
+- upsample->pub.upsample = sep_upsample;
|
|
|
++ upsample->pub._upsample = sep_upsample;
|
|
|
+ upsample->pub.need_context_rows = FALSE; /* until we find out differently */
|
|
|
+ } else
|
|
|
+ upsample = (my_upsample_ptr)cinfo->upsample;
|
|
|
+
|
|
|
+ if (cinfo->CCIR601_sampling) /* this isn't supported */
|
|
|
+ ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
|
|
|
+
|
|
|
+ /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
|
|
|
+@@ -459,66 +465,76 @@ jinit_upsampler(j_decompress_ptr cinfo)
|
|
|
+ need_buffer = FALSE;
|
|
|
+ } else if (h_in_group == h_out_group && v_in_group == v_out_group) {
|
|
|
+ /* Fullsize components can be processed without any work. */
|
|
|
+ upsample->methods[ci] = fullsize_upsample;
|
|
|
+ need_buffer = FALSE;
|
|
|
+ } else if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) {
|
|
|
+ /* Special cases for 2h1v upsampling */
|
|
|
+ if (do_fancy && compptr->downsampled_width > 2) {
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_h2v1_fancy_upsample())
|
|
|
+ upsample->methods[ci] = jsimd_h2v1_fancy_upsample;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ upsample->methods[ci] = h2v1_fancy_upsample;
|
|
|
+ } else {
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_h2v1_upsample())
|
|
|
+ upsample->methods[ci] = jsimd_h2v1_upsample;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ upsample->methods[ci] = h2v1_upsample;
|
|
|
+ }
|
|
|
+ } else if (h_in_group == h_out_group &&
|
|
|
+ v_in_group * 2 == v_out_group && do_fancy) {
|
|
|
+ /* Non-fancy upsampling is handled by the generic method */
|
|
|
+-#if defined(__arm__) || defined(__aarch64__) || \
|
|
|
+- defined(_M_ARM) || defined(_M_ARM64)
|
|
|
++#if defined(WITH_SIMD) && (defined(__arm__) || defined(__aarch64__) || \
|
|
|
++ defined(_M_ARM) || defined(_M_ARM64))
|
|
|
+ if (jsimd_can_h1v2_fancy_upsample())
|
|
|
+ upsample->methods[ci] = jsimd_h1v2_fancy_upsample;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+ upsample->methods[ci] = h1v2_fancy_upsample;
|
|
|
+ upsample->pub.need_context_rows = TRUE;
|
|
|
+ } else if (h_in_group * 2 == h_out_group &&
|
|
|
+ v_in_group * 2 == v_out_group) {
|
|
|
+ /* Special cases for 2h2v upsampling */
|
|
|
+ if (do_fancy && compptr->downsampled_width > 2) {
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_h2v2_fancy_upsample())
|
|
|
+ upsample->methods[ci] = jsimd_h2v2_fancy_upsample;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ upsample->methods[ci] = h2v2_fancy_upsample;
|
|
|
+ upsample->pub.need_context_rows = TRUE;
|
|
|
+ } else {
|
|
|
++#ifdef WITH_SIMD
|
|
|
+ if (jsimd_can_h2v2_upsample())
|
|
|
+ upsample->methods[ci] = jsimd_h2v2_upsample;
|
|
|
+ else
|
|
|
++#endif
|
|
|
+ upsample->methods[ci] = h2v2_upsample;
|
|
|
+ }
|
|
|
+ } else if ((h_out_group % h_in_group) == 0 &&
|
|
|
+ (v_out_group % v_in_group) == 0) {
|
|
|
+ /* Generic integral-factors upsampling method */
|
|
|
+-#if defined(__mips__)
|
|
|
++#if defined(WITH_SIMD) && defined(__mips__)
|
|
|
+ if (jsimd_can_int_upsample())
|
|
|
+ upsample->methods[ci] = jsimd_int_upsample;
|
|
|
+ else
|
|
|
+ #endif
|
|
|
+ upsample->methods[ci] = int_upsample;
|
|
|
+ upsample->h_expand[ci] = (UINT8)(h_out_group / h_in_group);
|
|
|
+ upsample->v_expand[ci] = (UINT8)(v_out_group / v_in_group);
|
|
|
+ } else
|
|
|
+ ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
|
|
|
+ if (need_buffer && !cinfo->master->jinit_upsampler_no_alloc) {
|
|
|
+- upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
|
|
|
++ upsample->color_buf[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ (JDIMENSION)jround_up((long)cinfo->output_width,
|
|
|
+ (long)cinfo->max_h_samp_factor),
|
|
|
+ (JDIMENSION)cinfo->max_v_samp_factor);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
|
|
|
+diff --git a/media/libjpeg/jdsample.h b/media/libjpeg/jdsample.h
|
|
|
+--- a/media/libjpeg/jdsample.h
|
|
|
++++ b/media/libjpeg/jdsample.h
|
|
|
+@@ -1,40 +1,43 @@
|
|
|
+ /*
|
|
|
+ * jdsample.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1996, Thomas G. Lane.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* Pointer to routine to upsample a single component */
|
|
|
+ typedef void (*upsample1_ptr) (j_decompress_ptr cinfo,
|
|
|
+ jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data,
|
|
|
+- JSAMPARRAY *output_data_ptr);
|
|
|
++ _JSAMPARRAY input_data,
|
|
|
++ _JSAMPARRAY *output_data_ptr);
|
|
|
+
|
|
|
+ /* Private subobject */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_upsampler pub; /* public fields */
|
|
|
+
|
|
|
+ /* Color conversion buffer. When using separate upsampling and color
|
|
|
+ * conversion steps, this buffer holds one upsampled row group until it
|
|
|
+ * has been color converted and output.
|
|
|
+ * Note: we do not allocate any storage for component(s) which are full-size,
|
|
|
+ * ie do not need rescaling. The corresponding entry of color_buf[] is
|
|
|
+ * simply set to point to the input data array, thereby avoiding copying.
|
|
|
+ */
|
|
|
+- JSAMPARRAY color_buf[MAX_COMPONENTS];
|
|
|
++ _JSAMPARRAY color_buf[MAX_COMPONENTS];
|
|
|
+
|
|
|
+ /* Per-component upsampling method pointers */
|
|
|
+ upsample1_ptr methods[MAX_COMPONENTS];
|
|
|
+
|
|
|
+ int next_row_out; /* counts rows emitted from color_buf */
|
|
|
+ JDIMENSION rows_to_go; /* counts rows remaining in image */
|
|
|
+
|
|
|
+ /* Height of an input row group for each component. */
|
|
|
+diff --git a/media/libjpeg/jdtrans.c b/media/libjpeg/jdtrans.c
|
|
|
+--- a/media/libjpeg/jdtrans.c
|
|
|
++++ b/media/libjpeg/jdtrans.c
|
|
|
+@@ -1,27 +1,27 @@
|
|
|
+ /*
|
|
|
+ * jdtrans.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1995-1997, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2020, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains library routines for transcoding decompression,
|
|
|
+ * that is, reading raw DCT coefficient arrays from an input JPEG file.
|
|
|
+ * The routines in jdapimin.c will also be needed by a transcoder.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
+-#include "jpegcomp.h"
|
|
|
++#include "jpegapicomp.h"
|
|
|
+
|
|
|
+
|
|
|
+ /* Forward declarations */
|
|
|
+ LOCAL(void) transdecode_master_selection(j_decompress_ptr cinfo);
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Read the coefficient arrays from a JPEG file.
|
|
|
+@@ -43,16 +43,19 @@ LOCAL(void) transdecode_master_selection
|
|
|
+ *
|
|
|
+ * Returns NULL if suspended. This case need be checked only if
|
|
|
+ * a suspending data source is used.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(jvirt_barray_ptr *)
|
|
|
+ jpeg_read_coefficients(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
++
|
|
|
+ if (cinfo->global_state == DSTATE_READY) {
|
|
|
+ /* First call: initialize active modules */
|
|
|
+ transdecode_master_selection(cinfo);
|
|
|
+ cinfo->global_state = DSTATE_RDCOEFS;
|
|
|
+ }
|
|
|
+ if (cinfo->global_state == DSTATE_RDCOEFS) {
|
|
|
+ /* Absorb whole file into the coef buffer */
|
|
|
+ for (;;) {
|
|
|
+@@ -122,17 +125,20 @@ transdecode_master_selection(j_decompres
|
|
|
+ #else
|
|
|
+ ERREXIT(cinfo, JERR_NOT_COMPILED);
|
|
|
+ #endif
|
|
|
+ } else
|
|
|
+ jinit_huff_decoder(cinfo);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Always get a full-image coefficient buffer. */
|
|
|
+- jinit_d_coef_controller(cinfo, TRUE);
|
|
|
++ if (cinfo->data_precision == 12)
|
|
|
++ j12init_d_coef_controller(cinfo, TRUE);
|
|
|
++ else
|
|
|
++ jinit_d_coef_controller(cinfo, TRUE);
|
|
|
+
|
|
|
+ /* We can now tell the memory manager to allocate virtual arrays. */
|
|
|
+ (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
|
|
|
+
|
|
|
+ /* Initialize input side of decompressor to consume first scan. */
|
|
|
+ (*cinfo->inputctl->start_input_pass) (cinfo);
|
|
|
+
|
|
|
+ /* Initialize progress monitoring. */
|
|
|
+diff --git a/media/libjpeg/jerror.c b/media/libjpeg/jerror.c
|
|
|
+--- a/media/libjpeg/jerror.c
|
|
|
++++ b/media/libjpeg/jerror.c
|
|
|
+@@ -1,15 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jerror.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1998, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * Copyright (C) 2022, 2024, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains simple error-reporting and trace-message routines.
|
|
|
+ * These are suitable for Unix-like systems and others where writing to
|
|
|
+ * stderr is the right thing to do. Many applications will want to replace
|
|
|
+ * some or all of these routines.
|
|
|
+ *
|
|
|
+@@ -41,17 +41,17 @@
|
|
|
+ * We do this from the master message list in jerror.h by re-reading
|
|
|
+ * jerror.h with a suitable definition for macro JMESSAGE.
|
|
|
+ * The message table is made an external symbol just in case any applications
|
|
|
+ * want to refer to it directly.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JMESSAGE(code, string) string,
|
|
|
+
|
|
|
+-const char * const jpeg_std_message_table[] = {
|
|
|
++static const char * const jpeg_std_message_table[] = {
|
|
|
+ #include "jerror.h"
|
|
|
+ NULL
|
|
|
+ };
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Error exit handler: must not return to caller.
|
|
|
+ *
|
|
|
+@@ -224,28 +224,22 @@ reset_error_mgr(j_common_ptr cinfo)
|
|
|
+ *
|
|
|
+ * cinfo.err = jpeg_std_error(&err);
|
|
|
+ * after which the application may override some of the methods.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(struct jpeg_error_mgr *)
|
|
|
+ jpeg_std_error(struct jpeg_error_mgr *err)
|
|
|
+ {
|
|
|
++ memset(err, 0, sizeof(struct jpeg_error_mgr));
|
|
|
++
|
|
|
+ err->error_exit = error_exit;
|
|
|
+ err->emit_message = emit_message;
|
|
|
+ err->output_message = output_message;
|
|
|
+ err->format_message = format_message;
|
|
|
+ err->reset_error_mgr = reset_error_mgr;
|
|
|
+
|
|
|
+- err->trace_level = 0; /* default = no tracing */
|
|
|
+- err->num_warnings = 0; /* no warnings emitted yet */
|
|
|
+- err->msg_code = 0; /* may be useful as a flag for "no error" */
|
|
|
+-
|
|
|
+ /* Initialize message table pointers */
|
|
|
+ err->jpeg_message_table = jpeg_std_message_table;
|
|
|
+ err->last_jpeg_message = (int)JMSG_LASTMSGCODE - 1;
|
|
|
+
|
|
|
+- err->addon_message_table = NULL;
|
|
|
+- err->first_addon_message = 0; /* for safety */
|
|
|
+- err->last_addon_message = 0;
|
|
|
+-
|
|
|
+ return err;
|
|
|
+ }
|
|
|
+diff --git a/media/libjpeg/jerror.h b/media/libjpeg/jerror.h
|
|
|
+--- a/media/libjpeg/jerror.h
|
|
|
++++ b/media/libjpeg/jerror.h
|
|
|
+@@ -1,16 +1,18 @@
|
|
|
+ /*
|
|
|
+ * jerror.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1997, Thomas G. Lane.
|
|
|
+ * Modified 1997-2009 by Guido Vollbeding.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2014, 2017, 2021-2022, D. R. Commander.
|
|
|
++ * Copyright (C) 2014, 2017, 2021-2023, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file defines the error and message codes for the JPEG library.
|
|
|
+ * Edit this file to add new codes, or to translate the message strings to
|
|
|
+ * some other language.
|
|
|
+ * A set of error-reporting macros are defined too. Some applications using
|
|
|
+ * the JPEG library may wish to include this file to get the error codes
|
|
|
+@@ -48,35 +50,36 @@ JMESSAGE(JERR_ARITH_NOTIMPL, "Sorry, ari
|
|
|
+ #endif
|
|
|
+ JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
|
|
|
+ JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
|
|
|
+ JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
|
|
|
+ JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
|
|
|
+ #endif
|
|
|
+-JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
|
|
|
++JMESSAGE(JERR_BAD_DCT_COEF,
|
|
|
++ "DCT coefficient (lossy) or spatial difference (lossless) out of range")
|
|
|
+ JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ JMESSAGE(JERR_BAD_DROP_SAMPLING,
|
|
|
+ "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
|
|
|
+ #endif
|
|
|
+ JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
|
|
|
+ JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
|
|
|
+ JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
|
|
|
+ JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
|
|
|
+ JMESSAGE(JERR_BAD_LIB_VERSION,
|
|
|
+ "Wrong JPEG library version: library is %d, caller expects %d")
|
|
|
+ JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
|
|
|
+ JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
|
|
|
+ JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
|
|
|
+ JMESSAGE(JERR_BAD_PROGRESSION,
|
|
|
+- "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
|
|
|
++ "Invalid progressive/lossless parameters Ss=%d Se=%d Ah=%d Al=%d")
|
|
|
+ JMESSAGE(JERR_BAD_PROG_SCRIPT,
|
|
|
+- "Invalid progressive parameters at scan script entry %d")
|
|
|
++ "Invalid progressive/lossless parameters at scan script entry %d")
|
|
|
+ JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
|
|
|
+ JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
|
|
|
+ JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
|
|
|
+ JMESSAGE(JERR_BAD_STRUCT_SIZE,
|
|
|
+ "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
|
|
|
+ JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
|
|
|
+ JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
|
|
|
+ JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
|
|
|
+@@ -103,17 +106,17 @@ JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
|
|
|
+ "Cannot transcode due to multiple use of quantization table %d")
|
|
|
+ JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
|
|
|
+ JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
|
|
|
+ JMESSAGE(JERR_NOTIMPL, "Requested features are incompatible")
|
|
|
+ JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
|
|
|
+ #endif
|
|
|
+-JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
|
|
|
++JMESSAGE(JERR_NO_BACKING_STORE, "Memory limit exceeded")
|
|
|
+ JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
|
|
|
+ JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
|
|
|
+ JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
|
|
|
+ JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
|
|
|
+ JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
|
|
|
+ JMESSAGE(JERR_QUANT_COMPONENTS,
|
|
|
+ "Cannot quantize more than %d color components")
|
|
|
+ JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
|
|
|
+@@ -175,17 +178,17 @@ JMESSAGE(JTRC_TFILE_CLOSE, "Closed tempo
|
|
|
+ JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
|
|
|
+ JMESSAGE(JTRC_THUMB_JPEG,
|
|
|
+ "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
|
|
|
+ JMESSAGE(JTRC_THUMB_PALETTE,
|
|
|
+ "JFIF extension marker: palette thumbnail image, length %u")
|
|
|
+ JMESSAGE(JTRC_THUMB_RGB,
|
|
|
+ "JFIF extension marker: RGB thumbnail image, length %u")
|
|
|
+ JMESSAGE(JTRC_UNKNOWN_IDS,
|
|
|
+- "Unrecognized component IDs %d %d %d, assuming YCbCr")
|
|
|
++ "Unrecognized component IDs %d %d %d, assuming YCbCr (lossy) or RGB (lossless)")
|
|
|
+ JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
|
|
|
+ JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
|
|
|
+ JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
|
|
|
+ #endif
|
|
|
+ JMESSAGE(JWRN_BOGUS_PROGRESSION,
|
|
|
+ "Inconsistent progression sequence for component %d coefficient %d")
|
|
|
+@@ -206,16 +209,18 @@ JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmeti
|
|
|
+ JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
|
|
|
+ #endif
|
|
|
+ #endif
|
|
|
+ JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
|
|
|
+ #if JPEG_LIB_VERSION < 70
|
|
|
+ JMESSAGE(JERR_BAD_DROP_SAMPLING,
|
|
|
+ "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
|
|
|
+ #endif
|
|
|
++JMESSAGE(JERR_BAD_RESTART,
|
|
|
++ "Invalid restart interval %d; must be an integer multiple of the number of MCUs in an MCU row (%d)")
|
|
|
+
|
|
|
+ #ifdef JMAKE_ENUM_LIST
|
|
|
+
|
|
|
+ JMSG_LASTMSGCODE
|
|
|
+ } J_MESSAGE_CODE;
|
|
|
+
|
|
|
+ #undef JMAKE_ENUM_LIST
|
|
|
+ #endif /* JMAKE_ENUM_LIST */
|
|
|
+diff --git a/media/libjpeg/jfdctfst.c b/media/libjpeg/jfdctfst.c
|
|
|
+--- a/media/libjpeg/jfdctfst.c
|
|
|
++++ b/media/libjpeg/jfdctfst.c
|
|
|
+@@ -109,17 +109,17 @@
|
|
|
+ #define MULTIPLY(var, const) ((DCTELEM)DESCALE((var) * (const), CONST_BITS))
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Perform the forward DCT on one block of samples.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_fdct_ifast(DCTELEM *data)
|
|
|
++_jpeg_fdct_ifast(DCTELEM *data)
|
|
|
+ {
|
|
|
+ DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
|
|
+ DCTELEM tmp10, tmp11, tmp12, tmp13;
|
|
|
+ DCTELEM z1, z2, z3, z4, z5, z11, z13;
|
|
|
+ DCTELEM *dataptr;
|
|
|
+ int ctr;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/jfdctint.c b/media/libjpeg/jfdctint.c
|
|
|
+--- a/media/libjpeg/jfdctint.c
|
|
|
++++ b/media/libjpeg/jfdctint.c
|
|
|
+@@ -1,15 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jfdctint.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2015, 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2015, 2020, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains a slower but more accurate integer implementation of the
|
|
|
+ * forward DCT (Discrete Cosine Transform).
|
|
|
+ *
|
|
|
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
|
|
|
+ * on each column. Direct algorithms are also available, but they are
|
|
|
+@@ -135,17 +135,17 @@
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Perform the forward DCT on one block of samples.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_fdct_islow(DCTELEM *data)
|
|
|
++_jpeg_fdct_islow(DCTELEM *data)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
|
|
+ JLONG tmp10, tmp11, tmp12, tmp13;
|
|
|
+ JLONG z1, z2, z3, z4, z5;
|
|
|
+ DCTELEM *dataptr;
|
|
|
+ int ctr;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/jidctflt.c b/media/libjpeg/jidctflt.c
|
|
|
+--- a/media/libjpeg/jidctflt.c
|
|
|
++++ b/media/libjpeg/jidctflt.c
|
|
|
+@@ -1,16 +1,16 @@
|
|
|
+ /*
|
|
|
+ * jidctflt.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1998, Thomas G. Lane.
|
|
|
+ * Modified 2010 by Guido Vollbeding.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2014, D. R. Commander.
|
|
|
++ * Copyright (C) 2014, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains a floating-point implementation of the
|
|
|
+ * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
|
|
|
+ * must also perform dequantization of the input coefficients.
|
|
|
+ *
|
|
|
+ * This implementation should be more accurate than either of the integer
|
|
|
+@@ -64,28 +64,28 @@
|
|
|
+ #define DEQUANTIZE(coef, quantval) (((FAST_FLOAT)(coef)) * (quantval))
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
|
|
+ FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
|
|
|
+ FAST_FLOAT z5, z10, z11, z12, z13;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ FLOAT_MULT_TYPE *quantptr;
|
|
|
+ FAST_FLOAT *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ int ctr;
|
|
|
+ FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
|
|
|
+ #define _0_125 ((FLOAT_MULT_TYPE)0.125)
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (FLOAT_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -187,17 +187,17 @@ jpeg_idct_float(j_decompress_ptr cinfo,
|
|
|
+ * However, the column calculation has created many nonzero AC terms, so
|
|
|
+ * the simplification applies less often (typically 5% to 10% of the time).
|
|
|
+ * And testing floats for zero is relatively expensive, so we don't bother.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* Even part */
|
|
|
+
|
|
|
+ /* Apply signed->unsigned and prepare float->int conversion */
|
|
|
+- z5 = wsptr[0] + ((FAST_FLOAT)CENTERJSAMPLE + (FAST_FLOAT)0.5);
|
|
|
++ z5 = wsptr[0] + ((FAST_FLOAT)_CENTERJSAMPLE + (FAST_FLOAT)0.5);
|
|
|
+ tmp10 = z5 + wsptr[4];
|
|
|
+ tmp11 = z5 - wsptr[4];
|
|
|
+
|
|
|
+ tmp13 = wsptr[2] + wsptr[6];
|
|
|
+ tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT)1.414213562) - tmp13;
|
|
|
+
|
|
|
+ tmp0 = tmp10 + tmp13;
|
|
|
+ tmp3 = tmp10 - tmp13;
|
|
|
+diff --git a/media/libjpeg/jidctfst.c b/media/libjpeg/jidctfst.c
|
|
|
+--- a/media/libjpeg/jidctfst.c
|
|
|
++++ b/media/libjpeg/jidctfst.c
|
|
|
+@@ -1,15 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jidctfst.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1998, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2015, D. R. Commander.
|
|
|
++ * Copyright (C) 2015, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains a fast, not so accurate integer implementation of the
|
|
|
+ * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
|
|
|
+ * must also perform dequantization of the input coefficients.
|
|
|
+ *
|
|
|
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
|
|
|
+@@ -59,20 +59,20 @@
|
|
|
+ * This compromises accuracy slightly, but it lets us save a few shifts.
|
|
|
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
|
|
|
+ * everywhere except in the multiplications proper; this saves a good deal
|
|
|
+ * of work on 16-bit-int machines.
|
|
|
+ *
|
|
|
+ * The dequantized coefficients are not integers because the AA&N scaling
|
|
|
+ * factors have been incorporated. We represent them scaled up by PASS1_BITS,
|
|
|
+ * so that the first and second IDCT rounds have the same input scaling.
|
|
|
+- * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
|
|
|
++ * For 8-bit samples, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
|
|
|
+ * avoid a descaling shift; this compromises accuracy rather drastically
|
|
|
+ * for small quantization table entries, but it saves a lot of shifts.
|
|
|
+- * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
|
|
|
++ * For 12-bit samples, there's no hope of using 16x16 multiplies anyway,
|
|
|
+ * so we use a much larger scaling factor to preserve accuracy.
|
|
|
+ *
|
|
|
+ * A final compromise is to represent the multiplicative constants to only
|
|
|
+ * 8 fractional bits, rather than 13. This saves some shifting work on some
|
|
|
+ * machines, and may also reduce the cost of multiplication (since there
|
|
|
+ * are fewer one-bits in the constants).
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -163,28 +163,28 @@
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
|
|
+ DCTELEM tmp10, tmp11, tmp12, tmp13;
|
|
|
+ DCTELEM z5, z10, z11, z12, z13;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ IFAST_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[DCTSIZE2]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS /* for DESCALE */
|
|
|
+ ISHIFT_TEMPS /* for IDESCALE */
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+@@ -291,17 +291,17 @@ jpeg_idct_ifast(j_decompress_ptr cinfo,
|
|
|
+ * test takes more time than it's worth. In that case this section
|
|
|
+ * may be commented out.
|
|
|
+ */
|
|
|
+
|
|
|
+ #ifndef NO_ZERO_ROW_TEST
|
|
|
+ if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
|
|
|
+ wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
|
|
|
+ /* AC terms all zero */
|
|
|
+- JSAMPLE dcval =
|
|
|
++ _JSAMPLE dcval =
|
|
|
+ range_limit[IDESCALE(wsptr[0], PASS1_BITS + 3) & RANGE_MASK];
|
|
|
+
|
|
|
+ outptr[0] = dcval;
|
|
|
+ outptr[1] = dcval;
|
|
|
+ outptr[2] = dcval;
|
|
|
+ outptr[3] = dcval;
|
|
|
+ outptr[4] = dcval;
|
|
|
+ outptr[5] = dcval;
|
|
|
+diff --git a/media/libjpeg/jidctint.c b/media/libjpeg/jidctint.c
|
|
|
+--- a/media/libjpeg/jidctint.c
|
|
|
++++ b/media/libjpeg/jidctint.c
|
|
|
+@@ -1,16 +1,16 @@
|
|
|
+ /*
|
|
|
+ * jidctint.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1998, Thomas G. Lane.
|
|
|
+ * Modification developed 2002-2018 by Guido Vollbeding.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2015, 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2015, 2020, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains a slower but more accurate integer implementation of the
|
|
|
+ * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
|
|
|
+ * must also perform dequantization of the input coefficients.
|
|
|
+ *
|
|
|
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
|
|
|
+@@ -165,28 +165,28 @@
|
|
|
+ #define DEQUANTIZE(coef, quantval) (((ISLOW_MULT_TYPE)(coef)) * (quantval))
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp1, tmp2, tmp3;
|
|
|
+ JLONG tmp10, tmp11, tmp12, tmp13;
|
|
|
+ JLONG z1, z2, z3, z4, z5;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[DCTSIZE2]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+ /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
|
|
|
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
|
|
|
+
|
|
|
+@@ -309,18 +309,18 @@ jpeg_idct_islow(j_decompress_ptr cinfo,
|
|
|
+ * test takes more time than it's worth. In that case this section
|
|
|
+ * may be commented out.
|
|
|
+ */
|
|
|
+
|
|
|
+ #ifndef NO_ZERO_ROW_TEST
|
|
|
+ if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
|
|
|
+ wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
|
|
|
+ /* AC terms all zero */
|
|
|
+- JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
|
|
|
+- PASS1_BITS + 3) & RANGE_MASK];
|
|
|
++ _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
|
|
|
++ PASS1_BITS + 3) & RANGE_MASK];
|
|
|
+
|
|
|
+ outptr[0] = dcval;
|
|
|
+ outptr[1] = dcval;
|
|
|
+ outptr[2] = dcval;
|
|
|
+ outptr[3] = dcval;
|
|
|
+ outptr[4] = dcval;
|
|
|
+ outptr[5] = dcval;
|
|
|
+ outptr[6] = dcval;
|
|
|
+@@ -419,27 +419,27 @@ jpeg_idct_islow(j_decompress_ptr cinfo,
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a reduced-size 7x7 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/14).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_7x7(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_7x7(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
|
|
|
+ JLONG z1, z2, z3;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[7 * 7]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -568,27 +568,27 @@ jpeg_idct_7x7(j_decompress_ptr cinfo, jp
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a reduced-size 6x6 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/12).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
|
|
|
+ JLONG z1, z2, z3;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[6 * 6]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -689,27 +689,27 @@ jpeg_idct_6x6(j_decompress_ptr cinfo, jp
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a reduced-size 5x5 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 5 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/10).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_5x5(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_5x5(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp1, tmp10, tmp11, tmp12;
|
|
|
+ JLONG z1, z2, z3;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[5 * 5]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -804,26 +804,26 @@ jpeg_idct_5x5(j_decompress_ptr cinfo, jp
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a reduced-size 3x3 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 2 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/6).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_3x3(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_3x3(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp2, tmp10, tmp12;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[3 * 3]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -894,27 +894,27 @@ jpeg_idct_3x3(j_decompress_ptr cinfo, jp
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a 9x9 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 10 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/18).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_9x9(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_9x9(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
|
|
|
+ JLONG z1, z2, z3, z4;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[8 * 9]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -1065,28 +1065,28 @@ jpeg_idct_9x9(j_decompress_ptr cinfo, jp
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a 10x10 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/20).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
|
|
|
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24;
|
|
|
+ JLONG z1, z2, z3, z4, z5;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[8 * 10]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -1260,28 +1260,28 @@ jpeg_idct_10x10(j_decompress_ptr cinfo,
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing an 11x11 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 24 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/22).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_11x11(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_11x11(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
|
|
|
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
|
|
|
+ JLONG z1, z2, z3, z4;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[8 * 11]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -1454,28 +1454,28 @@ jpeg_idct_11x11(j_decompress_ptr cinfo,
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a 12x12 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 15 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/24).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
|
|
|
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
|
|
|
+ JLONG z1, z2, z3, z4;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[8 * 12]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -1670,28 +1670,28 @@ jpeg_idct_12x12(j_decompress_ptr cinfo,
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a 13x13 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 29 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/26).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_13x13(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_13x13(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
|
|
|
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
|
|
|
+ JLONG z1, z2, z3, z4;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[8 * 13]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -1898,28 +1898,28 @@ jpeg_idct_13x13(j_decompress_ptr cinfo,
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a 14x14 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 20 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/28).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_14x14(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_14x14(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
|
|
|
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
|
|
|
+ JLONG z1, z2, z3, z4;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[8 * 14]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -2124,28 +2124,28 @@ jpeg_idct_14x14(j_decompress_ptr cinfo,
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a 15x15 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 22 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/30).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_15x15(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_15x15(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
|
|
|
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
|
|
|
+ JLONG z1, z2, z3, z4;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[8 * 15]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -2366,28 +2366,28 @@ jpeg_idct_15x15(j_decompress_ptr cinfo,
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a 16x16 output block.
|
|
|
+ *
|
|
|
+ * Optimized algorithm with 28 multiplications in the 1-D kernel.
|
|
|
+ * cK represents sqrt(2) * cos(K*pi/32).
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
|
|
|
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
|
|
|
+ JLONG z1, z2, z3, z4;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[8 * 16]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+diff --git a/media/libjpeg/jidctred.c b/media/libjpeg/jidctred.c
|
|
|
+--- a/media/libjpeg/jidctred.c
|
|
|
++++ b/media/libjpeg/jidctred.c
|
|
|
+@@ -1,15 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jidctred.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1994-1998, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2015, D. R. Commander.
|
|
|
++ * Copyright (C) 2015, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains inverse-DCT routines that produce reduced-size output:
|
|
|
+ * either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.
|
|
|
+ *
|
|
|
+ * The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M)
|
|
|
+ * algorithm used in jidctint.c. We simply replace each 8-to-8 1-D IDCT step
|
|
|
+@@ -113,27 +113,27 @@
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a reduced-size 4x4 output block.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp2, tmp10, tmp12;
|
|
|
+ JLONG z1, z2, z3, z4;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[DCTSIZE * 4]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -205,18 +205,18 @@ jpeg_idct_4x4(j_decompress_ptr cinfo, jp
|
|
|
+ for (ctr = 0; ctr < 4; ctr++) {
|
|
|
+ outptr = output_buf[ctr] + output_col;
|
|
|
+ /* It's not clear whether a zero row test is worthwhile here ... */
|
|
|
+
|
|
|
+ #ifndef NO_ZERO_ROW_TEST
|
|
|
+ if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 &&
|
|
|
+ wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
|
|
|
+ /* AC terms all zero */
|
|
|
+- JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
|
|
|
+- PASS1_BITS + 3) & RANGE_MASK];
|
|
|
++ _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
|
|
|
++ PASS1_BITS + 3) & RANGE_MASK];
|
|
|
+
|
|
|
+ outptr[0] = dcval;
|
|
|
+ outptr[1] = dcval;
|
|
|
+ outptr[2] = dcval;
|
|
|
+ outptr[3] = dcval;
|
|
|
+
|
|
|
+ wsptr += DCTSIZE; /* advance pointer to next row */
|
|
|
+ continue;
|
|
|
+@@ -271,26 +271,26 @@ jpeg_idct_4x4(j_decompress_ptr cinfo, jp
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a reduced-size 2x2 output block.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ JLONG tmp0, tmp10, z1;
|
|
|
+ JCOEFPTR inptr;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+ int *wsptr;
|
|
|
+- JSAMPROW outptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPROW outptr;
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ int ctr;
|
|
|
+ int workspace[DCTSIZE * 2]; /* buffers data between passes */
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* Pass 1: process columns from input, store into work array. */
|
|
|
+
|
|
|
+ inptr = coef_block;
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+@@ -340,18 +340,18 @@ jpeg_idct_2x2(j_decompress_ptr cinfo, jp
|
|
|
+ wsptr = workspace;
|
|
|
+ for (ctr = 0; ctr < 2; ctr++) {
|
|
|
+ outptr = output_buf[ctr] + output_col;
|
|
|
+ /* It's not clear whether a zero row test is worthwhile here ... */
|
|
|
+
|
|
|
+ #ifndef NO_ZERO_ROW_TEST
|
|
|
+ if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) {
|
|
|
+ /* AC terms all zero */
|
|
|
+- JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
|
|
|
+- PASS1_BITS + 3) & RANGE_MASK];
|
|
|
++ _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
|
|
|
++ PASS1_BITS + 3) & RANGE_MASK];
|
|
|
+
|
|
|
+ outptr[0] = dcval;
|
|
|
+ outptr[1] = dcval;
|
|
|
+
|
|
|
+ wsptr += DCTSIZE; /* advance pointer to next row */
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ #endif
|
|
|
+@@ -382,23 +382,23 @@ jpeg_idct_2x2(j_decompress_ptr cinfo, jp
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ * producing a reduced-size 1x1 output block.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jpeg_idct_1x1(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
++_jpeg_idct_1x1(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block, _JSAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col)
|
|
|
+ {
|
|
|
+ int dcval;
|
|
|
+ ISLOW_MULT_TYPE *quantptr;
|
|
|
+- JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
++ _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ /* We hardly need an inverse DCT routine for this: just take the
|
|
|
+ * average pixel value, which is one-eighth of the DC coefficient.
|
|
|
+ */
|
|
|
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
|
|
|
+ dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
|
|
|
+ dcval = (int)DESCALE((JLONG)dcval, 3);
|
|
|
+diff --git a/media/libjpeg/jinclude.h b/media/libjpeg/jinclude.h
|
|
|
+--- a/media/libjpeg/jinclude.h
|
|
|
++++ b/media/libjpeg/jinclude.h
|
|
|
+@@ -1,15 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jinclude.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1994, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * Copyright (C) 2022-2023, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file exists to provide a single place to fix any problems with
|
|
|
+ * including the wrong system include files. (Common problems are taken
|
|
|
+ * care of by the standard jconfig symbols, but on really weird systems
|
|
|
+ * you may have to edit this file.)
|
|
|
+ *
|
|
|
+@@ -118,16 +118,18 @@ static INLINE int GETENV_S(char *buffer,
|
|
|
+ #ifndef NO_PUTENV
|
|
|
+
|
|
|
+ #ifdef _WIN32
|
|
|
+
|
|
|
+ #define PUTENV_S(name, value) _putenv_s(name, value)
|
|
|
+
|
|
|
+ #else
|
|
|
+
|
|
|
++#include <errno.h>
|
|
|
++
|
|
|
+ /* This provides a similar interface to the Microsoft _putenv_s() function, but
|
|
|
+ * other than parameter validation, it has no advantages over setenv().
|
|
|
+ */
|
|
|
+
|
|
|
+ static INLINE int PUTENV_S(const char *name, const char *value)
|
|
|
+ {
|
|
|
+ if (!name || !value)
|
|
|
+ return (errno = EINVAL);
|
|
|
+diff --git a/media/libjpeg/jlossls.h b/media/libjpeg/jlossls.h
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jlossls.h
|
|
|
+@@ -0,0 +1,101 @@
|
|
|
++/*
|
|
|
++ * jlossls.h
|
|
|
++ *
|
|
|
++ * This file was part of the Independent JPEG Group's software:
|
|
|
++ * Copyright (C) 1998, Thomas G. Lane.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
++ * libjpeg-turbo Modifications:
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ *
|
|
|
++ * This include file contains common declarations for the lossless JPEG
|
|
|
++ * codec modules.
|
|
|
++ */
|
|
|
++
|
|
|
++#ifndef JLOSSLS_H
|
|
|
++#define JLOSSLS_H
|
|
|
++
|
|
|
++#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++
|
|
|
++#define JPEG_INTERNALS
|
|
|
++#include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
++
|
|
|
++
|
|
|
++#define ALLOC_DARRAY(pool_id, diffsperrow, numrows) \
|
|
|
++ (JDIFFARRAY)(*cinfo->mem->alloc_sarray) \
|
|
|
++ ((j_common_ptr)cinfo, pool_id, \
|
|
|
++ (diffsperrow) * sizeof(JDIFF) / sizeof(_JSAMPLE), numrows)
|
|
|
++
|
|
|
++
|
|
|
++/*
|
|
|
++ * Table H.1: Predictors for lossless coding.
|
|
|
++ */
|
|
|
++
|
|
|
++#define PREDICTOR1 Ra
|
|
|
++#define PREDICTOR2 Rb
|
|
|
++#define PREDICTOR3 Rc
|
|
|
++#define PREDICTOR4 (int)((JLONG)Ra + (JLONG)Rb - (JLONG)Rc)
|
|
|
++#define PREDICTOR5 (int)((JLONG)Ra + RIGHT_SHIFT((JLONG)Rb - (JLONG)Rc, 1))
|
|
|
++#define PREDICTOR6 (int)((JLONG)Rb + RIGHT_SHIFT((JLONG)Ra - (JLONG)Rc, 1))
|
|
|
++#define PREDICTOR7 (int)RIGHT_SHIFT((JLONG)Ra + (JLONG)Rb, 1)
|
|
|
++
|
|
|
++#endif
|
|
|
++
|
|
|
++
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++
|
|
|
++typedef void (*predict_difference_method_ptr) (j_compress_ptr cinfo, int ci,
|
|
|
++ _JSAMPROW input_buf,
|
|
|
++ _JSAMPROW prev_row,
|
|
|
++ JDIFFROW diff_buf,
|
|
|
++ JDIMENSION width);
|
|
|
++
|
|
|
++/* Lossless compressor */
|
|
|
++typedef struct {
|
|
|
++ struct jpeg_forward_dct pub; /* public fields */
|
|
|
++
|
|
|
++ /* It is useful to allow each component to have a separate diff method. */
|
|
|
++ predict_difference_method_ptr predict_difference[MAX_COMPONENTS];
|
|
|
++
|
|
|
++ /* MCU rows left in the restart interval for each component */
|
|
|
++ unsigned int restart_rows_to_go[MAX_COMPONENTS];
|
|
|
++
|
|
|
++ /* Sample scaling */
|
|
|
++ void (*scaler_scale) (j_compress_ptr cinfo, _JSAMPROW input_buf,
|
|
|
++ _JSAMPROW output_buf, JDIMENSION width);
|
|
|
++} jpeg_lossless_compressor;
|
|
|
++
|
|
|
++typedef jpeg_lossless_compressor *lossless_comp_ptr;
|
|
|
++
|
|
|
++#endif /* C_LOSSLESS_SUPPORTED */
|
|
|
++
|
|
|
++
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++
|
|
|
++typedef void (*predict_undifference_method_ptr) (j_decompress_ptr cinfo,
|
|
|
++ int comp_index,
|
|
|
++ JDIFFROW diff_buf,
|
|
|
++ JDIFFROW prev_row,
|
|
|
++ JDIFFROW undiff_buf,
|
|
|
++ JDIMENSION width);
|
|
|
++
|
|
|
++/* Lossless decompressor */
|
|
|
++typedef struct {
|
|
|
++ struct jpeg_inverse_dct pub; /* public fields */
|
|
|
++
|
|
|
++ /* It is useful to allow each component to have a separate undiff method. */
|
|
|
++ predict_undifference_method_ptr predict_undifference[MAX_COMPONENTS];
|
|
|
++
|
|
|
++ /* Sample scaling */
|
|
|
++ void (*scaler_scale) (j_decompress_ptr cinfo, JDIFFROW diff_buf,
|
|
|
++ _JSAMPROW output_buf, JDIMENSION width);
|
|
|
++} jpeg_lossless_decompressor;
|
|
|
++
|
|
|
++typedef jpeg_lossless_decompressor *lossless_decomp_ptr;
|
|
|
++
|
|
|
++#endif /* D_LOSSLESS_SUPPORTED */
|
|
|
++
|
|
|
++#endif /* JLOSSLS_H */
|
|
|
+diff --git a/media/libjpeg/jmemmgr.c b/media/libjpeg/jmemmgr.c
|
|
|
+--- a/media/libjpeg/jmemmgr.c
|
|
|
++++ b/media/libjpeg/jmemmgr.c
|
|
|
+@@ -150,17 +150,19 @@ typedef my_memory_mgr *my_mem_ptr;
|
|
|
+ /*
|
|
|
+ * The control blocks for virtual arrays.
|
|
|
+ * Note that these blocks are allocated in the "small" pool area.
|
|
|
+ * System-dependent info for the associated backing store (if any) is hidden
|
|
|
+ * inside the backing_store_info struct.
|
|
|
+ */
|
|
|
+
|
|
|
+ struct jvirt_sarray_control {
|
|
|
+- JSAMPARRAY mem_buffer; /* => the in-memory buffer */
|
|
|
++ JSAMPARRAY mem_buffer; /* => the in-memory buffer (if
|
|
|
++ cinfo->data_precision is 12, then this is
|
|
|
++ actually a J12SAMPARRAY) */
|
|
|
+ JDIMENSION rows_in_array; /* total virtual array height */
|
|
|
+ JDIMENSION samplesperrow; /* width of array (and of memory buffer) */
|
|
|
+ JDIMENSION maxaccess; /* max rows accessed by access_virt_sarray */
|
|
|
+ JDIMENSION rows_in_mem; /* height of memory buffer */
|
|
|
+ JDIMENSION rowsperchunk; /* allocation chunk size in mem_buffer */
|
|
|
+ JDIMENSION cur_start_row; /* first logical row # in the buffer */
|
|
|
+ JDIMENSION first_undef_row; /* row # of first uninitialized row */
|
|
|
+ boolean pre_zero; /* pre-zero mode requested? */
|
|
|
+@@ -346,19 +348,20 @@ alloc_small(j_common_ptr cinfo, int pool
|
|
|
+ /*
|
|
|
+ * Allocation of "large" objects.
|
|
|
+ *
|
|
|
+ * The external semantics of these are the same as "small" objects. However,
|
|
|
+ * the pool management heuristics are quite different. We assume that each
|
|
|
+ * request is large enough that it may as well be passed directly to
|
|
|
+ * jpeg_get_large; the pool management just links everything together
|
|
|
+ * so that we can free it all on demand.
|
|
|
+- * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
|
|
|
+- * structures. The routines that create these structures (see below)
|
|
|
+- * deliberately bunch rows together to ensure a large request size.
|
|
|
++ * Note: the major use of "large" objects is in
|
|
|
++ * JSAMPARRAY/J12SAMPARRAY/J16SAMPARRAY and JBLOCKARRAY structures. The
|
|
|
++ * routines that create these structures (see below) deliberately bunch rows
|
|
|
++ * together to ensure a large request size.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void *)
|
|
|
+ alloc_large(j_common_ptr cinfo, int pool_id, size_t sizeofobject)
|
|
|
+ /* Allocate a "large" object */
|
|
|
+ {
|
|
|
+ my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
|
|
|
+ large_pool_ptr hdr_ptr;
|
|
|
+@@ -432,58 +435,115 @@ alloc_sarray(j_common_ptr cinfo, int poo
|
|
|
+ JDIMENSION numrows)
|
|
|
+ /* Allocate a 2-D sample array */
|
|
|
+ {
|
|
|
+ my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
|
|
|
+ JSAMPARRAY result;
|
|
|
+ JSAMPROW workspace;
|
|
|
+ JDIMENSION rowsperchunk, currow, i;
|
|
|
+ long ltemp;
|
|
|
++ J12SAMPARRAY result12;
|
|
|
++ J12SAMPROW workspace12;
|
|
|
++#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++ J16SAMPARRAY result16;
|
|
|
++ J16SAMPROW workspace16;
|
|
|
++#endif
|
|
|
++ int data_precision = cinfo->is_decompressor ?
|
|
|
++ ((j_decompress_ptr)cinfo)->data_precision :
|
|
|
++ ((j_compress_ptr)cinfo)->data_precision;
|
|
|
++ size_t sample_size = data_precision == 16 ?
|
|
|
++ sizeof(J16SAMPLE) : (data_precision == 12 ?
|
|
|
++ sizeof(J12SAMPLE) :
|
|
|
++ sizeof(JSAMPLE));
|
|
|
+
|
|
|
+ /* Make sure each row is properly aligned */
|
|
|
+- if ((ALIGN_SIZE % sizeof(JSAMPLE)) != 0)
|
|
|
++ if ((ALIGN_SIZE % sample_size) != 0)
|
|
|
+ out_of_memory(cinfo, 5); /* safety check */
|
|
|
+
|
|
|
+ if (samplesperrow > MAX_ALLOC_CHUNK) {
|
|
|
+ /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject
|
|
|
+ is close to SIZE_MAX. */
|
|
|
+ out_of_memory(cinfo, 9);
|
|
|
+ }
|
|
|
+ samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) /
|
|
|
+- sizeof(JSAMPLE));
|
|
|
++ sample_size);
|
|
|
+
|
|
|
+ /* Calculate max # of rows allowed in one allocation chunk */
|
|
|
+ ltemp = (MAX_ALLOC_CHUNK - sizeof(large_pool_hdr)) /
|
|
|
+- ((long)samplesperrow * sizeof(JSAMPLE));
|
|
|
++ ((long)samplesperrow * (long)sample_size);
|
|
|
+ if (ltemp <= 0)
|
|
|
+ ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
|
|
|
+ if (ltemp < (long)numrows)
|
|
|
+ rowsperchunk = (JDIMENSION)ltemp;
|
|
|
+ else
|
|
|
+ rowsperchunk = numrows;
|
|
|
+ mem->last_rowsperchunk = rowsperchunk;
|
|
|
+
|
|
|
+- /* Get space for row pointers (small object) */
|
|
|
+- result = (JSAMPARRAY)alloc_small(cinfo, pool_id,
|
|
|
+- (size_t)(numrows * sizeof(JSAMPROW)));
|
|
|
++ if (data_precision == 16) {
|
|
|
++#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++ /* Get space for row pointers (small object) */
|
|
|
++ result16 = (J16SAMPARRAY)alloc_small(cinfo, pool_id,
|
|
|
++ (size_t)(numrows *
|
|
|
++ sizeof(J16SAMPROW)));
|
|
|
++
|
|
|
++ /* Get the rows themselves (large objects) */
|
|
|
++ currow = 0;
|
|
|
++ while (currow < numrows) {
|
|
|
++ rowsperchunk = MIN(rowsperchunk, numrows - currow);
|
|
|
++ workspace16 = (J16SAMPROW)alloc_large(cinfo, pool_id,
|
|
|
++ (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * sample_size));
|
|
|
++ for (i = rowsperchunk; i > 0; i--) {
|
|
|
++ result16[currow++] = workspace16;
|
|
|
++ workspace16 += samplesperrow;
|
|
|
++ }
|
|
|
++ }
|
|
|
++
|
|
|
++ return (JSAMPARRAY)result16;
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, data_precision);
|
|
|
++ return NULL;
|
|
|
++#endif
|
|
|
++ } else if (data_precision == 12) {
|
|
|
++ /* Get space for row pointers (small object) */
|
|
|
++ result12 = (J12SAMPARRAY)alloc_small(cinfo, pool_id,
|
|
|
++ (size_t)(numrows *
|
|
|
++ sizeof(J12SAMPROW)));
|
|
|
+
|
|
|
+- /* Get the rows themselves (large objects) */
|
|
|
+- currow = 0;
|
|
|
+- while (currow < numrows) {
|
|
|
+- rowsperchunk = MIN(rowsperchunk, numrows - currow);
|
|
|
+- workspace = (JSAMPROW)alloc_large(cinfo, pool_id,
|
|
|
+- (size_t)((size_t)rowsperchunk * (size_t)samplesperrow *
|
|
|
+- sizeof(JSAMPLE)));
|
|
|
+- for (i = rowsperchunk; i > 0; i--) {
|
|
|
+- result[currow++] = workspace;
|
|
|
+- workspace += samplesperrow;
|
|
|
++ /* Get the rows themselves (large objects) */
|
|
|
++ currow = 0;
|
|
|
++ while (currow < numrows) {
|
|
|
++ rowsperchunk = MIN(rowsperchunk, numrows - currow);
|
|
|
++ workspace12 = (J12SAMPROW)alloc_large(cinfo, pool_id,
|
|
|
++ (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * sample_size));
|
|
|
++ for (i = rowsperchunk; i > 0; i--) {
|
|
|
++ result12[currow++] = workspace12;
|
|
|
++ workspace12 += samplesperrow;
|
|
|
++ }
|
|
|
+ }
|
|
|
+- }
|
|
|
++
|
|
|
++ return (JSAMPARRAY)result12;
|
|
|
++ } else {
|
|
|
++ /* Get space for row pointers (small object) */
|
|
|
++ result = (JSAMPARRAY)alloc_small(cinfo, pool_id,
|
|
|
++ (size_t)(numrows * sizeof(JSAMPROW)));
|
|
|
+
|
|
|
+- return result;
|
|
|
++ /* Get the rows themselves (large objects) */
|
|
|
++ currow = 0;
|
|
|
++ while (currow < numrows) {
|
|
|
++ rowsperchunk = MIN(rowsperchunk, numrows - currow);
|
|
|
++ workspace = (JSAMPROW)alloc_large(cinfo, pool_id,
|
|
|
++ (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * sample_size));
|
|
|
++ for (i = rowsperchunk; i > 0; i--) {
|
|
|
++ result[currow++] = workspace;
|
|
|
++ workspace += samplesperrow;
|
|
|
++ }
|
|
|
++ }
|
|
|
++
|
|
|
++ return result;
|
|
|
++ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Creation of 2-D coefficient-block arrays.
|
|
|
+ * This is essentially the same as the code for sample arrays, above.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -635,30 +695,37 @@ METHODDEF(void)
|
|
|
+ realize_virt_arrays(j_common_ptr cinfo)
|
|
|
+ /* Allocate the in-memory buffers for any unrealized virtual arrays */
|
|
|
+ {
|
|
|
+ my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
|
|
|
+ size_t space_per_minheight, maximum_space, avail_mem;
|
|
|
+ size_t minheights, max_minheights;
|
|
|
+ jvirt_sarray_ptr sptr;
|
|
|
+ jvirt_barray_ptr bptr;
|
|
|
++ int data_precision = cinfo->is_decompressor ?
|
|
|
++ ((j_decompress_ptr)cinfo)->data_precision :
|
|
|
++ ((j_compress_ptr)cinfo)->data_precision;
|
|
|
++ size_t sample_size = data_precision == 16 ?
|
|
|
++ sizeof(J16SAMPLE) : (data_precision == 12 ?
|
|
|
++ sizeof(J12SAMPLE) :
|
|
|
++ sizeof(JSAMPLE));
|
|
|
+
|
|
|
+ /* Compute the minimum space needed (maxaccess rows in each buffer)
|
|
|
+ * and the maximum space needed (full image height in each buffer).
|
|
|
+ * These may be of use to the system-dependent jpeg_mem_available routine.
|
|
|
+ */
|
|
|
+ space_per_minheight = 0;
|
|
|
+ maximum_space = 0;
|
|
|
+ for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
|
|
|
+ if (sptr->mem_buffer == NULL) { /* if not realized yet */
|
|
|
+ size_t new_space = (long)sptr->rows_in_array *
|
|
|
+- (long)sptr->samplesperrow * sizeof(JSAMPLE);
|
|
|
++ (long)sptr->samplesperrow * sample_size;
|
|
|
+
|
|
|
+ space_per_minheight += (long)sptr->maxaccess *
|
|
|
+- (long)sptr->samplesperrow * sizeof(JSAMPLE);
|
|
|
++ (long)sptr->samplesperrow * sample_size;
|
|
|
+ if (SIZE_MAX - maximum_space < new_space)
|
|
|
+ out_of_memory(cinfo, 10);
|
|
|
+ maximum_space += new_space;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
|
|
|
+ if (bptr->mem_buffer == NULL) { /* if not realized yet */
|
|
|
+ size_t new_space = (long)bptr->rows_in_array *
|
|
|
+@@ -703,17 +770,17 @@ realize_virt_arrays(j_common_ptr cinfo)
|
|
|
+ /* This buffer fits in memory */
|
|
|
+ sptr->rows_in_mem = sptr->rows_in_array;
|
|
|
+ } else {
|
|
|
+ /* It doesn't fit in memory, create backing store. */
|
|
|
+ sptr->rows_in_mem = (JDIMENSION)(max_minheights * sptr->maxaccess);
|
|
|
+ jpeg_open_backing_store(cinfo, &sptr->b_s_info,
|
|
|
+ (long)sptr->rows_in_array *
|
|
|
+ (long)sptr->samplesperrow *
|
|
|
+- (long)sizeof(JSAMPLE));
|
|
|
++ (long)sample_size);
|
|
|
+ sptr->b_s_open = TRUE;
|
|
|
+ }
|
|
|
+ sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
|
|
|
+ sptr->samplesperrow, sptr->rows_in_mem);
|
|
|
+ sptr->rowsperchunk = mem->last_rowsperchunk;
|
|
|
+ sptr->cur_start_row = 0;
|
|
|
+ sptr->first_undef_row = 0;
|
|
|
+ sptr->dirty = FALSE;
|
|
|
+@@ -746,39 +813,74 @@ realize_virt_arrays(j_common_ptr cinfo)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ do_sarray_io(j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
|
|
|
+ /* Do backing store read or write of a virtual sample array */
|
|
|
+ {
|
|
|
+ long bytesperrow, file_offset, byte_count, rows, thisrow, i;
|
|
|
++ int data_precision = cinfo->is_decompressor ?
|
|
|
++ ((j_decompress_ptr)cinfo)->data_precision :
|
|
|
++ ((j_compress_ptr)cinfo)->data_precision;
|
|
|
++ size_t sample_size = data_precision == 16 ?
|
|
|
++ sizeof(J16SAMPLE) : (data_precision == 12 ?
|
|
|
++ sizeof(J12SAMPLE) :
|
|
|
++ sizeof(JSAMPLE));
|
|
|
+
|
|
|
+- bytesperrow = (long)ptr->samplesperrow * sizeof(JSAMPLE);
|
|
|
++ bytesperrow = (long)ptr->samplesperrow * (long)sample_size;
|
|
|
+ file_offset = ptr->cur_start_row * bytesperrow;
|
|
|
+ /* Loop to read or write each allocation chunk in mem_buffer */
|
|
|
+ for (i = 0; i < (long)ptr->rows_in_mem; i += ptr->rowsperchunk) {
|
|
|
+ /* One chunk, but check for short chunk at end of buffer */
|
|
|
+ rows = MIN((long)ptr->rowsperchunk, (long)ptr->rows_in_mem - i);
|
|
|
+ /* Transfer no more than is currently defined */
|
|
|
+ thisrow = (long)ptr->cur_start_row + i;
|
|
|
+ rows = MIN(rows, (long)ptr->first_undef_row - thisrow);
|
|
|
+ /* Transfer no more than fits in file */
|
|
|
+ rows = MIN(rows, (long)ptr->rows_in_array - thisrow);
|
|
|
+ if (rows <= 0) /* this chunk might be past end of file! */
|
|
|
+ break;
|
|
|
+ byte_count = rows * bytesperrow;
|
|
|
+- if (writing)
|
|
|
+- (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
|
|
|
+- (void *)ptr->mem_buffer[i],
|
|
|
+- file_offset, byte_count);
|
|
|
+- else
|
|
|
+- (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
|
|
|
+- (void *)ptr->mem_buffer[i],
|
|
|
+- file_offset, byte_count);
|
|
|
++ if (data_precision == 16) {
|
|
|
++#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++ J16SAMPARRAY mem_buffer16 = (J16SAMPARRAY)ptr->mem_buffer;
|
|
|
++
|
|
|
++ if (writing)
|
|
|
++ (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
|
|
|
++ (void *)mem_buffer16[i],
|
|
|
++ file_offset, byte_count);
|
|
|
++ else
|
|
|
++ (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
|
|
|
++ (void *)mem_buffer16[i],
|
|
|
++ file_offset, byte_count);
|
|
|
++#else
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, data_precision);
|
|
|
++#endif
|
|
|
++ } else if (data_precision == 12) {
|
|
|
++ J12SAMPARRAY mem_buffer12 = (J12SAMPARRAY)ptr->mem_buffer;
|
|
|
++
|
|
|
++ if (writing)
|
|
|
++ (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
|
|
|
++ (void *)mem_buffer12[i],
|
|
|
++ file_offset, byte_count);
|
|
|
++ else
|
|
|
++ (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
|
|
|
++ (void *)mem_buffer12[i],
|
|
|
++ file_offset, byte_count);
|
|
|
++ } else {
|
|
|
++ if (writing)
|
|
|
++ (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
|
|
|
++ (void *)ptr->mem_buffer[i],
|
|
|
++ file_offset, byte_count);
|
|
|
++ else
|
|
|
++ (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
|
|
|
++ (void *)ptr->mem_buffer[i],
|
|
|
++ file_offset, byte_count);
|
|
|
++ }
|
|
|
+ file_offset += byte_count;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ do_barray_io(j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
|
|
|
+ /* Do backing store read or write of a virtual coefficient-block array */
|
|
|
+@@ -816,16 +918,23 @@ METHODDEF(JSAMPARRAY)
|
|
|
+ access_virt_sarray(j_common_ptr cinfo, jvirt_sarray_ptr ptr,
|
|
|
+ JDIMENSION start_row, JDIMENSION num_rows, boolean writable)
|
|
|
+ /* Access the part of a virtual sample array starting at start_row */
|
|
|
+ /* and extending for num_rows rows. writable is true if */
|
|
|
+ /* caller intends to modify the accessed area. */
|
|
|
+ {
|
|
|
+ JDIMENSION end_row = start_row + num_rows;
|
|
|
+ JDIMENSION undef_row;
|
|
|
++ int data_precision = cinfo->is_decompressor ?
|
|
|
++ ((j_decompress_ptr)cinfo)->data_precision :
|
|
|
++ ((j_compress_ptr)cinfo)->data_precision;
|
|
|
++ size_t sample_size = data_precision == 16 ?
|
|
|
++ sizeof(J16SAMPLE) : (data_precision == 12 ?
|
|
|
++ sizeof(J12SAMPLE) :
|
|
|
++ sizeof(JSAMPLE));
|
|
|
+
|
|
|
+ /* debugging check */
|
|
|
+ if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
|
|
|
+ ptr->mem_buffer == NULL)
|
|
|
+ ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
|
|
|
+
|
|
|
+ /* Make the desired part of the virtual array accessible */
|
|
|
+ if (start_row < ptr->cur_start_row ||
|
|
|
+@@ -871,17 +980,17 @@ access_virt_sarray(j_common_ptr cinfo, j
|
|
|
+ ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
|
|
|
+ undef_row = start_row; /* but reader is allowed to read ahead */
|
|
|
+ } else {
|
|
|
+ undef_row = ptr->first_undef_row;
|
|
|
+ }
|
|
|
+ if (writable)
|
|
|
+ ptr->first_undef_row = end_row;
|
|
|
+ if (ptr->pre_zero) {
|
|
|
+- size_t bytesperrow = (size_t)ptr->samplesperrow * sizeof(JSAMPLE);
|
|
|
++ size_t bytesperrow = (size_t)ptr->samplesperrow * sample_size;
|
|
|
+ undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
|
|
|
+ end_row -= ptr->cur_start_row;
|
|
|
+ while (undef_row < end_row) {
|
|
|
+ jzero_far((void *)ptr->mem_buffer[undef_row], bytesperrow);
|
|
|
+ undef_row++;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if (!writable) /* reader looking at undefined data */
|
|
|
+diff --git a/media/libjpeg/jmemsys.h b/media/libjpeg/jmemsys.h
|
|
|
+--- a/media/libjpeg/jmemsys.h
|
|
|
++++ b/media/libjpeg/jmemsys.h
|
|
|
+@@ -94,63 +94,32 @@ EXTERN(size_t) jpeg_mem_available(j_comm
|
|
|
+ * backing-store object. The read/write/close method pointers are called
|
|
|
+ * by jmemmgr.c to manipulate the backing-store object; all other fields
|
|
|
+ * are private to the system-dependent backing store routines.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define TEMP_NAME_LENGTH 64 /* max length of a temporary file's name */
|
|
|
+
|
|
|
+
|
|
|
+-#ifdef USE_MSDOS_MEMMGR /* DOS-specific junk */
|
|
|
+-
|
|
|
+-typedef unsigned short XMSH; /* type of extended-memory handles */
|
|
|
+-typedef unsigned short EMSH; /* type of expanded-memory handles */
|
|
|
+-
|
|
|
+-typedef union {
|
|
|
+- short file_handle; /* DOS file handle if it's a temp file */
|
|
|
+- XMSH xms_handle; /* handle if it's a chunk of XMS */
|
|
|
+- EMSH ems_handle; /* handle if it's a chunk of EMS */
|
|
|
+-} handle_union;
|
|
|
+-
|
|
|
+-#endif /* USE_MSDOS_MEMMGR */
|
|
|
+-
|
|
|
+-#ifdef USE_MAC_MEMMGR /* Mac-specific junk */
|
|
|
+-#include <Files.h>
|
|
|
+-#endif /* USE_MAC_MEMMGR */
|
|
|
+-
|
|
|
+-
|
|
|
+ typedef struct backing_store_struct *backing_store_ptr;
|
|
|
+
|
|
|
+ typedef struct backing_store_struct {
|
|
|
+ /* Methods for reading/writing/closing this backing-store object */
|
|
|
+ void (*read_backing_store) (j_common_ptr cinfo, backing_store_ptr info,
|
|
|
+ void *buffer_address, long file_offset,
|
|
|
+ long byte_count);
|
|
|
+ void (*write_backing_store) (j_common_ptr cinfo, backing_store_ptr info,
|
|
|
+ void *buffer_address, long file_offset,
|
|
|
+ long byte_count);
|
|
|
+ void (*close_backing_store) (j_common_ptr cinfo, backing_store_ptr info);
|
|
|
+
|
|
|
+ /* Private fields for system-dependent backing-store management */
|
|
|
+-#ifdef USE_MSDOS_MEMMGR
|
|
|
+- /* For the MS-DOS manager (jmemdos.c), we need: */
|
|
|
+- handle_union handle; /* reference to backing-store storage object */
|
|
|
+- char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
|
|
|
+-#else
|
|
|
+-#ifdef USE_MAC_MEMMGR
|
|
|
+- /* For the Mac manager (jmemmac.c), we need: */
|
|
|
+- short temp_file; /* file reference number to temp file */
|
|
|
+- FSSpec tempSpec; /* the FSSpec for the temp file */
|
|
|
+- char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
|
|
|
+-#else
|
|
|
+ /* For a typical implementation with temp files, we need: */
|
|
|
+ FILE *temp_file; /* stdio reference to temp file */
|
|
|
+ char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
|
|
|
+-#endif
|
|
|
+-#endif
|
|
|
+ } backing_store_info;
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initial opening of a backing-store object. This must fill in the
|
|
|
+ * read/write/close pointers in the object. The read/write routines
|
|
|
+ * may take an error exit if the specified maximum file size is exceeded.
|
|
|
+ * (If jpeg_mem_available always returns a large value, this routine can
|
|
|
+diff --git a/media/libjpeg/jmorecfg.h b/media/libjpeg/jmorecfg.h
|
|
|
+--- a/media/libjpeg/jmorecfg.h
|
|
|
++++ b/media/libjpeg/jmorecfg.h
|
|
|
+@@ -1,16 +1,18 @@
|
|
|
+ /*
|
|
|
+ * jmorecfg.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
+ * Modified 1997-2009 by Guido Vollbeding.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, 2022, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains additional configuration options that customize the
|
|
|
+ * JPEG software for special applications or support machine-dependent
|
|
|
+ * optimizations. Most users will not need to touch this file.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -37,41 +39,39 @@
|
|
|
+ */
|
|
|
+
|
|
|
+ /* Representation of a single sample (pixel element value).
|
|
|
+ * We frequently allocate large arrays of these, so it's important to keep
|
|
|
+ * them small. But if you have memory to burn and access to char or short
|
|
|
+ * arrays is very slow on your hardware, you might want to change these.
|
|
|
+ */
|
|
|
+
|
|
|
+-#if BITS_IN_JSAMPLE == 8
|
|
|
+-/* JSAMPLE should be the smallest type that will hold the values 0..255.
|
|
|
+- */
|
|
|
++/* JSAMPLE should be the smallest type that will hold the values 0..255. */
|
|
|
+
|
|
|
+ typedef unsigned char JSAMPLE;
|
|
|
+ #define GETJSAMPLE(value) ((int)(value))
|
|
|
+
|
|
|
+-#define MAXJSAMPLE 255
|
|
|
+-#define CENTERJSAMPLE 128
|
|
|
+-
|
|
|
+-#endif /* BITS_IN_JSAMPLE == 8 */
|
|
|
++#define MAXJSAMPLE 255
|
|
|
++#define CENTERJSAMPLE 128
|
|
|
+
|
|
|
+
|
|
|
+-#if BITS_IN_JSAMPLE == 12
|
|
|
+-/* JSAMPLE should be the smallest type that will hold the values 0..4095.
|
|
|
+- * On nearly all machines "short" will do nicely.
|
|
|
+- */
|
|
|
++/* J12SAMPLE should be the smallest type that will hold the values 0..4095. */
|
|
|
++
|
|
|
++typedef short J12SAMPLE;
|
|
|
++
|
|
|
++#define MAXJ12SAMPLE 4095
|
|
|
++#define CENTERJ12SAMPLE 2048
|
|
|
+
|
|
|
+-typedef short JSAMPLE;
|
|
|
+-#define GETJSAMPLE(value) ((int)(value))
|
|
|
++
|
|
|
++/* J16SAMPLE should be the smallest type that will hold the values 0..65535. */
|
|
|
+
|
|
|
+-#define MAXJSAMPLE 4095
|
|
|
+-#define CENTERJSAMPLE 2048
|
|
|
++typedef unsigned short J16SAMPLE;
|
|
|
+
|
|
|
+-#endif /* BITS_IN_JSAMPLE == 12 */
|
|
|
++#define MAXJ16SAMPLE 65535
|
|
|
++#define CENTERJ16SAMPLE 32768
|
|
|
+
|
|
|
+
|
|
|
+ /* Representation of a DCT frequency coefficient.
|
|
|
+ * This should be a signed value of at least 16 bits; "short" is usually OK.
|
|
|
+ * Again, we allocate large arrays of these, but you can change to int
|
|
|
+ * if you have memory to burn and "short" is really slow.
|
|
|
+ */
|
|
|
+
|
|
|
+@@ -228,31 +228,34 @@ typedef int boolean;
|
|
|
+ #define DCT_ISLOW_SUPPORTED /* accurate integer method */
|
|
|
+ #define DCT_IFAST_SUPPORTED /* less accurate int method [legacy feature] */
|
|
|
+ #define DCT_FLOAT_SUPPORTED /* floating-point method [legacy feature] */
|
|
|
+
|
|
|
+ /* Encoder capability options: */
|
|
|
+
|
|
|
+ #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
|
|
|
+ #define C_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/
|
|
|
++#define C_LOSSLESS_SUPPORTED /* Lossless JPEG? */
|
|
|
+ #define ENTROPY_OPT_SUPPORTED /* Optimization of entropy coding parms? */
|
|
|
+ /* Note: if you selected 12-bit data precision, it is dangerous to turn off
|
|
|
+ * ENTROPY_OPT_SUPPORTED. The standard Huffman tables are only good for 8-bit
|
|
|
+ * precision, so jchuff.c normally uses entropy optimization to compute
|
|
|
+ * usable tables for higher precision. If you don't want to do optimization,
|
|
|
+ * you'll have to supply different default Huffman tables.
|
|
|
+- * The exact same statements apply for progressive JPEG: the default tables
|
|
|
+- * don't work for progressive mode. (This may get fixed, however.)
|
|
|
++ * The exact same statements apply for progressive and lossless JPEG:
|
|
|
++ * the default tables don't work for progressive mode or lossless mode.
|
|
|
++ * (This may get fixed, however.)
|
|
|
+ */
|
|
|
+ #define INPUT_SMOOTHING_SUPPORTED /* Input image smoothing option? */
|
|
|
+
|
|
|
+ /* Decoder capability options: */
|
|
|
+
|
|
|
+ #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
|
|
|
+ #define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/
|
|
|
++#define D_LOSSLESS_SUPPORTED /* Lossless JPEG? */
|
|
|
+ #define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */
|
|
|
+ #define BLOCK_SMOOTHING_SUPPORTED /* Block smoothing? (Progressive only) */
|
|
|
+ #define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */
|
|
|
+ #undef UPSAMPLE_SCALING_SUPPORTED /* Output rescaling at upsample stage? */
|
|
|
+ #define UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */
|
|
|
+ #define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */
|
|
|
+ #define QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/jpeg12/moz.build b/media/libjpeg/jpeg12/moz.build
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jpeg12/moz.build
|
|
|
+@@ -0,0 +1,48 @@
|
|
|
++# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
|
|
++# vim: set filetype=python:
|
|
|
++# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
++# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
++# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
++
|
|
|
++with Files("**"):
|
|
|
++ BUG_COMPONENT = ("Core", "Graphics: ImageLib")
|
|
|
++
|
|
|
++DEFINES['BITS_IN_JSAMPLE'] = 12
|
|
|
++
|
|
|
++# From JPEG12_SOURCES in the upstream CMakeLists.txt
|
|
|
++SOURCES += [
|
|
|
++ '../jcapistd.c',
|
|
|
++ '../jccoefct.c',
|
|
|
++ '../jccolor.c',
|
|
|
++ '../jcdctmgr.c',
|
|
|
++ '../jcdiffct.c',
|
|
|
++ '../jclossls.c',
|
|
|
++ '../jcmainct.c',
|
|
|
++ '../jcprepct.c',
|
|
|
++ '../jcsample.c',
|
|
|
++ '../jdapistd.c',
|
|
|
++ '../jdcoefct.c',
|
|
|
++ '../jdcolor.c',
|
|
|
++ '../jddctmgr.c',
|
|
|
++ '../jddiffct.c',
|
|
|
++ '../jdlossls.c',
|
|
|
++ '../jdmainct.c',
|
|
|
++ '../jdmerge.c',
|
|
|
++ '../jdpostct.c',
|
|
|
++ '../jdsample.c',
|
|
|
++ '../jfdctfst.c',
|
|
|
++ '../jfdctint.c',
|
|
|
++ '../jidctflt.c',
|
|
|
++ '../jidctfst.c',
|
|
|
++ '../jidctint.c',
|
|
|
++ '../jidctred.c',
|
|
|
++ '../jquant1.c',
|
|
|
++ '../jquant2.c',
|
|
|
++ '../jutils.c',
|
|
|
++]
|
|
|
++
|
|
|
++# We allow warnings for third-party code that can be updated from upstream.
|
|
|
++AllowCompilerWarnings()
|
|
|
++
|
|
|
++FINAL_LIBRARY = 'gkmedias'
|
|
|
++
|
|
|
+diff --git a/media/libjpeg/jpeg16/moz.build b/media/libjpeg/jpeg16/moz.build
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jpeg16/moz.build
|
|
|
+@@ -0,0 +1,35 @@
|
|
|
++# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
|
|
|
++# vim: set filetype=python:
|
|
|
++# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
++# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
++# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
++
|
|
|
++with Files("**"):
|
|
|
++ BUG_COMPONENT = ("Core", "Graphics: ImageLib")
|
|
|
++
|
|
|
++DEFINES['BITS_IN_JSAMPLE'] = 16
|
|
|
++
|
|
|
++# From JPEG16_SOURCES in the upstream CMakeLists.txt
|
|
|
++SOURCES += [
|
|
|
++ '../jcapistd.c',
|
|
|
++ '../jccolor.c',
|
|
|
++ '../jcdiffct.c',
|
|
|
++ '../jclossls.c',
|
|
|
++ '../jcmainct.c',
|
|
|
++ '../jcprepct.c',
|
|
|
++ '../jcsample.c',
|
|
|
++ '../jdapistd.c',
|
|
|
++ '../jdcolor.c',
|
|
|
++ '../jddiffct.c',
|
|
|
++ '../jdlossls.c',
|
|
|
++ '../jdmainct.c',
|
|
|
++ '../jdpostct.c',
|
|
|
++ '../jdsample.c',
|
|
|
++ '../jutils.c',
|
|
|
++]
|
|
|
++
|
|
|
++# We allow warnings for third-party code that can be updated from upstream.
|
|
|
++AllowCompilerWarnings()
|
|
|
++
|
|
|
++FINAL_LIBRARY = 'gkmedias'
|
|
|
++
|
|
|
+diff --git a/media/libjpeg/jpeg_nbits_table.h b/media/libjpeg/jpeg_nbits.c
|
|
|
+rename from media/libjpeg/jpeg_nbits_table.h
|
|
|
+rename to media/libjpeg/jpeg_nbits.c
|
|
|
+--- a/media/libjpeg/jpeg_nbits_table.h
|
|
|
++++ b/media/libjpeg/jpeg_nbits.c
|
|
|
+@@ -1,9 +1,37 @@
|
|
|
+-static const unsigned char jpeg_nbits_table[65536] = {
|
|
|
++/*
|
|
|
++ * Copyright (C) 2024, D. R. Commander.
|
|
|
++ *
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ */
|
|
|
++
|
|
|
++#include "jpeg_nbits.h"
|
|
|
++#include "jconfigint.h"
|
|
|
++
|
|
|
++
|
|
|
++#ifndef USE_CLZ_INTRINSIC
|
|
|
++
|
|
|
++#define INCLUDE_JPEG_NBITS_TABLE
|
|
|
++
|
|
|
++/* When building for x86[-64] with the SIMD extensions enabled, the C Huffman
|
|
|
++ * encoders can reuse jpeg_nbits_table from the SSE2 baseline Huffman encoder.
|
|
|
++ */
|
|
|
++#if (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \
|
|
|
++ defined(_M_X64)) && defined(WITH_SIMD)
|
|
|
++#undef INCLUDE_JPEG_NBITS_TABLE
|
|
|
++#endif
|
|
|
++
|
|
|
++#endif
|
|
|
++
|
|
|
++
|
|
|
++#ifdef INCLUDE_JPEG_NBITS_TABLE
|
|
|
++
|
|
|
++const unsigned char HIDDEN jpeg_nbits_table[65536] = {
|
|
|
+ 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
|
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
|
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
|
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
|
|
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
|
|
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
|
|
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
|
|
+@@ -4091,8 +4119,16 @@ static const unsigned char jpeg_nbits_ta
|
|
|
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
|
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
|
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
|
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
|
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
|
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
|
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
|
|
|
+ };
|
|
|
++
|
|
|
++#else
|
|
|
++
|
|
|
++/* Suppress compiler warnings about empty translation unit. */
|
|
|
++
|
|
|
++typedef int dummy_jpeg_nbits_table;
|
|
|
++
|
|
|
++#endif
|
|
|
+diff --git a/media/libjpeg/jpeg_nbits.h b/media/libjpeg/jpeg_nbits.h
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jpeg_nbits.h
|
|
|
+@@ -0,0 +1,43 @@
|
|
|
++/*
|
|
|
++ * Copyright (C) 2014, 2021, 2024, D. R. Commander.
|
|
|
++ * Copyright (C) 2014, Olle Liljenzin.
|
|
|
++ * Copyright (C) 2020, Arm Limited.
|
|
|
++ *
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ */
|
|
|
++
|
|
|
++/*
|
|
|
++ * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
|
|
|
++ * used for bit counting rather than the lookup table. This will reduce the
|
|
|
++ * memory footprint by 64k, which is important for some mobile applications
|
|
|
++ * that create many isolated instances of libjpeg-turbo (web browsers, for
|
|
|
++ * instance.) This may improve performance on some mobile platforms as well.
|
|
|
++ * This feature is enabled by default only on Arm processors, because some x86
|
|
|
++ * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
|
|
|
++ * shown to have a significant performance impact even on the x86 chips that
|
|
|
++ * have a fast implementation of it. When building for Armv6, you can
|
|
|
++ * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
|
|
|
++ * flags (this defines __thumb__).
|
|
|
++ */
|
|
|
++
|
|
|
++/* NOTE: Both GCC and Clang define __GNUC__ */
|
|
|
++#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
|
|
|
++ defined(_M_ARM) || defined(_M_ARM64)
|
|
|
++#if !defined(__thumb__) || defined(__thumb2__)
|
|
|
++#define USE_CLZ_INTRINSIC
|
|
|
++#endif
|
|
|
++#endif
|
|
|
++
|
|
|
++#ifdef USE_CLZ_INTRINSIC
|
|
|
++#if defined(_MSC_VER) && !defined(__clang__)
|
|
|
++#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x))
|
|
|
++#else
|
|
|
++#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
|
|
|
++#endif
|
|
|
++#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
|
|
|
++#else
|
|
|
++extern const unsigned char jpeg_nbits_table[65536];
|
|
|
++#define JPEG_NBITS(x) (jpeg_nbits_table[x])
|
|
|
++#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
|
|
|
++#endif
|
|
|
+diff --git a/media/libjpeg/jpegcomp.h b/media/libjpeg/jpegapicomp.h
|
|
|
+rename from media/libjpeg/jpegcomp.h
|
|
|
+rename to media/libjpeg/jpegapicomp.h
|
|
|
+--- a/media/libjpeg/jpegcomp.h
|
|
|
++++ b/media/libjpeg/jpegapicomp.h
|
|
|
+@@ -1,10 +1,10 @@
|
|
|
+ /*
|
|
|
+- * jpegcomp.h
|
|
|
++ * jpegapicomp.h
|
|
|
+ *
|
|
|
+ * Copyright (C) 2010, 2020, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * JPEG compatibility macros
|
|
|
+ * These declarations are considered internal to the JPEG library; most
|
|
|
+ * applications using the library shouldn't need to include this file.
|
|
|
+diff --git a/media/libjpeg/jpegint.h b/media/libjpeg/jpegint.h
|
|
|
+--- a/media/libjpeg/jpegint.h
|
|
|
++++ b/media/libjpeg/jpegint.h
|
|
|
+@@ -1,27 +1,40 @@
|
|
|
+ /*
|
|
|
+ * jpegint.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1997, Thomas G. Lane.
|
|
|
+ * Modified 1997-2009 by Guido Vollbeding.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2015-2016, 2019, 2021, D. R. Commander.
|
|
|
++ * Copyright (C) 2015-2017, 2019, 2021-2022, D. R. Commander.
|
|
|
+ * Copyright (C) 2015, Google, Inc.
|
|
|
+ * Copyright (C) 2021, Alex Richardson.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file provides common declarations for the various JPEG modules.
|
|
|
+ * These declarations are considered internal to the JPEG library; most
|
|
|
+ * applications using the library shouldn't need to include this file.
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
++/* Representation of a spatial difference value.
|
|
|
++ * This should be a signed value of at least 16 bits; int is usually OK.
|
|
|
++ */
|
|
|
++
|
|
|
++typedef int JDIFF;
|
|
|
++
|
|
|
++typedef JDIFF FAR *JDIFFROW; /* pointer to one row of difference values */
|
|
|
++typedef JDIFFROW *JDIFFARRAY; /* ptr to some rows (a 2-D diff array) */
|
|
|
++typedef JDIFFARRAY *JDIFFIMAGE; /* a 3-D diff array: top index is color */
|
|
|
++
|
|
|
++
|
|
|
+ /* Declarations for both compression & decompression */
|
|
|
+
|
|
|
+ typedef enum { /* Operating modes for buffer controllers */
|
|
|
+ JBUF_PASS_THRU, /* Plain stripwise operation */
|
|
|
+ /* Remaining modes require a full-image buffer to have been created */
|
|
|
+ JBUF_SAVE_SOURCE, /* Run source subobject only, save output */
|
|
|
+ JBUF_CRANK_DEST, /* Run dest subobject only, using saved data */
|
|
|
+ JBUF_SAVE_AND_PASS /* Run both subobjects, save output */
|
|
|
+@@ -56,16 +69,19 @@ typedef long JLONG;
|
|
|
+ * sizeof(void *) != sizeof(size_t). The only other options would require C99
|
|
|
+ * or Clang-specific builtins.
|
|
|
+ */
|
|
|
+ typedef __UINTPTR_TYPE__ JUINTPTR;
|
|
|
+ #else
|
|
|
+ typedef size_t JUINTPTR;
|
|
|
+ #endif
|
|
|
+
|
|
|
++#define IsExtRGB(cs) \
|
|
|
++ (cs == JCS_RGB || (cs >= JCS_EXT_RGB && cs <= JCS_EXT_ARGB))
|
|
|
++
|
|
|
+ /*
|
|
|
+ * Left shift macro that handles a negative operand without causing any
|
|
|
+ * sanitizer warnings
|
|
|
+ */
|
|
|
+
|
|
|
+ #define LEFT_SHIFT(a, b) ((JLONG)((unsigned long)(a) << (b)))
|
|
|
+
|
|
|
+
|
|
|
+@@ -75,73 +91,131 @@ typedef size_t JUINTPTR;
|
|
|
+ struct jpeg_comp_master {
|
|
|
+ void (*prepare_for_pass) (j_compress_ptr cinfo);
|
|
|
+ void (*pass_startup) (j_compress_ptr cinfo);
|
|
|
+ void (*finish_pass) (j_compress_ptr cinfo);
|
|
|
+
|
|
|
+ /* State variables made visible to other modules */
|
|
|
+ boolean call_pass_startup; /* True if pass_startup must be called */
|
|
|
+ boolean is_last_pass; /* True during last pass */
|
|
|
++ boolean lossless; /* True if lossless mode is enabled */
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Main buffer control (downsampled-data buffer) */
|
|
|
+ struct jpeg_c_main_controller {
|
|
|
+ void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
|
|
|
+ void (*process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+ JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
|
|
|
++ void (*process_data_12) (j_compress_ptr cinfo, J12SAMPARRAY input_buf,
|
|
|
++ JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ void (*process_data_16) (j_compress_ptr cinfo, J16SAMPARRAY input_buf,
|
|
|
++ JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
|
|
|
++#endif
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Compression preprocessing (downsampling input buffer control) */
|
|
|
+ struct jpeg_c_prep_controller {
|
|
|
+ void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
|
|
|
+ void (*pre_process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+ JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail,
|
|
|
+ JSAMPIMAGE output_buf,
|
|
|
+ JDIMENSION *out_row_group_ctr,
|
|
|
+ JDIMENSION out_row_groups_avail);
|
|
|
++ void (*pre_process_data_12) (j_compress_ptr cinfo, J12SAMPARRAY input_buf,
|
|
|
++ JDIMENSION *in_row_ctr,
|
|
|
++ JDIMENSION in_rows_avail,
|
|
|
++ J12SAMPIMAGE output_buf,
|
|
|
++ JDIMENSION *out_row_group_ctr,
|
|
|
++ JDIMENSION out_row_groups_avail);
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ void (*pre_process_data_16) (j_compress_ptr cinfo, J16SAMPARRAY input_buf,
|
|
|
++ JDIMENSION *in_row_ctr,
|
|
|
++ JDIMENSION in_rows_avail,
|
|
|
++ J16SAMPIMAGE output_buf,
|
|
|
++ JDIMENSION *out_row_group_ctr,
|
|
|
++ JDIMENSION out_row_groups_avail);
|
|
|
++#endif
|
|
|
+ };
|
|
|
+
|
|
|
+-/* Coefficient buffer control */
|
|
|
++/* Lossy mode: Coefficient buffer control
|
|
|
++ * Lossless mode: Difference buffer control
|
|
|
++ */
|
|
|
+ struct jpeg_c_coef_controller {
|
|
|
+ void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
|
|
|
+ boolean (*compress_data) (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
|
|
|
++ boolean (*compress_data_12) (j_compress_ptr cinfo, J12SAMPIMAGE input_buf);
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ boolean (*compress_data_16) (j_compress_ptr cinfo, J16SAMPIMAGE input_buf);
|
|
|
++#endif
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Colorspace conversion */
|
|
|
+ struct jpeg_color_converter {
|
|
|
+ void (*start_pass) (j_compress_ptr cinfo);
|
|
|
+ void (*color_convert) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+ JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
+ int num_rows);
|
|
|
++ void (*color_convert_12) (j_compress_ptr cinfo, J12SAMPARRAY input_buf,
|
|
|
++ J12SAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
++ int num_rows);
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ void (*color_convert_16) (j_compress_ptr cinfo, J16SAMPARRAY input_buf,
|
|
|
++ J16SAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
++ int num_rows);
|
|
|
++#endif
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Downsampling */
|
|
|
+ struct jpeg_downsampler {
|
|
|
+ void (*start_pass) (j_compress_ptr cinfo);
|
|
|
+ void (*downsample) (j_compress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION in_row_index, JSAMPIMAGE output_buf,
|
|
|
+ JDIMENSION out_row_group_index);
|
|
|
++ void (*downsample_12) (j_compress_ptr cinfo, J12SAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_index, J12SAMPIMAGE output_buf,
|
|
|
++ JDIMENSION out_row_group_index);
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++ void (*downsample_16) (j_compress_ptr cinfo, J16SAMPIMAGE input_buf,
|
|
|
++ JDIMENSION in_row_index, J16SAMPIMAGE output_buf,
|
|
|
++ JDIMENSION out_row_group_index);
|
|
|
++#endif
|
|
|
+
|
|
|
+ boolean need_context_rows; /* TRUE if need rows above & below */
|
|
|
+ };
|
|
|
+
|
|
|
+-/* Forward DCT (also controls coefficient quantization) */
|
|
|
++/* Lossy mode: Forward DCT (also controls coefficient quantization)
|
|
|
++ * Lossless mode: Prediction, sample differencing, and point transform
|
|
|
++ */
|
|
|
+ struct jpeg_forward_dct {
|
|
|
+ void (*start_pass) (j_compress_ptr cinfo);
|
|
|
++
|
|
|
++ /* Lossy mode */
|
|
|
+ /* perhaps this should be an array??? */
|
|
|
+ void (*forward_DCT) (j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+ JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
|
|
+ JDIMENSION start_row, JDIMENSION start_col,
|
|
|
+ JDIMENSION num_blocks);
|
|
|
++ void (*forward_DCT_12) (j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
++ J12SAMPARRAY sample_data, JBLOCKROW coef_blocks,
|
|
|
++ JDIMENSION start_row, JDIMENSION start_col,
|
|
|
++ JDIMENSION num_blocks);
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Entropy encoding */
|
|
|
+ struct jpeg_entropy_encoder {
|
|
|
+ void (*start_pass) (j_compress_ptr cinfo, boolean gather_statistics);
|
|
|
++
|
|
|
++ /* Lossy mode */
|
|
|
+ boolean (*encode_mcu) (j_compress_ptr cinfo, JBLOCKROW *MCU_data);
|
|
|
++ /* Lossless mode */
|
|
|
++ JDIMENSION (*encode_mcus) (j_compress_ptr cinfo, JDIFFIMAGE diff_buf,
|
|
|
++ JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
|
|
|
++ JDIMENSION nMCU);
|
|
|
++
|
|
|
+ void (*finish_pass) (j_compress_ptr cinfo);
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Marker writing */
|
|
|
+ struct jpeg_marker_writer {
|
|
|
+ void (*write_file_header) (j_compress_ptr cinfo);
|
|
|
+ void (*write_frame_header) (j_compress_ptr cinfo);
|
|
|
+ void (*write_scan_header) (j_compress_ptr cinfo);
|
|
|
+@@ -159,16 +233,17 @@ struct jpeg_marker_writer {
|
|
|
+
|
|
|
+ /* Master control module */
|
|
|
+ struct jpeg_decomp_master {
|
|
|
+ void (*prepare_for_output_pass) (j_decompress_ptr cinfo);
|
|
|
+ void (*finish_output_pass) (j_decompress_ptr cinfo);
|
|
|
+
|
|
|
+ /* State variables made visible to other modules */
|
|
|
+ boolean is_dummy_pass; /* True during 1st pass for 2-pass quant */
|
|
|
++ boolean lossless; /* True if decompressing a lossless image */
|
|
|
+
|
|
|
+ /* Partial decompression variables */
|
|
|
+ JDIMENSION first_iMCU_col;
|
|
|
+ JDIMENSION last_iMCU_col;
|
|
|
+ JDIMENSION first_MCU_col[MAX_COMPONENTS];
|
|
|
+ JDIMENSION last_MCU_col[MAX_COMPONENTS];
|
|
|
+ boolean jinit_upsampler_no_alloc;
|
|
|
+
|
|
|
+@@ -188,36 +263,72 @@ struct jpeg_input_controller {
|
|
|
+ boolean eoi_reached; /* True when EOI has been consumed */
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Main buffer control (downsampled-data buffer) */
|
|
|
+ struct jpeg_d_main_controller {
|
|
|
+ void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode);
|
|
|
+ void (*process_data) (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
|
|
|
++ void (*process_data_12) (j_decompress_ptr cinfo, J12SAMPARRAY output_buf,
|
|
|
++ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ void (*process_data_16) (j_decompress_ptr cinfo, J16SAMPARRAY output_buf,
|
|
|
++ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
|
|
|
++#endif
|
|
|
+ };
|
|
|
+
|
|
|
+-/* Coefficient buffer control */
|
|
|
++/* Lossy mode: Coefficient buffer control
|
|
|
++ * Lossless mode: Difference buffer control
|
|
|
++ */
|
|
|
+ struct jpeg_d_coef_controller {
|
|
|
+ void (*start_input_pass) (j_decompress_ptr cinfo);
|
|
|
+ int (*consume_data) (j_decompress_ptr cinfo);
|
|
|
+ void (*start_output_pass) (j_decompress_ptr cinfo);
|
|
|
+ int (*decompress_data) (j_decompress_ptr cinfo, JSAMPIMAGE output_buf);
|
|
|
++ int (*decompress_data_12) (j_decompress_ptr cinfo, J12SAMPIMAGE output_buf);
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ int (*decompress_data_16) (j_decompress_ptr cinfo, J16SAMPIMAGE output_buf);
|
|
|
++#endif
|
|
|
++
|
|
|
++ /* These variables keep track of the current location of the input side. */
|
|
|
++ /* cinfo->input_iMCU_row is also used for this. */
|
|
|
++ JDIMENSION MCU_ctr; /* counts MCUs processed in current row */
|
|
|
++ int MCU_vert_offset; /* counts MCU rows within iMCU row */
|
|
|
++ int MCU_rows_per_iMCU_row; /* number of such rows needed */
|
|
|
++
|
|
|
++ /* The output side's location is represented by cinfo->output_iMCU_row. */
|
|
|
++
|
|
|
++ /* Lossy mode */
|
|
|
+ /* Pointer to array of coefficient virtual arrays, or NULL if none */
|
|
|
+ jvirt_barray_ptr *coef_arrays;
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Decompression postprocessing (color quantization buffer control) */
|
|
|
+ struct jpeg_d_post_controller {
|
|
|
+ void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode);
|
|
|
+ void (*post_process_data) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+ JDIMENSION in_row_groups_avail,
|
|
|
+ JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
|
|
|
+ JDIMENSION out_rows_avail);
|
|
|
++ void (*post_process_data_12) (j_decompress_ptr cinfo, J12SAMPIMAGE input_buf,
|
|
|
++ JDIMENSION *in_row_group_ctr,
|
|
|
++ JDIMENSION in_row_groups_avail,
|
|
|
++ J12SAMPARRAY output_buf,
|
|
|
++ JDIMENSION *out_row_ctr,
|
|
|
++ JDIMENSION out_rows_avail);
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ void (*post_process_data_16) (j_decompress_ptr cinfo, J16SAMPIMAGE input_buf,
|
|
|
++ JDIMENSION *in_row_group_ctr,
|
|
|
++ JDIMENSION in_row_groups_avail,
|
|
|
++ J16SAMPARRAY output_buf,
|
|
|
++ JDIMENSION *out_row_ctr,
|
|
|
++ JDIMENSION out_rows_avail);
|
|
|
++#endif
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Marker reading & parsing */
|
|
|
+ struct jpeg_marker_reader {
|
|
|
+ void (*reset_marker_reader) (j_decompress_ptr cinfo);
|
|
|
+ /* Read markers until SOS or EOI.
|
|
|
+ * Returns same codes as are defined for jpeg_consume_input:
|
|
|
+ * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
|
|
|
+@@ -233,60 +344,98 @@ struct jpeg_marker_reader {
|
|
|
+ boolean saw_SOF; /* found SOF? */
|
|
|
+ int next_restart_num; /* next restart number expected (0-7) */
|
|
|
+ unsigned int discarded_bytes; /* # of bytes skipped looking for a marker */
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Entropy decoding */
|
|
|
+ struct jpeg_entropy_decoder {
|
|
|
+ void (*start_pass) (j_decompress_ptr cinfo);
|
|
|
++
|
|
|
++ /* Lossy mode */
|
|
|
+ boolean (*decode_mcu) (j_decompress_ptr cinfo, JBLOCKROW *MCU_data);
|
|
|
++ /* Lossless mode */
|
|
|
++ JDIMENSION (*decode_mcus) (j_decompress_ptr cinfo, JDIFFIMAGE diff_buf,
|
|
|
++ JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
|
|
|
++ JDIMENSION nMCU);
|
|
|
++ boolean (*process_restart) (j_decompress_ptr cinfo);
|
|
|
+
|
|
|
+ /* This is here to share code between baseline and progressive decoders; */
|
|
|
+ /* other modules probably should not use it */
|
|
|
+ boolean insufficient_data; /* set TRUE after emitting warning */
|
|
|
+ };
|
|
|
+
|
|
|
+-/* Inverse DCT (also performs dequantization) */
|
|
|
++/* Lossy mode: Inverse DCT (also performs dequantization)
|
|
|
++ * Lossless mode: Prediction, sample undifferencing, point transform, and
|
|
|
++ * sample size scaling
|
|
|
++ */
|
|
|
+ typedef void (*inverse_DCT_method_ptr) (j_decompress_ptr cinfo,
|
|
|
+ jpeg_component_info *compptr,
|
|
|
+ JCOEFPTR coef_block,
|
|
|
+ JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION output_col);
|
|
|
++typedef void (*inverse_DCT_12_method_ptr) (j_decompress_ptr cinfo,
|
|
|
++ jpeg_component_info *compptr,
|
|
|
++ JCOEFPTR coef_block,
|
|
|
++ J12SAMPARRAY output_buf,
|
|
|
++ JDIMENSION output_col);
|
|
|
+
|
|
|
+ struct jpeg_inverse_dct {
|
|
|
+ void (*start_pass) (j_decompress_ptr cinfo);
|
|
|
++
|
|
|
++ /* Lossy mode */
|
|
|
+ /* It is useful to allow each component to have a separate IDCT method. */
|
|
|
+ inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
|
|
|
++ inverse_DCT_12_method_ptr inverse_DCT_12[MAX_COMPONENTS];
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Upsampling (note that upsampler must also call color converter) */
|
|
|
+ struct jpeg_upsampler {
|
|
|
+ void (*start_pass) (j_decompress_ptr cinfo);
|
|
|
+ void (*upsample) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION *in_row_group_ctr,
|
|
|
+ JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
|
|
|
+ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
|
|
|
++ void (*upsample_12) (j_decompress_ptr cinfo, J12SAMPIMAGE input_buf,
|
|
|
++ JDIMENSION *in_row_group_ctr,
|
|
|
++ JDIMENSION in_row_groups_avail, J12SAMPARRAY output_buf,
|
|
|
++ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ void (*upsample_16) (j_decompress_ptr cinfo, J16SAMPIMAGE input_buf,
|
|
|
++ JDIMENSION *in_row_group_ctr,
|
|
|
++ JDIMENSION in_row_groups_avail, J16SAMPARRAY output_buf,
|
|
|
++ JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
|
|
|
++#endif
|
|
|
+
|
|
|
+ boolean need_context_rows; /* TRUE if need rows above & below */
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Colorspace conversion */
|
|
|
+ struct jpeg_color_deconverter {
|
|
|
+ void (*start_pass) (j_decompress_ptr cinfo);
|
|
|
+ void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+ JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
+ int num_rows);
|
|
|
++ void (*color_convert_12) (j_decompress_ptr cinfo, J12SAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, J12SAMPARRAY output_buf,
|
|
|
++ int num_rows);
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++ void (*color_convert_16) (j_decompress_ptr cinfo, J16SAMPIMAGE input_buf,
|
|
|
++ JDIMENSION input_row, J16SAMPARRAY output_buf,
|
|
|
++ int num_rows);
|
|
|
++#endif
|
|
|
+ };
|
|
|
+
|
|
|
+ /* Color quantization or color precision reduction */
|
|
|
+ struct jpeg_color_quantizer {
|
|
|
+ void (*start_pass) (j_decompress_ptr cinfo, boolean is_pre_scan);
|
|
|
+ void (*color_quantize) (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+ JSAMPARRAY output_buf, int num_rows);
|
|
|
++ void (*color_quantize_12) (j_decompress_ptr cinfo, J12SAMPARRAY input_buf,
|
|
|
++ J12SAMPARRAY output_buf, int num_rows);
|
|
|
+ void (*finish_pass) (j_decompress_ptr cinfo);
|
|
|
+ void (*new_color_map) (j_decompress_ptr cinfo);
|
|
|
+ };
|
|
|
+
|
|
|
+
|
|
|
+ /* Miscellaneous useful macros */
|
|
|
+
|
|
|
+ #undef MAX
|
|
|
+@@ -318,55 +467,122 @@ struct jpeg_color_quantizer {
|
|
|
+
|
|
|
+
|
|
|
+ /* Compression module initialization routines */
|
|
|
+ EXTERN(void) jinit_compress_master(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_c_master_control(j_compress_ptr cinfo,
|
|
|
+ boolean transcode_only);
|
|
|
+ EXTERN(void) jinit_c_main_controller(j_compress_ptr cinfo,
|
|
|
+ boolean need_full_buffer);
|
|
|
++EXTERN(void) j12init_c_main_controller(j_compress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
+ EXTERN(void) jinit_c_prep_controller(j_compress_ptr cinfo,
|
|
|
+ boolean need_full_buffer);
|
|
|
++EXTERN(void) j12init_c_prep_controller(j_compress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
+ EXTERN(void) jinit_c_coef_controller(j_compress_ptr cinfo,
|
|
|
+ boolean need_full_buffer);
|
|
|
++EXTERN(void) j12init_c_coef_controller(j_compress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
+ EXTERN(void) jinit_color_converter(j_compress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_color_converter(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_downsampler(j_compress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_downsampler(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_forward_dct(j_compress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_forward_dct(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_huff_encoder(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_phuff_encoder(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_arith_encoder(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_marker_writer(j_compress_ptr cinfo);
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++EXTERN(void) j16init_c_main_controller(j_compress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) j16init_c_prep_controller(j_compress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) j16init_color_converter(j_compress_ptr cinfo);
|
|
|
++EXTERN(void) j16init_downsampler(j_compress_ptr cinfo);
|
|
|
++EXTERN(void) jinit_c_diff_controller(j_compress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) j12init_c_diff_controller(j_compress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) j16init_c_diff_controller(j_compress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) jinit_lhuff_encoder(j_compress_ptr cinfo);
|
|
|
++EXTERN(void) jinit_lossless_compressor(j_compress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_lossless_compressor(j_compress_ptr cinfo);
|
|
|
++EXTERN(void) j16init_lossless_compressor(j_compress_ptr cinfo);
|
|
|
++#endif
|
|
|
++
|
|
|
+ /* Decompression module initialization routines */
|
|
|
+ EXTERN(void) jinit_master_decompress(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_d_main_controller(j_decompress_ptr cinfo,
|
|
|
+ boolean need_full_buffer);
|
|
|
++EXTERN(void) j12init_d_main_controller(j_decompress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
+ EXTERN(void) jinit_d_coef_controller(j_decompress_ptr cinfo,
|
|
|
+ boolean need_full_buffer);
|
|
|
++EXTERN(void) j12init_d_coef_controller(j_decompress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
+ EXTERN(void) jinit_d_post_controller(j_decompress_ptr cinfo,
|
|
|
+ boolean need_full_buffer);
|
|
|
++EXTERN(void) j12init_d_post_controller(j_decompress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
+ EXTERN(void) jinit_input_controller(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_marker_reader(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_huff_decoder(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_phuff_decoder(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_arith_decoder(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_inverse_dct(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_inverse_dct(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_upsampler(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_upsampler(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_color_deconverter(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_color_deconverter(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_1pass_quantizer(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_1pass_quantizer(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_2pass_quantizer(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_2pass_quantizer(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jinit_merged_upsampler(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_merged_upsampler(j_decompress_ptr cinfo);
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++EXTERN(void) j16init_d_main_controller(j_decompress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) j16init_d_post_controller(j_decompress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) j16init_upsampler(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) j16init_color_deconverter(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) jinit_d_diff_controller(j_decompress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) j12init_d_diff_controller(j_decompress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) j16init_d_diff_controller(j_decompress_ptr cinfo,
|
|
|
++ boolean need_full_buffer);
|
|
|
++EXTERN(void) jinit_lhuff_decoder(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) jinit_lossless_decompressor(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) j12init_lossless_decompressor(j_decompress_ptr cinfo);
|
|
|
++EXTERN(void) j16init_lossless_decompressor(j_decompress_ptr cinfo);
|
|
|
++#endif
|
|
|
++
|
|
|
+ /* Memory manager initialization */
|
|
|
+ EXTERN(void) jinit_memory_mgr(j_common_ptr cinfo);
|
|
|
+
|
|
|
+ /* Utility routines in jutils.c */
|
|
|
+ EXTERN(long) jdiv_round_up(long a, long b);
|
|
|
+ EXTERN(long) jround_up(long a, long b);
|
|
|
+ EXTERN(void) jcopy_sample_rows(JSAMPARRAY input_array, int source_row,
|
|
|
+ JSAMPARRAY output_array, int dest_row,
|
|
|
+ int num_rows, JDIMENSION num_cols);
|
|
|
++EXTERN(void) j12copy_sample_rows(J12SAMPARRAY input_array, int source_row,
|
|
|
++ J12SAMPARRAY output_array, int dest_row,
|
|
|
++ int num_rows, JDIMENSION num_cols);
|
|
|
++#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++EXTERN(void) j16copy_sample_rows(J16SAMPARRAY input_array, int source_row,
|
|
|
++ J16SAMPARRAY output_array, int dest_row,
|
|
|
++ int num_rows, JDIMENSION num_cols);
|
|
|
++#endif
|
|
|
+ EXTERN(void) jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row,
|
|
|
+ JDIMENSION num_blocks);
|
|
|
+ EXTERN(void) jzero_far(void *target, size_t bytestozero);
|
|
|
+ /* Constant tables in jutils.c */
|
|
|
+ #if 0 /* This table is not actually needed in v6a */
|
|
|
+ extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
|
|
|
+ #endif
|
|
|
+ extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
|
|
|
+diff --git a/media/libjpeg/jpeglib.h b/media/libjpeg/jpeglib.h
|
|
|
+--- a/media/libjpeg/jpeglib.h
|
|
|
++++ b/media/libjpeg/jpeglib.h
|
|
|
+@@ -1,16 +1,19 @@
|
|
|
+ /*
|
|
|
+ * jpeglib.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1998, Thomas G. Lane.
|
|
|
+ * Modified 2002-2009 by Guido Vollbeding.
|
|
|
++ * Lossless JPEG Modifications:
|
|
|
++ * Copyright (C) 1999, Ken Murchison.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2009-2011, 2013-2014, 2016-2017, 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2009-2011, 2013-2014, 2016-2017, 2020, 2022-2023,
|
|
|
++ D. R. Commander.
|
|
|
+ * Copyright (C) 2015, Google, Inc.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file defines the application interface for the JPEG library.
|
|
|
+ * Most applications using the library need only include this file,
|
|
|
+ * and perhaps jerror.h if they want to know the exact error codes.
|
|
|
+ */
|
|
|
+@@ -38,43 +41,64 @@ extern "C" {
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
+ /* Various constants determining the sizes of things.
|
|
|
+ * All of these are specified by the JPEG standard, so don't change them
|
|
|
+ * if you want to be compatible.
|
|
|
+ */
|
|
|
+
|
|
|
++/* NOTE: In lossless mode, an MCU contains one or more samples rather than one
|
|
|
++ * or more 8x8 DCT blocks, so the term "data unit" is used to generically
|
|
|
++ * describe a sample in lossless mode or an 8x8 DCT block in lossy mode. To
|
|
|
++ * preserve backward API/ABI compatibility, the field and macro names retain
|
|
|
++ * the "block" terminology.
|
|
|
++ */
|
|
|
++
|
|
|
+ #define DCTSIZE 8 /* The basic DCT block is 8x8 samples */
|
|
|
+ #define DCTSIZE2 64 /* DCTSIZE squared; # of elements in a block */
|
|
|
+ #define NUM_QUANT_TBLS 4 /* Quantization tables are numbered 0..3 */
|
|
|
+ #define NUM_HUFF_TBLS 4 /* Huffman tables are numbered 0..3 */
|
|
|
+ #define NUM_ARITH_TBLS 16 /* Arith-coding tables are numbered 0..15 */
|
|
|
+ #define MAX_COMPS_IN_SCAN 4 /* JPEG limit on # of components in one scan */
|
|
|
+ #define MAX_SAMP_FACTOR 4 /* JPEG limit on sampling factors */
|
|
|
+ /* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
|
|
|
+ * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
|
|
|
+ * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
|
|
|
+ * to handle it. We even let you do this from the jconfig.h file. However,
|
|
|
+ * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
|
|
|
+ * sometimes emits noncompliant files doesn't mean you should too.
|
|
|
+ */
|
|
|
+-#define C_MAX_BLOCKS_IN_MCU 10 /* compressor's limit on blocks per MCU */
|
|
|
++#define C_MAX_BLOCKS_IN_MCU 10 /* compressor's limit on data units/MCU */
|
|
|
+ #ifndef D_MAX_BLOCKS_IN_MCU
|
|
|
+-#define D_MAX_BLOCKS_IN_MCU 10 /* decompressor's limit on blocks per MCU */
|
|
|
++#define D_MAX_BLOCKS_IN_MCU 10 /* decompressor's limit on data units/MCU */
|
|
|
+ #endif
|
|
|
+
|
|
|
+
|
|
|
+ /* Data structures for images (arrays of samples and of DCT coefficients).
|
|
|
+ */
|
|
|
+
|
|
|
+ typedef JSAMPLE *JSAMPROW; /* ptr to one image row of pixel samples. */
|
|
|
+ typedef JSAMPROW *JSAMPARRAY; /* ptr to some rows (a 2-D sample array) */
|
|
|
+ typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D sample array: top index is color */
|
|
|
+
|
|
|
++typedef J12SAMPLE *J12SAMPROW; /* ptr to one image row of 12-bit pixel
|
|
|
++ samples. */
|
|
|
++typedef J12SAMPROW *J12SAMPARRAY; /* ptr to some 12-bit sample rows (a 2-D
|
|
|
++ 12-bit sample array) */
|
|
|
++typedef J12SAMPARRAY *J12SAMPIMAGE; /* a 3-D 12-bit sample array: top index is
|
|
|
++ color */
|
|
|
++
|
|
|
++typedef J16SAMPLE *J16SAMPROW; /* ptr to one image row of 16-bit pixel
|
|
|
++ samples. */
|
|
|
++typedef J16SAMPROW *J16SAMPARRAY; /* ptr to some 16-bit sample rows (a 2-D
|
|
|
++ 16-bit sample array) */
|
|
|
++typedef J16SAMPARRAY *J16SAMPIMAGE; /* a 3-D 16-bit sample array: top index is
|
|
|
++ color */
|
|
|
++
|
|
|
+ typedef JCOEF JBLOCK[DCTSIZE2]; /* one block of coefficients */
|
|
|
+ typedef JBLOCK *JBLOCKROW; /* pointer to one row of coefficient blocks */
|
|
|
+ typedef JBLOCKROW *JBLOCKARRAY; /* a 2-D array of coefficient blocks */
|
|
|
+ typedef JBLOCKARRAY *JBLOCKIMAGE; /* a 3-D array of coefficient blocks */
|
|
|
+
|
|
|
+ typedef JCOEF *JCOEFPTR; /* useful in a couple of places */
|
|
|
+
|
|
|
+
|
|
|
+@@ -130,56 +154,61 @@ typedef struct {
|
|
|
+ /* for decompression, they are read from the SOS marker. */
|
|
|
+ /* The decompressor output side may not use these variables. */
|
|
|
+ int dc_tbl_no; /* DC entropy table selector (0..3) */
|
|
|
+ int ac_tbl_no; /* AC entropy table selector (0..3) */
|
|
|
+
|
|
|
+ /* Remaining fields should be treated as private by applications. */
|
|
|
+
|
|
|
+ /* These values are computed during compression or decompression startup: */
|
|
|
+- /* Component's size in DCT blocks.
|
|
|
+- * Any dummy blocks added to complete an MCU are not counted; therefore
|
|
|
+- * these values do not depend on whether a scan is interleaved or not.
|
|
|
++ /* Component's size in data units.
|
|
|
++ * In lossy mode, any dummy blocks added to complete an MCU are not counted;
|
|
|
++ * therefore these values do not depend on whether a scan is interleaved or
|
|
|
++ * not. In lossless mode, these are always equal to the image width and
|
|
|
++ * height.
|
|
|
+ */
|
|
|
+ JDIMENSION width_in_blocks;
|
|
|
+ JDIMENSION height_in_blocks;
|
|
|
+- /* Size of a DCT block in samples. Always DCTSIZE for compression.
|
|
|
+- * For decompression this is the size of the output from one DCT block,
|
|
|
++ /* Size of a data unit in samples. Always DCTSIZE for lossy compression.
|
|
|
++ * For lossy decompression this is the size of the output from one DCT block,
|
|
|
+ * reflecting any scaling we choose to apply during the IDCT step.
|
|
|
+- * Values from 1 to 16 are supported.
|
|
|
+- * Note that different components may receive different IDCT scalings.
|
|
|
++ * Values from 1 to 16 are supported. Note that different components may
|
|
|
++ * receive different IDCT scalings. In lossless mode, this is always equal
|
|
|
++ * to 1.
|
|
|
+ */
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ int DCT_h_scaled_size;
|
|
|
+ int DCT_v_scaled_size;
|
|
|
+ #else
|
|
|
+ int DCT_scaled_size;
|
|
|
+ #endif
|
|
|
+ /* The downsampled dimensions are the component's actual, unpadded number
|
|
|
+ * of samples at the main buffer (preprocessing/compression interface), thus
|
|
|
+ * downsampled_width = ceil(image_width * Hi/Hmax)
|
|
|
+- * and similarly for height. For decompression, IDCT scaling is included, so
|
|
|
++ * and similarly for height. For lossy decompression, IDCT scaling is
|
|
|
++ * included, so
|
|
|
+ * downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE)
|
|
|
++ * In lossless mode, these are always equal to the image width and height.
|
|
|
+ */
|
|
|
+ JDIMENSION downsampled_width; /* actual width in samples */
|
|
|
+ JDIMENSION downsampled_height; /* actual height in samples */
|
|
|
+ /* This flag is used only for decompression. In cases where some of the
|
|
|
+ * components will be ignored (eg grayscale output from YCbCr image),
|
|
|
+ * we can skip most computations for the unused components.
|
|
|
+ */
|
|
|
+ boolean component_needed; /* do we need the value of this component? */
|
|
|
+
|
|
|
+ /* These values are computed before starting a scan of the component. */
|
|
|
+ /* The decompressor output side may not use these variables. */
|
|
|
+- int MCU_width; /* number of blocks per MCU, horizontally */
|
|
|
+- int MCU_height; /* number of blocks per MCU, vertically */
|
|
|
++ int MCU_width; /* number of data units per MCU, horizontally */
|
|
|
++ int MCU_height; /* number of data units per MCU, vertically */
|
|
|
+ int MCU_blocks; /* MCU_width * MCU_height */
|
|
|
+ int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */
|
|
|
+- int last_col_width; /* # of non-dummy blocks across in last MCU */
|
|
|
+- int last_row_height; /* # of non-dummy blocks down in last MCU */
|
|
|
++ int last_col_width; /* # of non-dummy data units across in last MCU */
|
|
|
++ int last_row_height; /* # of non-dummy data units down in last MCU */
|
|
|
+
|
|
|
+ /* Saved quantization table for component; NULL if none yet saved.
|
|
|
+ * See jdinput.c comments about the need for this information.
|
|
|
+ * This field is currently used only for decompression.
|
|
|
+ */
|
|
|
+ JQUANT_TBL *quant_table;
|
|
|
+
|
|
|
+ /* Private per-component storage for DCT or IDCT subsystem. */
|
|
|
+@@ -187,18 +216,22 @@ typedef struct {
|
|
|
+ } jpeg_component_info;
|
|
|
+
|
|
|
+
|
|
|
+ /* The script for encoding a multiple-scan file is an array of these: */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ int comps_in_scan; /* number of components encoded in this scan */
|
|
|
+ int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
|
|
|
+- int Ss, Se; /* progressive JPEG spectral selection parms */
|
|
|
+- int Ah, Al; /* progressive JPEG successive approx. parms */
|
|
|
++ int Ss, Se; /* progressive JPEG spectral selection parms
|
|
|
++ (Ss is the predictor selection value in
|
|
|
++ lossless mode) */
|
|
|
++ int Ah, Al; /* progressive JPEG successive approx. parms
|
|
|
++ (Al is the point transform value in lossless
|
|
|
++ mode) */
|
|
|
+ } jpeg_scan_info;
|
|
|
+
|
|
|
+ /* The decompressor can save APPn and COM markers in a list of these: */
|
|
|
+
|
|
|
+ typedef struct jpeg_marker_struct *jpeg_saved_marker_ptr;
|
|
|
+
|
|
|
+ struct jpeg_marker_struct {
|
|
|
+ jpeg_saved_marker_ptr next; /* next in list, or NULL */
|
|
|
+@@ -233,17 +266,18 @@ typedef enum {
|
|
|
+ order to ensure the best performance, libjpeg-turbo can set that byte to
|
|
|
+ whatever value it wishes. Use the following colorspace constants to
|
|
|
+ ensure that the X byte is set to 0xFF, so that it can be interpreted as an
|
|
|
+ opaque alpha channel. */
|
|
|
+ JCS_EXT_RGBA, /* red/green/blue/alpha */
|
|
|
+ JCS_EXT_BGRA, /* blue/green/red/alpha */
|
|
|
+ JCS_EXT_ABGR, /* alpha/blue/green/red */
|
|
|
+ JCS_EXT_ARGB, /* alpha/red/green/blue */
|
|
|
+- JCS_RGB565 /* 5-bit red/6-bit green/5-bit blue */
|
|
|
++ JCS_RGB565 /* 5-bit red/6-bit green/5-bit blue
|
|
|
++ [decompression only] */
|
|
|
+ } J_COLOR_SPACE;
|
|
|
+
|
|
|
+ /* DCT/IDCT algorithm options. */
|
|
|
+
|
|
|
+ typedef enum {
|
|
|
+ JDCT_ISLOW, /* accurate integer method */
|
|
|
+ JDCT_IFAST, /* less accurate integer method [legacy feature] */
|
|
|
+ JDCT_FLOAT /* floating-point method [legacy feature] */
|
|
|
+@@ -414,40 +448,43 @@ struct jpeg_compress_struct {
|
|
|
+ int max_h_samp_factor; /* largest h_samp_factor */
|
|
|
+ int max_v_samp_factor; /* largest v_samp_factor */
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */
|
|
|
+ int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */
|
|
|
+ #endif
|
|
|
+
|
|
|
+- JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */
|
|
|
+- /* The coefficient controller receives data in units of MCU rows as defined
|
|
|
+- * for fully interleaved scans (whether the JPEG file is interleaved or not).
|
|
|
+- * There are v_samp_factor * DCTSIZE sample rows of each component in an
|
|
|
+- * "iMCU" (interleaved MCU) row.
|
|
|
++ JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coefficient or
|
|
|
++ difference controller */
|
|
|
++ /* The coefficient or difference controller receives data in units of MCU
|
|
|
++ * rows as defined for fully interleaved scans (whether the JPEG file is
|
|
|
++ * interleaved or not). In lossy mode, there are v_samp_factor * DCTSIZE
|
|
|
++ * sample rows of each component in an "iMCU" (interleaved MCU) row. In
|
|
|
++ * lossless mode, total_iMCU_rows is always equal to the image height.
|
|
|
+ */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * These fields are valid during any one scan.
|
|
|
+ * They describe the components and MCUs actually appearing in the scan.
|
|
|
+ */
|
|
|
+ int comps_in_scan; /* # of JPEG components in this scan */
|
|
|
+ jpeg_component_info *cur_comp_info[MAX_COMPS_IN_SCAN];
|
|
|
+ /* *cur_comp_info[i] describes component that appears i'th in SOS */
|
|
|
+
|
|
|
+ JDIMENSION MCUs_per_row; /* # of MCUs across the image */
|
|
|
+ JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */
|
|
|
+
|
|
|
+- int blocks_in_MCU; /* # of DCT blocks per MCU */
|
|
|
++ int blocks_in_MCU; /* # of data units per MCU */
|
|
|
+ int MCU_membership[C_MAX_BLOCKS_IN_MCU];
|
|
|
+ /* MCU_membership[i] is index in cur_comp_info of component owning */
|
|
|
+- /* i'th block in an MCU */
|
|
|
++ /* i'th data unit in an MCU */
|
|
|
+
|
|
|
+- int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */
|
|
|
++ int Ss, Se, Ah, Al; /* progressive/lossless JPEG parameters for
|
|
|
++ scan */
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 80
|
|
|
+ int block_size; /* the basic DCT block size: 1..16 */
|
|
|
+ const int *natural_order; /* natural-order position array */
|
|
|
+ int lim_Se; /* min( Se, DCTSIZE2-1 ) */
|
|
|
+ #endif
|
|
|
+
|
|
|
+ /*
|
|
|
+@@ -532,17 +569,22 @@ struct jpeg_decompress_struct {
|
|
|
+
|
|
|
+ /* When quantizing colors, the output colormap is described by these fields.
|
|
|
+ * The application can supply a colormap by setting colormap non-NULL before
|
|
|
+ * calling jpeg_start_decompress; otherwise a colormap is created during
|
|
|
+ * jpeg_start_decompress or jpeg_start_output.
|
|
|
+ * The map has out_color_components rows and actual_number_of_colors columns.
|
|
|
+ */
|
|
|
+ int actual_number_of_colors; /* number of entries in use */
|
|
|
+- JSAMPARRAY colormap; /* The color map as a 2-D pixel array */
|
|
|
++ JSAMPARRAY colormap; /* The color map as a 2-D pixel array
|
|
|
++ If data_precision is 12 or 16, then this is
|
|
|
++ actually a J12SAMPARRAY or a J16SAMPARRAY,
|
|
|
++ so callers must type-cast it in order to
|
|
|
++ read/write 12-bit or 16-bit samples from/to
|
|
|
++ the array. */
|
|
|
+
|
|
|
+ /* State variables: these variables indicate the progress of decompression.
|
|
|
+ * The application may examine these but must not modify them.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* Row index of next scanline to be read from jpeg_read_scanlines().
|
|
|
+ * Application may use this to control its processing loop, e.g.,
|
|
|
+ * "while (output_scanline < output_height)".
|
|
|
+@@ -642,44 +684,51 @@ struct jpeg_decompress_struct {
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */
|
|
|
+ int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */
|
|
|
+ #else
|
|
|
+ int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */
|
|
|
+ #endif
|
|
|
+
|
|
|
+ JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */
|
|
|
+- /* The coefficient controller's input and output progress is measured in
|
|
|
+- * units of "iMCU" (interleaved MCU) rows. These are the same as MCU rows
|
|
|
+- * in fully interleaved JPEG scans, but are used whether the scan is
|
|
|
+- * interleaved or not. We define an iMCU row as v_samp_factor DCT block
|
|
|
+- * rows of each component. Therefore, the IDCT output contains
|
|
|
++ /* The coefficient or difference controller's input and output progress is
|
|
|
++ * measured in units of "iMCU" (interleaved MCU) rows. These are the same as
|
|
|
++ * MCU rows in fully interleaved JPEG scans, but are used whether the scan is
|
|
|
++ * interleaved or not. In lossy mode, we define an iMCU row as v_samp_factor
|
|
|
++ * DCT block rows of each component. Therefore, the IDCT output contains
|
|
|
+ * v_samp_factor*DCT_[v_]scaled_size sample rows of a component per iMCU row.
|
|
|
++ * In lossless mode, total_iMCU_rows is always equal to the image height.
|
|
|
+ */
|
|
|
+
|
|
|
+- JSAMPLE *sample_range_limit; /* table for fast range-limiting */
|
|
|
++ JSAMPLE *sample_range_limit; /* table for fast range-limiting
|
|
|
++ If data_precision is 12 or 16, then this is
|
|
|
++ actually a J12SAMPLE pointer or a J16SAMPLE
|
|
|
++ pointer, so callers must type-cast it in
|
|
|
++ order to read 12-bit or 16-bit samples from
|
|
|
++ the array. */
|
|
|
+
|
|
|
+ /*
|
|
|
+ * These fields are valid during any one scan.
|
|
|
+ * They describe the components and MCUs actually appearing in the scan.
|
|
|
+ * Note that the decompressor output side must not use these fields.
|
|
|
+ */
|
|
|
+ int comps_in_scan; /* # of JPEG components in this scan */
|
|
|
+ jpeg_component_info *cur_comp_info[MAX_COMPS_IN_SCAN];
|
|
|
+ /* *cur_comp_info[i] describes component that appears i'th in SOS */
|
|
|
+
|
|
|
+ JDIMENSION MCUs_per_row; /* # of MCUs across the image */
|
|
|
+ JDIMENSION MCU_rows_in_scan; /* # of MCU rows in the image */
|
|
|
+
|
|
|
+- int blocks_in_MCU; /* # of DCT blocks per MCU */
|
|
|
++ int blocks_in_MCU; /* # of data units per MCU */
|
|
|
+ int MCU_membership[D_MAX_BLOCKS_IN_MCU];
|
|
|
+ /* MCU_membership[i] is index in cur_comp_info of component owning */
|
|
|
+- /* i'th block in an MCU */
|
|
|
++ /* i'th data unit in an MCU */
|
|
|
+
|
|
|
+- int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */
|
|
|
++ int Ss, Se, Ah, Al; /* progressive/lossless JPEG parameters for
|
|
|
++ scan */
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 80
|
|
|
+ /* These fields are derived from Se of first SOS marker.
|
|
|
+ */
|
|
|
+ int block_size; /* the basic DCT block size: 1..16 */
|
|
|
+ const int *natural_order; /* natural-order position array for entropy decode */
|
|
|
+ int lim_Se; /* min( Se, DCTSIZE2-1 ) for entropy decode */
|
|
|
+ #endif
|
|
|
+@@ -830,16 +879,21 @@ typedef struct jvirt_sarray_control *jvi
|
|
|
+ typedef struct jvirt_barray_control *jvirt_barray_ptr;
|
|
|
+
|
|
|
+
|
|
|
+ struct jpeg_memory_mgr {
|
|
|
+ /* Method pointers */
|
|
|
+ void *(*alloc_small) (j_common_ptr cinfo, int pool_id, size_t sizeofobject);
|
|
|
+ void *(*alloc_large) (j_common_ptr cinfo, int pool_id,
|
|
|
+ size_t sizeofobject);
|
|
|
++ /* If cinfo->data_precision is 12 or 16, then this method and the
|
|
|
++ * access_virt_sarray method actually return a J12SAMPARRAY or a
|
|
|
++ * J16SAMPARRAY, so callers must type-cast the return value in order to
|
|
|
++ * read/write 12-bit or 16-bit samples from/to the array.
|
|
|
++ */
|
|
|
+ JSAMPARRAY (*alloc_sarray) (j_common_ptr cinfo, int pool_id,
|
|
|
+ JDIMENSION samplesperrow, JDIMENSION numrows);
|
|
|
+ JBLOCKARRAY (*alloc_barray) (j_common_ptr cinfo, int pool_id,
|
|
|
+ JDIMENSION blocksperrow, JDIMENSION numrows);
|
|
|
+ jvirt_sarray_ptr (*request_virt_sarray) (j_common_ptr cinfo, int pool_id,
|
|
|
+ boolean pre_zero,
|
|
|
+ JDIMENSION samplesperrow,
|
|
|
+ JDIMENSION numrows,
|
|
|
+@@ -911,23 +965,21 @@ EXTERN(void) jpeg_CreateDecompress(j_dec
|
|
|
+ EXTERN(void) jpeg_destroy_compress(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jpeg_destroy_decompress(j_decompress_ptr cinfo);
|
|
|
+
|
|
|
+ /* Standard data source and destination managers: stdio streams. */
|
|
|
+ /* Caller is responsible for opening the file before and closing after. */
|
|
|
+ EXTERN(void) jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile);
|
|
|
+ EXTERN(void) jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile);
|
|
|
+
|
|
|
+-#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
|
|
|
+ /* Data source and destination managers: memory buffers. */
|
|
|
+ EXTERN(void) jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer,
|
|
|
+ unsigned long *outsize);
|
|
|
+ EXTERN(void) jpeg_mem_src(j_decompress_ptr cinfo,
|
|
|
+ const unsigned char *inbuffer, unsigned long insize);
|
|
|
+-#endif
|
|
|
+
|
|
|
+ /* Default parameter setup for compression */
|
|
|
+ EXTERN(void) jpeg_set_defaults(j_compress_ptr cinfo);
|
|
|
+ /* Compression parameter setup aids */
|
|
|
+ EXTERN(void) jpeg_set_colorspace(j_compress_ptr cinfo,
|
|
|
+ J_COLOR_SPACE colorspace);
|
|
|
+ EXTERN(void) jpeg_default_colorspace(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jpeg_set_quality(j_compress_ptr cinfo, int quality,
|
|
|
+@@ -937,37 +989,49 @@ EXTERN(void) jpeg_set_linear_quality(j_c
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ EXTERN(void) jpeg_default_qtables(j_compress_ptr cinfo,
|
|
|
+ boolean force_baseline);
|
|
|
+ #endif
|
|
|
+ EXTERN(void) jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl,
|
|
|
+ const unsigned int *basic_table,
|
|
|
+ int scale_factor, boolean force_baseline);
|
|
|
+ EXTERN(int) jpeg_quality_scaling(int quality);
|
|
|
++EXTERN(void) jpeg_enable_lossless(j_compress_ptr cinfo,
|
|
|
++ int predictor_selection_value,
|
|
|
++ int point_transform);
|
|
|
+ EXTERN(void) jpeg_simple_progression(j_compress_ptr cinfo);
|
|
|
+ EXTERN(void) jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress);
|
|
|
+ EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table(j_common_ptr cinfo);
|
|
|
+ EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table(j_common_ptr cinfo);
|
|
|
+
|
|
|
+ /* Main entry points for compression */
|
|
|
+ EXTERN(void) jpeg_start_compress(j_compress_ptr cinfo,
|
|
|
+ boolean write_all_tables);
|
|
|
+ EXTERN(JDIMENSION) jpeg_write_scanlines(j_compress_ptr cinfo,
|
|
|
+ JSAMPARRAY scanlines,
|
|
|
+ JDIMENSION num_lines);
|
|
|
++EXTERN(JDIMENSION) jpeg12_write_scanlines(j_compress_ptr cinfo,
|
|
|
++ J12SAMPARRAY scanlines,
|
|
|
++ JDIMENSION num_lines);
|
|
|
++EXTERN(JDIMENSION) jpeg16_write_scanlines(j_compress_ptr cinfo,
|
|
|
++ J16SAMPARRAY scanlines,
|
|
|
++ JDIMENSION num_lines);
|
|
|
+ EXTERN(void) jpeg_finish_compress(j_compress_ptr cinfo);
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 70
|
|
|
+ /* Precalculate JPEG dimensions for current compression parameters. */
|
|
|
+ EXTERN(void) jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo);
|
|
|
+ #endif
|
|
|
+
|
|
|
+ /* Replaces jpeg_write_scanlines when writing raw downsampled data. */
|
|
|
+ EXTERN(JDIMENSION) jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
|
|
|
+ JDIMENSION num_lines);
|
|
|
++EXTERN(JDIMENSION) jpeg12_write_raw_data(j_compress_ptr cinfo,
|
|
|
++ J12SAMPIMAGE data,
|
|
|
++ JDIMENSION num_lines);
|
|
|
+
|
|
|
+ /* Write a special marker. See libjpeg.txt concerning safe usage. */
|
|
|
+ EXTERN(void) jpeg_write_marker(j_compress_ptr cinfo, int marker,
|
|
|
+ const JOCTET *dataptr, unsigned int datalen);
|
|
|
+ /* Same, but piecemeal. */
|
|
|
+ EXTERN(void) jpeg_write_m_header(j_compress_ptr cinfo, int marker,
|
|
|
+ unsigned int datalen);
|
|
|
+ EXTERN(void) jpeg_write_m_byte(j_compress_ptr cinfo, int val);
|
|
|
+@@ -993,25 +1057,38 @@ EXTERN(int) jpeg_read_header(j_decompres
|
|
|
+ * give a suspension return (the stdio source module doesn't).
|
|
|
+ */
|
|
|
+
|
|
|
+ /* Main entry points for decompression */
|
|
|
+ EXTERN(boolean) jpeg_start_decompress(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(JDIMENSION) jpeg_read_scanlines(j_decompress_ptr cinfo,
|
|
|
+ JSAMPARRAY scanlines,
|
|
|
+ JDIMENSION max_lines);
|
|
|
++EXTERN(JDIMENSION) jpeg12_read_scanlines(j_decompress_ptr cinfo,
|
|
|
++ J12SAMPARRAY scanlines,
|
|
|
++ JDIMENSION max_lines);
|
|
|
++EXTERN(JDIMENSION) jpeg16_read_scanlines(j_decompress_ptr cinfo,
|
|
|
++ J16SAMPARRAY scanlines,
|
|
|
++ JDIMENSION max_lines);
|
|
|
+ EXTERN(JDIMENSION) jpeg_skip_scanlines(j_decompress_ptr cinfo,
|
|
|
+ JDIMENSION num_lines);
|
|
|
++EXTERN(JDIMENSION) jpeg12_skip_scanlines(j_decompress_ptr cinfo,
|
|
|
++ JDIMENSION num_lines);
|
|
|
+ EXTERN(void) jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
|
|
|
+ JDIMENSION *width);
|
|
|
++EXTERN(void) jpeg12_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
|
|
|
++ JDIMENSION *width);
|
|
|
+ EXTERN(boolean) jpeg_finish_decompress(j_decompress_ptr cinfo);
|
|
|
+
|
|
|
+ /* Replaces jpeg_read_scanlines when reading raw downsampled data. */
|
|
|
+ EXTERN(JDIMENSION) jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
|
|
|
+ JDIMENSION max_lines);
|
|
|
++EXTERN(JDIMENSION) jpeg12_read_raw_data(j_decompress_ptr cinfo,
|
|
|
++ J12SAMPIMAGE data,
|
|
|
++ JDIMENSION max_lines);
|
|
|
+
|
|
|
+ /* Additional entry points for buffered-image mode. */
|
|
|
+ EXTERN(boolean) jpeg_has_multiple_scans(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(boolean) jpeg_start_output(j_decompress_ptr cinfo, int scan_number);
|
|
|
+ EXTERN(boolean) jpeg_finish_output(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(boolean) jpeg_input_complete(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(void) jpeg_new_colormap(j_decompress_ptr cinfo);
|
|
|
+ EXTERN(int) jpeg_consume_input(j_decompress_ptr cinfo);
|
|
|
+diff --git a/media/libjpeg/jquant1.c b/media/libjpeg/jquant1.c
|
|
|
+--- a/media/libjpeg/jquant1.c
|
|
|
++++ b/media/libjpeg/jquant1.c
|
|
|
+@@ -1,28 +1,29 @@
|
|
|
+ /*
|
|
|
+ * jquant1.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2009, 2015, D. R. Commander.
|
|
|
++ * Copyright (C) 2009, 2015, 2022-2023, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains 1-pass color quantization (color mapping) routines.
|
|
|
+ * These routines provide mapping to a fixed color map using equally spaced
|
|
|
+ * color values. Optional Floyd-Steinberg or ordered dithering is available.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+-#ifdef QUANT_1PASS_SUPPORTED
|
|
|
++#if defined(QUANT_1PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The main purpose of 1-pass quantization is to provide a fast, if not very
|
|
|
+ * high quality, colormapped output capability. A 2-pass quantizer usually
|
|
|
+ * gives better visual quality; however, for quantized grayscale output this
|
|
|
+ * quantizer is perfectly adequate. Dithering is highly recommended with this
|
|
|
+ * quantizer, though you can turn it off if you really want to.
|
|
|
+@@ -61,17 +62,17 @@
|
|
|
+ * In place of Schumacher's comparisons against a "threshold" value, we add a
|
|
|
+ * "dither" value to the input pixel and then round the result to the nearest
|
|
|
+ * output value. The dither value is equivalent to (0.5 - threshold) times
|
|
|
+ * the distance between output values. For ordered dithering, we assume that
|
|
|
+ * the output colors are equally spaced; if not, results will probably be
|
|
|
+ * worse, since the dither may be too much or too little at a given point.
|
|
|
+ *
|
|
|
+ * The normal calculation would be to form pixel value + dither, range-limit
|
|
|
+- * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
|
|
|
++ * this to 0.._MAXJSAMPLE, and then index into the colorindex table as usual.
|
|
|
+ * We can skip the separate range-limiting step by extending the colorindex
|
|
|
+ * table in both directions.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define ODITHER_SIZE 16 /* dimension of dither matrix */
|
|
|
+ /* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
|
|
|
+ #define ODITHER_CELLS (ODITHER_SIZE * ODITHER_SIZE) /* # cells in matrix */
|
|
|
+ #define ODITHER_MASK (ODITHER_SIZE - 1) /* mask for wrapping around
|
|
|
+@@ -139,23 +140,23 @@ typedef FSERROR *FSERRPTR; /* point
|
|
|
+ /* Private subobject */
|
|
|
+
|
|
|
+ #define MAX_Q_COMPS 4 /* max components I can handle */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_color_quantizer pub; /* public fields */
|
|
|
+
|
|
|
+ /* Initially allocated colormap is saved here */
|
|
|
+- JSAMPARRAY sv_colormap; /* The color map as a 2-D pixel array */
|
|
|
++ _JSAMPARRAY sv_colormap; /* The color map as a 2-D pixel array */
|
|
|
+ int sv_actual; /* number of entries in use */
|
|
|
+
|
|
|
+- JSAMPARRAY colorindex; /* Precomputed mapping for speed */
|
|
|
++ _JSAMPARRAY colorindex; /* Precomputed mapping for speed */
|
|
|
+ /* colorindex[i][j] = index of color closest to pixel value j in component i,
|
|
|
+ * premultiplied as described above. Since colormap indexes must fit into
|
|
|
+- * JSAMPLEs, the entries of this array will too.
|
|
|
++ * _JSAMPLEs, the entries of this array will too.
|
|
|
+ */
|
|
|
+ boolean is_padded; /* is the colorindex padded for odither? */
|
|
|
+
|
|
|
+ int Ncolors[MAX_Q_COMPS]; /* # of values allocated to each component */
|
|
|
+
|
|
|
+ /* Variables for ordered dithering */
|
|
|
+ int row_index; /* cur row's vertical index in dither matrix */
|
|
|
+ ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
|
|
|
+@@ -243,46 +244,46 @@ select_ncolors(j_decompress_ptr cinfo, i
|
|
|
+
|
|
|
+ return total_colors;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(int)
|
|
|
+ output_value(j_decompress_ptr cinfo, int ci, int j, int maxj)
|
|
|
+ /* Return j'th output value, where j will range from 0 to maxj */
|
|
|
+-/* The output values must fall in 0..MAXJSAMPLE in increasing order */
|
|
|
++/* The output values must fall in 0.._MAXJSAMPLE in increasing order */
|
|
|
+ {
|
|
|
+- /* We always provide values 0 and MAXJSAMPLE for each component;
|
|
|
++ /* We always provide values 0 and _MAXJSAMPLE for each component;
|
|
|
+ * any additional values are equally spaced between these limits.
|
|
|
+ * (Forcing the upper and lower values to the limits ensures that
|
|
|
+ * dithering can't produce a color outside the selected gamut.)
|
|
|
+ */
|
|
|
+- return (int)(((JLONG)j * MAXJSAMPLE + maxj / 2) / maxj);
|
|
|
++ return (int)(((JLONG)j * _MAXJSAMPLE + maxj / 2) / maxj);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(int)
|
|
|
+ largest_input_value(j_decompress_ptr cinfo, int ci, int j, int maxj)
|
|
|
+ /* Return largest input value that should map to j'th output value */
|
|
|
+-/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
|
|
|
++/* Must have largest(j=0) >= 0, and largest(j=maxj) >= _MAXJSAMPLE */
|
|
|
+ {
|
|
|
+ /* Breakpoints are halfway between values returned by output_value */
|
|
|
+- return (int)(((JLONG)(2 * j + 1) * MAXJSAMPLE + maxj) / (2 * maxj));
|
|
|
++ return (int)(((JLONG)(2 * j + 1) * _MAXJSAMPLE + maxj) / (2 * maxj));
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Create the colormap.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ create_colormap(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+- JSAMPARRAY colormap; /* Created colormap */
|
|
|
++ _JSAMPARRAY colormap; /* Created colormap */
|
|
|
+ int total_colors; /* Number of distinct output colors */
|
|
|
+ int i, j, k, nci, blksize, blkdist, ptr, val;
|
|
|
+
|
|
|
+ /* Select number of colors for each component */
|
|
|
+ total_colors = select_ncolors(cinfo, cquantize->Ncolors);
|
|
|
+
|
|
|
+ /* Report selected color counts */
|
|
|
+ if (cinfo->out_color_components == 3)
|
|
|
+@@ -291,17 +292,17 @@ create_colormap(j_decompress_ptr cinfo)
|
|
|
+ cquantize->Ncolors[2]);
|
|
|
+ else
|
|
|
+ TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
|
|
|
+
|
|
|
+ /* Allocate and fill in the colormap. */
|
|
|
+ /* The colors are ordered in the map in standard row-major order, */
|
|
|
+ /* i.e. rightmost (highest-indexed) color changes most rapidly. */
|
|
|
+
|
|
|
+- colormap = (*cinfo->mem->alloc_sarray)
|
|
|
++ colormap = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ (JDIMENSION)total_colors, (JDIMENSION)cinfo->out_color_components);
|
|
|
+
|
|
|
+ /* blksize is number of adjacent repeated entries for a component */
|
|
|
+ /* blkdist is distance between groups of identical entries for a component */
|
|
|
+ blkdist = total_colors;
|
|
|
+
|
|
|
+ for (i = 0; i < cinfo->out_color_components; i++) {
|
|
|
+@@ -310,17 +311,17 @@ create_colormap(j_decompress_ptr cinfo)
|
|
|
+ blksize = blkdist / nci;
|
|
|
+ for (j = 0; j < nci; j++) {
|
|
|
+ /* Compute j'th output value (out of nci) for component */
|
|
|
+ val = output_value(cinfo, i, j, nci - 1);
|
|
|
+ /* Fill in all colormap entries that have this value of this component */
|
|
|
+ for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
|
|
|
+ /* fill in blksize entries beginning at ptr */
|
|
|
+ for (k = 0; k < blksize; k++)
|
|
|
+- colormap[i][ptr + k] = (JSAMPLE)val;
|
|
|
++ colormap[i][ptr + k] = (_JSAMPLE)val;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ blkdist = blksize; /* blksize of this color is blkdist of next */
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Save the colormap in private storage,
|
|
|
+ * where it will survive color quantization mode changes.
|
|
|
+ */
|
|
|
+@@ -332,65 +333,65 @@ create_colormap(j_decompress_ptr cinfo)
|
|
|
+ /*
|
|
|
+ * Create the color index table.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ create_colorindex(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+- JSAMPROW indexptr;
|
|
|
++ _JSAMPROW indexptr;
|
|
|
+ int i, j, k, nci, blksize, val, pad;
|
|
|
+
|
|
|
+- /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
|
|
|
+- * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
|
|
|
++ /* For ordered dither, we pad the color index tables by _MAXJSAMPLE in
|
|
|
++ * each direction (input index values can be -_MAXJSAMPLE .. 2*_MAXJSAMPLE).
|
|
|
+ * This is not necessary in the other dithering modes. However, we
|
|
|
+ * flag whether it was done in case user changes dithering mode.
|
|
|
+ */
|
|
|
+ if (cinfo->dither_mode == JDITHER_ORDERED) {
|
|
|
+- pad = MAXJSAMPLE * 2;
|
|
|
++ pad = _MAXJSAMPLE * 2;
|
|
|
+ cquantize->is_padded = TRUE;
|
|
|
+ } else {
|
|
|
+ pad = 0;
|
|
|
+ cquantize->is_padded = FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+- cquantize->colorindex = (*cinfo->mem->alloc_sarray)
|
|
|
++ cquantize->colorindex = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+- (JDIMENSION)(MAXJSAMPLE + 1 + pad),
|
|
|
++ (JDIMENSION)(_MAXJSAMPLE + 1 + pad),
|
|
|
+ (JDIMENSION)cinfo->out_color_components);
|
|
|
+
|
|
|
+ /* blksize is number of adjacent repeated entries for a component */
|
|
|
+ blksize = cquantize->sv_actual;
|
|
|
+
|
|
|
+ for (i = 0; i < cinfo->out_color_components; i++) {
|
|
|
+ /* fill in colorindex entries for i'th color component */
|
|
|
+ nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
|
|
|
+ blksize = blksize / nci;
|
|
|
+
|
|
|
+ /* adjust colorindex pointers to provide padding at negative indexes. */
|
|
|
+ if (pad)
|
|
|
+- cquantize->colorindex[i] += MAXJSAMPLE;
|
|
|
++ cquantize->colorindex[i] += _MAXJSAMPLE;
|
|
|
+
|
|
|
+ /* in loop, val = index of current output value, */
|
|
|
+ /* and k = largest j that maps to current val */
|
|
|
+ indexptr = cquantize->colorindex[i];
|
|
|
+ val = 0;
|
|
|
+ k = largest_input_value(cinfo, i, 0, nci - 1);
|
|
|
+- for (j = 0; j <= MAXJSAMPLE; j++) {
|
|
|
++ for (j = 0; j <= _MAXJSAMPLE; j++) {
|
|
|
+ while (j > k) /* advance val if past boundary */
|
|
|
+ k = largest_input_value(cinfo, i, ++val, nci - 1);
|
|
|
+ /* premultiply so that no multiplication needed in main processing */
|
|
|
+- indexptr[j] = (JSAMPLE)(val * blksize);
|
|
|
++ indexptr[j] = (_JSAMPLE)(val * blksize);
|
|
|
+ }
|
|
|
+ /* Pad at both ends if necessary */
|
|
|
+ if (pad)
|
|
|
+- for (j = 1; j <= MAXJSAMPLE; j++) {
|
|
|
++ for (j = 1; j <= _MAXJSAMPLE; j++) {
|
|
|
+ indexptr[-j] = indexptr[0];
|
|
|
+- indexptr[MAXJSAMPLE + j] = indexptr[MAXJSAMPLE];
|
|
|
++ indexptr[_MAXJSAMPLE + j] = indexptr[_MAXJSAMPLE];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Create an ordered-dither array for a component having ncolors
|
|
|
+ * distinct output values.
|
|
|
+@@ -401,26 +402,26 @@ make_odither_array(j_decompress_ptr cinf
|
|
|
+ {
|
|
|
+ ODITHER_MATRIX_PTR odither;
|
|
|
+ int j, k;
|
|
|
+ JLONG num, den;
|
|
|
+
|
|
|
+ odither = (ODITHER_MATRIX_PTR)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(ODITHER_MATRIX));
|
|
|
+- /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
|
|
|
++ /* The inter-value distance for this color is _MAXJSAMPLE/(ncolors-1).
|
|
|
+ * Hence the dither value for the matrix cell with fill order f
|
|
|
+- * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
|
|
|
++ * (f=0..N-1) should be (N-1-2*f)/(2*N) * _MAXJSAMPLE/(ncolors-1).
|
|
|
+ * On 16-bit-int machine, be careful to avoid overflow.
|
|
|
+ */
|
|
|
+ den = 2 * ODITHER_CELLS * ((JLONG)(ncolors - 1));
|
|
|
+ for (j = 0; j < ODITHER_SIZE; j++) {
|
|
|
+ for (k = 0; k < ODITHER_SIZE; k++) {
|
|
|
+ num = ((JLONG)(ODITHER_CELLS - 1 -
|
|
|
+- 2 * ((int)base_dither_matrix[j][k]))) * MAXJSAMPLE;
|
|
|
++ 2 * ((int)base_dither_matrix[j][k]))) * _MAXJSAMPLE;
|
|
|
+ /* Ensure round towards zero despite C's lack of consistency
|
|
|
+ * about rounding negative values in integer division...
|
|
|
+ */
|
|
|
+ odither[j][k] = (int)(num < 0 ? -((-num) / den) : num / den);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return odither;
|
|
|
+ }
|
|
|
+@@ -455,105 +456,105 @@ create_odither_tables(j_decompress_ptr c
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Map some rows of pixels to the output colormapped representation.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-color_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows)
|
|
|
++color_quantize(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ /* General case, no dithering */
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+- JSAMPARRAY colorindex = cquantize->colorindex;
|
|
|
++ _JSAMPARRAY colorindex = cquantize->colorindex;
|
|
|
+ register int pixcode, ci;
|
|
|
+- register JSAMPROW ptrin, ptrout;
|
|
|
++ register _JSAMPROW ptrin, ptrout;
|
|
|
+ int row;
|
|
|
+ JDIMENSION col;
|
|
|
+ JDIMENSION width = cinfo->output_width;
|
|
|
+ register int nc = cinfo->out_color_components;
|
|
|
+
|
|
|
+ for (row = 0; row < num_rows; row++) {
|
|
|
+ ptrin = input_buf[row];
|
|
|
+ ptrout = output_buf[row];
|
|
|
+ for (col = width; col > 0; col--) {
|
|
|
+ pixcode = 0;
|
|
|
+ for (ci = 0; ci < nc; ci++) {
|
|
|
+ pixcode += colorindex[ci][*ptrin++];
|
|
|
+ }
|
|
|
+- *ptrout++ = (JSAMPLE)pixcode;
|
|
|
++ *ptrout++ = (_JSAMPLE)pixcode;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-color_quantize3(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows)
|
|
|
++color_quantize3(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ /* Fast path for out_color_components==3, no dithering */
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+ register int pixcode;
|
|
|
+- register JSAMPROW ptrin, ptrout;
|
|
|
+- JSAMPROW colorindex0 = cquantize->colorindex[0];
|
|
|
+- JSAMPROW colorindex1 = cquantize->colorindex[1];
|
|
|
+- JSAMPROW colorindex2 = cquantize->colorindex[2];
|
|
|
++ register _JSAMPROW ptrin, ptrout;
|
|
|
++ _JSAMPROW colorindex0 = cquantize->colorindex[0];
|
|
|
++ _JSAMPROW colorindex1 = cquantize->colorindex[1];
|
|
|
++ _JSAMPROW colorindex2 = cquantize->colorindex[2];
|
|
|
+ int row;
|
|
|
+ JDIMENSION col;
|
|
|
+ JDIMENSION width = cinfo->output_width;
|
|
|
+
|
|
|
+ for (row = 0; row < num_rows; row++) {
|
|
|
+ ptrin = input_buf[row];
|
|
|
+ ptrout = output_buf[row];
|
|
|
+ for (col = width; col > 0; col--) {
|
|
|
+ pixcode = colorindex0[*ptrin++];
|
|
|
+ pixcode += colorindex1[*ptrin++];
|
|
|
+ pixcode += colorindex2[*ptrin++];
|
|
|
+- *ptrout++ = (JSAMPLE)pixcode;
|
|
|
++ *ptrout++ = (_JSAMPLE)pixcode;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-quantize_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows)
|
|
|
++quantize_ord_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ /* General case, with ordered dithering */
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+- register JSAMPROW input_ptr;
|
|
|
+- register JSAMPROW output_ptr;
|
|
|
+- JSAMPROW colorindex_ci;
|
|
|
++ register _JSAMPROW input_ptr;
|
|
|
++ register _JSAMPROW output_ptr;
|
|
|
++ _JSAMPROW colorindex_ci;
|
|
|
+ int *dither; /* points to active row of dither matrix */
|
|
|
+ int row_index, col_index; /* current indexes into dither matrix */
|
|
|
+ int nc = cinfo->out_color_components;
|
|
|
+ int ci;
|
|
|
+ int row;
|
|
|
+ JDIMENSION col;
|
|
|
+ JDIMENSION width = cinfo->output_width;
|
|
|
+
|
|
|
+ for (row = 0; row < num_rows; row++) {
|
|
|
+ /* Initialize output values to 0 so can process components separately */
|
|
|
+- jzero_far((void *)output_buf[row], (size_t)(width * sizeof(JSAMPLE)));
|
|
|
++ jzero_far((void *)output_buf[row], (size_t)(width * sizeof(_JSAMPLE)));
|
|
|
+ row_index = cquantize->row_index;
|
|
|
+ for (ci = 0; ci < nc; ci++) {
|
|
|
+ input_ptr = input_buf[row] + ci;
|
|
|
+ output_ptr = output_buf[row];
|
|
|
+ colorindex_ci = cquantize->colorindex[ci];
|
|
|
+ dither = cquantize->odither[ci][row_index];
|
|
|
+ col_index = 0;
|
|
|
+
|
|
|
+ for (col = width; col > 0; col--) {
|
|
|
+- /* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
|
|
|
++ /* Form pixel value + dither, range-limit to 0.._MAXJSAMPLE,
|
|
|
+ * select output value, accumulate into output code for this pixel.
|
|
|
+ * Range-limiting need not be done explicitly, as we have extended
|
|
|
+ * the colorindex table to produce the right answers for out-of-range
|
|
|
+- * inputs. The maximum dither is +- MAXJSAMPLE; this sets the
|
|
|
++ * inputs. The maximum dither is +- _MAXJSAMPLE; this sets the
|
|
|
+ * required amount of padding.
|
|
|
+ */
|
|
|
+ *output_ptr +=
|
|
|
+ colorindex_ci[*input_ptr + dither[col_index]];
|
|
|
+ input_ptr += nc;
|
|
|
+ output_ptr++;
|
|
|
+ col_index = (col_index + 1) & ODITHER_MASK;
|
|
|
+ }
|
|
|
+@@ -561,27 +562,27 @@ quantize_ord_dither(j_decompress_ptr cin
|
|
|
+ /* Advance row index for next row */
|
|
|
+ row_index = (row_index + 1) & ODITHER_MASK;
|
|
|
+ cquantize->row_index = row_index;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-quantize3_ord_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows)
|
|
|
++quantize3_ord_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ /* Fast path for out_color_components==3, with ordered dithering */
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+ register int pixcode;
|
|
|
+- register JSAMPROW input_ptr;
|
|
|
+- register JSAMPROW output_ptr;
|
|
|
+- JSAMPROW colorindex0 = cquantize->colorindex[0];
|
|
|
+- JSAMPROW colorindex1 = cquantize->colorindex[1];
|
|
|
+- JSAMPROW colorindex2 = cquantize->colorindex[2];
|
|
|
++ register _JSAMPROW input_ptr;
|
|
|
++ register _JSAMPROW output_ptr;
|
|
|
++ _JSAMPROW colorindex0 = cquantize->colorindex[0];
|
|
|
++ _JSAMPROW colorindex1 = cquantize->colorindex[1];
|
|
|
++ _JSAMPROW colorindex2 = cquantize->colorindex[2];
|
|
|
+ int *dither0; /* points to active row of dither matrix */
|
|
|
+ int *dither1;
|
|
|
+ int *dither2;
|
|
|
+ int row_index, col_index; /* current indexes into dither matrix */
|
|
|
+ int row;
|
|
|
+ JDIMENSION col;
|
|
|
+ JDIMENSION width = cinfo->output_width;
|
|
|
+
|
|
|
+@@ -593,55 +594,55 @@ quantize3_ord_dither(j_decompress_ptr ci
|
|
|
+ dither1 = cquantize->odither[1][row_index];
|
|
|
+ dither2 = cquantize->odither[2][row_index];
|
|
|
+ col_index = 0;
|
|
|
+
|
|
|
+ for (col = width; col > 0; col--) {
|
|
|
+ pixcode = colorindex0[(*input_ptr++) + dither0[col_index]];
|
|
|
+ pixcode += colorindex1[(*input_ptr++) + dither1[col_index]];
|
|
|
+ pixcode += colorindex2[(*input_ptr++) + dither2[col_index]];
|
|
|
+- *output_ptr++ = (JSAMPLE)pixcode;
|
|
|
++ *output_ptr++ = (_JSAMPLE)pixcode;
|
|
|
+ col_index = (col_index + 1) & ODITHER_MASK;
|
|
|
+ }
|
|
|
+ row_index = (row_index + 1) & ODITHER_MASK;
|
|
|
+ cquantize->row_index = row_index;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-quantize_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows)
|
|
|
++quantize_fs_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ /* General case, with Floyd-Steinberg dithering */
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+ register LOCFSERROR cur; /* current error or pixel value */
|
|
|
+ LOCFSERROR belowerr; /* error for pixel below cur */
|
|
|
+ LOCFSERROR bpreverr; /* error for below/prev col */
|
|
|
+ LOCFSERROR bnexterr; /* error for below/next col */
|
|
|
+ LOCFSERROR delta;
|
|
|
+ register FSERRPTR errorptr; /* => fserrors[] at column before current */
|
|
|
+- register JSAMPROW input_ptr;
|
|
|
+- register JSAMPROW output_ptr;
|
|
|
+- JSAMPROW colorindex_ci;
|
|
|
+- JSAMPROW colormap_ci;
|
|
|
++ register _JSAMPROW input_ptr;
|
|
|
++ register _JSAMPROW output_ptr;
|
|
|
++ _JSAMPROW colorindex_ci;
|
|
|
++ _JSAMPROW colormap_ci;
|
|
|
+ int pixcode;
|
|
|
+ int nc = cinfo->out_color_components;
|
|
|
+ int dir; /* 1 for left-to-right, -1 for right-to-left */
|
|
|
+ int dirnc; /* dir * nc */
|
|
|
+ int ci;
|
|
|
+ int row;
|
|
|
+ JDIMENSION col;
|
|
|
+ JDIMENSION width = cinfo->output_width;
|
|
|
+- JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ for (row = 0; row < num_rows; row++) {
|
|
|
+ /* Initialize output values to 0 so can process components separately */
|
|
|
+- jzero_far((void *)output_buf[row], (size_t)(width * sizeof(JSAMPLE)));
|
|
|
++ jzero_far((void *)output_buf[row], (size_t)(width * sizeof(_JSAMPLE)));
|
|
|
+ for (ci = 0; ci < nc; ci++) {
|
|
|
+ input_ptr = input_buf[row] + ci;
|
|
|
+ output_ptr = output_buf[row];
|
|
|
+ if (cquantize->on_odd_row) {
|
|
|
+ /* work right to left in this row */
|
|
|
+ input_ptr += (width - 1) * nc; /* so point to rightmost pixel */
|
|
|
+ output_ptr += width - 1;
|
|
|
+ dir = -1;
|
|
|
+@@ -665,25 +666,25 @@ quantize_fs_dither(j_decompress_ptr cinf
|
|
|
+ * current line. Add the error propagated from the previous line
|
|
|
+ * to form the complete error correction term for this pixel, and
|
|
|
+ * round the error term (which is expressed * 16) to an integer.
|
|
|
+ * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
|
|
|
+ * for either sign of the error value.
|
|
|
+ * Note: errorptr points to *previous* column's array entry.
|
|
|
+ */
|
|
|
+ cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
|
|
|
+- /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
|
|
|
+- * The maximum error is +- MAXJSAMPLE; this sets the required size
|
|
|
++ /* Form pixel value + error, and range-limit to 0.._MAXJSAMPLE.
|
|
|
++ * The maximum error is +- _MAXJSAMPLE; this sets the required size
|
|
|
+ * of the range_limit array.
|
|
|
+ */
|
|
|
+ cur += *input_ptr;
|
|
|
+ cur = range_limit[cur];
|
|
|
+ /* Select output value, accumulate into output code for this pixel */
|
|
|
+ pixcode = colorindex_ci[cur];
|
|
|
+- *output_ptr += (JSAMPLE)pixcode;
|
|
|
++ *output_ptr += (_JSAMPLE)pixcode;
|
|
|
+ /* Compute actual representation error at this pixel */
|
|
|
+ /* Note: we can do this even though we don't have the final */
|
|
|
+ /* pixel code, because the colormap is orthogonal. */
|
|
|
+ cur -= colormap_ci[pixcode];
|
|
|
+ /* Compute error fractions to be propagated to adjacent pixels.
|
|
|
+ * Add these into the running sums, and simultaneously shift the
|
|
|
+ * next-line error sums left by 1 column.
|
|
|
+ */
|
|
|
+@@ -740,45 +741,45 @@ alloc_fs_workspace(j_decompress_ptr cinf
|
|
|
+ METHODDEF(void)
|
|
|
+ start_pass_1_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+ size_t arraysize;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ /* Install my colormap. */
|
|
|
+- cinfo->colormap = cquantize->sv_colormap;
|
|
|
++ cinfo->colormap = (JSAMPARRAY)cquantize->sv_colormap;
|
|
|
+ cinfo->actual_number_of_colors = cquantize->sv_actual;
|
|
|
+
|
|
|
+ /* Initialize for desired dithering mode. */
|
|
|
+ switch (cinfo->dither_mode) {
|
|
|
+ case JDITHER_NONE:
|
|
|
+ if (cinfo->out_color_components == 3)
|
|
|
+- cquantize->pub.color_quantize = color_quantize3;
|
|
|
++ cquantize->pub._color_quantize = color_quantize3;
|
|
|
+ else
|
|
|
+- cquantize->pub.color_quantize = color_quantize;
|
|
|
++ cquantize->pub._color_quantize = color_quantize;
|
|
|
+ break;
|
|
|
+ case JDITHER_ORDERED:
|
|
|
+ if (cinfo->out_color_components == 3)
|
|
|
+- cquantize->pub.color_quantize = quantize3_ord_dither;
|
|
|
++ cquantize->pub._color_quantize = quantize3_ord_dither;
|
|
|
+ else
|
|
|
+- cquantize->pub.color_quantize = quantize_ord_dither;
|
|
|
++ cquantize->pub._color_quantize = quantize_ord_dither;
|
|
|
+ cquantize->row_index = 0; /* initialize state for ordered dither */
|
|
|
+ /* If user changed to ordered dither from another mode,
|
|
|
+ * we must recreate the color index table with padding.
|
|
|
+ * This will cost extra space, but probably isn't very likely.
|
|
|
+ */
|
|
|
+ if (!cquantize->is_padded)
|
|
|
+ create_colorindex(cinfo);
|
|
|
+ /* Create ordered-dither tables if we didn't already. */
|
|
|
+ if (cquantize->odither[0] == NULL)
|
|
|
+ create_odither_tables(cinfo);
|
|
|
+ break;
|
|
|
+ case JDITHER_FS:
|
|
|
+- cquantize->pub.color_quantize = quantize_fs_dither;
|
|
|
++ cquantize->pub._color_quantize = quantize_fs_dither;
|
|
|
+ cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
|
|
|
+ /* Allocate Floyd-Steinberg workspace if didn't already. */
|
|
|
+ if (cquantize->fserrors[0] == NULL)
|
|
|
+ alloc_fs_workspace(cinfo);
|
|
|
+ /* Initialize the propagated errors to zero. */
|
|
|
+ arraysize = (size_t)((cinfo->output_width + 2) * sizeof(FSERROR));
|
|
|
+ for (i = 0; i < cinfo->out_color_components; i++)
|
|
|
+ jzero_far((void *)cquantize->fserrors[i], arraysize);
|
|
|
+@@ -813,44 +814,51 @@ new_color_map_1_quant(j_decompress_ptr c
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Module initialization routine for 1-pass color quantization.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_1pass_quantizer(j_decompress_ptr cinfo)
|
|
|
++_jinit_1pass_quantizer(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
++ /* Color quantization is not supported with lossless JPEG images */
|
|
|
++ if (cinfo->master->lossless)
|
|
|
++ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
++
|
|
|
+ cquantize = (my_cquantize_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_cquantizer));
|
|
|
+ cinfo->cquantize = (struct jpeg_color_quantizer *)cquantize;
|
|
|
+ cquantize->pub.start_pass = start_pass_1_quant;
|
|
|
+ cquantize->pub.finish_pass = finish_pass_1_quant;
|
|
|
+ cquantize->pub.new_color_map = new_color_map_1_quant;
|
|
|
+ cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
|
|
|
+ cquantize->odither[0] = NULL; /* Also flag odither arrays not allocated */
|
|
|
+
|
|
|
+ /* Make sure my internal arrays won't overflow */
|
|
|
+ if (cinfo->out_color_components > MAX_Q_COMPS)
|
|
|
+ ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
|
|
|
+- /* Make sure colormap indexes can be represented by JSAMPLEs */
|
|
|
+- if (cinfo->desired_number_of_colors > (MAXJSAMPLE + 1))
|
|
|
+- ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE + 1);
|
|
|
++ /* Make sure colormap indexes can be represented by _JSAMPLEs */
|
|
|
++ if (cinfo->desired_number_of_colors > (_MAXJSAMPLE + 1))
|
|
|
++ ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, _MAXJSAMPLE + 1);
|
|
|
+
|
|
|
+ /* Create the colormap and color index table. */
|
|
|
+ create_colormap(cinfo);
|
|
|
+ create_colorindex(cinfo);
|
|
|
+
|
|
|
+ /* Allocate Floyd-Steinberg workspace now if requested.
|
|
|
+ * We do this now since it may affect the memory manager's space
|
|
|
+ * calculations. If the user changes to FS dither mode in a later pass, we
|
|
|
+ * will allocate the space then, and will possibly overrun the
|
|
|
+ * max_memory_to_use setting.
|
|
|
+ */
|
|
|
+ if (cinfo->dither_mode == JDITHER_FS)
|
|
|
+ alloc_fs_workspace(cinfo);
|
|
|
+ }
|
|
|
+
|
|
|
+-#endif /* QUANT_1PASS_SUPPORTED */
|
|
|
++#endif /* defined(QUANT_1PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
|
|
|
+diff --git a/media/libjpeg/jquant2.c b/media/libjpeg/jquant2.c
|
|
|
+--- a/media/libjpeg/jquant2.c
|
|
|
++++ b/media/libjpeg/jquant2.c
|
|
|
+@@ -1,15 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jquant2.c
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-1996, Thomas G. Lane.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2009, 2014-2015, 2020, D. R. Commander.
|
|
|
++ * Copyright (C) 2009, 2014-2015, 2020, 2022-2023, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains 2-pass color quantization (color mapping) routines.
|
|
|
+ * These routines provide selection of a custom color map for an image,
|
|
|
+ * followed by mapping of the image to that color map, with optional
|
|
|
+ * Floyd-Steinberg dithering.
|
|
|
+ * It is also possible to use just the second pass to map to an arbitrary
|
|
|
+@@ -18,18 +18,19 @@
|
|
|
+ * Note: ordered dithering is not supported, since there isn't any fast
|
|
|
+ * way to compute intercolor distances; it's unclear that ordered dither's
|
|
|
+ * fundamental assumptions even hold with an irregularly spaced color map.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+-#ifdef QUANT_2PASS_SUPPORTED
|
|
|
++#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * This module implements the well-known Heckbert paradigm for color
|
|
|
+ * quantization. Most of the ideas used here can be traced back to
|
|
|
+ * Heckbert's seminal paper
|
|
|
+ * Heckbert, Paul. "Color Image Quantization for Frame Buffer Display",
|
|
|
+ * Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
|
|
|
+@@ -101,17 +102,17 @@ static const int c_scales[3] = { R_SCALE
|
|
|
+ * desired colors. 16 bits/cell is plenty for that too.)
|
|
|
+ * Since the JPEG code is intended to run in small memory model on 80x86
|
|
|
+ * machines, we can't just allocate the histogram in one chunk. Instead
|
|
|
+ * of a true 3-D array, we use a row of pointers to 2-D arrays. Each
|
|
|
+ * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
|
|
|
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.
|
|
|
+ */
|
|
|
+
|
|
|
+-#define MAXNUMCOLORS (MAXJSAMPLE + 1) /* maximum size of colormap */
|
|
|
++#define MAXNUMCOLORS (_MAXJSAMPLE + 1) /* maximum size of colormap */
|
|
|
+
|
|
|
+ /* These will do the right thing for either R,G,B or B,G,R color order,
|
|
|
+ * but you may not like the results for other color orders.
|
|
|
+ */
|
|
|
+ #define HIST_C0_BITS 5 /* bits of precision in R/B histogram */
|
|
|
+ #define HIST_C1_BITS 6 /* bits of precision in G histogram */
|
|
|
+ #define HIST_C2_BITS 5 /* bits of precision in B/R histogram */
|
|
|
+
|
|
|
+@@ -168,17 +169,17 @@ typedef FSERROR *FSERRPTR; /* point
|
|
|
+
|
|
|
+
|
|
|
+ /* Private subobject */
|
|
|
+
|
|
|
+ typedef struct {
|
|
|
+ struct jpeg_color_quantizer pub; /* public fields */
|
|
|
+
|
|
|
+ /* Space for the eventually created colormap is stashed here */
|
|
|
+- JSAMPARRAY sv_colormap; /* colormap allocated at init time */
|
|
|
++ _JSAMPARRAY sv_colormap; /* colormap allocated at init time */
|
|
|
+ int desired; /* desired # of colors = size of colormap */
|
|
|
+
|
|
|
+ /* Variables for accumulating image statistics */
|
|
|
+ hist3d histogram; /* pointer to the histogram */
|
|
|
+
|
|
|
+ boolean needs_zeroed; /* TRUE if next pass must zero histogram */
|
|
|
+
|
|
|
+ /* Variables for Floyd-Steinberg dithering */
|
|
|
+@@ -195,21 +196,21 @@ typedef my_cquantizer *my_cquantize_ptr;
|
|
|
+ * In this module the prescan simply updates the histogram, which has been
|
|
|
+ * initialized to zeroes by start_pass.
|
|
|
+ * An output_buf parameter is required by the method signature, but no data
|
|
|
+ * is actually output (in fact the buffer controller is probably passing a
|
|
|
+ * NULL pointer).
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-prescan_quantize(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows)
|
|
|
++prescan_quantize(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+- register JSAMPROW ptr;
|
|
|
++ register _JSAMPROW ptr;
|
|
|
+ register histptr histp;
|
|
|
+ register hist3d histogram = cquantize->histogram;
|
|
|
+ int row;
|
|
|
+ JDIMENSION col;
|
|
|
+ JDIMENSION width = cinfo->output_width;
|
|
|
+
|
|
|
+ for (row = 0; row < num_rows; row++) {
|
|
|
+ ptr = input_buf[row];
|
|
|
+@@ -372,17 +373,17 @@ have_c2min:
|
|
|
+ }
|
|
|
+ have_c2max:
|
|
|
+
|
|
|
+ /* Update box volume.
|
|
|
+ * We use 2-norm rather than real volume here; this biases the method
|
|
|
+ * against making long narrow boxes, and it has the side benefit that
|
|
|
+ * a box is splittable iff norm > 0.
|
|
|
+ * Since the differences are expressed in histogram-cell units,
|
|
|
+- * we have to shift back to JSAMPLE units to get consistent distances;
|
|
|
++ * we have to shift back to _JSAMPLE units to get consistent distances;
|
|
|
+ * after which, we scale according to the selected distance scale factors.
|
|
|
+ */
|
|
|
+ dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
|
|
|
+ dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
|
|
|
+ dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
|
|
|
+ boxp->volume = dist0 * dist0 + dist1 * dist1 + dist2 * dist2;
|
|
|
+
|
|
|
+ /* Now scan remaining volume of box and compute population */
|
|
|
+@@ -503,19 +504,22 @@ compute_color(j_decompress_ptr cinfo, bo
|
|
|
+ total += count;
|
|
|
+ c0total += ((c0 << C0_SHIFT) + ((1 << C0_SHIFT) >> 1)) * count;
|
|
|
+ c1total += ((c1 << C1_SHIFT) + ((1 << C1_SHIFT) >> 1)) * count;
|
|
|
+ c2total += ((c2 << C2_SHIFT) + ((1 << C2_SHIFT) >> 1)) * count;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+- cinfo->colormap[0][icolor] = (JSAMPLE)((c0total + (total >> 1)) / total);
|
|
|
+- cinfo->colormap[1][icolor] = (JSAMPLE)((c1total + (total >> 1)) / total);
|
|
|
+- cinfo->colormap[2][icolor] = (JSAMPLE)((c2total + (total >> 1)) / total);
|
|
|
++ ((_JSAMPARRAY)cinfo->colormap)[0][icolor] =
|
|
|
++ (_JSAMPLE)((c0total + (total >> 1)) / total);
|
|
|
++ ((_JSAMPARRAY)cinfo->colormap)[1][icolor] =
|
|
|
++ (_JSAMPLE)((c1total + (total >> 1)) / total);
|
|
|
++ ((_JSAMPARRAY)cinfo->colormap)[2][icolor] =
|
|
|
++ (_JSAMPLE)((c2total + (total >> 1)) / total);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ select_colors(j_decompress_ptr cinfo, int desired_colors)
|
|
|
+ /* Master routine for color selection */
|
|
|
+ {
|
|
|
+ boxptr boxlist;
|
|
|
+@@ -523,21 +527,21 @@ select_colors(j_decompress_ptr cinfo, in
|
|
|
+ int i;
|
|
|
+
|
|
|
+ /* Allocate workspace for box list */
|
|
|
+ boxlist = (boxptr)(*cinfo->mem->alloc_small)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE, desired_colors * sizeof(box));
|
|
|
+ /* Initialize one box containing whole space */
|
|
|
+ numboxes = 1;
|
|
|
+ boxlist[0].c0min = 0;
|
|
|
+- boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
|
|
|
++ boxlist[0].c0max = _MAXJSAMPLE >> C0_SHIFT;
|
|
|
+ boxlist[0].c1min = 0;
|
|
|
+- boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
|
|
|
++ boxlist[0].c1max = _MAXJSAMPLE >> C1_SHIFT;
|
|
|
+ boxlist[0].c2min = 0;
|
|
|
+- boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
|
|
|
++ boxlist[0].c2max = _MAXJSAMPLE >> C2_SHIFT;
|
|
|
+ /* Shrink it to actually-used volume and set its statistics */
|
|
|
+ update_box(cinfo, &boxlist[0]);
|
|
|
+ /* Perform median-cut to produce final box list */
|
|
|
+ numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
|
|
|
+ /* Compute the representative color for each box, fill colormap */
|
|
|
+ for (i = 0; i < numboxes; i++)
|
|
|
+ compute_color(cinfo, &boxlist[i], i);
|
|
|
+ cinfo->actual_number_of_colors = numboxes;
|
|
|
+@@ -618,17 +622,17 @@ select_colors(j_decompress_ptr cinfo, in
|
|
|
+ * all be folded into one big routine, but splitting them up this way saves
|
|
|
+ * some stack space (the mindist[] and bestdist[] arrays need not coexist)
|
|
|
+ * and may allow some compilers to produce better code by registerizing more
|
|
|
+ * inner-loop variables.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(int)
|
|
|
+ find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
|
|
|
+- JSAMPLE colorlist[])
|
|
|
++ _JSAMPLE colorlist[])
|
|
|
+ /* Locate the colormap entries close enough to an update box to be candidates
|
|
|
+ * for the nearest entry to some cell(s) in the update box. The update box
|
|
|
+ * is specified by the center coordinates of its first cell. The number of
|
|
|
+ * candidate colormap entries is returned, and their colormap indexes are
|
|
|
+ * placed in colorlist[].
|
|
|
+ * This routine uses Heckbert's "locally sorted search" criterion to select
|
|
|
+ * the colors that need further consideration.
|
|
|
+ */
|
|
|
+@@ -660,17 +664,17 @@ find_nearby_colors(j_decompress_ptr cinf
|
|
|
+ * Both of these can be found by considering only the corners of the box.
|
|
|
+ * We save the minimum distance for each color in mindist[];
|
|
|
+ * only the smallest maximum distance is of interest.
|
|
|
+ */
|
|
|
+ minmaxdist = 0x7FFFFFFFL;
|
|
|
+
|
|
|
+ for (i = 0; i < numcolors; i++) {
|
|
|
+ /* We compute the squared-c0-distance term, then add in the other two. */
|
|
|
+- x = cinfo->colormap[0][i];
|
|
|
++ x = ((_JSAMPARRAY)cinfo->colormap)[0][i];
|
|
|
+ if (x < minc0) {
|
|
|
+ tdist = (x - minc0) * C0_SCALE;
|
|
|
+ min_dist = tdist * tdist;
|
|
|
+ tdist = (x - maxc0) * C0_SCALE;
|
|
|
+ max_dist = tdist * tdist;
|
|
|
+ } else if (x > maxc0) {
|
|
|
+ tdist = (x - maxc0) * C0_SCALE;
|
|
|
+ min_dist = tdist * tdist;
|
|
|
+@@ -683,17 +687,17 @@ find_nearby_colors(j_decompress_ptr cinf
|
|
|
+ tdist = (x - maxc0) * C0_SCALE;
|
|
|
+ max_dist = tdist * tdist;
|
|
|
+ } else {
|
|
|
+ tdist = (x - minc0) * C0_SCALE;
|
|
|
+ max_dist = tdist * tdist;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+- x = cinfo->colormap[1][i];
|
|
|
++ x = ((_JSAMPARRAY)cinfo->colormap)[1][i];
|
|
|
+ if (x < minc1) {
|
|
|
+ tdist = (x - minc1) * C1_SCALE;
|
|
|
+ min_dist += tdist * tdist;
|
|
|
+ tdist = (x - maxc1) * C1_SCALE;
|
|
|
+ max_dist += tdist * tdist;
|
|
|
+ } else if (x > maxc1) {
|
|
|
+ tdist = (x - maxc1) * C1_SCALE;
|
|
|
+ min_dist += tdist * tdist;
|
|
|
+@@ -705,17 +709,17 @@ find_nearby_colors(j_decompress_ptr cinf
|
|
|
+ tdist = (x - maxc1) * C1_SCALE;
|
|
|
+ max_dist += tdist * tdist;
|
|
|
+ } else {
|
|
|
+ tdist = (x - minc1) * C1_SCALE;
|
|
|
+ max_dist += tdist * tdist;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+- x = cinfo->colormap[2][i];
|
|
|
++ x = ((_JSAMPARRAY)cinfo->colormap)[2][i];
|
|
|
+ if (x < minc2) {
|
|
|
+ tdist = (x - minc2) * C2_SCALE;
|
|
|
+ min_dist += tdist * tdist;
|
|
|
+ tdist = (x - maxc2) * C2_SCALE;
|
|
|
+ max_dist += tdist * tdist;
|
|
|
+ } else if (x > maxc2) {
|
|
|
+ tdist = (x - maxc2) * C2_SCALE;
|
|
|
+ min_dist += tdist * tdist;
|
|
|
+@@ -739,36 +743,36 @@ find_nearby_colors(j_decompress_ptr cinf
|
|
|
+
|
|
|
+ /* Now we know that no cell in the update box is more than minmaxdist
|
|
|
+ * away from some colormap entry. Therefore, only colors that are
|
|
|
+ * within minmaxdist of some part of the box need be considered.
|
|
|
+ */
|
|
|
+ ncolors = 0;
|
|
|
+ for (i = 0; i < numcolors; i++) {
|
|
|
+ if (mindist[i] <= minmaxdist)
|
|
|
+- colorlist[ncolors++] = (JSAMPLE)i;
|
|
|
++ colorlist[ncolors++] = (_JSAMPLE)i;
|
|
|
+ }
|
|
|
+ return ncolors;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
|
|
|
+- int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
|
|
|
++ int numcolors, _JSAMPLE colorlist[], _JSAMPLE bestcolor[])
|
|
|
+ /* Find the closest colormap entry for each cell in the update box,
|
|
|
+ * given the list of candidate colors prepared by find_nearby_colors.
|
|
|
+ * Return the indexes of the closest entries in the bestcolor[] array.
|
|
|
+ * This routine uses Thomas' incremental distance calculation method to
|
|
|
+ * find the distance from a colormap entry to successive cells in the box.
|
|
|
+ */
|
|
|
+ {
|
|
|
+ int ic0, ic1, ic2;
|
|
|
+ int i, icolor;
|
|
|
+ register JLONG *bptr; /* pointer into bestdist[] array */
|
|
|
+- JSAMPLE *cptr; /* pointer into bestcolor[] array */
|
|
|
++ _JSAMPLE *cptr; /* pointer into bestcolor[] array */
|
|
|
+ JLONG dist0, dist1; /* initial distance values */
|
|
|
+ register JLONG dist2; /* current distance in inner loop */
|
|
|
+ JLONG xx0, xx1; /* distance increments */
|
|
|
+ register JLONG xx2;
|
|
|
+ JLONG inc0, inc1, inc2; /* initial values for increments */
|
|
|
+ /* This array holds the distance to the nearest-so-far color for each cell */
|
|
|
+ JLONG bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
|
|
|
+
|
|
|
+@@ -785,21 +789,21 @@ find_best_colors(j_decompress_ptr cinfo,
|
|
|
+ /* Nominal steps between cell centers ("x" in Thomas article) */
|
|
|
+ #define STEP_C0 ((1 << C0_SHIFT) * C0_SCALE)
|
|
|
+ #define STEP_C1 ((1 << C1_SHIFT) * C1_SCALE)
|
|
|
+ #define STEP_C2 ((1 << C2_SHIFT) * C2_SCALE)
|
|
|
+
|
|
|
+ for (i = 0; i < numcolors; i++) {
|
|
|
+ icolor = colorlist[i];
|
|
|
+ /* Compute (square of) distance from minc0/c1/c2 to this color */
|
|
|
+- inc0 = (minc0 - cinfo->colormap[0][icolor]) * C0_SCALE;
|
|
|
++ inc0 = (minc0 - ((_JSAMPARRAY)cinfo->colormap)[0][icolor]) * C0_SCALE;
|
|
|
+ dist0 = inc0 * inc0;
|
|
|
+- inc1 = (minc1 - cinfo->colormap[1][icolor]) * C1_SCALE;
|
|
|
++ inc1 = (minc1 - ((_JSAMPARRAY)cinfo->colormap)[1][icolor]) * C1_SCALE;
|
|
|
+ dist0 += inc1 * inc1;
|
|
|
+- inc2 = (minc2 - cinfo->colormap[2][icolor]) * C2_SCALE;
|
|
|
++ inc2 = (minc2 - ((_JSAMPARRAY)cinfo->colormap)[2][icolor]) * C2_SCALE;
|
|
|
+ dist0 += inc2 * inc2;
|
|
|
+ /* Form the initial difference increments */
|
|
|
+ inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
|
|
|
+ inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
|
|
|
+ inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
|
|
|
+ /* Now loop over all cells in box, updating distance per Thomas method */
|
|
|
+ bptr = bestdist;
|
|
|
+ cptr = bestcolor;
|
|
|
+@@ -808,17 +812,17 @@ find_best_colors(j_decompress_ptr cinfo,
|
|
|
+ dist1 = dist0;
|
|
|
+ xx1 = inc1;
|
|
|
+ for (ic1 = BOX_C1_ELEMS - 1; ic1 >= 0; ic1--) {
|
|
|
+ dist2 = dist1;
|
|
|
+ xx2 = inc2;
|
|
|
+ for (ic2 = BOX_C2_ELEMS - 1; ic2 >= 0; ic2--) {
|
|
|
+ if (dist2 < *bptr) {
|
|
|
+ *bptr = dist2;
|
|
|
+- *cptr = (JSAMPLE)icolor;
|
|
|
++ *cptr = (_JSAMPLE)icolor;
|
|
|
+ }
|
|
|
+ dist2 += xx2;
|
|
|
+ xx2 += 2 * STEP_C2 * STEP_C2;
|
|
|
+ bptr++;
|
|
|
+ cptr++;
|
|
|
+ }
|
|
|
+ dist1 += xx1;
|
|
|
+ xx1 += 2 * STEP_C1 * STEP_C1;
|
|
|
+@@ -835,23 +839,23 @@ fill_inverse_cmap(j_decompress_ptr cinfo
|
|
|
+ /* Fill the inverse-colormap entries in the update box that contains */
|
|
|
+ /* histogram cell c0/c1/c2. (Only that one cell MUST be filled, but */
|
|
|
+ /* we can fill as many others as we wish.) */
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+ hist3d histogram = cquantize->histogram;
|
|
|
+ int minc0, minc1, minc2; /* lower left corner of update box */
|
|
|
+ int ic0, ic1, ic2;
|
|
|
+- register JSAMPLE *cptr; /* pointer into bestcolor[] array */
|
|
|
++ register _JSAMPLE *cptr; /* pointer into bestcolor[] array */
|
|
|
+ register histptr cachep; /* pointer into main cache array */
|
|
|
+ /* This array lists the candidate colormap indexes. */
|
|
|
+- JSAMPLE colorlist[MAXNUMCOLORS];
|
|
|
++ _JSAMPLE colorlist[MAXNUMCOLORS];
|
|
|
+ int numcolors; /* number of candidate colors */
|
|
|
+ /* This array holds the actually closest colormap index for each cell. */
|
|
|
+- JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
|
|
|
++ _JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
|
|
|
+
|
|
|
+ /* Convert cell coordinates to update box ID */
|
|
|
+ c0 >>= BOX_C0_LOG;
|
|
|
+ c1 >>= BOX_C1_LOG;
|
|
|
+ c2 >>= BOX_C2_LOG;
|
|
|
+
|
|
|
+ /* Compute true coordinates of update box's origin corner.
|
|
|
+ * Actually we compute the coordinates of the center of the corner
|
|
|
+@@ -886,23 +890,23 @@ fill_inverse_cmap(j_decompress_ptr cinfo
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Map some rows of pixels to the output colormapped representation.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-pass2_no_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows)
|
|
|
++pass2_no_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ /* This version performs no dithering */
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+ hist3d histogram = cquantize->histogram;
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
+ register histptr cachep;
|
|
|
+ register int c0, c1, c2;
|
|
|
+ int row;
|
|
|
+ JDIMENSION col;
|
|
|
+ JDIMENSION width = cinfo->output_width;
|
|
|
+
|
|
|
+ for (row = 0; row < num_rows; row++) {
|
|
|
+ inptr = input_buf[row];
|
|
|
+@@ -913,46 +917,46 @@ pass2_no_dither(j_decompress_ptr cinfo,
|
|
|
+ c1 = (*inptr++) >> C1_SHIFT;
|
|
|
+ c2 = (*inptr++) >> C2_SHIFT;
|
|
|
+ cachep = &histogram[c0][c1][c2];
|
|
|
+ /* If we have not seen this color before, find nearest colormap entry */
|
|
|
+ /* and update the cache */
|
|
|
+ if (*cachep == 0)
|
|
|
+ fill_inverse_cmap(cinfo, c0, c1, c2);
|
|
|
+ /* Now emit the colormap index for this cell */
|
|
|
+- *outptr++ = (JSAMPLE)(*cachep - 1);
|
|
|
++ *outptr++ = (_JSAMPLE)(*cachep - 1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+-pass2_fs_dither(j_decompress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPARRAY output_buf, int num_rows)
|
|
|
++pass2_fs_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
|
|
|
++ _JSAMPARRAY output_buf, int num_rows)
|
|
|
+ /* This version performs Floyd-Steinberg dithering */
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+ hist3d histogram = cquantize->histogram;
|
|
|
+ register LOCFSERROR cur0, cur1, cur2; /* current error or pixel value */
|
|
|
+ LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
|
|
|
+ LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
|
|
|
+ register FSERRPTR errorptr; /* => fserrors[] at column before current */
|
|
|
+- JSAMPROW inptr; /* => current input pixel */
|
|
|
+- JSAMPROW outptr; /* => current output pixel */
|
|
|
++ _JSAMPROW inptr; /* => current input pixel */
|
|
|
++ _JSAMPROW outptr; /* => current output pixel */
|
|
|
+ histptr cachep;
|
|
|
+ int dir; /* +1 or -1 depending on direction */
|
|
|
+ int dir3; /* 3*dir, for advancing inptr & errorptr */
|
|
|
+ int row;
|
|
|
+ JDIMENSION col;
|
|
|
+ JDIMENSION width = cinfo->output_width;
|
|
|
+- JSAMPLE *range_limit = cinfo->sample_range_limit;
|
|
|
++ _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
|
|
|
+ int *error_limit = cquantize->error_limiter;
|
|
|
+- JSAMPROW colormap0 = cinfo->colormap[0];
|
|
|
+- JSAMPROW colormap1 = cinfo->colormap[1];
|
|
|
+- JSAMPROW colormap2 = cinfo->colormap[2];
|
|
|
++ _JSAMPROW colormap0 = ((_JSAMPARRAY)cinfo->colormap)[0];
|
|
|
++ _JSAMPROW colormap1 = ((_JSAMPARRAY)cinfo->colormap)[1];
|
|
|
++ _JSAMPROW colormap2 = ((_JSAMPARRAY)cinfo->colormap)[2];
|
|
|
+ SHIFT_TEMPS
|
|
|
+
|
|
|
+ for (row = 0; row < num_rows; row++) {
|
|
|
+ inptr = input_buf[row];
|
|
|
+ outptr = output_buf[row];
|
|
|
+ if (cquantize->on_odd_row) {
|
|
|
+ /* work right to left in this row */
|
|
|
+ inptr += (width - 1) * 3; /* so point to rightmost pixel */
|
|
|
+@@ -987,18 +991,18 @@ pass2_fs_dither(j_decompress_ptr cinfo,
|
|
|
+ cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3 + 1] + 8, 4);
|
|
|
+ cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3 + 2] + 8, 4);
|
|
|
+ /* Limit the error using transfer function set by init_error_limit.
|
|
|
+ * See comments with init_error_limit for rationale.
|
|
|
+ */
|
|
|
+ cur0 = error_limit[cur0];
|
|
|
+ cur1 = error_limit[cur1];
|
|
|
+ cur2 = error_limit[cur2];
|
|
|
+- /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
|
|
|
+- * The maximum error is +- MAXJSAMPLE (or less with error limiting);
|
|
|
++ /* Form pixel value + error, and range-limit to 0.._MAXJSAMPLE.
|
|
|
++ * The maximum error is +- _MAXJSAMPLE (or less with error limiting);
|
|
|
+ * this sets the required size of the range_limit array.
|
|
|
+ */
|
|
|
+ cur0 += inptr[0];
|
|
|
+ cur1 += inptr[1];
|
|
|
+ cur2 += inptr[2];
|
|
|
+ cur0 = range_limit[cur0];
|
|
|
+ cur1 = range_limit[cur1];
|
|
|
+ cur2 = range_limit[cur2];
|
|
|
+@@ -1008,17 +1012,17 @@ pass2_fs_dither(j_decompress_ptr cinfo,
|
|
|
+ /* If we have not seen this color before, find nearest colormap */
|
|
|
+ /* entry and update the cache */
|
|
|
+ if (*cachep == 0)
|
|
|
+ fill_inverse_cmap(cinfo, cur0 >> C0_SHIFT, cur1 >> C1_SHIFT,
|
|
|
+ cur2 >> C2_SHIFT);
|
|
|
+ /* Now emit the colormap index for this cell */
|
|
|
+ {
|
|
|
+ register int pixcode = *cachep - 1;
|
|
|
+- *outptr = (JSAMPLE)pixcode;
|
|
|
++ *outptr = (_JSAMPLE)pixcode;
|
|
|
+ /* Compute representation error for this pixel */
|
|
|
+ cur0 -= colormap0[pixcode];
|
|
|
+ cur1 -= colormap1[pixcode];
|
|
|
+ cur2 -= colormap2[pixcode];
|
|
|
+ }
|
|
|
+ /* Compute error fractions to be propagated to adjacent pixels.
|
|
|
+ * Add these into the running sums, and simultaneously shift the
|
|
|
+ * next-line error sums left by 1 column.
|
|
|
+@@ -1059,72 +1063,72 @@ pass2_fs_dither(j_decompress_ptr cinfo,
|
|
|
+ errorptr[2] = (FSERROR)bpreverr2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Initialize the error-limiting transfer function (lookup table).
|
|
|
+ * The raw F-S error computation can potentially compute error values of up to
|
|
|
+- * +- MAXJSAMPLE. But we want the maximum correction applied to a pixel to be
|
|
|
++ * +- _MAXJSAMPLE. But we want the maximum correction applied to a pixel to be
|
|
|
+ * much less, otherwise obviously wrong pixels will be created. (Typical
|
|
|
+ * effects include weird fringes at color-area boundaries, isolated bright
|
|
|
+ * pixels in a dark area, etc.) The standard advice for avoiding this problem
|
|
|
+ * is to ensure that the "corners" of the color cube are allocated as output
|
|
|
+ * colors; then repeated errors in the same direction cannot cause cascading
|
|
|
+ * error buildup. However, that only prevents the error from getting
|
|
|
+ * completely out of hand; Aaron Giles reports that error limiting improves
|
|
|
+ * the results even with corner colors allocated.
|
|
|
+- * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
|
|
|
++ * A simple clamping of the error values to about +- _MAXJSAMPLE/8 works pretty
|
|
|
+ * well, but the smoother transfer function used below is even better. Thanks
|
|
|
+ * to Aaron Giles for this idea.
|
|
|
+ */
|
|
|
+
|
|
|
+ LOCAL(void)
|
|
|
+ init_error_limit(j_decompress_ptr cinfo)
|
|
|
+ /* Allocate and fill in the error_limiter table */
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+ int *table;
|
|
|
+ int in, out;
|
|
|
+
|
|
|
+ table = (int *)(*cinfo->mem->alloc_small)
|
|
|
+- ((j_common_ptr)cinfo, JPOOL_IMAGE, (MAXJSAMPLE * 2 + 1) * sizeof(int));
|
|
|
+- table += MAXJSAMPLE; /* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
|
|
|
++ ((j_common_ptr)cinfo, JPOOL_IMAGE, (_MAXJSAMPLE * 2 + 1) * sizeof(int));
|
|
|
++ table += _MAXJSAMPLE; /* so can index -_MAXJSAMPLE .. +_MAXJSAMPLE */
|
|
|
+ cquantize->error_limiter = table;
|
|
|
+
|
|
|
+-#define STEPSIZE ((MAXJSAMPLE + 1) / 16)
|
|
|
+- /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
|
|
|
++#define STEPSIZE ((_MAXJSAMPLE + 1) / 16)
|
|
|
++ /* Map errors 1:1 up to +- _MAXJSAMPLE/16 */
|
|
|
+ out = 0;
|
|
|
+ for (in = 0; in < STEPSIZE; in++, out++) {
|
|
|
+ table[in] = out; table[-in] = -out;
|
|
|
+ }
|
|
|
+- /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
|
|
|
++ /* Map errors 1:2 up to +- 3*_MAXJSAMPLE/16 */
|
|
|
+ for (; in < STEPSIZE * 3; in++, out += (in & 1) ? 0 : 1) {
|
|
|
+ table[in] = out; table[-in] = -out;
|
|
|
+ }
|
|
|
+- /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
|
|
|
+- for (; in <= MAXJSAMPLE; in++) {
|
|
|
++ /* Clamp the rest to final out value (which is (_MAXJSAMPLE+1)/8) */
|
|
|
++ for (; in <= _MAXJSAMPLE; in++) {
|
|
|
+ table[in] = out; table[-in] = -out;
|
|
|
+ }
|
|
|
+ #undef STEPSIZE
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Finish up at the end of each pass.
|
|
|
+ */
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ finish_pass1(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
|
|
|
+
|
|
|
+ /* Select the representative colors and fill in cinfo->colormap */
|
|
|
+- cinfo->colormap = cquantize->sv_colormap;
|
|
|
++ cinfo->colormap = (JSAMPARRAY)cquantize->sv_colormap;
|
|
|
+ select_colors(cinfo, cquantize->desired);
|
|
|
+ /* Force next pass to zero the color index table */
|
|
|
+ cquantize->needs_zeroed = TRUE;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ METHODDEF(void)
|
|
|
+ finish_pass2(j_decompress_ptr cinfo)
|
|
|
+@@ -1146,25 +1150,25 @@ start_pass_2_quant(j_decompress_ptr cinf
|
|
|
+
|
|
|
+ /* Only F-S dithering or no dithering is supported. */
|
|
|
+ /* If user asks for ordered dither, give them F-S. */
|
|
|
+ if (cinfo->dither_mode != JDITHER_NONE)
|
|
|
+ cinfo->dither_mode = JDITHER_FS;
|
|
|
+
|
|
|
+ if (is_pre_scan) {
|
|
|
+ /* Set up method pointers */
|
|
|
+- cquantize->pub.color_quantize = prescan_quantize;
|
|
|
++ cquantize->pub._color_quantize = prescan_quantize;
|
|
|
+ cquantize->pub.finish_pass = finish_pass1;
|
|
|
+ cquantize->needs_zeroed = TRUE; /* Always zero histogram */
|
|
|
+ } else {
|
|
|
+ /* Set up method pointers */
|
|
|
+ if (cinfo->dither_mode == JDITHER_FS)
|
|
|
+- cquantize->pub.color_quantize = pass2_fs_dither;
|
|
|
++ cquantize->pub._color_quantize = pass2_fs_dither;
|
|
|
+ else
|
|
|
+- cquantize->pub.color_quantize = pass2_no_dither;
|
|
|
++ cquantize->pub._color_quantize = pass2_no_dither;
|
|
|
+ cquantize->pub.finish_pass = finish_pass2;
|
|
|
+
|
|
|
+ /* Make sure color count is acceptable */
|
|
|
+ i = cinfo->actual_number_of_colors;
|
|
|
+ if (i < 1)
|
|
|
+ ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
|
|
|
+ if (i > MAXNUMCOLORS)
|
|
|
+ ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
|
|
|
+@@ -1210,32 +1214,36 @@ new_color_map_2_quant(j_decompress_ptr c
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Module initialization routine for 2-pass color quantization.
|
|
|
+ */
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jinit_2pass_quantizer(j_decompress_ptr cinfo)
|
|
|
++_jinit_2pass_quantizer(j_decompress_ptr cinfo)
|
|
|
+ {
|
|
|
+ my_cquantize_ptr cquantize;
|
|
|
+ int i;
|
|
|
+
|
|
|
++ if (cinfo->data_precision != BITS_IN_JSAMPLE)
|
|
|
++ ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
|
|
|
++
|
|
|
+ cquantize = (my_cquantize_ptr)
|
|
|
+ (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ sizeof(my_cquantizer));
|
|
|
+ cinfo->cquantize = (struct jpeg_color_quantizer *)cquantize;
|
|
|
+ cquantize->pub.start_pass = start_pass_2_quant;
|
|
|
+ cquantize->pub.new_color_map = new_color_map_2_quant;
|
|
|
+ cquantize->fserrors = NULL; /* flag optional arrays not allocated */
|
|
|
+ cquantize->error_limiter = NULL;
|
|
|
+
|
|
|
+ /* Make sure jdmaster didn't give me a case I can't handle */
|
|
|
+- if (cinfo->out_color_components != 3)
|
|
|
++ if (cinfo->out_color_components != 3 ||
|
|
|
++ cinfo->out_color_space == JCS_RGB565 || cinfo->master->lossless)
|
|
|
+ ERREXIT(cinfo, JERR_NOTIMPL);
|
|
|
+
|
|
|
+ /* Allocate the histogram/inverse colormap storage */
|
|
|
+ cquantize->histogram = (hist3d)(*cinfo->mem->alloc_small)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * sizeof(hist2d));
|
|
|
+ for (i = 0; i < HIST_C0_ELEMS; i++) {
|
|
|
+ cquantize->histogram[i] = (hist2d)(*cinfo->mem->alloc_large)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+@@ -1248,20 +1256,20 @@ jinit_2pass_quantizer(j_decompress_ptr c
|
|
|
+ * calculations.
|
|
|
+ */
|
|
|
+ if (cinfo->enable_2pass_quant) {
|
|
|
+ /* Make sure color count is acceptable */
|
|
|
+ int desired = cinfo->desired_number_of_colors;
|
|
|
+ /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
|
|
|
+ if (desired < 8)
|
|
|
+ ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
|
|
|
+- /* Make sure colormap indexes can be represented by JSAMPLEs */
|
|
|
++ /* Make sure colormap indexes can be represented by _JSAMPLEs */
|
|
|
+ if (desired > MAXNUMCOLORS)
|
|
|
+ ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
|
|
|
+- cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
|
|
|
++ cquantize->sv_colormap = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)desired, (JDIMENSION)3);
|
|
|
+ cquantize->desired = desired;
|
|
|
+ } else
|
|
|
+ cquantize->sv_colormap = NULL;
|
|
|
+
|
|
|
+ /* Only F-S dithering or no dithering is supported. */
|
|
|
+ /* If user asks for ordered dither, give them F-S. */
|
|
|
+ if (cinfo->dither_mode != JDITHER_NONE)
|
|
|
+@@ -1277,9 +1285,9 @@ jinit_2pass_quantizer(j_decompress_ptr c
|
|
|
+ cquantize->fserrors = (FSERRPTR)(*cinfo->mem->alloc_large)
|
|
|
+ ((j_common_ptr)cinfo, JPOOL_IMAGE,
|
|
|
+ (size_t)((cinfo->output_width + 2) * (3 * sizeof(FSERROR))));
|
|
|
+ /* Might as well create the error-limiting table too. */
|
|
|
+ init_error_limit(cinfo);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+-#endif /* QUANT_2PASS_SUPPORTED */
|
|
|
++#endif /* defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
|
|
|
+diff --git a/media/libjpeg/jsamplecomp.h b/media/libjpeg/jsamplecomp.h
|
|
|
+new file mode 100644
|
|
|
+--- /dev/null
|
|
|
++++ b/media/libjpeg/jsamplecomp.h
|
|
|
+@@ -0,0 +1,336 @@
|
|
|
++/*
|
|
|
++ * jsamplecomp.h
|
|
|
++ *
|
|
|
++ * Copyright (C) 2022, D. R. Commander.
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ */
|
|
|
++
|
|
|
++/* In source files that must be compiled for multiple data precisions, we
|
|
|
++ * prefix all precision-dependent data types, macros, methods, fields, and
|
|
|
++ * function names with an underscore. Including this file replaces those
|
|
|
++ * precision-independent tokens with their precision-dependent equivalents,
|
|
|
++ * based on the value of BITS_IN_JSAMPLE.
|
|
|
++ */
|
|
|
++
|
|
|
++#ifndef JSAMPLECOMP_H
|
|
|
++#define JSAMPLECOMP_H
|
|
|
++
|
|
|
++#if BITS_IN_JSAMPLE == 16
|
|
|
++
|
|
|
++/* Sample data types and macros (jmorecfg.h) */
|
|
|
++#define _JSAMPLE J16SAMPLE
|
|
|
++
|
|
|
++#define _MAXJSAMPLE MAXJ16SAMPLE
|
|
|
++#define _CENTERJSAMPLE CENTERJ16SAMPLE
|
|
|
++
|
|
|
++#define _JSAMPROW J16SAMPROW
|
|
|
++#define _JSAMPARRAY J16SAMPARRAY
|
|
|
++#define _JSAMPIMAGE J16SAMPIMAGE
|
|
|
++
|
|
|
++/* External functions (jpeglib.h) */
|
|
|
++#define _jpeg_write_scanlines jpeg16_write_scanlines
|
|
|
++#define _jpeg_read_scanlines jpeg16_read_scanlines
|
|
|
++
|
|
|
++/* Internal methods (jpegint.h) */
|
|
|
++
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++/* Use the 16-bit method in the jpeg_c_main_controller structure. */
|
|
|
++#define _process_data process_data_16
|
|
|
++/* Use the 16-bit method in the jpeg_c_prep_controller structure. */
|
|
|
++#define _pre_process_data pre_process_data_16
|
|
|
++/* Use the 16-bit method in the jpeg_c_coef_controller structure. */
|
|
|
++#define _compress_data compress_data_16
|
|
|
++/* Use the 16-bit method in the jpeg_color_converter structure. */
|
|
|
++#define _color_convert color_convert_16
|
|
|
++/* Use the 16-bit method in the jpeg_downsampler structure. */
|
|
|
++#define _downsample downsample_16
|
|
|
++#endif
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++/* Use the 16-bit method in the jpeg_d_main_controller structure. */
|
|
|
++#define _process_data process_data_16
|
|
|
++/* Use the 16-bit method in the jpeg_d_coef_controller structure. */
|
|
|
++#define _decompress_data decompress_data_16
|
|
|
++/* Use the 16-bit method in the jpeg_d_post_controller structure. */
|
|
|
++#define _post_process_data post_process_data_16
|
|
|
++/* Use the 16-bit method in the jpeg_upsampler structure. */
|
|
|
++#define _upsample upsample_16
|
|
|
++/* Use the 16-bit method in the jpeg_color_converter structure. */
|
|
|
++#define _color_convert color_convert_16
|
|
|
++#endif
|
|
|
++
|
|
|
++/* Global internal functions (jpegint.h) */
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++#define _jinit_c_main_controller j16init_c_main_controller
|
|
|
++#define _jinit_c_prep_controller j16init_c_prep_controller
|
|
|
++#define _jinit_color_converter j16init_color_converter
|
|
|
++#define _jinit_downsampler j16init_downsampler
|
|
|
++#define _jinit_c_diff_controller j16init_c_diff_controller
|
|
|
++#define _jinit_lossless_compressor j16init_lossless_compressor
|
|
|
++#endif
|
|
|
++
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++#define _jinit_d_main_controller j16init_d_main_controller
|
|
|
++#define _jinit_d_post_controller j16init_d_post_controller
|
|
|
++#define _jinit_upsampler j16init_upsampler
|
|
|
++#define _jinit_color_deconverter j16init_color_deconverter
|
|
|
++#define _jinit_merged_upsampler j16init_merged_upsampler
|
|
|
++#define _jinit_d_diff_controller j16init_d_diff_controller
|
|
|
++#define _jinit_lossless_decompressor j16init_lossless_decompressor
|
|
|
++#endif
|
|
|
++
|
|
|
++#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++#define _jcopy_sample_rows j16copy_sample_rows
|
|
|
++#endif
|
|
|
++
|
|
|
++/* Internal fields (cdjpeg.h) */
|
|
|
++
|
|
|
++#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
|
|
|
++/* Use the 16-bit buffer in the cjpeg_source_struct and djpeg_dest_struct
|
|
|
++ structures. */
|
|
|
++#define _buffer buffer16
|
|
|
++#endif
|
|
|
++
|
|
|
++/* Image I/O functions (cdjpeg.h) */
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++#define _jinit_read_gif j16init_read_gif
|
|
|
++#define _jinit_read_ppm j16init_read_ppm
|
|
|
++#endif
|
|
|
++
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++#define _jinit_write_ppm j16init_write_ppm
|
|
|
++#endif
|
|
|
++
|
|
|
++#elif BITS_IN_JSAMPLE == 12
|
|
|
++
|
|
|
++/* Sample data types and macros (jmorecfg.h) */
|
|
|
++#define _JSAMPLE J12SAMPLE
|
|
|
++
|
|
|
++#define _MAXJSAMPLE MAXJ12SAMPLE
|
|
|
++#define _CENTERJSAMPLE CENTERJ12SAMPLE
|
|
|
++
|
|
|
++#define _JSAMPROW J12SAMPROW
|
|
|
++#define _JSAMPARRAY J12SAMPARRAY
|
|
|
++#define _JSAMPIMAGE J12SAMPIMAGE
|
|
|
++
|
|
|
++/* External functions (jpeglib.h) */
|
|
|
++#define _jpeg_write_scanlines jpeg12_write_scanlines
|
|
|
++#define _jpeg_write_raw_data jpeg12_write_raw_data
|
|
|
++#define _jpeg_read_scanlines jpeg12_read_scanlines
|
|
|
++#define _jpeg_skip_scanlines jpeg12_skip_scanlines
|
|
|
++#define _jpeg_crop_scanline jpeg12_crop_scanline
|
|
|
++#define _jpeg_read_raw_data jpeg12_read_raw_data
|
|
|
++
|
|
|
++/* Internal methods (jpegint.h) */
|
|
|
++
|
|
|
++/* Use the 12-bit method in the jpeg_c_main_controller structure. */
|
|
|
++#define _process_data process_data_12
|
|
|
++/* Use the 12-bit method in the jpeg_c_prep_controller structure. */
|
|
|
++#define _pre_process_data pre_process_data_12
|
|
|
++/* Use the 12-bit method in the jpeg_c_coef_controller structure. */
|
|
|
++#define _compress_data compress_data_12
|
|
|
++/* Use the 12-bit method in the jpeg_color_converter structure. */
|
|
|
++#define _color_convert color_convert_12
|
|
|
++/* Use the 12-bit method in the jpeg_downsampler structure. */
|
|
|
++#define _downsample downsample_12
|
|
|
++/* Use the 12-bit method in the jpeg_forward_dct structure. */
|
|
|
++#define _forward_DCT forward_DCT_12
|
|
|
++/* Use the 12-bit method in the jpeg_d_main_controller structure. */
|
|
|
++#define _process_data process_data_12
|
|
|
++/* Use the 12-bit method in the jpeg_d_coef_controller structure. */
|
|
|
++#define _decompress_data decompress_data_12
|
|
|
++/* Use the 12-bit method in the jpeg_d_post_controller structure. */
|
|
|
++#define _post_process_data post_process_data_12
|
|
|
++/* Use the 12-bit method in the jpeg_inverse_dct structure. */
|
|
|
++#define _inverse_DCT_method_ptr inverse_DCT_12_method_ptr
|
|
|
++#define _inverse_DCT inverse_DCT_12
|
|
|
++/* Use the 12-bit method in the jpeg_upsampler structure. */
|
|
|
++#define _upsample upsample_12
|
|
|
++/* Use the 12-bit method in the jpeg_color_converter structure. */
|
|
|
++#define _color_convert color_convert_12
|
|
|
++/* Use the 12-bit method in the jpeg_color_quantizer structure. */
|
|
|
++#define _color_quantize color_quantize_12
|
|
|
++
|
|
|
++/* Global internal functions (jpegint.h) */
|
|
|
++#define _jinit_c_main_controller j12init_c_main_controller
|
|
|
++#define _jinit_c_prep_controller j12init_c_prep_controller
|
|
|
++#define _jinit_c_coef_controller j12init_c_coef_controller
|
|
|
++#define _jinit_color_converter j12init_color_converter
|
|
|
++#define _jinit_downsampler j12init_downsampler
|
|
|
++#define _jinit_forward_dct j12init_forward_dct
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++#define _jinit_c_diff_controller j12init_c_diff_controller
|
|
|
++#define _jinit_lossless_compressor j12init_lossless_compressor
|
|
|
++#endif
|
|
|
++
|
|
|
++#define _jinit_d_main_controller j12init_d_main_controller
|
|
|
++#define _jinit_d_coef_controller j12init_d_coef_controller
|
|
|
++#define _jinit_d_post_controller j12init_d_post_controller
|
|
|
++#define _jinit_inverse_dct j12init_inverse_dct
|
|
|
++#define _jinit_upsampler j12init_upsampler
|
|
|
++#define _jinit_color_deconverter j12init_color_deconverter
|
|
|
++#define _jinit_1pass_quantizer j12init_1pass_quantizer
|
|
|
++#define _jinit_2pass_quantizer j12init_2pass_quantizer
|
|
|
++#define _jinit_merged_upsampler j12init_merged_upsampler
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++#define _jinit_d_diff_controller j12init_d_diff_controller
|
|
|
++#define _jinit_lossless_decompressor j12init_lossless_decompressor
|
|
|
++#endif
|
|
|
++
|
|
|
++#define _jcopy_sample_rows j12copy_sample_rows
|
|
|
++
|
|
|
++/* Global internal functions (jdct.h) */
|
|
|
++#define _jpeg_fdct_islow jpeg12_fdct_islow
|
|
|
++#define _jpeg_fdct_ifast jpeg12_fdct_ifast
|
|
|
++
|
|
|
++#define _jpeg_idct_islow jpeg12_idct_islow
|
|
|
++#define _jpeg_idct_ifast jpeg12_idct_ifast
|
|
|
++#define _jpeg_idct_float jpeg12_idct_float
|
|
|
++#define _jpeg_idct_7x7 jpeg12_idct_7x7
|
|
|
++#define _jpeg_idct_6x6 jpeg12_idct_6x6
|
|
|
++#define _jpeg_idct_5x5 jpeg12_idct_5x5
|
|
|
++#define _jpeg_idct_4x4 jpeg12_idct_4x4
|
|
|
++#define _jpeg_idct_3x3 jpeg12_idct_3x3
|
|
|
++#define _jpeg_idct_2x2 jpeg12_idct_2x2
|
|
|
++#define _jpeg_idct_1x1 jpeg12_idct_1x1
|
|
|
++#define _jpeg_idct_9x9 jpeg12_idct_9x9
|
|
|
++#define _jpeg_idct_10x10 jpeg12_idct_10x10
|
|
|
++#define _jpeg_idct_11x11 jpeg12_idct_11x11
|
|
|
++#define _jpeg_idct_12x12 jpeg12_idct_12x12
|
|
|
++#define _jpeg_idct_13x13 jpeg12_idct_13x13
|
|
|
++#define _jpeg_idct_14x14 jpeg12_idct_14x14
|
|
|
++#define _jpeg_idct_15x15 jpeg12_idct_15x15
|
|
|
++#define _jpeg_idct_16x16 jpeg12_idct_16x16
|
|
|
++
|
|
|
++/* Internal fields (cdjpeg.h) */
|
|
|
++
|
|
|
++/* Use the 12-bit buffer in the cjpeg_source_struct and djpeg_dest_struct
|
|
|
++ structures. */
|
|
|
++#define _buffer buffer12
|
|
|
++
|
|
|
++/* Image I/O functions (cdjpeg.h) */
|
|
|
++#define _jinit_read_gif j12init_read_gif
|
|
|
++#define _jinit_write_gif j12init_write_gif
|
|
|
++#define _jinit_read_ppm j12init_read_ppm
|
|
|
++#define _jinit_write_ppm j12init_write_ppm
|
|
|
++
|
|
|
++#define _read_color_map read_color_map_12
|
|
|
++
|
|
|
++#else /* BITS_IN_JSAMPLE */
|
|
|
++
|
|
|
++/* Sample data types and macros (jmorecfg.h) */
|
|
|
++#define _JSAMPLE JSAMPLE
|
|
|
++
|
|
|
++#define _MAXJSAMPLE MAXJSAMPLE
|
|
|
++#define _CENTERJSAMPLE CENTERJSAMPLE
|
|
|
++
|
|
|
++#define _JSAMPROW JSAMPROW
|
|
|
++#define _JSAMPARRAY JSAMPARRAY
|
|
|
++#define _JSAMPIMAGE JSAMPIMAGE
|
|
|
++
|
|
|
++/* External functions (jpeglib.h) */
|
|
|
++#define _jpeg_write_scanlines jpeg_write_scanlines
|
|
|
++#define _jpeg_write_raw_data jpeg_write_raw_data
|
|
|
++#define _jpeg_read_scanlines jpeg_read_scanlines
|
|
|
++#define _jpeg_skip_scanlines jpeg_skip_scanlines
|
|
|
++#define _jpeg_crop_scanline jpeg_crop_scanline
|
|
|
++#define _jpeg_read_raw_data jpeg_read_raw_data
|
|
|
++
|
|
|
++/* Internal methods (jpegint.h) */
|
|
|
++
|
|
|
++/* Use the 8-bit method in the jpeg_c_main_controller structure. */
|
|
|
++#define _process_data process_data
|
|
|
++/* Use the 8-bit method in the jpeg_c_prep_controller structure. */
|
|
|
++#define _pre_process_data pre_process_data
|
|
|
++/* Use the 8-bit method in the jpeg_c_coef_controller structure. */
|
|
|
++#define _compress_data compress_data
|
|
|
++/* Use the 8-bit method in the jpeg_color_converter structure. */
|
|
|
++#define _color_convert color_convert
|
|
|
++/* Use the 8-bit method in the jpeg_downsampler structure. */
|
|
|
++#define _downsample downsample
|
|
|
++/* Use the 8-bit method in the jpeg_forward_dct structure. */
|
|
|
++#define _forward_DCT forward_DCT
|
|
|
++/* Use the 8-bit method in the jpeg_d_main_controller structure. */
|
|
|
++#define _process_data process_data
|
|
|
++/* Use the 8-bit method in the jpeg_d_coef_controller structure. */
|
|
|
++#define _decompress_data decompress_data
|
|
|
++/* Use the 8-bit method in the jpeg_d_post_controller structure. */
|
|
|
++#define _post_process_data post_process_data
|
|
|
++/* Use the 8-bit method in the jpeg_inverse_dct structure. */
|
|
|
++#define _inverse_DCT_method_ptr inverse_DCT_method_ptr
|
|
|
++#define _inverse_DCT inverse_DCT
|
|
|
++/* Use the 8-bit method in the jpeg_upsampler structure. */
|
|
|
++#define _upsample upsample
|
|
|
++/* Use the 8-bit method in the jpeg_color_converter structure. */
|
|
|
++#define _color_convert color_convert
|
|
|
++/* Use the 8-bit method in the jpeg_color_quantizer structure. */
|
|
|
++#define _color_quantize color_quantize
|
|
|
++
|
|
|
++/* Global internal functions (jpegint.h) */
|
|
|
++#define _jinit_c_main_controller jinit_c_main_controller
|
|
|
++#define _jinit_c_prep_controller jinit_c_prep_controller
|
|
|
++#define _jinit_c_coef_controller jinit_c_coef_controller
|
|
|
++#define _jinit_color_converter jinit_color_converter
|
|
|
++#define _jinit_downsampler jinit_downsampler
|
|
|
++#define _jinit_forward_dct jinit_forward_dct
|
|
|
++#ifdef C_LOSSLESS_SUPPORTED
|
|
|
++#define _jinit_c_diff_controller jinit_c_diff_controller
|
|
|
++#define _jinit_lossless_compressor jinit_lossless_compressor
|
|
|
++#endif
|
|
|
++
|
|
|
++#define _jinit_d_main_controller jinit_d_main_controller
|
|
|
++#define _jinit_d_coef_controller jinit_d_coef_controller
|
|
|
++#define _jinit_d_post_controller jinit_d_post_controller
|
|
|
++#define _jinit_inverse_dct jinit_inverse_dct
|
|
|
++#define _jinit_upsampler jinit_upsampler
|
|
|
++#define _jinit_color_deconverter jinit_color_deconverter
|
|
|
++#define _jinit_1pass_quantizer jinit_1pass_quantizer
|
|
|
++#define _jinit_2pass_quantizer jinit_2pass_quantizer
|
|
|
++#define _jinit_merged_upsampler jinit_merged_upsampler
|
|
|
++#ifdef D_LOSSLESS_SUPPORTED
|
|
|
++#define _jinit_d_diff_controller jinit_d_diff_controller
|
|
|
++#define _jinit_lossless_decompressor jinit_lossless_decompressor
|
|
|
++#endif
|
|
|
++
|
|
|
++#define _jcopy_sample_rows jcopy_sample_rows
|
|
|
++
|
|
|
++/* Global internal functions (jdct.h) */
|
|
|
++#define _jpeg_fdct_islow jpeg_fdct_islow
|
|
|
++#define _jpeg_fdct_ifast jpeg_fdct_ifast
|
|
|
++
|
|
|
++#define _jpeg_idct_islow jpeg_idct_islow
|
|
|
++#define _jpeg_idct_ifast jpeg_idct_ifast
|
|
|
++#define _jpeg_idct_float jpeg_idct_float
|
|
|
++#define _jpeg_idct_7x7 jpeg_idct_7x7
|
|
|
++#define _jpeg_idct_6x6 jpeg_idct_6x6
|
|
|
++#define _jpeg_idct_5x5 jpeg_idct_5x5
|
|
|
++#define _jpeg_idct_4x4 jpeg_idct_4x4
|
|
|
++#define _jpeg_idct_3x3 jpeg_idct_3x3
|
|
|
++#define _jpeg_idct_2x2 jpeg_idct_2x2
|
|
|
++#define _jpeg_idct_1x1 jpeg_idct_1x1
|
|
|
++#define _jpeg_idct_9x9 jpeg_idct_9x9
|
|
|
++#define _jpeg_idct_10x10 jpeg_idct_10x10
|
|
|
++#define _jpeg_idct_11x11 jpeg_idct_11x11
|
|
|
++#define _jpeg_idct_12x12 jpeg_idct_12x12
|
|
|
++#define _jpeg_idct_13x13 jpeg_idct_13x13
|
|
|
++#define _jpeg_idct_14x14 jpeg_idct_14x14
|
|
|
++#define _jpeg_idct_15x15 jpeg_idct_15x15
|
|
|
++#define _jpeg_idct_16x16 jpeg_idct_16x16
|
|
|
++
|
|
|
++/* Internal fields (cdjpeg.h) */
|
|
|
++
|
|
|
++/* Use the 8-bit buffer in the cjpeg_source_struct and djpeg_dest_struct
|
|
|
++ structures. */
|
|
|
++#define _buffer buffer
|
|
|
++
|
|
|
++/* Image I/O functions (cdjpeg.h) */
|
|
|
++#define _jinit_read_gif jinit_read_gif
|
|
|
++#define _jinit_write_gif jinit_write_gif
|
|
|
++#define _jinit_read_ppm jinit_read_ppm
|
|
|
++#define _jinit_write_ppm jinit_write_ppm
|
|
|
++
|
|
|
++#define _read_color_map read_color_map
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE */
|
|
|
++
|
|
|
++#endif /* JSAMPLECOMP_H */
|
|
|
+diff --git a/media/libjpeg/jsimd.h b/media/libjpeg/jsimd.h
|
|
|
+--- a/media/libjpeg/jsimd.h
|
|
|
++++ b/media/libjpeg/jsimd.h
|
|
|
+@@ -7,16 +7,18 @@
|
|
|
+ * Copyright (C) 2020, Arm Limited.
|
|
|
+ *
|
|
|
+ * Based on the x86 SIMD extension for IJG JPEG library,
|
|
|
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ *
|
|
|
+ */
|
|
|
+
|
|
|
++#ifdef WITH_SIMD
|
|
|
++
|
|
|
+ #include "jchuff.h" /* Declarations shared with jcphuff.c */
|
|
|
+
|
|
|
+ EXTERN(int) jsimd_can_rgb_ycc(void);
|
|
|
+ EXTERN(int) jsimd_can_rgb_gray(void);
|
|
|
+ EXTERN(int) jsimd_can_ycc_rgb(void);
|
|
|
+ EXTERN(int) jsimd_can_ycc_rgb565(void);
|
|
|
+ EXTERN(int) jsimd_c_can_null_convert(void);
|
|
|
+
|
|
|
+@@ -116,8 +118,10 @@ EXTERN(void) jsimd_encode_mcu_AC_first_p
|
|
|
+ (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
|
|
|
+ UJCOEF *values, size_t *zerobits);
|
|
|
+
|
|
|
+ EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void);
|
|
|
+
|
|
|
+ EXTERN(int) jsimd_encode_mcu_AC_refine_prepare
|
|
|
+ (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
|
|
|
+ UJCOEF *absvalues, size_t *bits);
|
|
|
++
|
|
|
++#endif /* WITH_SIMD */
|
|
|
+diff --git a/media/libjpeg/jsimd_none.c b/media/libjpeg/jsimd_none.c
|
|
|
+deleted file mode 100644
|
|
|
+--- a/media/libjpeg/jsimd_none.c
|
|
|
++++ /dev/null
|
|
|
+@@ -1,431 +0,0 @@
|
|
|
+-/*
|
|
|
+- * jsimd_none.c
|
|
|
+- *
|
|
|
+- * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+- * Copyright (C) 2009-2011, 2014, 2022, D. R. Commander.
|
|
|
+- * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
|
|
|
+- * Copyright (C) 2020, Arm Limited.
|
|
|
+- *
|
|
|
+- * Based on the x86 SIMD extension for IJG JPEG library,
|
|
|
+- * Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+- * For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+- *
|
|
|
+- * This file contains stubs for when there is no SIMD support available.
|
|
|
+- */
|
|
|
+-
|
|
|
+-#define JPEG_INTERNALS
|
|
|
+-#include "jinclude.h"
|
|
|
+-#include "jpeglib.h"
|
|
|
+-#include "jsimd.h"
|
|
|
+-#include "jdct.h"
|
|
|
+-#include "jsimddct.h"
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_rgb_ycc(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_rgb_gray(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_ycc_rgb(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_ycc_rgb565(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_c_can_null_convert(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
+- int num_rows)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
+- int num_rows)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
+- int num_rows)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION input_row, JSAMPARRAY output_buf,
|
|
|
+- int num_rows)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
|
+- JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
|
+- int num_rows)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h2v2_downsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h2v1_downsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h2v2_smooth_downsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY output_data)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
|
|
|
+- jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY output_data)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY output_data)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h2v2_upsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h2v1_upsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_int_upsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h2v2_fancy_upsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h2v1_fancy_upsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h1v2_fancy_upsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h2v2_merged_upsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_h2v1_merged_upsample(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
|
+- JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_convsamp(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_convsamp_float(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
|
|
|
+- DCTELEM *workspace)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
|
|
|
+- FAST_FLOAT *workspace)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_fdct_islow(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_fdct_ifast(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_fdct_float(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_fdct_islow(DCTELEM *data)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_fdct_ifast(DCTELEM *data)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_fdct_float(FAST_FLOAT *data)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_quantize(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_quantize_float(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
|
|
|
+- FAST_FLOAT *workspace)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_idct_2x2(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_idct_4x4(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_idct_6x6(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_idct_12x12(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_idct_islow(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_idct_ifast(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_idct_float(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
|
|
|
+- JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
|
|
+- JDIMENSION output_col)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_huff_encode_one_block(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(JOCTET *)
|
|
|
+-jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
|
|
|
+- int last_dc_val, c_derived_tbl *dctbl,
|
|
|
+- c_derived_tbl *actbl)
|
|
|
+-{
|
|
|
+- return NULL;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_encode_mcu_AC_first_prepare(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(void)
|
|
|
+-jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
|
|
|
+- const int *jpeg_natural_order_start, int Sl,
|
|
|
+- int Al, UJCOEF *values, size_t *zerobits)
|
|
|
+-{
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_can_encode_mcu_AC_refine_prepare(void)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+-
|
|
|
+-GLOBAL(int)
|
|
|
+-jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
|
|
|
+- const int *jpeg_natural_order_start, int Sl,
|
|
|
+- int Al, UJCOEF *absvalues, size_t *bits)
|
|
|
+-{
|
|
|
+- return 0;
|
|
|
+-}
|
|
|
+diff --git a/media/libjpeg/jutils.c b/media/libjpeg/jutils.c
|
|
|
+--- a/media/libjpeg/jutils.c
|
|
|
++++ b/media/libjpeg/jutils.c
|
|
|
+@@ -12,18 +12,21 @@
|
|
|
+ * for both compression and decompression.
|
|
|
+ * Note we prefix all global names with "j" to minimize conflicts with
|
|
|
+ * a surrounding application.
|
|
|
+ */
|
|
|
+
|
|
|
+ #define JPEG_INTERNALS
|
|
|
+ #include "jinclude.h"
|
|
|
+ #include "jpeglib.h"
|
|
|
++#include "jsamplecomp.h"
|
|
|
+
|
|
|
+
|
|
|
++#if BITS_IN_JSAMPLE == 8
|
|
|
++
|
|
|
+ /*
|
|
|
+ * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
|
|
|
+ * of a DCT block read in natural order (left to right, top to bottom).
|
|
|
+ */
|
|
|
+
|
|
|
+ #if 0 /* This table is not actually needed in v6a */
|
|
|
+
|
|
|
+ const int jpeg_zigzag_order[DCTSIZE2] = {
|
|
|
+@@ -84,41 +87,51 @@ GLOBAL(long)
|
|
|
+ jround_up(long a, long b)
|
|
|
+ /* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
|
|
|
+ /* Assumes a >= 0, b > 0 */
|
|
|
+ {
|
|
|
+ a += b - 1L;
|
|
|
+ return a - (a % b);
|
|
|
+ }
|
|
|
+
|
|
|
++#endif /* BITS_IN_JSAMPLE == 8 */
|
|
|
++
|
|
|
++
|
|
|
++#if BITS_IN_JSAMPLE != 16 || \
|
|
|
++ defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+-jcopy_sample_rows(JSAMPARRAY input_array, int source_row,
|
|
|
+- JSAMPARRAY output_array, int dest_row, int num_rows,
|
|
|
+- JDIMENSION num_cols)
|
|
|
++_jcopy_sample_rows(_JSAMPARRAY input_array, int source_row,
|
|
|
++ _JSAMPARRAY output_array, int dest_row, int num_rows,
|
|
|
++ JDIMENSION num_cols)
|
|
|
+ /* Copy some rows of samples from one place to another.
|
|
|
+ * num_rows rows are copied from input_array[source_row++]
|
|
|
+ * to output_array[dest_row++]; these areas may overlap for duplication.
|
|
|
+ * The source and destination arrays must be at least as wide as num_cols.
|
|
|
+ */
|
|
|
+ {
|
|
|
+- register JSAMPROW inptr, outptr;
|
|
|
+- register size_t count = (size_t)(num_cols * sizeof(JSAMPLE));
|
|
|
++ register _JSAMPROW inptr, outptr;
|
|
|
++ register size_t count = (size_t)(num_cols * sizeof(_JSAMPLE));
|
|
|
+ register int row;
|
|
|
+
|
|
|
+ input_array += source_row;
|
|
|
+ output_array += dest_row;
|
|
|
+
|
|
|
+ for (row = num_rows; row > 0; row--) {
|
|
|
+ inptr = *input_array++;
|
|
|
+ outptr = *output_array++;
|
|
|
+ memcpy(outptr, inptr, count);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
++#endif /* BITS_IN_JSAMPLE != 16 ||
|
|
|
++ defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED) */
|
|
|
++
|
|
|
++
|
|
|
++#if BITS_IN_JSAMPLE == 8
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row,
|
|
|
+ JDIMENSION num_blocks)
|
|
|
+ /* Copy a row of coefficient blocks from one place to another. */
|
|
|
+ {
|
|
|
+ memcpy(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF)));
|
|
|
+ }
|
|
|
+@@ -126,8 +139,10 @@ jcopy_block_row(JBLOCKROW input_row, JBL
|
|
|
+
|
|
|
+ GLOBAL(void)
|
|
|
+ jzero_far(void *target, size_t bytestozero)
|
|
|
+ /* Zero out a chunk of memory. */
|
|
|
+ /* This might be sample-array data, block-array data, or alloc_large data. */
|
|
|
+ {
|
|
|
+ memset(target, 0, bytestozero);
|
|
|
+ }
|
|
|
++
|
|
|
++#endif /* BITS_IN_JSAMPLE == 8 */
|
|
|
+diff --git a/media/libjpeg/jversion.h b/media/libjpeg/jversion.h
|
|
|
+--- a/media/libjpeg/jversion.h
|
|
|
++++ b/media/libjpeg/jversion.h
|
|
|
+@@ -1,15 +1,15 @@
|
|
|
+ /*
|
|
|
+ * jversion.h
|
|
|
+ *
|
|
|
+ * This file was part of the Independent JPEG Group's software:
|
|
|
+ * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
|
|
|
+ * libjpeg-turbo Modifications:
|
|
|
+- * Copyright (C) 2010, 2012-2023, D. R. Commander.
|
|
|
++ * Copyright (C) 2010, 2012-2024, D. R. Commander.
|
|
|
+ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
+ * file.
|
|
|
+ *
|
|
|
+ * This file contains software version identification.
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+ #if JPEG_LIB_VERSION >= 80
|
|
|
+@@ -31,24 +31,26 @@
|
|
|
+ * - libjpeg-turbo authors (2009-) in descending order of the date of their
|
|
|
+ * most recent contribution to the project, then in ascending order of the
|
|
|
+ * date of their first contribution to the project, then in alphabetical
|
|
|
+ * order
|
|
|
+ * - Upstream authors in descending order of the date of the first inclusion of
|
|
|
+ * their code
|
|
|
+ */
|
|
|
+
|
|
|
+-#define JCOPYRIGHT \
|
|
|
+- "Copyright (C) 2009-2023 D. R. Commander\n" \
|
|
|
++#define JCOPYRIGHT1 \
|
|
|
++ "Copyright (C) 2009-2024 D. R. Commander\n" \
|
|
|
+ "Copyright (C) 2015, 2020 Google, Inc.\n" \
|
|
|
+ "Copyright (C) 2019-2020 Arm Limited\n" \
|
|
|
+ "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
|
|
|
+ "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
|
|
|
+- "Copyright (C) 2015 Intel Corporation\n" \
|
|
|
++ "Copyright (C) 2015 Intel Corporation\n"
|
|
|
++#define JCOPYRIGHT2 \
|
|
|
+ "Copyright (C) 2013-2014 Linaro Limited\n" \
|
|
|
+ "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
|
|
|
+ "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
|
|
|
+ "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
|
|
|
+ "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
|
|
|
+- "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding"
|
|
|
++ "Copyright (C) 1999 Ken Murchison\n" \
|
|
|
++ "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding\n"
|
|
|
+
|
|
|
+ #define JCOPYRIGHT_SHORT \
|
|
|
+- "Copyright (C) 1991-2023 The libjpeg-turbo Project and many others"
|
|
|
++ "Copyright (C) 1991-2024 The libjpeg-turbo Project and many others"
|
|
|
+diff --git a/media/libjpeg/moz.build b/media/libjpeg/moz.build
|
|
|
+--- a/media/libjpeg/moz.build
|
|
|
++++ b/media/libjpeg/moz.build
|
|
|
+@@ -2,16 +2,32 @@
|
|
|
+ # vim: set filetype=python:
|
|
|
+ # This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
+
|
|
|
+ with Files("**"):
|
|
|
+ BUG_COMPONENT = ("Core", "ImageLib")
|
|
|
+
|
|
|
++# Not technically necessary, but set for consistency with the 12/16-bit ones.
|
|
|
++DEFINES['BITS_IN_JSAMPLE'] = 8
|
|
|
++
|
|
|
++# libjpeg-turbo 3 requires rebuilding some files multiple times in order to
|
|
|
++# pick up 12-bit and 16-bit support. Failure to do so results in missing
|
|
|
++# symbols during linking.
|
|
|
++#
|
|
|
++# Since we can't build the same files multiple times with different
|
|
|
++# BITS_IN_JSAMPLE values defined, we instead create separate moz.build files
|
|
|
++# for each configuration. The files must be kept in sync with JPEG12_SOURCES
|
|
|
++# and JPEG16_SOURCES from the upstream CMakeLists.txt.
|
|
|
++DIRS += [
|
|
|
++ "jpeg12",
|
|
|
++ "jpeg16",
|
|
|
++]
|
|
|
++
|
|
|
+ EXPORTS += [
|
|
|
+ 'jconfig.h',
|
|
|
+ 'jerror.h',
|
|
|
+ 'jinclude.h',
|
|
|
+ 'jmorecfg.h',
|
|
|
+ 'jpegint.h',
|
|
|
+ 'jpeglib.h',
|
|
|
+ ]
|
|
|
+@@ -20,19 +36,22 @@ SOURCES += [
|
|
|
+ 'jcomapi.c',
|
|
|
+ 'jdapimin.c',
|
|
|
+ 'jdapistd.c',
|
|
|
+ 'jdatadst.c',
|
|
|
+ 'jdatasrc.c',
|
|
|
+ 'jdcoefct.c',
|
|
|
+ 'jdcolor.c',
|
|
|
+ 'jddctmgr.c',
|
|
|
++ 'jddiffct.c',
|
|
|
+ 'jdhuff.c',
|
|
|
+ 'jdicc.c',
|
|
|
+ 'jdinput.c',
|
|
|
++ 'jdlhuff.c',
|
|
|
++ 'jdlossls.c',
|
|
|
+ 'jdmainct.c',
|
|
|
+ 'jdmarker.c',
|
|
|
+ 'jdmaster.c',
|
|
|
+ 'jdmerge.c',
|
|
|
+ 'jdphuff.c',
|
|
|
+ 'jdpostct.c',
|
|
|
+ 'jdsample.c',
|
|
|
+ 'jdtrans.c',
|
|
|
+@@ -41,31 +60,35 @@ SOURCES += [
|
|
|
+ 'jfdctfst.c',
|
|
|
+ 'jfdctint.c',
|
|
|
+ 'jidctflt.c',
|
|
|
+ 'jidctfst.c',
|
|
|
+ 'jidctint.c',
|
|
|
+ 'jidctred.c',
|
|
|
+ 'jmemmgr.c',
|
|
|
+ 'jmemnobs.c',
|
|
|
++ 'jpeg_nbits.c',
|
|
|
+ 'jquant1.c',
|
|
|
+ 'jquant2.c',
|
|
|
+ 'jutils.c',
|
|
|
+ ]
|
|
|
+
|
|
|
+ # These files enable support for writing JPEGs
|
|
|
+ SOURCES += [
|
|
|
+ 'jcapimin.c',
|
|
|
+ 'jcapistd.c',
|
|
|
+ 'jccoefct.c',
|
|
|
+ 'jccolor.c',
|
|
|
+ 'jcdctmgr.c',
|
|
|
++ 'jcdiffct.c',
|
|
|
+ 'jchuff.c',
|
|
|
+ 'jcicc.c',
|
|
|
+ 'jcinit.c',
|
|
|
++ 'jclhuff.c',
|
|
|
++ 'jclossls.c',
|
|
|
+ 'jcmainct.c',
|
|
|
+ 'jcmarker.c',
|
|
|
+ 'jcmaster.c',
|
|
|
+ 'jcparam.c',
|
|
|
+ 'jcphuff.c',
|
|
|
+ 'jcprepct.c',
|
|
|
+ 'jcsample.c',
|
|
|
+ 'jctrans.c',
|
|
|
+@@ -269,20 +292,16 @@ if CONFIG['LIBJPEG_TURBO_SIMD_FLAGS']:
|
|
|
+ 'simd/i386/jquant-mmx.asm',
|
|
|
+ 'simd/i386/jquant-sse.asm',
|
|
|
+ 'simd/i386/jquantf-sse2.asm',
|
|
|
+ 'simd/i386/jquanti-avx2.asm',
|
|
|
+ 'simd/i386/jquanti-sse2.asm',
|
|
|
+ 'simd/i386/jsimd.c',
|
|
|
+ 'simd/i386/jsimdcpu.asm',
|
|
|
+ ]
|
|
|
+-else: # No SIMD support?
|
|
|
+- SOURCES += [
|
|
|
+- 'jsimd_none.c',
|
|
|
+- ]
|
|
|
+
|
|
|
+ ASFLAGS += CONFIG['LIBJPEG_TURBO_SIMD_FLAGS']
|
|
|
+
|
|
|
+ # Make sure the x86 & x86-64 ASM files can see the necessary includes.
|
|
|
+ if CONFIG['CPU_ARCH'] == 'x86':
|
|
|
+ ASFLAGS += ['-I%s/media/libjpeg/simd/nasm/' % TOPSRCDIR]
|
|
|
+ ASFLAGS += ['-I%s/media/libjpeg/simd/i386/' % TOPSRCDIR]
|
|
|
+ if CONFIG['CPU_ARCH'] == 'x86_64':
|
|
|
+diff --git a/media/libjpeg/mozilla.diff b/media/libjpeg/mozilla.diff
|
|
|
+--- a/media/libjpeg/mozilla.diff
|
|
|
++++ b/media/libjpeg/mozilla.diff
|
|
|
+@@ -1,49 +1,65 @@
|
|
|
+-diff --git jmorecfg.h jmorecfg.h
|
|
|
+---- jmorecfg.h
|
|
|
+-+++ jmorecfg.h
|
|
|
+-@@ -13,8 +13,9 @@
|
|
|
++diff --git a/media/libjpeg/jmorecfg.h b/media/libjpeg/jmorecfg.h
|
|
|
++--- a/media/libjpeg/jmorecfg.h
|
|
|
+++++ b/media/libjpeg/jmorecfg.h
|
|
|
++@@ -9,16 +9,17 @@
|
|
|
++ * For conditions of distribution and use, see the accompanying README.ijg
|
|
|
++ * file.
|
|
|
++ *
|
|
|
++ * This file contains additional configuration options that customize the
|
|
|
+ * JPEG software for special applications or support machine-dependent
|
|
|
+ * optimizations. Most users will not need to touch this file.
|
|
|
+ */
|
|
|
+
|
|
|
+ +#include <stdint.h>
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Maximum number of components (color channels) allowed in JPEG image.
|
|
|
+ * To meet the letter of Rec. ITU-T T.81 | ISO/IEC 10918-1, set this to 255.
|
|
|
+-@@ -95,23 +96,17 @@ typedef unsigned char JOCTET;
|
|
|
++ * However, darn few applications need more than 4 channels (maybe 5 for CMYK +
|
|
|
++ * alpha mask). We recommend 10 as a reasonable compromise; use 4 if you are
|
|
|
++ * really short on memory. (Each allowed component costs a hundred or so
|
|
|
++ * bytes of storage, whether actually used in an image or not.)
|
|
|
++@@ -91,27 +92,25 @@ typedef unsigned char JOCTET;
|
|
|
++ * They must be at least as wide as specified; but making them too big
|
|
|
++ * won't cost a huge amount of memory, so we don't provide special
|
|
|
++ * extraction code like we did for JSAMPLE. (In other words, these
|
|
|
++ * typedefs live at a different point on the speed/space tradeoff curve.)
|
|
|
+ */
|
|
|
+
|
|
|
+ /* UINT8 must hold at least the values 0..255. */
|
|
|
+
|
|
|
+ -typedef unsigned char UINT8;
|
|
|
+ +typedef uint8_t UINT8;
|
|
|
+
|
|
|
+ /* UINT16 must hold at least the values 0..65535. */
|
|
|
+
|
|
|
+--#ifdef HAVE_UNSIGNED_SHORT
|
|
|
+ -typedef unsigned short UINT16;
|
|
|
+--#else /* not HAVE_UNSIGNED_SHORT */
|
|
|
+--typedef unsigned int UINT16;
|
|
|
+--#endif /* HAVE_UNSIGNED_SHORT */
|
|
|
+ +typedef uint16_t UINT16;
|
|
|
+
|
|
|
+ /* INT16 must hold at least the values -32768..32767. */
|
|
|
+
|
|
|
+ -#ifndef XMD_H /* X11/xmd.h correctly defines INT16 */
|
|
|
+ -typedef short INT16;
|
|
|
+ -#endif
|
|
|
+ +typedef int16_t INT16;
|
|
|
+
|
|
|
+ /* INT32 must hold at least signed 32-bit values.
|
|
|
+ *
|
|
|
+ * NOTE: The INT32 typedef dates back to libjpeg v5 (1994.) Integers were
|
|
|
+-@@ -136,17 +131,9 @@ typedef short INT16;
|
|
|
++ * sometimes 16-bit back then (MS-DOS), which is why INT32 is typedef'd to
|
|
|
++ * long. It also wasn't common (or at least as common) in 1994 for INT32 to be
|
|
|
++ * defined by platform headers. Since then, however, INT32 is defined in
|
|
|
++ * several other common places:
|
|
|
++@@ -128,25 +127,17 @@ typedef short INT16;
|
|
|
++ * This is a recipe for conflict, since "long" and "int" aren't always
|
|
|
++ * compatible types. Since the definition of INT32 has technically been part
|
|
|
++ * of the libjpeg API for more than 20 years, we can't remove it, but we do not
|
|
|
++ * use it internally any longer. We instead define a separate type (JLONG)
|
|
|
+ * for internal use, which ensures that internal behavior will always be the
|
|
|
+ * same regardless of any external headers that may be included.
|
|
|
+ */
|
|
|
+
|
|
|
+ -#ifndef XMD_H /* X11/xmd.h correctly defines INT32 */
|
|
|
+ -#ifndef _BASETSD_H_ /* Microsoft defines it in basetsd.h */
|
|
|
+ -#ifndef _BASETSD_H /* MinGW is slightly different */
|
|
|
+ -#ifndef QGLOBAL_H /* Qt defines it in qglobal.h */
|
|
|
+@@ -52,8 +68,12 @@ diff --git jmorecfg.h jmorecfg.h
|
|
|
+ -#endif
|
|
|
+ -#endif
|
|
|
+ -#endif
|
|
|
+ +typedef int32_t INT32;
|
|
|
+
|
|
|
+ /* Datatype used for image dimensions. The JPEG standard only supports
|
|
|
+ * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore
|
|
|
+ * "unsigned int" is sufficient on all machines. However, if you need to
|
|
|
++ * handle larger images and you don't mind deviating from the spec, you
|
|
|
++ * can change this datatype. (Note that changing this datatype will
|
|
|
++ * potentially require modifying the SIMD code. The x86-64 SIMD extensions,
|
|
|
++ * in particular, assume a 32-bit JDIMENSION.)
|
|
|
+diff --git a/media/libjpeg/simd/i386/jccolext-avx2.asm b/media/libjpeg/simd/i386/jccolext-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jccolext-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jccolext-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jccolext.asm - colorspace conversion (AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -44,25 +44,25 @@
|
|
|
+ EXTN(jsimd_rgb_ycc_convert_avx2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [img_width(eax)]
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPIMAGE [output_buf(eax)]
|
|
|
+@@ -75,34 +75,34 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
|
|
+ lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
|
+
|
|
|
+ pop ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_buf(eax)]
|
|
|
+ mov eax, INT [num_rows(eax)]
|
|
|
+ test eax, eax
|
|
|
+ jle near .return
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+- pushpic eax
|
|
|
++ PUSHPIC eax
|
|
|
+ push edx
|
|
|
+ push ebx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+ push ecx ; col
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr0
|
|
|
+ mov ebx, JSAMPROW [ebx] ; outptr1
|
|
|
+ mov edx, JSAMPROW [edx] ; outptr2
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_YMMWORD
|
|
|
+ jae near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ %if RGB_PIXELSIZE == 3 ; ---------------
|
|
|
+
|
|
|
+ .column_ld1:
|
|
|
+ push eax
|
|
|
+ push edx
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
|
|
+ test cl, SIZEOF_BYTE
|
|
|
+@@ -149,17 +149,17 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
|
|
+ .column_ld64:
|
|
|
+ test cl, 2*SIZEOF_YMMWORD
|
|
|
+ mov ecx, SIZEOF_YMMWORD
|
|
|
+ jz short .rgb_ycc_cnv
|
|
|
+ vmovdqa ymmB, ymmA
|
|
|
+ vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ jmp short .rgb_ycc_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmB, YMMWORD [esi+2*SIZEOF_YMMWORD]
|
|
|
+
|
|
|
+ .rgb_ycc_cnv:
|
|
|
+ ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05
|
|
|
+@@ -273,17 +273,17 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
|
|
+ test cl, SIZEOF_XMMWORD
|
|
|
+ mov ecx, SIZEOF_YMMWORD
|
|
|
+ jz short .rgb_ycc_cnv
|
|
|
+ vmovdqa ymmE, ymmA
|
|
|
+ vmovdqa ymmH, ymmF
|
|
|
+ vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ jmp short .rgb_ycc_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmE, YMMWORD [esi+2*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmH, YMMWORD [esi+3*SIZEOF_YMMWORD]
|
|
|
+
|
|
|
+ .rgb_ycc_cnv:
|
|
|
+@@ -547,17 +547,17 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
|
|
+ test ecx, ecx
|
|
|
+ jnz near .column_ld1
|
|
|
+
|
|
|
+ pop ecx ; col
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+ pop ebx
|
|
|
+ pop edx
|
|
|
+- poppic eax
|
|
|
++ POPPIC eax
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_JSAMPROW ; input_buf
|
|
|
+ add edi, byte SIZEOF_JSAMPROW
|
|
|
+ add ebx, byte SIZEOF_JSAMPROW
|
|
|
+ add edx, byte SIZEOF_JSAMPROW
|
|
|
+ dec eax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/simd/i386/jccolext-mmx.asm b/media/libjpeg/simd/i386/jccolext-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jccolext-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jccolext-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jccolext.asm - colorspace conversion (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -44,25 +44,25 @@
|
|
|
+ EXTN(jsimd_rgb_ycc_convert_mmx):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [img_width(eax)] ; num_cols
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPIMAGE [output_buf(eax)]
|
|
|
+@@ -75,34 +75,34 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
|
|
+ lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
|
+
|
|
|
+ pop ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_buf(eax)]
|
|
|
+ mov eax, INT [num_rows(eax)]
|
|
|
+ test eax, eax
|
|
|
+ jle near .return
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+- pushpic eax
|
|
|
++ PUSHPIC eax
|
|
|
+ push edx
|
|
|
+ push ebx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+ push ecx ; col
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr0
|
|
|
+ mov ebx, JSAMPROW [ebx] ; outptr1
|
|
|
+ mov edx, JSAMPROW [edx] ; outptr2
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_MMWORD
|
|
|
+ jae short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ %if RGB_PIXELSIZE == 3 ; ---------------
|
|
|
+
|
|
|
+ .column_ld1:
|
|
|
+ push eax
|
|
|
+ push edx
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
|
|
+ test cl, SIZEOF_BYTE
|
|
|
+@@ -138,17 +138,17 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
|
|
+ .column_ld16:
|
|
|
+ test cl, 2*SIZEOF_MMWORD
|
|
|
+ mov ecx, SIZEOF_MMWORD
|
|
|
+ jz short .rgb_ycc_cnv
|
|
|
+ movq mmF, mmA
|
|
|
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ jmp short .rgb_ycc_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ movq mmF, MMWORD [esi+2*SIZEOF_MMWORD]
|
|
|
+
|
|
|
+ .rgb_ycc_cnv:
|
|
|
+ ; mmA=(00 10 20 01 11 21 02 12)
|
|
|
+@@ -206,17 +206,17 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
|
|
+ test cl, SIZEOF_MMWORD/2
|
|
|
+ mov ecx, SIZEOF_MMWORD
|
|
|
+ jz short .rgb_ycc_cnv
|
|
|
+ movq mmD, mmA
|
|
|
+ movq mmC, mmF
|
|
|
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ jmp short .rgb_ycc_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ movq mmD, MMWORD [esi+2*SIZEOF_MMWORD]
|
|
|
+ movq mmC, MMWORD [esi+3*SIZEOF_MMWORD]
|
|
|
+
|
|
|
+ .rgb_ycc_cnv:
|
|
|
+@@ -444,17 +444,17 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
|
|
|
+ test ecx, ecx
|
|
|
+ jnz near .column_ld1
|
|
|
+
|
|
|
+ pop ecx ; col
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+ pop ebx
|
|
|
+ pop edx
|
|
|
+- poppic eax
|
|
|
++ POPPIC eax
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_JSAMPROW ; input_buf
|
|
|
+ add edi, byte SIZEOF_JSAMPROW
|
|
|
+ add ebx, byte SIZEOF_JSAMPROW
|
|
|
+ add edx, byte SIZEOF_JSAMPROW
|
|
|
+ dec eax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/simd/i386/jccolext-sse2.asm b/media/libjpeg/simd/i386/jccolext-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jccolext-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jccolext-sse2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jccolext.asm - colorspace conversion (SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -43,25 +43,25 @@
|
|
|
+ EXTN(jsimd_rgb_ycc_convert_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [img_width(eax)]
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPIMAGE [output_buf(eax)]
|
|
|
+@@ -74,34 +74,34 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
|
|
+ lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
|
+
|
|
|
+ pop ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_buf(eax)]
|
|
|
+ mov eax, INT [num_rows(eax)]
|
|
|
+ test eax, eax
|
|
|
+ jle near .return
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+- pushpic eax
|
|
|
++ PUSHPIC eax
|
|
|
+ push edx
|
|
|
+ push ebx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+ push ecx ; col
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr0
|
|
|
+ mov ebx, JSAMPROW [ebx] ; outptr1
|
|
|
+ mov edx, JSAMPROW [edx] ; outptr2
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_XMMWORD
|
|
|
+ jae near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ %if RGB_PIXELSIZE == 3 ; ---------------
|
|
|
+
|
|
|
+ .column_ld1:
|
|
|
+ push eax
|
|
|
+ push edx
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
|
|
+ test cl, SIZEOF_BYTE
|
|
|
+@@ -142,17 +142,17 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
|
|
+ .column_ld32:
|
|
|
+ test cl, 2*SIZEOF_XMMWORD
|
|
|
+ mov ecx, SIZEOF_XMMWORD
|
|
|
+ jz short .rgb_ycc_cnv
|
|
|
+ movdqa xmmB, xmmA
|
|
|
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+ jmp short .rgb_ycc_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ .rgb_ycc_cnv:
|
|
|
+ ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
|
|
|
+@@ -227,17 +227,17 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
|
|
+ test cl, SIZEOF_XMMWORD/2
|
|
|
+ mov ecx, SIZEOF_XMMWORD
|
|
|
+ jz short .rgb_ycc_cnv
|
|
|
+ movdqa xmmF, xmmA
|
|
|
+ movdqa xmmH, xmmE
|
|
|
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+ jmp short .rgb_ycc_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ .rgb_ycc_cnv:
|
|
|
+@@ -473,17 +473,17 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
|
|
+ test ecx, ecx
|
|
|
+ jnz near .column_ld1
|
|
|
+
|
|
|
+ pop ecx ; col
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+ pop ebx
|
|
|
+ pop edx
|
|
|
+- poppic eax
|
|
|
++ POPPIC eax
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_JSAMPROW ; input_buf
|
|
|
+ add edi, byte SIZEOF_JSAMPROW
|
|
|
+ add ebx, byte SIZEOF_JSAMPROW
|
|
|
+ add edx, byte SIZEOF_JSAMPROW
|
|
|
+ dec eax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/simd/i386/jccolor-avx2.asm b/media/libjpeg/simd/i386/jccolor-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jccolor-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jccolor-avx2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jccolor.asm - colorspace conversion (AVX2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -28,30 +28,30 @@ F_0_299 equ 19595 ; FIX(0
|
|
|
+ F_0_331 equ 21709 ; FIX(0.33126)
|
|
|
+ F_0_418 equ 27439 ; FIX(0.41869)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_ycc_convert_avx2):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 8 dw F_0_114, F_0_250
|
|
|
+ PW_MF016_MF033 times 8 dw -F_0_168, -F_0_331
|
|
|
+ PW_MF008_MF041 times 8 dw -F_0_081, -F_0_418
|
|
|
+ PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \
|
|
|
+ (CENTERJSAMPLE << SCALEBITS)
|
|
|
+ PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jccolext-avx2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jccolor-mmx.asm b/media/libjpeg/simd/i386/jccolor-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jccolor-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jccolor-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jccolor.asm - colorspace conversion (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -28,30 +28,30 @@ F_0_299 equ 19595 ; FIX(0
|
|
|
+ F_0_331 equ 21709 ; FIX(0.33126)
|
|
|
+ F_0_418 equ 27439 ; FIX(0.41869)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_ycc_convert_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_ycc_convert_mmx):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 2 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 2 dw F_0_114, F_0_250
|
|
|
+ PW_MF016_MF033 times 2 dw -F_0_168, -F_0_331
|
|
|
+ PW_MF008_MF041 times 2 dw -F_0_081, -F_0_418
|
|
|
+ PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS - 1)) - 1 + \
|
|
|
+ (CENTERJSAMPLE << SCALEBITS)
|
|
|
+ PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jccolext-mmx.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jccolor-sse2.asm b/media/libjpeg/simd/i386/jccolor-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jccolor-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jccolor-sse2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jccolor.asm - colorspace conversion (SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -27,30 +27,30 @@ F_0_299 equ 19595 ; FIX(0
|
|
|
+ F_0_331 equ 21709 ; FIX(0.33126)
|
|
|
+ F_0_418 equ 27439 ; FIX(0.41869)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_ycc_convert_sse2):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 4 dw F_0_114, F_0_250
|
|
|
+ PW_MF016_MF033 times 4 dw -F_0_168, -F_0_331
|
|
|
+ PW_MF008_MF041 times 4 dw -F_0_081, -F_0_418
|
|
|
+ PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \
|
|
|
+ (CENTERJSAMPLE << SCALEBITS)
|
|
|
+ PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jccolext-sse2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jcgray-avx2.asm b/media/libjpeg/simd/i386/jcgray-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jcgray-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jcgray-avx2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jcgray.asm - grayscale colorspace conversion (AVX2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -24,26 +24,26 @@ F_0_114 equ 7471 ; FIX(0
|
|
|
+ F_0_250 equ 16384 ; FIX(0.25000)
|
|
|
+ F_0_299 equ 19595 ; FIX(0.29900)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_gray_convert_avx2):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 8 dw F_0_114, F_0_250
|
|
|
+ PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jcgryext-avx2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jcgray-mmx.asm b/media/libjpeg/simd/i386/jcgray-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jcgray-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jcgray-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jcgray.asm - grayscale colorspace conversion (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -24,26 +24,26 @@ F_0_114 equ 7471 ; FIX(0
|
|
|
+ F_0_250 equ 16384 ; FIX(0.25000)
|
|
|
+ F_0_299 equ 19595 ; FIX(0.29900)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_gray_convert_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_gray_convert_mmx):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 2 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 2 dw F_0_114, F_0_250
|
|
|
+ PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jcgryext-mmx.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jcgray-sse2.asm b/media/libjpeg/simd/i386/jcgray-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jcgray-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jcgray-sse2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jcgray.asm - grayscale colorspace conversion (SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -23,26 +23,26 @@ F_0_114 equ 7471 ; FIX(0
|
|
|
+ F_0_250 equ 16384 ; FIX(0.25000)
|
|
|
+ F_0_299 equ 19595 ; FIX(0.29900)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_gray_convert_sse2):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 4 dw F_0_114, F_0_250
|
|
|
+ PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jcgryext-sse2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jcgryext-avx2.asm b/media/libjpeg/simd/i386/jcgryext-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jcgryext-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jcgryext-avx2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jcgryext.asm - grayscale colorspace conversion (AVX2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -44,25 +44,25 @@
|
|
|
+ EXTN(jsimd_rgb_gray_convert_avx2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [img_width(eax)]
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPIMAGE [output_buf(eax)]
|
|
|
+@@ -71,30 +71,30 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
|
|
+ lea edi, [edi+ecx*SIZEOF_JSAMPROW]
|
|
|
+
|
|
|
+ pop ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_buf(eax)]
|
|
|
+ mov eax, INT [num_rows(eax)]
|
|
|
+ test eax, eax
|
|
|
+ jle near .return
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+- pushpic eax
|
|
|
++ PUSHPIC eax
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+ push ecx ; col
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr0
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_YMMWORD
|
|
|
+ jae near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ %if RGB_PIXELSIZE == 3 ; ---------------
|
|
|
+
|
|
|
+ .column_ld1:
|
|
|
+ push eax
|
|
|
+ push edx
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
|
|
+ test cl, SIZEOF_BYTE
|
|
|
+@@ -141,17 +141,17 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
|
|
+ .column_ld64:
|
|
|
+ test cl, 2*SIZEOF_YMMWORD
|
|
|
+ mov ecx, SIZEOF_YMMWORD
|
|
|
+ jz short .rgb_gray_cnv
|
|
|
+ vmovdqa ymmB, ymmA
|
|
|
+ vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ jmp short .rgb_gray_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmB, YMMWORD [esi+2*SIZEOF_YMMWORD]
|
|
|
+
|
|
|
+ .rgb_gray_cnv:
|
|
|
+ ; ymmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05
|
|
|
+@@ -265,17 +265,17 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
|
|
+ test cl, SIZEOF_XMMWORD
|
|
|
+ mov ecx, SIZEOF_YMMWORD
|
|
|
+ jz short .rgb_gray_cnv
|
|
|
+ vmovdqa ymmE, ymmA
|
|
|
+ vmovdqa ymmH, ymmF
|
|
|
+ vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ jmp short .rgb_gray_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmE, YMMWORD [esi+2*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymmH, YMMWORD [esi+3*SIZEOF_YMMWORD]
|
|
|
+
|
|
|
+ .rgb_gray_cnv:
|
|
|
+@@ -428,17 +428,17 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
|
|
+ cmp ecx, byte SIZEOF_YMMWORD
|
|
|
+ jae near .columnloop
|
|
|
+ test ecx, ecx
|
|
|
+ jnz near .column_ld1
|
|
|
+
|
|
|
+ pop ecx ; col
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+- poppic eax
|
|
|
++ POPPIC eax
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_JSAMPROW ; input_buf
|
|
|
+ add edi, byte SIZEOF_JSAMPROW
|
|
|
+ dec eax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ vzeroupper
|
|
|
+diff --git a/media/libjpeg/simd/i386/jcgryext-mmx.asm b/media/libjpeg/simd/i386/jcgryext-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jcgryext-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jcgryext-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jcgryext.asm - grayscale colorspace conversion (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -44,25 +44,25 @@
|
|
|
+ EXTN(jsimd_rgb_gray_convert_mmx):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [img_width(eax)] ; num_cols
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPIMAGE [output_buf(eax)]
|
|
|
+@@ -71,30 +71,30 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
|
|
+ lea edi, [edi+ecx*SIZEOF_JSAMPROW]
|
|
|
+
|
|
|
+ pop ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_buf(eax)]
|
|
|
+ mov eax, INT [num_rows(eax)]
|
|
|
+ test eax, eax
|
|
|
+ jle near .return
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+- pushpic eax
|
|
|
++ PUSHPIC eax
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+ push ecx ; col
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr0
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_MMWORD
|
|
|
+ jae short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ %if RGB_PIXELSIZE == 3 ; ---------------
|
|
|
+
|
|
|
+ .column_ld1:
|
|
|
+ push eax
|
|
|
+ push edx
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
|
|
+ test cl, SIZEOF_BYTE
|
|
|
+@@ -130,17 +130,17 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
|
|
+ .column_ld16:
|
|
|
+ test cl, 2*SIZEOF_MMWORD
|
|
|
+ mov ecx, SIZEOF_MMWORD
|
|
|
+ jz short .rgb_gray_cnv
|
|
|
+ movq mmF, mmA
|
|
|
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ jmp short .rgb_gray_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ movq mmF, MMWORD [esi+2*SIZEOF_MMWORD]
|
|
|
+
|
|
|
+ .rgb_gray_cnv:
|
|
|
+ ; mmA=(00 10 20 01 11 21 02 12)
|
|
|
+@@ -198,17 +198,17 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
|
|
+ test cl, SIZEOF_MMWORD/2
|
|
|
+ mov ecx, SIZEOF_MMWORD
|
|
|
+ jz short .rgb_gray_cnv
|
|
|
+ movq mmD, mmA
|
|
|
+ movq mmC, mmF
|
|
|
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ jmp short .rgb_gray_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ movq mmD, MMWORD [esi+2*SIZEOF_MMWORD]
|
|
|
+ movq mmC, MMWORD [esi+3*SIZEOF_MMWORD]
|
|
|
+
|
|
|
+ .rgb_gray_cnv:
|
|
|
+@@ -325,17 +325,17 @@ EXTN(jsimd_rgb_gray_convert_mmx):
|
|
|
+ cmp ecx, byte SIZEOF_MMWORD
|
|
|
+ jae near .columnloop
|
|
|
+ test ecx, ecx
|
|
|
+ jnz near .column_ld1
|
|
|
+
|
|
|
+ pop ecx ; col
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+- poppic eax
|
|
|
++ POPPIC eax
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_JSAMPROW ; input_buf
|
|
|
+ add edi, byte SIZEOF_JSAMPROW
|
|
|
+ dec eax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ emms ; empty MMX state
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/simd/i386/jcgryext-sse2.asm b/media/libjpeg/simd/i386/jcgryext-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jcgryext-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jcgryext-sse2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jcgryext.asm - grayscale colorspace conversion (SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -43,25 +43,25 @@
|
|
|
+ EXTN(jsimd_rgb_gray_convert_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [img_width(eax)]
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPIMAGE [output_buf(eax)]
|
|
|
+@@ -70,30 +70,30 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
|
|
+ lea edi, [edi+ecx*SIZEOF_JSAMPROW]
|
|
|
+
|
|
|
+ pop ecx
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_buf(eax)]
|
|
|
+ mov eax, INT [num_rows(eax)]
|
|
|
+ test eax, eax
|
|
|
+ jle near .return
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+- pushpic eax
|
|
|
++ PUSHPIC eax
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+ push ecx ; col
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr0
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_XMMWORD
|
|
|
+ jae near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ %if RGB_PIXELSIZE == 3 ; ---------------
|
|
|
+
|
|
|
+ .column_ld1:
|
|
|
+ push eax
|
|
|
+ push edx
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
|
|
+ test cl, SIZEOF_BYTE
|
|
|
+@@ -134,17 +134,17 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
|
|
+ .column_ld32:
|
|
|
+ test cl, 2*SIZEOF_XMMWORD
|
|
|
+ mov ecx, SIZEOF_XMMWORD
|
|
|
+ jz short .rgb_gray_cnv
|
|
|
+ movdqa xmmB, xmmA
|
|
|
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+ jmp short .rgb_gray_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ .rgb_gray_cnv:
|
|
|
+ ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
|
|
|
+@@ -219,17 +219,17 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
|
|
+ test cl, SIZEOF_XMMWORD/2
|
|
|
+ mov ecx, SIZEOF_XMMWORD
|
|
|
+ jz short .rgb_gray_cnv
|
|
|
+ movdqa xmmF, xmmA
|
|
|
+ movdqa xmmH, xmmE
|
|
|
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+ jmp short .rgb_gray_cnv
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
|
|
|
+ movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ .rgb_gray_cnv:
|
|
|
+@@ -354,17 +354,17 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
|
|
+ cmp ecx, byte SIZEOF_XMMWORD
|
|
|
+ jae near .columnloop
|
|
|
+ test ecx, ecx
|
|
|
+ jnz near .column_ld1
|
|
|
+
|
|
|
+ pop ecx ; col
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+- poppic eax
|
|
|
++ POPPIC eax
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_JSAMPROW ; input_buf
|
|
|
+ add edi, byte SIZEOF_JSAMPROW
|
|
|
+ dec eax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop edi
|
|
|
+diff --git a/media/libjpeg/simd/i386/jchuff-sse2.asm b/media/libjpeg/simd/i386/jchuff-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jchuff-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jchuff-sse2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jchuff-sse2.asm - Huffman entropy encoding (SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2009-2011, 2014-2017, 2019, D. R. Commander.
|
|
|
++; Copyright (C) 2009-2011, 2014-2017, 2019, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Matthieu Darbois.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+@@ -37,17 +37,17 @@ endstruc
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+ GLOBAL_DATA(jconst_huff_encode_one_block)
|
|
|
+
|
|
|
+ EXTN(jconst_huff_encode_one_block):
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ jpeg_mask_bits dq 0x0000, 0x0001, 0x0003, 0x0007
|
|
|
+ dq 0x000f, 0x001f, 0x003f, 0x007f
|
|
|
+ dq 0x00ff, 0x01ff, 0x03ff, 0x07ff
|
|
|
+ dq 0x0fff, 0x1fff, 0x3fff, 0x7fff
|
|
|
+
|
|
|
+ times 1 << 14 db 15
|
|
|
+ times 1 << 13 db 14
|
|
|
+@@ -60,17 +60,18 @@ times 1 << 7 db 8
|
|
|
+ times 1 << 6 db 7
|
|
|
+ times 1 << 5 db 6
|
|
|
+ times 1 << 4 db 5
|
|
|
+ times 1 << 3 db 4
|
|
|
+ times 1 << 2 db 3
|
|
|
+ times 1 << 1 db 2
|
|
|
+ times 1 << 0 db 1
|
|
|
+ times 1 db 0
|
|
|
+-jpeg_nbits_table:
|
|
|
++GLOBAL_DATA(jpeg_nbits_table)
|
|
|
++EXTN(jpeg_nbits_table):
|
|
|
+ times 1 db 0
|
|
|
+ times 1 << 0 db 1
|
|
|
+ times 1 << 1 db 2
|
|
|
+ times 1 << 2 db 3
|
|
|
+ times 1 << 3 db 4
|
|
|
+ times 1 << 4 db 5
|
|
|
+ times 1 << 5 db 6
|
|
|
+ times 1 << 6 db 7
|
|
|
+@@ -78,24 +79,24 @@ times 1 << 7 db 8
|
|
|
+ times 1 << 8 db 9
|
|
|
+ times 1 << 9 db 10
|
|
|
+ times 1 << 10 db 11
|
|
|
+ times 1 << 11 db 12
|
|
|
+ times 1 << 12 db 13
|
|
|
+ times 1 << 13 db 14
|
|
|
+ times 1 << 14 db 15
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ %ifdef PIC
|
|
|
+ %define NBITS(x) nbits_base + x
|
|
|
+ %else
|
|
|
+-%define NBITS(x) jpeg_nbits_table + x
|
|
|
++%define NBITS(x) EXTN(jpeg_nbits_table) + x
|
|
|
+ %endif
|
|
|
+-%define MASK_BITS(x) NBITS((x) * 8) + (jpeg_mask_bits - jpeg_nbits_table)
|
|
|
++%define MASK_BITS(x) NBITS((x) * 8) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %define mm_put_buffer mm0
|
|
|
+ %define mm_all_0xff mm1
|
|
|
+ %define mm_temp mm2
|
|
|
+@@ -230,17 +231,17 @@ times 1 << 14 db 15
|
|
|
+
|
|
|
+ %macro POP 1
|
|
|
+ pop %1
|
|
|
+ %assign stack_offset stack_offset - 4
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; If PIC is defined, load the address of a symbol defined in this file into a
|
|
|
+ ; register. Equivalent to
|
|
|
+-; get_GOT %1
|
|
|
++; GET_GOT %1
|
|
|
+ ; lea %1, [GOTOFF(%1, %2)]
|
|
|
+ ; without using the GOT.
|
|
|
+ ;
|
|
|
+ ; Usage:
|
|
|
+ ; %1 - register into which to load the address of the symbol
|
|
|
+ ; %2 - symbol whose address should be loaded
|
|
|
+ ; %3 - optional multi-line macro to execute before the symbol address is loaded
|
|
|
+ ; %4 - optional multi-line macro to execute after the symbol address is loaded
|
|
|
+@@ -464,17 +465,17 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|
|
+ movaps XMMWORD [t + 56 * SIZEOF_WORD], xmm3 ;H: t[i+56] = w3[i];
|
|
|
+ movq xmm4, qword [block + 36 * SIZEOF_WORD] ;G: w4 = 36 37 38 39 -- -- -- --
|
|
|
+ pcmpeqw xmm3, xmm0 ;H: w3[i] = (w3[i] == 0 ? -1 : 0);
|
|
|
+ punpckldq xmm4, xmm1 ;G: w4 = 36 37 44 45 38 39 46 47
|
|
|
+ movdqa xmm1, xmm4 ;F: w1 = 36 37 44 45 38 39 46 47
|
|
|
+ pcmpeqw mm_all_0xff, mm_all_0xff ;Z: all_0xff[i] = 0xFF;
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+- GET_SYM nbits_base, jpeg_nbits_table, GET_SYM_BEFORE, GET_SYM_AFTER
|
|
|
++ GET_SYM nbits_base, EXTN(jpeg_nbits_table), GET_SYM_BEFORE, GET_SYM_AFTER
|
|
|
+
|
|
|
+ psrldq xmm4, 1 * SIZEOF_WORD ;G: w4 = 37 44 45 38 39 46 47 --
|
|
|
+ shufpd xmm1, xmm5, 10b ;F: w1 = 36 37 44 45 50 51 58 59
|
|
|
+ pshufhw xmm4, xmm4, 11010011b ;G: w4 = 37 44 45 38 -- 39 46 --
|
|
|
+ pslldq xmm1, 1 * SIZEOF_WORD ;F: w1 = -- 36 37 44 45 50 51 58
|
|
|
+ pinsrw xmm4, word [block + 59 * SIZEOF_WORD], 0 ;G: w4 = 59 44 45 38 -- 39 46 --
|
|
|
+ pshufd xmm1, xmm1, 11011000b ;F: w1 = -- 36 45 50 37 44 51 58
|
|
|
+ cmp code_temp, 1 << 31 ;Z: Set CF if code_temp < 0x80000000,
|
|
|
+diff --git a/media/libjpeg/simd/i386/jcsample-avx2.asm b/media/libjpeg/simd/i386/jcsample-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jcsample-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jcsample-avx2.asm
|
|
|
+@@ -1,14 +1,14 @@
|
|
|
+ ;
|
|
|
+ ; jcsample.asm - downsampling (AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -65,17 +65,17 @@ EXTN(jsimd_h2v1_downsample_avx2):
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ mov eax, INT [max_v_samp(ebp)]
|
|
|
+ test eax, eax
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ cld
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .expandloop:
|
|
|
+ push eax
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPROW [esi]
|
|
|
+ add edi, edx
|
|
|
+ mov al, JSAMPLE [edi-1]
|
|
|
+
|
|
|
+@@ -101,28 +101,28 @@ EXTN(jsimd_h2v1_downsample_avx2):
|
|
|
+ vmovd xmm7, edx
|
|
|
+ vpshufd xmm7, xmm7, 0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
|
|
|
+ vperm2i128 ymm7, ymm7, ymm7, 0 ; ymm7={xmm7, xmm7}
|
|
|
+ vpcmpeqw ymm6, ymm6, ymm6
|
|
|
+ vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push ecx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_YMMWORD
|
|
|
+ jae short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_r24:
|
|
|
+ ; ecx can possibly be 8, 16, 24
|
|
|
+ cmp ecx, 24
|
|
|
+ jne .columnloop_r16
|
|
|
+ vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu xmm1, XMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ mov ecx, SIZEOF_YMMWORD
|
|
|
+@@ -136,17 +136,17 @@ EXTN(jsimd_h2v1_downsample_avx2):
|
|
|
+ mov ecx, SIZEOF_YMMWORD
|
|
|
+ jmp short .downsample
|
|
|
+
|
|
|
+ .columnloop_r8:
|
|
|
+ vmovdqu xmm0, XMMWORD[esi+0*SIZEOF_YMMWORD]
|
|
|
+ vpxor ymm1, ymm1, ymm1
|
|
|
+ mov ecx, SIZEOF_YMMWORD
|
|
|
+ jmp short .downsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymm1, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+
|
|
|
+ .downsample:
|
|
|
+ vpsrlw ymm2, ymm0, BYTE_BIT
|
|
|
+ vpand ymm0, ymm0, ymm6
|
|
|
+@@ -238,17 +238,17 @@ EXTN(jsimd_h2v2_downsample_avx2):
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ mov eax, INT [max_v_samp(ebp)]
|
|
|
+ test eax, eax
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ cld
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .expandloop:
|
|
|
+ push eax
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPROW [esi]
|
|
|
+ add edi, edx
|
|
|
+ mov al, JSAMPLE [edi-1]
|
|
|
+
|
|
|
+@@ -274,29 +274,29 @@ EXTN(jsimd_h2v2_downsample_avx2):
|
|
|
+ vmovd xmm7, edx
|
|
|
+ vpcmpeqw ymm6, ymm6, ymm6
|
|
|
+ vpshufd xmm7, xmm7, 0x00 ; ymm7={1, 2, 1, 2, 1, 2, 1, 2}
|
|
|
+ vperm2i128 ymm7, ymm7, ymm7, 0
|
|
|
+ vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push ecx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
|
|
|
+ mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_YMMWORD
|
|
|
+ jae short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_r24:
|
|
|
+ cmp ecx, 24
|
|
|
+ jne .columnloop_r16
|
|
|
+ vmovdqu ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymm1, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu xmm2, XMMWORD [edx+1*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu xmm3, XMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+@@ -315,17 +315,17 @@ EXTN(jsimd_h2v2_downsample_avx2):
|
|
|
+
|
|
|
+ .columnloop_r8:
|
|
|
+ vmovdqu xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
|
|
|
+ vmovdqu xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ vpxor ymm2, ymm2, ymm2
|
|
|
+ vpxor ymm3, ymm3, ymm3
|
|
|
+ mov ecx, SIZEOF_YMMWORD
|
|
|
+ jmp short .downsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ vmovdqu ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymm1, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymm2, YMMWORD [edx+1*SIZEOF_YMMWORD]
|
|
|
+ vmovdqu ymm3, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+
|
|
|
+ .downsample:
|
|
|
+diff --git a/media/libjpeg/simd/i386/jcsample-mmx.asm b/media/libjpeg/simd/i386/jcsample-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jcsample-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jcsample-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jcsample.asm - downsampling (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -64,17 +64,17 @@ EXTN(jsimd_h2v1_downsample_mmx):
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ mov eax, INT [max_v_samp(ebp)]
|
|
|
+ test eax, eax
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ cld
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .expandloop:
|
|
|
+ push eax
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPROW [esi]
|
|
|
+ add edi, edx
|
|
|
+ mov al, JSAMPLE [edi-1]
|
|
|
+
|
|
|
+@@ -99,25 +99,25 @@ EXTN(jsimd_h2v1_downsample_mmx):
|
|
|
+ mov edx, 0x00010000 ; bias pattern
|
|
|
+ movd mm7, edx
|
|
|
+ pcmpeqw mm6, mm6
|
|
|
+ punpckldq mm7, mm7 ; mm7={0, 1, 0, 1}
|
|
|
+ psrlw mm6, BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push ecx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mm1, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ movq mm2, mm0
|
|
|
+ movq mm3, mm1
|
|
|
+
|
|
|
+ pand mm0, mm6
|
|
|
+@@ -207,17 +207,17 @@ EXTN(jsimd_h2v2_downsample_mmx):
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ mov eax, INT [max_v_samp(ebp)]
|
|
|
+ test eax, eax
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ cld
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .expandloop:
|
|
|
+ push eax
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPROW [esi]
|
|
|
+ add edi, edx
|
|
|
+ mov al, JSAMPLE [edi-1]
|
|
|
+
|
|
|
+@@ -242,26 +242,26 @@ EXTN(jsimd_h2v2_downsample_mmx):
|
|
|
+ mov edx, 0x00020001 ; bias pattern
|
|
|
+ movd mm7, edx
|
|
|
+ pcmpeqw mm6, mm6
|
|
|
+ punpckldq mm7, mm7 ; mm7={1, 2, 1, 2}
|
|
|
+ psrlw mm6, BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push ecx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
|
|
|
+ mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [edx+0*SIZEOF_MMWORD]
|
|
|
+ movq mm1, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mm2, MMWORD [edx+1*SIZEOF_MMWORD]
|
|
|
+ movq mm3, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+
|
|
|
+ movq mm4, mm0
|
|
|
+diff --git a/media/libjpeg/simd/i386/jcsample-sse2.asm b/media/libjpeg/simd/i386/jcsample-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jcsample-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jcsample-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jcsample.asm - downsampling (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -64,17 +64,17 @@ EXTN(jsimd_h2v1_downsample_sse2):
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ mov eax, INT [max_v_samp(ebp)]
|
|
|
+ test eax, eax
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ cld
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .expandloop:
|
|
|
+ push eax
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPROW [esi]
|
|
|
+ add edi, edx
|
|
|
+ mov al, JSAMPLE [edi-1]
|
|
|
+
|
|
|
+@@ -99,35 +99,35 @@ EXTN(jsimd_h2v1_downsample_sse2):
|
|
|
+ mov edx, 0x00010000 ; bias pattern
|
|
|
+ movd xmm7, edx
|
|
|
+ pcmpeqw xmm6, xmm6
|
|
|
+ pshufd xmm7, xmm7, 0x00 ; xmm7={0, 1, 0, 1, 0, 1, 0, 1}
|
|
|
+ psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push ecx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_XMMWORD
|
|
|
+ jae short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_r8:
|
|
|
+ movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ pxor xmm1, xmm1
|
|
|
+ mov ecx, SIZEOF_XMMWORD
|
|
|
+ jmp short .downsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqa xmm1, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ .downsample:
|
|
|
+ movdqa xmm2, xmm0
|
|
|
+ movdqa xmm3, xmm1
|
|
|
+@@ -220,17 +220,17 @@ EXTN(jsimd_h2v2_downsample_sse2):
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ mov eax, INT [max_v_samp(ebp)]
|
|
|
+ test eax, eax
|
|
|
+ jle short .expand_end
|
|
|
+
|
|
|
+ cld
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .expandloop:
|
|
|
+ push eax
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPROW [esi]
|
|
|
+ add edi, edx
|
|
|
+ mov al, JSAMPLE [edi-1]
|
|
|
+
|
|
|
+@@ -255,38 +255,38 @@ EXTN(jsimd_h2v2_downsample_sse2):
|
|
|
+ mov edx, 0x00020001 ; bias pattern
|
|
|
+ movd xmm7, edx
|
|
|
+ pcmpeqw xmm6, xmm6
|
|
|
+ pshufd xmm7, xmm7, 0x00 ; xmm7={1, 2, 1, 2, 1, 2, 1, 2}
|
|
|
+ psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push ecx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
|
|
|
+ mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+ cmp ecx, byte SIZEOF_XMMWORD
|
|
|
+ jae short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_r8:
|
|
|
+ movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
|
|
|
+ movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ pxor xmm2, xmm2
|
|
|
+ pxor xmm3, xmm3
|
|
|
+ mov ecx, SIZEOF_XMMWORD
|
|
|
+ jmp short .downsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
|
|
|
+ movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqa xmm2, XMMWORD [edx+1*SIZEOF_XMMWORD]
|
|
|
+ movdqa xmm3, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ .downsample:
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdcolext-avx2.asm b/media/libjpeg/simd/i386/jdcolext-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdcolext-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdcolext-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdcolext.asm - colorspace conversion (AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2012, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2012, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -45,25 +45,25 @@
|
|
|
+ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [out_width(eax)] ; num_cols
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPIMAGE [input_buf(eax)]
|
|
|
+@@ -76,31 +76,31 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|
|
+ lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
|
+
|
|
|
+ pop ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)]
|
|
|
+ mov eax, INT [num_rows(eax)]
|
|
|
+ test eax, eax
|
|
|
+ jle near .return
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push eax
|
|
|
+ push edi
|
|
|
+ push edx
|
|
|
+ push ebx
|
|
|
+ push esi
|
|
|
+ push ecx ; col
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr0
|
|
|
+ mov ebx, JSAMPROW [ebx] ; inptr1
|
|
|
+ mov edx, JSAMPROW [edx] ; inptr2
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+- alignx 16, 7
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ vmovdqu ymm5, YMMWORD [ebx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
|
|
|
+ vmovdqu ymm1, YMMWORD [edx] ; ymm1=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV)
|
|
|
+
|
|
|
+ vpcmpeqw ymm0, ymm0, ymm0
|
|
|
+ vpcmpeqw ymm7, ymm7, ymm7
|
|
|
+ vpsrlw ymm0, ymm0, BYTE_BIT ; ymm0={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+@@ -290,17 +290,17 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|
|
+ add edi, byte RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr
|
|
|
+ sub ecx, byte SIZEOF_YMMWORD
|
|
|
+ jz near .nextrow
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_YMMWORD ; inptr0
|
|
|
+ add ebx, byte SIZEOF_YMMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_YMMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st64:
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
|
|
+ cmp ecx, byte 2*SIZEOF_YMMWORD
|
|
|
+ jb short .column_st32
|
|
|
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA
|
|
|
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD
|
|
|
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr
|
|
|
+@@ -431,17 +431,17 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|
|
+ add edi, RGB_PIXELSIZE*SIZEOF_YMMWORD ; outptr
|
|
|
+ sub ecx, byte SIZEOF_YMMWORD
|
|
|
+ jz near .nextrow
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_YMMWORD ; inptr0
|
|
|
+ add ebx, byte SIZEOF_YMMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_YMMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st64:
|
|
|
+ cmp ecx, byte SIZEOF_YMMWORD/2
|
|
|
+ jb short .column_st32
|
|
|
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA
|
|
|
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD
|
|
|
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr
|
|
|
+ vmovdqa ymmA, ymmC
|
|
|
+@@ -474,17 +474,17 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|
|
+ ; Store one pixel (4 bytes) of ymmA to the output when it has enough
|
|
|
+ ; space.
|
|
|
+ test ecx, ecx
|
|
|
+ jz short .nextrow
|
|
|
+ vmovd XMM_DWORD [edi], xmmA
|
|
|
+
|
|
|
+ %endif ; RGB_PIXELSIZE ; ---------------
|
|
|
+
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .nextrow:
|
|
|
+ pop ecx
|
|
|
+ pop esi
|
|
|
+ pop ebx
|
|
|
+ pop edx
|
|
|
+ pop edi
|
|
|
+ pop eax
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdcolext-mmx.asm b/media/libjpeg/simd/i386/jdcolext-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdcolext-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdcolext-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdcolext.asm - colorspace conversion (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -44,25 +44,25 @@
|
|
|
+ EXTN(jsimd_ycc_rgb_convert_mmx):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [out_width(eax)] ; num_cols
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPIMAGE [input_buf(eax)]
|
|
|
+@@ -75,31 +75,31 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
|
|
+ lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
|
+
|
|
|
+ pop ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)]
|
|
|
+ mov eax, INT [num_rows(eax)]
|
|
|
+ test eax, eax
|
|
|
+ jle near .return
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push eax
|
|
|
+ push edi
|
|
|
+ push edx
|
|
|
+ push ebx
|
|
|
+ push esi
|
|
|
+ push ecx ; col
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr0
|
|
|
+ mov ebx, JSAMPROW [ebx] ; inptr1
|
|
|
+ mov edx, JSAMPROW [edx] ; inptr2
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+- alignx 16, 7
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movq mm5, MMWORD [ebx] ; mm5=Cb(01234567)
|
|
|
+ movq mm1, MMWORD [edx] ; mm1=Cr(01234567)
|
|
|
+
|
|
|
+ pcmpeqw mm4, mm4
|
|
|
+ pcmpeqw mm7, mm7
|
|
|
+ psrlw mm4, BYTE_BIT
|
|
|
+@@ -250,17 +250,17 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
|
|
+ sub ecx, byte SIZEOF_MMWORD
|
|
|
+ jz short .nextrow
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_MMWORD ; inptr0
|
|
|
+ add ebx, byte SIZEOF_MMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_MMWORD ; inptr2
|
|
|
+ add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st16:
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
|
|
+ cmp ecx, byte 2*SIZEOF_MMWORD
|
|
|
+ jb short .column_st8
|
|
|
+ movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
|
+ movq MMWORD [edi+1*SIZEOF_MMWORD], mmE
|
|
|
+ movq mmA, mmC
|
|
|
+@@ -339,17 +339,17 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
|
|
+ sub ecx, byte SIZEOF_MMWORD
|
|
|
+ jz short .nextrow
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_MMWORD ; inptr0
|
|
|
+ add ebx, byte SIZEOF_MMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_MMWORD ; inptr2
|
|
|
+ add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st16:
|
|
|
+ cmp ecx, byte SIZEOF_MMWORD/2
|
|
|
+ jb short .column_st8
|
|
|
+ movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
|
+ movq MMWORD [edi+1*SIZEOF_MMWORD], mmD
|
|
|
+ movq mmA, mmC
|
|
|
+ movq mmD, mmH
|
|
|
+@@ -364,17 +364,17 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
|
|
|
+ add edi, byte 1*SIZEOF_MMWORD
|
|
|
+ .column_st4:
|
|
|
+ cmp ecx, byte SIZEOF_MMWORD/8
|
|
|
+ jb short .nextrow
|
|
|
+ movd dword [edi+0*SIZEOF_DWORD], mmA
|
|
|
+
|
|
|
+ %endif ; RGB_PIXELSIZE ; ---------------
|
|
|
+
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .nextrow:
|
|
|
+ pop ecx
|
|
|
+ pop esi
|
|
|
+ pop ebx
|
|
|
+ pop edx
|
|
|
+ pop edi
|
|
|
+ pop eax
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdcolext-sse2.asm b/media/libjpeg/simd/i386/jdcolext-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdcolext-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdcolext-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdcolext.asm - colorspace conversion (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2012, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2012, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -44,25 +44,25 @@
|
|
|
+ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [out_width(eax)] ; num_cols
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPIMAGE [input_buf(eax)]
|
|
|
+@@ -75,31 +75,31 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|
|
+ lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
|
+
|
|
|
+ pop ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)]
|
|
|
+ mov eax, INT [num_rows(eax)]
|
|
|
+ test eax, eax
|
|
|
+ jle near .return
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push eax
|
|
|
+ push edi
|
|
|
+ push edx
|
|
|
+ push ebx
|
|
|
+ push esi
|
|
|
+ push ecx ; col
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr0
|
|
|
+ mov ebx, JSAMPROW [ebx] ; inptr1
|
|
|
+ mov edx, JSAMPROW [edx] ; inptr2
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+- alignx 16, 7
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF)
|
|
|
+ movdqa xmm1, XMMWORD [edx] ; xmm1=Cr(0123456789ABCDEF)
|
|
|
+
|
|
|
+ pcmpeqw xmm4, xmm4
|
|
|
+ pcmpeqw xmm7, xmm7
|
|
|
+ psrlw xmm4, BYTE_BIT
|
|
|
+@@ -270,17 +270,17 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|
|
+ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
|
|
+ sub ecx, byte SIZEOF_XMMWORD
|
|
|
+ jz near .nextrow
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_XMMWORD ; inptr0
|
|
|
+ add ebx, byte SIZEOF_XMMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_XMMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st32:
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
|
|
+ cmp ecx, byte 2*SIZEOF_XMMWORD
|
|
|
+ jb short .column_st16
|
|
|
+ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
|
|
+ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
|
|
+ add edi, byte 2*SIZEOF_XMMWORD ; outptr
|
|
|
+@@ -382,17 +382,17 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|
|
+ add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
|
|
+ sub ecx, byte SIZEOF_XMMWORD
|
|
|
+ jz near .nextrow
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_XMMWORD ; inptr0
|
|
|
+ add ebx, byte SIZEOF_XMMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_XMMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st32:
|
|
|
+ cmp ecx, byte SIZEOF_XMMWORD/2
|
|
|
+ jb short .column_st16
|
|
|
+ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
|
|
+ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
|
|
+ add edi, byte 2*SIZEOF_XMMWORD ; outptr
|
|
|
+ movdqa xmmA, xmmC
|
|
|
+@@ -418,17 +418,17 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|
|
+ ; Store one pixel (4 bytes) of xmmA to the output when it has enough
|
|
|
+ ; space.
|
|
|
+ test ecx, ecx
|
|
|
+ jz short .nextrow
|
|
|
+ movd XMM_DWORD [edi], xmmA
|
|
|
+
|
|
|
+ %endif ; RGB_PIXELSIZE ; ---------------
|
|
|
+
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .nextrow:
|
|
|
+ pop ecx
|
|
|
+ pop esi
|
|
|
+ pop ebx
|
|
|
+ pop edx
|
|
|
+ pop edi
|
|
|
+ pop eax
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdcolor-avx2.asm b/media/libjpeg/simd/i386/jdcolor-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdcolor-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdcolor-avx2.asm
|
|
|
+@@ -1,14 +1,14 @@
|
|
|
+ ;
|
|
|
+ ; jdcolor.asm - colorspace conversion (AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -27,28 +27,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_ycc_rgb_convert_avx2):
|
|
|
+
|
|
|
+ PW_F0402 times 16 dw F_0_402
|
|
|
+ PW_MF0228 times 16 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 16 dw 1
|
|
|
+ PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jdcolext-avx2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdcolor-mmx.asm b/media/libjpeg/simd/i386/jdcolor-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdcolor-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdcolor-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdcolor.asm - colorspace conversion (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -26,28 +26,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_ycc_rgb_convert_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_ycc_rgb_convert_mmx):
|
|
|
+
|
|
|
+ PW_F0402 times 4 dw F_0_402
|
|
|
+ PW_MF0228 times 4 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 4 dw 1
|
|
|
+ PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jdcolext-mmx.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdcolor-sse2.asm b/media/libjpeg/simd/i386/jdcolor-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdcolor-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdcolor-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdcolor.asm - colorspace conversion (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -26,28 +26,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_ycc_rgb_convert_sse2):
|
|
|
+
|
|
|
+ PW_F0402 times 8 dw F_0_402
|
|
|
+ PW_MF0228 times 8 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 8 dw 1
|
|
|
+ PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jdcolext-sse2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdmerge-avx2.asm b/media/libjpeg/simd/i386/jdmerge-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdmerge-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdmerge-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdmerge.asm - merged upsampling/color conversion (AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -27,28 +27,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_merged_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_merged_upsample_avx2):
|
|
|
+
|
|
|
+ PW_F0402 times 16 dw F_0_402
|
|
|
+ PW_MF0228 times 16 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 16 dw 1
|
|
|
+ PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jdmrgext-avx2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdmerge-mmx.asm b/media/libjpeg/simd/i386/jdmerge-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdmerge-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdmerge-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdmerge.asm - merged upsampling/color conversion (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -26,28 +26,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_merged_upsample_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_merged_upsample_mmx):
|
|
|
+
|
|
|
+ PW_F0402 times 4 dw F_0_402
|
|
|
+ PW_MF0228 times 4 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 4 dw 1
|
|
|
+ PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jdmrgext-mmx.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdmerge-sse2.asm b/media/libjpeg/simd/i386/jdmerge-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdmerge-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdmerge-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdmerge.asm - merged upsampling/color conversion (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -26,28 +26,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_merged_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_merged_upsample_sse2):
|
|
|
+
|
|
|
+ PW_F0402 times 8 dw F_0_402
|
|
|
+ PW_MF0228 times 8 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 8 dw 1
|
|
|
+ PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+
|
|
|
+ %include "jdmrgext-sse2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdmrgext-avx2.asm b/media/libjpeg/simd/i386/jdmrgext-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdmrgext-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdmrgext-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdmrgext.asm - merged upsampling/color conversion (AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2012, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2012, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -45,25 +45,25 @@
|
|
|
+ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [output_width(eax)] ; col
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPIMAGE [input_buf(eax)]
|
|
|
+@@ -74,19 +74,19 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)]
|
|
|
+ mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0
|
|
|
+ mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1
|
|
|
+ mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+ pop ecx ; col
|
|
|
+
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+
|
|
|
+ vmovdqu ymm6, YMMWORD [ebx] ; ymm6=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
|
|
|
+ vmovdqu ymm7, YMMWORD [edx] ; ymm7=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV)
|
|
|
+
|
|
|
+ vpxor ymm1, ymm1, ymm1 ; ymm1=(all 0's)
|
|
|
+ vpcmpeqw ymm3, ymm3, ymm3
|
|
|
+ vpsllw ymm3, ymm3, 7 ; ymm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
|
|
|
+
|
|
|
+@@ -163,23 +163,23 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|
|
+ vpackssdw ymm2, ymm2, ymm7 ; ymm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
|
|
|
+ vpsubw ymm5, ymm5, ymm1 ; ymm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
|
|
|
+ vpsubw ymm2, ymm2, ymm3 ; ymm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
|
|
|
+
|
|
|
+ vmovdqa YMMWORD [wk(2)], ymm5 ; wk(2)=(G-Y)H
|
|
|
+
|
|
|
+ mov al, 2 ; Yctr
|
|
|
+ jmp short .Yloop_1st
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .Yloop_2nd:
|
|
|
+ vmovdqa ymm0, YMMWORD [wk(1)] ; ymm0=(R-Y)H
|
|
|
+ vmovdqa ymm2, YMMWORD [wk(2)] ; ymm2=(G-Y)H
|
|
|
+ vmovdqa ymm4, YMMWORD [wk(0)] ; ymm4=(B-Y)H
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .Yloop_1st:
|
|
|
+ vmovdqu ymm7, YMMWORD [esi] ; ymm7=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV)
|
|
|
+
|
|
|
+ vpcmpeqw ymm6, ymm6, ymm6
|
|
|
+ vpsrlw ymm6, ymm6, BYTE_BIT ; ymm6={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+ vpand ymm6, ymm6, ymm7 ; ymm6=Y(02468ACEGIKMOQSU)=YE
|
|
|
+ vpsrlw ymm7, ymm7, BYTE_BIT ; ymm7=Y(13579BDFHJLNPRTV)=YO
|
|
|
+@@ -296,17 +296,17 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_YMMWORD ; inptr0
|
|
|
+ dec al ; Yctr
|
|
|
+ jnz near .Yloop_2nd
|
|
|
+
|
|
|
+ add ebx, byte SIZEOF_YMMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_YMMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st64:
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
|
|
+ cmp ecx, byte 2*SIZEOF_YMMWORD
|
|
|
+ jb short .column_st32
|
|
|
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA
|
|
|
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD
|
|
|
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr
|
|
|
+@@ -440,17 +440,17 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_YMMWORD ; inptr0
|
|
|
+ dec al
|
|
|
+ jnz near .Yloop_2nd
|
|
|
+
|
|
|
+ add ebx, byte SIZEOF_YMMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_YMMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st64:
|
|
|
+ cmp ecx, byte SIZEOF_YMMWORD/2
|
|
|
+ jb short .column_st32
|
|
|
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymmA
|
|
|
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymmD
|
|
|
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr
|
|
|
+ vmovdqa ymmA, ymmC
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdmrgext-mmx.asm b/media/libjpeg/simd/i386/jdmrgext-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdmrgext-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdmrgext-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdmrgext.asm - merged upsampling/color conversion (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -42,25 +42,25 @@
|
|
|
+ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [output_width(eax)] ; col
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPIMAGE [input_buf(eax)]
|
|
|
+@@ -71,19 +71,19 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)]
|
|
|
+ mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0
|
|
|
+ mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1
|
|
|
+ mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+ pop ecx ; col
|
|
|
+
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+
|
|
|
+ movq mm6, MMWORD [ebx] ; mm6=Cb(01234567)
|
|
|
+ movq mm7, MMWORD [edx] ; mm7=Cr(01234567)
|
|
|
+
|
|
|
+ pxor mm1, mm1 ; mm1=(all 0's)
|
|
|
+ pcmpeqw mm3, mm3
|
|
|
+ psllw mm3, 7 ; mm3={0xFF80 0xFF80 0xFF80 0xFF80}
|
|
|
+
|
|
|
+@@ -166,23 +166,23 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
|
|
+ packssdw mm2, mm7 ; mm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
|
|
|
+ psubw mm5, mm1 ; mm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
|
|
|
+ psubw mm2, mm3 ; mm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
|
|
|
+
|
|
|
+ movq MMWORD [wk(2)], mm5 ; wk(2)=(G-Y)H
|
|
|
+
|
|
|
+ mov al, 2 ; Yctr
|
|
|
+ jmp short .Yloop_1st
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .Yloop_2nd:
|
|
|
+ movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H
|
|
|
+ movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H
|
|
|
+ movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .Yloop_1st:
|
|
|
+ movq mm7, MMWORD [esi] ; mm7=Y(01234567)
|
|
|
+
|
|
|
+ pcmpeqw mm6, mm6
|
|
|
+ psrlw mm6, BYTE_BIT ; mm6={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+ pand mm6, mm7 ; mm6=Y(0246)=YE
|
|
|
+ psrlw mm7, BYTE_BIT ; mm7=Y(1357)=YO
|
|
|
+@@ -253,17 +253,17 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
|
|
+ add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
|
|
|
+ add esi, byte SIZEOF_MMWORD ; inptr0
|
|
|
+ dec al ; Yctr
|
|
|
+ jnz near .Yloop_2nd
|
|
|
+
|
|
|
+ add ebx, byte SIZEOF_MMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_MMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st16:
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
|
|
+ cmp ecx, byte 2*SIZEOF_MMWORD
|
|
|
+ jb short .column_st8
|
|
|
+ movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
|
+ movq MMWORD [edi+1*SIZEOF_MMWORD], mmE
|
|
|
+ movq mmA, mmC
|
|
|
+@@ -345,17 +345,17 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
|
|
|
+ add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
|
|
|
+ add esi, byte SIZEOF_MMWORD ; inptr0
|
|
|
+ dec al ; Yctr
|
|
|
+ jnz near .Yloop_2nd
|
|
|
+
|
|
|
+ add ebx, byte SIZEOF_MMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_MMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st16:
|
|
|
+ cmp ecx, byte SIZEOF_MMWORD/2
|
|
|
+ jb short .column_st8
|
|
|
+ movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
|
+ movq MMWORD [edi+1*SIZEOF_MMWORD], mmD
|
|
|
+ movq mmA, mmC
|
|
|
+ movq mmD, mmH
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdmrgext-sse2.asm b/media/libjpeg/simd/i386/jdmrgext-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdmrgext-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdmrgext-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdmrgext.asm - merged upsampling/color conversion (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2012, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2012, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -44,25 +44,25 @@
|
|
|
+ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov ecx, JDIMENSION [output_width(eax)] ; col
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push ecx
|
|
|
+
|
|
|
+ mov edi, JSAMPIMAGE [input_buf(eax)]
|
|
|
+@@ -73,19 +73,19 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)]
|
|
|
+ mov esi, JSAMPROW [esi+ecx*SIZEOF_JSAMPROW] ; inptr0
|
|
|
+ mov ebx, JSAMPROW [ebx+ecx*SIZEOF_JSAMPROW] ; inptr1
|
|
|
+ mov edx, JSAMPROW [edx+ecx*SIZEOF_JSAMPROW] ; inptr2
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+ pop ecx ; col
|
|
|
+
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+- movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
++ MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
|
|
|
+
|
|
|
+ movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF)
|
|
|
+ movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF)
|
|
|
+
|
|
|
+ pxor xmm1, xmm1 ; xmm1=(all 0's)
|
|
|
+ pcmpeqw xmm3, xmm3
|
|
|
+ psllw xmm3, 7 ; xmm3={0xFF80 0xFF80 0xFF80 0xFF80 ..}
|
|
|
+
|
|
|
+@@ -168,23 +168,23 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|
|
+ packssdw xmm2, xmm7 ; xmm2=CbL*-FIX(0.344)+CrL*FIX(0.285)
|
|
|
+ psubw xmm5, xmm1 ; xmm5=CbH*-FIX(0.344)+CrH*-FIX(0.714)=(G-Y)H
|
|
|
+ psubw xmm2, xmm3 ; xmm2=CbL*-FIX(0.344)+CrL*-FIX(0.714)=(G-Y)L
|
|
|
+
|
|
|
+ movdqa XMMWORD [wk(2)], xmm5 ; wk(2)=(G-Y)H
|
|
|
+
|
|
|
+ mov al, 2 ; Yctr
|
|
|
+ jmp short .Yloop_1st
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .Yloop_2nd:
|
|
|
+ movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H
|
|
|
+ movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H
|
|
|
+ movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .Yloop_1st:
|
|
|
+ movdqa xmm7, XMMWORD [esi] ; xmm7=Y(0123456789ABCDEF)
|
|
|
+
|
|
|
+ pcmpeqw xmm6, xmm6
|
|
|
+ psrlw xmm6, BYTE_BIT ; xmm6={0xFF 0x00 0xFF 0x00 ..}
|
|
|
+ pand xmm6, xmm7 ; xmm6=Y(02468ACE)=YE
|
|
|
+ psrlw xmm7, BYTE_BIT ; xmm7=Y(13579BDF)=YO
|
|
|
+@@ -275,17 +275,17 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_XMMWORD ; inptr0
|
|
|
+ dec al ; Yctr
|
|
|
+ jnz near .Yloop_2nd
|
|
|
+
|
|
|
+ add ebx, byte SIZEOF_XMMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_XMMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st32:
|
|
|
+ lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
|
|
+ cmp ecx, byte 2*SIZEOF_XMMWORD
|
|
|
+ jb short .column_st16
|
|
|
+ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
|
|
+ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
|
|
+ add edi, byte 2*SIZEOF_XMMWORD ; outptr
|
|
|
+@@ -390,17 +390,17 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_XMMWORD ; inptr0
|
|
|
+ dec al ; Yctr
|
|
|
+ jnz near .Yloop_2nd
|
|
|
+
|
|
|
+ add ebx, byte SIZEOF_XMMWORD ; inptr1
|
|
|
+ add edx, byte SIZEOF_XMMWORD ; inptr2
|
|
|
+ jmp near .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .column_st32:
|
|
|
+ cmp ecx, byte SIZEOF_XMMWORD/2
|
|
|
+ jb short .column_st16
|
|
|
+ movdqu XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
|
|
+ movdqu XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
|
|
+ add edi, byte 2*SIZEOF_XMMWORD ; outptr
|
|
|
+ movdqa xmmA, xmmC
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdsample-avx2.asm b/media/libjpeg/simd/i386/jdsample-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdsample-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdsample-avx2.asm
|
|
|
+@@ -1,42 +1,42 @@
|
|
|
+ ;
|
|
|
+ ; jdsample.asm - upsampling (AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+ ; NASM is available from http://nasm.sourceforge.net/ or
|
|
|
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fancy_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_fancy_upsample_avx2):
|
|
|
+
|
|
|
+ PW_ONE times 16 dw 1
|
|
|
+ PW_TWO times 16 dw 2
|
|
|
+ PW_THREE times 16 dw 3
|
|
|
+ PW_SEVEN times 16 dw 7
|
|
|
+ PW_EIGHT times 16 dw 8
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ ;
|
|
|
+ ; The upsampling algorithm is linear interpolation between pixel centers,
|
|
|
+@@ -57,36 +57,36 @@ PW_EIGHT times 16 dw 8
|
|
|
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
|
|
+ push ebp
|
|
|
+ mov ebp, esp
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
|
|
|
+ test eax, eax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(ebp)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push eax ; colctr
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+@@ -99,25 +99,25 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
|
|
+ vpcmpeqb xmm7, xmm7, xmm7
|
|
|
+ vpsrldq xmm7, xmm7, (SIZEOF_XMMWORD-1) ; (ff -- -- -- ... -- --) LSB is ff
|
|
|
+ vpand ymm7, ymm7, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+
|
|
|
+ add eax, byte SIZEOF_YMMWORD-1
|
|
|
+ and eax, byte -SIZEOF_YMMWORD
|
|
|
+ cmp eax, byte SIZEOF_YMMWORD
|
|
|
+ ja short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_last:
|
|
|
+ vpcmpeqb xmm6, xmm6, xmm6
|
|
|
+ vpslldq xmm6, xmm6, (SIZEOF_XMMWORD-1)
|
|
|
+ vperm2i128 ymm6, ymm6, ymm6, 1 ; (---- ---- ... ---- ---- ff) MSB is ff
|
|
|
+ vpand ymm6, ymm6, YMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ jmp short .upsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ vmovdqu ymm6, YMMWORD [esi+1*SIZEOF_YMMWORD]
|
|
|
+ vperm2i128 ymm6, ymm0, ymm6, 0x20
|
|
|
+ vpslldq ymm6, ymm6, 15
|
|
|
+
|
|
|
+ .upsample:
|
|
|
+ vmovdqu ymm1, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm1=( 0 1 2 ... 29 30 31)
|
|
|
+@@ -191,17 +191,17 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ vzeroupper
|
|
|
+ pop edi
|
|
|
+ pop esi
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; need not be preserved
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ ; Again a triangle filter; see comments for h2v1 case, above.
|
|
|
+ ;
|
|
|
+@@ -229,39 +229,39 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
|
|
+ EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov edx, eax ; edx = original ebp
|
|
|
+ mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
|
|
|
+ test eax, eax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(edx)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(edx)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(edx)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push eax ; colctr
|
|
|
+ push ecx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above)
|
|
|
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
|
|
|
+@@ -281,18 +281,18 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
|
|
+ pop edx
|
|
|
+ .skip:
|
|
|
+ ; -- process the first column block
|
|
|
+
|
|
|
+ vmovdqu ymm0, YMMWORD [ebx+0*SIZEOF_YMMWORD] ; ymm0=row[ 0][0]
|
|
|
+ vmovdqu ymm1, YMMWORD [ecx+0*SIZEOF_YMMWORD] ; ymm1=row[-1][0]
|
|
|
+ vmovdqu ymm2, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm2=row[+1][0]
|
|
|
+
|
|
|
+- pushpic ebx
|
|
|
+- movpic ebx, POINTER [gotptr] ; load GOT address
|
|
|
++ PUSHPIC ebx
|
|
|
++ MOVPIC ebx, POINTER [gotptr] ; load GOT address
|
|
|
+
|
|
|
+ vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
|
|
|
+
|
|
|
+ vpunpckhbw ymm4, ymm0, ymm3 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
|
|
|
+ vpunpcklbw ymm5, ymm0, ymm3 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
|
|
|
+ vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
|
|
|
+ vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
|
|
|
+
|
|
|
+@@ -323,52 +323,52 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
|
|
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm6
|
|
|
+
|
|
|
+ vpand ymm1, ymm1, ymm7 ; ymm1=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
|
|
|
+ vpand ymm2, ymm2, ymm7 ; ymm2=( 0 -- -- -- -- -- -- -- -- -- -- -- -- -- -- --)
|
|
|
+
|
|
|
+ vmovdqa YMMWORD [wk(0)], ymm1
|
|
|
+ vmovdqa YMMWORD [wk(1)], ymm2
|
|
|
+
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+
|
|
|
+ add eax, byte SIZEOF_YMMWORD-1
|
|
|
+ and eax, byte -SIZEOF_YMMWORD
|
|
|
+ cmp eax, byte SIZEOF_YMMWORD
|
|
|
+ ja short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_last:
|
|
|
+ ; -- process the last column block
|
|
|
+
|
|
|
+- pushpic ebx
|
|
|
+- movpic ebx, POINTER [gotptr] ; load GOT address
|
|
|
++ PUSHPIC ebx
|
|
|
++ MOVPIC ebx, POINTER [gotptr] ; load GOT address
|
|
|
+
|
|
|
+ vpcmpeqb xmm1, xmm1, xmm1
|
|
|
+ vpslldq xmm1, xmm1, (SIZEOF_XMMWORD-2)
|
|
|
+ vperm2i128 ymm1, ymm1, ymm1, 1 ; (---- ---- ... ---- ---- ffff) MSB is ffff
|
|
|
+
|
|
|
+ vpand ymm2, ymm1, YMMWORD [edi+1*SIZEOF_YMMWORD]
|
|
|
+ vpand ymm1, ymm1, YMMWORD [edx+1*SIZEOF_YMMWORD]
|
|
|
+
|
|
|
+ vmovdqa YMMWORD [wk(2)], ymm1 ; ymm1=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
|
|
|
+ vmovdqa YMMWORD [wk(3)], ymm2 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
|
|
|
+
|
|
|
+ jmp near .upsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ ; -- process the next column block
|
|
|
+
|
|
|
+ vmovdqu ymm0, YMMWORD [ebx+1*SIZEOF_YMMWORD] ; ymm0=row[ 0][1]
|
|
|
+ vmovdqu ymm1, YMMWORD [ecx+1*SIZEOF_YMMWORD] ; ymm1=row[-1][1]
|
|
|
+ vmovdqu ymm2, YMMWORD [esi+1*SIZEOF_YMMWORD] ; ymm2=row[+1][1]
|
|
|
+
|
|
|
+- pushpic ebx
|
|
|
+- movpic ebx, POINTER [gotptr] ; load GOT address
|
|
|
++ PUSHPIC ebx
|
|
|
++ MOVPIC ebx, POINTER [gotptr] ; load GOT address
|
|
|
+
|
|
|
+ vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
|
|
|
+
|
|
|
+ vpunpckhbw ymm4, ymm0, ymm3 ; ymm4=row[ 0]( 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31)
|
|
|
+ vpunpcklbw ymm5, ymm0, ymm3 ; ymm5=row[ 0]( 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23)
|
|
|
+ vperm2i128 ymm0, ymm5, ymm4, 0x20 ; ymm0=row[ 0]( 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
|
|
|
+ vperm2i128 ymm4, ymm5, ymm4, 0x31 ; ymm4=row[ 0](16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31)
|
|
|
+
|
|
|
+@@ -511,17 +511,17 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
|
|
+ vpsllw ymm7, ymm7, BYTE_BIT
|
|
|
+ vpsllw ymm5, ymm5, BYTE_BIT
|
|
|
+ vpor ymm1, ymm1, ymm7 ; ymm1=Out1L=( 0 1 2 ... 29 30 31)
|
|
|
+ vpor ymm0, ymm0, ymm5 ; ymm0=Out1H=(32 33 34 ... 61 62 63)
|
|
|
+
|
|
|
+ vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm1
|
|
|
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm0
|
|
|
+
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+
|
|
|
+ sub eax, byte SIZEOF_YMMWORD
|
|
|
+ add ecx, byte 1*SIZEOF_YMMWORD ; inptr1(above)
|
|
|
+ add ebx, byte 1*SIZEOF_YMMWORD ; inptr0
|
|
|
+ add esi, byte 1*SIZEOF_YMMWORD ; inptr1(below)
|
|
|
+ add edx, byte 2*SIZEOF_YMMWORD ; outptr0
|
|
|
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr1
|
|
|
+ cmp eax, byte SIZEOF_YMMWORD
|
|
|
+@@ -585,25 +585,25 @@ EXTN(jsimd_h2v1_upsample_avx2):
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz short .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(ebp)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+ mov eax, edx ; colctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ cmp eax, byte SIZEOF_YMMWORD
|
|
|
+ ja near .above_16
|
|
|
+
|
|
|
+ vmovdqu xmm0, XMMWORD [esi+0*SIZEOF_YMMWORD]
|
|
|
+ vpunpckhbw xmm1, xmm0, xmm0
|
|
|
+ vpunpcklbw xmm0, xmm0, xmm0
|
|
|
+@@ -624,17 +624,17 @@ EXTN(jsimd_h2v1_upsample_avx2):
|
|
|
+ vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm1
|
|
|
+
|
|
|
+ sub eax, byte 2*SIZEOF_YMMWORD
|
|
|
+ jz short .nextrow
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_YMMWORD ; inptr
|
|
|
+ add edi, byte 2*SIZEOF_YMMWORD ; outptr
|
|
|
+ jmp short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .nextrow:
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_JSAMPROW ; input_data
|
|
|
+ add edi, byte SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec ecx ; rowctr
|
|
|
+@@ -684,26 +684,26 @@ EXTN(jsimd_h2v2_upsample_avx2):
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(ebp)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
|
|
|
+ mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
|
|
|
+ mov eax, edx ; colctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ cmp eax, byte SIZEOF_YMMWORD
|
|
|
+ ja short .above_16
|
|
|
+
|
|
|
+ vmovdqu xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ vpunpckhbw xmm1, xmm0, xmm0
|
|
|
+ vpunpcklbw xmm0, xmm0, xmm0
|
|
|
+@@ -729,17 +729,17 @@ EXTN(jsimd_h2v2_upsample_avx2):
|
|
|
+
|
|
|
+ sub eax, byte 2*SIZEOF_YMMWORD
|
|
|
+ jz short .nextrow
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_YMMWORD ; inptr
|
|
|
+ add ebx, 2*SIZEOF_YMMWORD ; outptr0
|
|
|
+ add edi, 2*SIZEOF_YMMWORD ; outptr1
|
|
|
+ jmp short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .nextrow:
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+
|
|
|
+ add esi, byte 1*SIZEOF_JSAMPROW ; input_data
|
|
|
+ add edi, byte 2*SIZEOF_JSAMPROW ; output_data
|
|
|
+ sub ecx, byte 2 ; rowctr
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdsample-mmx.asm b/media/libjpeg/simd/i386/jdsample-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdsample-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdsample-mmx.asm
|
|
|
+@@ -1,41 +1,41 @@
|
|
|
+ ;
|
|
|
+ ; jdsample.asm - upsampling (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+ ; NASM is available from http://nasm.sourceforge.net/ or
|
|
|
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fancy_upsample_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_fancy_upsample_mmx):
|
|
|
+
|
|
|
+ PW_ONE times 4 dw 1
|
|
|
+ PW_TWO times 4 dw 2
|
|
|
+ PW_THREE times 4 dw 3
|
|
|
+ PW_SEVEN times 4 dw 7
|
|
|
+ PW_EIGHT times 4 dw 8
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ ;
|
|
|
+ ; The upsampling algorithm is linear interpolation between pixel centers,
|
|
|
+@@ -56,36 +56,36 @@ PW_EIGHT times 4 dw 8
|
|
|
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_mmx)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_fancy_upsample_mmx):
|
|
|
+ push ebp
|
|
|
+ mov ebp, esp
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
|
|
|
+ test eax, eax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(ebp)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push eax ; colctr
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+@@ -98,24 +98,24 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
|
|
|
+ pcmpeqb mm7, mm7
|
|
|
+ psrlq mm7, (SIZEOF_MMWORD-1)*BYTE_BIT
|
|
|
+ pand mm7, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+
|
|
|
+ add eax, byte SIZEOF_MMWORD-1
|
|
|
+ and eax, byte -SIZEOF_MMWORD
|
|
|
+ cmp eax, byte SIZEOF_MMWORD
|
|
|
+ ja short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_last:
|
|
|
+ pcmpeqb mm6, mm6
|
|
|
+ psllq mm6, (SIZEOF_MMWORD-1)*BYTE_BIT
|
|
|
+ pand mm6, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ jmp short .upsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movq mm6, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
|
+ psllq mm6, (SIZEOF_MMWORD-1)*BYTE_BIT
|
|
|
+
|
|
|
+ .upsample:
|
|
|
+ movq mm1, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+ movq mm2, mm1
|
|
|
+@@ -182,17 +182,17 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
|
|
|
+
|
|
|
+ emms ; empty MMX state
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop edi
|
|
|
+ pop esi
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; need not be preserved
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ ; Again a triangle filter; see comments for h2v1 case, above.
|
|
|
+ ;
|
|
|
+@@ -219,39 +219,39 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
|
|
|
+ EXTN(jsimd_h2v2_fancy_upsample_mmx):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov edx, eax ; edx = original ebp
|
|
|
+ mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
|
|
|
+ test eax, eax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(edx)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(edx)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(edx)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push eax ; colctr
|
|
|
+ push ecx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above)
|
|
|
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
|
|
|
+@@ -271,18 +271,18 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
|
|
|
+ pop edx
|
|
|
+ .skip:
|
|
|
+ ; -- process the first column block
|
|
|
+
|
|
|
+ movq mm0, MMWORD [ebx+0*SIZEOF_MMWORD] ; mm0=row[ 0][0]
|
|
|
+ movq mm1, MMWORD [ecx+0*SIZEOF_MMWORD] ; mm1=row[-1][0]
|
|
|
+ movq mm2, MMWORD [esi+0*SIZEOF_MMWORD] ; mm2=row[+1][0]
|
|
|
+
|
|
|
+- pushpic ebx
|
|
|
+- movpic ebx, POINTER [gotptr] ; load GOT address
|
|
|
++ PUSHPIC ebx
|
|
|
++ MOVPIC ebx, POINTER [gotptr] ; load GOT address
|
|
|
+
|
|
|
+ pxor mm3, mm3 ; mm3=(all 0's)
|
|
|
+ movq mm4, mm0
|
|
|
+ punpcklbw mm0, mm3 ; mm0=row[ 0][0]( 0 1 2 3)
|
|
|
+ punpckhbw mm4, mm3 ; mm4=row[ 0][0]( 4 5 6 7)
|
|
|
+ movq mm5, mm1
|
|
|
+ punpcklbw mm1, mm3 ; mm1=row[-1][0]( 0 1 2 3)
|
|
|
+ punpckhbw mm5, mm3 ; mm5=row[-1][0]( 4 5 6 7)
|
|
|
+@@ -307,52 +307,52 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
|
|
|
+ movq MMWORD [edi+1*SIZEOF_MMWORD], mm6
|
|
|
+
|
|
|
+ pand mm1, mm7 ; mm1=( 0 - - -)
|
|
|
+ pand mm2, mm7 ; mm2=( 0 - - -)
|
|
|
+
|
|
|
+ movq MMWORD [wk(0)], mm1
|
|
|
+ movq MMWORD [wk(1)], mm2
|
|
|
+
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+
|
|
|
+ add eax, byte SIZEOF_MMWORD-1
|
|
|
+ and eax, byte -SIZEOF_MMWORD
|
|
|
+ cmp eax, byte SIZEOF_MMWORD
|
|
|
+ ja short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_last:
|
|
|
+ ; -- process the last column block
|
|
|
+
|
|
|
+- pushpic ebx
|
|
|
+- movpic ebx, POINTER [gotptr] ; load GOT address
|
|
|
++ PUSHPIC ebx
|
|
|
++ MOVPIC ebx, POINTER [gotptr] ; load GOT address
|
|
|
+
|
|
|
+ pcmpeqb mm1, mm1
|
|
|
+ psllq mm1, (SIZEOF_MMWORD-2)*BYTE_BIT
|
|
|
+ movq mm2, mm1
|
|
|
+
|
|
|
+ pand mm1, MMWORD [edx+1*SIZEOF_MMWORD] ; mm1=( - - - 7)
|
|
|
+ pand mm2, MMWORD [edi+1*SIZEOF_MMWORD] ; mm2=( - - - 7)
|
|
|
+
|
|
|
+ movq MMWORD [wk(2)], mm1
|
|
|
+ movq MMWORD [wk(3)], mm2
|
|
|
+
|
|
|
+ jmp short .upsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ ; -- process the next column block
|
|
|
+
|
|
|
+ movq mm0, MMWORD [ebx+1*SIZEOF_MMWORD] ; mm0=row[ 0][1]
|
|
|
+ movq mm1, MMWORD [ecx+1*SIZEOF_MMWORD] ; mm1=row[-1][1]
|
|
|
+ movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] ; mm2=row[+1][1]
|
|
|
+
|
|
|
+- pushpic ebx
|
|
|
+- movpic ebx, POINTER [gotptr] ; load GOT address
|
|
|
++ PUSHPIC ebx
|
|
|
++ MOVPIC ebx, POINTER [gotptr] ; load GOT address
|
|
|
+
|
|
|
+ pxor mm3, mm3 ; mm3=(all 0's)
|
|
|
+ movq mm4, mm0
|
|
|
+ punpcklbw mm0, mm3 ; mm0=row[ 0][1]( 0 1 2 3)
|
|
|
+ punpckhbw mm4, mm3 ; mm4=row[ 0][1]( 4 5 6 7)
|
|
|
+ movq mm5, mm1
|
|
|
+ punpcklbw mm1, mm3 ; mm1=row[-1][1]( 0 1 2 3)
|
|
|
+ punpckhbw mm5, mm3 ; mm5=row[-1][1]( 4 5 6 7)
|
|
|
+@@ -481,17 +481,17 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
|
|
|
+ psllw mm7, BYTE_BIT
|
|
|
+ psllw mm5, BYTE_BIT
|
|
|
+ por mm1, mm7 ; mm1=Out1L=( 0 1 2 3 4 5 6 7)
|
|
|
+ por mm0, mm5 ; mm0=Out1H=( 8 9 10 11 12 13 14 15)
|
|
|
+
|
|
|
+ movq MMWORD [edi+0*SIZEOF_MMWORD], mm1
|
|
|
+ movq MMWORD [edi+1*SIZEOF_MMWORD], mm0
|
|
|
+
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+
|
|
|
+ sub eax, byte SIZEOF_MMWORD
|
|
|
+ add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above)
|
|
|
+ add ebx, byte 1*SIZEOF_MMWORD ; inptr0
|
|
|
+ add esi, byte 1*SIZEOF_MMWORD ; inptr1(below)
|
|
|
+ add edx, byte 2*SIZEOF_MMWORD ; outptr0
|
|
|
+ add edi, byte 2*SIZEOF_MMWORD ; outptr1
|
|
|
+ cmp eax, byte SIZEOF_MMWORD
|
|
|
+@@ -556,25 +556,25 @@ EXTN(jsimd_h2v1_upsample_mmx):
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz short .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(ebp)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+ mov eax, edx ; colctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+
|
|
|
+ movq mm1, mm0
|
|
|
+ punpcklbw mm0, mm0
|
|
|
+ punpckhbw mm1, mm1
|
|
|
+
|
|
|
+@@ -594,17 +594,17 @@ EXTN(jsimd_h2v1_upsample_mmx):
|
|
|
+ movq MMWORD [edi+3*SIZEOF_MMWORD], mm3
|
|
|
+
|
|
|
+ sub eax, byte 2*SIZEOF_MMWORD
|
|
|
+ jz short .nextrow
|
|
|
+
|
|
|
+ add esi, byte 2*SIZEOF_MMWORD ; inptr
|
|
|
+ add edi, byte 4*SIZEOF_MMWORD ; outptr
|
|
|
+ jmp short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .nextrow:
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_JSAMPROW ; input_data
|
|
|
+ add edi, byte SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec ecx ; rowctr
|
|
|
+@@ -655,26 +655,26 @@ EXTN(jsimd_h2v2_upsample_mmx):
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz short .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(ebp)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
|
|
|
+ mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
|
|
|
+ mov eax, edx ; colctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
|
+
|
|
|
+ movq mm1, mm0
|
|
|
+ punpcklbw mm0, mm0
|
|
|
+ punpckhbw mm1, mm1
|
|
|
+
|
|
|
+@@ -699,17 +699,17 @@ EXTN(jsimd_h2v2_upsample_mmx):
|
|
|
+
|
|
|
+ sub eax, byte 2*SIZEOF_MMWORD
|
|
|
+ jz short .nextrow
|
|
|
+
|
|
|
+ add esi, byte 2*SIZEOF_MMWORD ; inptr
|
|
|
+ add ebx, byte 4*SIZEOF_MMWORD ; outptr0
|
|
|
+ add edi, byte 4*SIZEOF_MMWORD ; outptr1
|
|
|
+ jmp short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .nextrow:
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+
|
|
|
+ add esi, byte 1*SIZEOF_JSAMPROW ; input_data
|
|
|
+ add edi, byte 2*SIZEOF_JSAMPROW ; output_data
|
|
|
+ sub ecx, byte 2 ; rowctr
|
|
|
+diff --git a/media/libjpeg/simd/i386/jdsample-sse2.asm b/media/libjpeg/simd/i386/jdsample-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jdsample-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jdsample-sse2.asm
|
|
|
+@@ -1,41 +1,41 @@
|
|
|
+ ;
|
|
|
+ ; jdsample.asm - upsampling (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+ ; NASM is available from http://nasm.sourceforge.net/ or
|
|
|
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fancy_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_fancy_upsample_sse2):
|
|
|
+
|
|
|
+ PW_ONE times 8 dw 1
|
|
|
+ PW_TWO times 8 dw 2
|
|
|
+ PW_THREE times 8 dw 3
|
|
|
+ PW_SEVEN times 8 dw 7
|
|
|
+ PW_EIGHT times 8 dw 8
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ ;
|
|
|
+ ; The upsampling algorithm is linear interpolation between pixel centers,
|
|
|
+@@ -56,36 +56,36 @@ PW_EIGHT times 8 dw 8
|
|
|
+ %define output_data_ptr(b) (b) + 20 ; JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
|
|
+ push ebp
|
|
|
+ mov ebp, esp
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
|
|
|
+ test eax, eax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(ebp)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push eax ; colctr
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+
|
|
|
+@@ -98,24 +98,24 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
|
|
+ pcmpeqb xmm7, xmm7
|
|
|
+ psrldq xmm7, (SIZEOF_XMMWORD-1)
|
|
|
+ pand xmm7, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ add eax, byte SIZEOF_XMMWORD-1
|
|
|
+ and eax, byte -SIZEOF_XMMWORD
|
|
|
+ cmp eax, byte SIZEOF_XMMWORD
|
|
|
+ ja short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_last:
|
|
|
+ pcmpeqb xmm6, xmm6
|
|
|
+ pslldq xmm6, (SIZEOF_XMMWORD-1)
|
|
|
+ pand xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ jmp short .upsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ movdqa xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
|
+ pslldq xmm6, (SIZEOF_XMMWORD-1)
|
|
|
+
|
|
|
+ .upsample:
|
|
|
+ movdqa xmm1, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+ movdqa xmm2, xmm1
|
|
|
+@@ -180,17 +180,17 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
|
|
+ dec ecx ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop edi
|
|
|
+ pop esi
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; need not be preserved
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ ; Again a triangle filter; see comments for h2v1 case, above.
|
|
|
+ ;
|
|
|
+@@ -218,39 +218,39 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
|
|
+ EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic eax ; make a room for GOT address
|
|
|
++ PUSHPIC eax ; make a room for GOT address
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
+- movpic POINTER [gotptr], ebx ; save GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
++ MOVPIC POINTER [gotptr], ebx ; save GOT address
|
|
|
+
|
|
|
+ mov edx, eax ; edx = original ebp
|
|
|
+ mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
|
|
|
+ test eax, eax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(edx)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(edx)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(edx)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push eax ; colctr
|
|
|
+ push ecx
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov ecx, JSAMPROW [esi-1*SIZEOF_JSAMPROW] ; inptr1(above)
|
|
|
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
|
|
|
+@@ -270,18 +270,18 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
|
|
+ pop edx
|
|
|
+ .skip:
|
|
|
+ ; -- process the first column block
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [ebx+0*SIZEOF_XMMWORD] ; xmm0=row[ 0][0]
|
|
|
+ movdqa xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0]
|
|
|
+ movdqa xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0]
|
|
|
+
|
|
|
+- pushpic ebx
|
|
|
+- movpic ebx, POINTER [gotptr] ; load GOT address
|
|
|
++ PUSHPIC ebx
|
|
|
++ MOVPIC ebx, POINTER [gotptr] ; load GOT address
|
|
|
+
|
|
|
+ pxor xmm3, xmm3 ; xmm3=(all 0's)
|
|
|
+ movdqa xmm4, xmm0
|
|
|
+ punpcklbw xmm0, xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7)
|
|
|
+ punpckhbw xmm4, xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15)
|
|
|
+ movdqa xmm5, xmm1
|
|
|
+ punpcklbw xmm1, xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7)
|
|
|
+ punpckhbw xmm5, xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15)
|
|
|
+@@ -306,52 +306,52 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
|
|
+ movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm6
|
|
|
+
|
|
|
+ pand xmm1, xmm7 ; xmm1=( 0 -- -- -- -- -- -- --)
|
|
|
+ pand xmm2, xmm7 ; xmm2=( 0 -- -- -- -- -- -- --)
|
|
|
+
|
|
|
+ movdqa XMMWORD [wk(0)], xmm1
|
|
|
+ movdqa XMMWORD [wk(1)], xmm2
|
|
|
+
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+
|
|
|
+ add eax, byte SIZEOF_XMMWORD-1
|
|
|
+ and eax, byte -SIZEOF_XMMWORD
|
|
|
+ cmp eax, byte SIZEOF_XMMWORD
|
|
|
+ ja short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop_last:
|
|
|
+ ; -- process the last column block
|
|
|
+
|
|
|
+- pushpic ebx
|
|
|
+- movpic ebx, POINTER [gotptr] ; load GOT address
|
|
|
++ PUSHPIC ebx
|
|
|
++ MOVPIC ebx, POINTER [gotptr] ; load GOT address
|
|
|
+
|
|
|
+ pcmpeqb xmm1, xmm1
|
|
|
+ pslldq xmm1, (SIZEOF_XMMWORD-2)
|
|
|
+ movdqa xmm2, xmm1
|
|
|
+
|
|
|
+ pand xmm1, XMMWORD [edx+1*SIZEOF_XMMWORD]
|
|
|
+ pand xmm2, XMMWORD [edi+1*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ movdqa XMMWORD [wk(2)], xmm1 ; xmm1=(-- -- -- -- -- -- -- 15)
|
|
|
+ movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15)
|
|
|
+
|
|
|
+ jmp near .upsample
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .columnloop:
|
|
|
+ ; -- process the next column block
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [ebx+1*SIZEOF_XMMWORD] ; xmm0=row[ 0][1]
|
|
|
+ movdqa xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1]
|
|
|
+ movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1]
|
|
|
+
|
|
|
+- pushpic ebx
|
|
|
+- movpic ebx, POINTER [gotptr] ; load GOT address
|
|
|
++ PUSHPIC ebx
|
|
|
++ MOVPIC ebx, POINTER [gotptr] ; load GOT address
|
|
|
+
|
|
|
+ pxor xmm3, xmm3 ; xmm3=(all 0's)
|
|
|
+ movdqa xmm4, xmm0
|
|
|
+ punpcklbw xmm0, xmm3 ; xmm0=row[ 0]( 0 1 2 3 4 5 6 7)
|
|
|
+ punpckhbw xmm4, xmm3 ; xmm4=row[ 0]( 8 9 10 11 12 13 14 15)
|
|
|
+ movdqa xmm5, xmm1
|
|
|
+ punpcklbw xmm1, xmm3 ; xmm1=row[-1]( 0 1 2 3 4 5 6 7)
|
|
|
+ punpckhbw xmm5, xmm3 ; xmm5=row[-1]( 8 9 10 11 12 13 14 15)
|
|
|
+@@ -480,17 +480,17 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
|
|
+ psllw xmm7, BYTE_BIT
|
|
|
+ psllw xmm5, BYTE_BIT
|
|
|
+ por xmm1, xmm7 ; xmm1=Out1L=( 0 1 2 ... 13 14 15)
|
|
|
+ por xmm0, xmm5 ; xmm0=Out1H=(16 17 18 ... 29 30 31)
|
|
|
+
|
|
|
+ movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
|
|
|
+ movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
|
|
|
+
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+
|
|
|
+ sub eax, byte SIZEOF_XMMWORD
|
|
|
+ add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above)
|
|
|
+ add ebx, byte 1*SIZEOF_XMMWORD ; inptr0
|
|
|
+ add esi, byte 1*SIZEOF_XMMWORD ; inptr1(below)
|
|
|
+ add edx, byte 2*SIZEOF_XMMWORD ; outptr0
|
|
|
+ add edi, byte 2*SIZEOF_XMMWORD ; outptr1
|
|
|
+ cmp eax, byte SIZEOF_XMMWORD
|
|
|
+@@ -553,25 +553,25 @@ EXTN(jsimd_h2v1_upsample_sse2):
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz short .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(ebp)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov edi, JSAMPROW [edi] ; outptr
|
|
|
+ mov eax, edx ; colctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ movdqa xmm1, xmm0
|
|
|
+ punpcklbw xmm0, xmm0
|
|
|
+ punpckhbw xmm1, xmm1
|
|
|
+
|
|
|
+@@ -591,17 +591,17 @@ EXTN(jsimd_h2v1_upsample_sse2):
|
|
|
+ movdqa XMMWORD [edi+3*SIZEOF_XMMWORD], xmm3
|
|
|
+
|
|
|
+ sub eax, byte 2*SIZEOF_XMMWORD
|
|
|
+ jz short .nextrow
|
|
|
+
|
|
|
+ add esi, byte 2*SIZEOF_XMMWORD ; inptr
|
|
|
+ add edi, byte 4*SIZEOF_XMMWORD ; outptr
|
|
|
+ jmp short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .nextrow:
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+
|
|
|
+ add esi, byte SIZEOF_JSAMPROW ; input_data
|
|
|
+ add edi, byte SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec ecx ; rowctr
|
|
|
+@@ -650,26 +650,26 @@ EXTN(jsimd_h2v2_upsample_sse2):
|
|
|
+
|
|
|
+ mov ecx, INT [max_v_samp(ebp)] ; rowctr
|
|
|
+ test ecx, ecx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
|
|
|
+ mov edi, POINTER [output_data_ptr(ebp)]
|
|
|
+ mov edi, JSAMPARRAY [edi] ; output_data
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+ push edi
|
|
|
+ push esi
|
|
|
+
|
|
|
+ mov esi, JSAMPROW [esi] ; inptr
|
|
|
+ mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
|
|
|
+ mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
|
|
|
+ mov eax, edx ; colctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
|
+
|
|
|
+ movdqa xmm1, xmm0
|
|
|
+ punpcklbw xmm0, xmm0
|
|
|
+ punpckhbw xmm1, xmm1
|
|
|
+
|
|
|
+@@ -694,17 +694,17 @@ EXTN(jsimd_h2v2_upsample_sse2):
|
|
|
+
|
|
|
+ sub eax, byte 2*SIZEOF_XMMWORD
|
|
|
+ jz short .nextrow
|
|
|
+
|
|
|
+ add esi, byte 2*SIZEOF_XMMWORD ; inptr
|
|
|
+ add ebx, byte 4*SIZEOF_XMMWORD ; outptr0
|
|
|
+ add edi, byte 4*SIZEOF_XMMWORD ; outptr1
|
|
|
+ jmp short .columnloop
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+
|
|
|
+ .nextrow:
|
|
|
+ pop esi
|
|
|
+ pop edi
|
|
|
+
|
|
|
+ add esi, byte 1*SIZEOF_JSAMPROW ; input_data
|
|
|
+ add edi, byte 2*SIZEOF_JSAMPROW ; output_data
|
|
|
+ sub ecx, byte 2 ; rowctr
|
|
|
+diff --git a/media/libjpeg/simd/i386/jfdctflt-3dn.asm b/media/libjpeg/simd/i386/jfdctflt-3dn.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jfdctflt-3dn.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jfdctflt-3dn.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jfdctflt.asm - floating-point FDCT (3DNow!)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -19,27 +19,27 @@
|
|
|
+ ; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+ %include "jdct.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_float_3dnow)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_float_3dnow):
|
|
|
+
|
|
|
+ PD_0_382 times 2 dd 0.382683432365089771728460
|
|
|
+ PD_0_707 times 2 dd 0.707106781186547524400844
|
|
|
+ PD_0_541 times 2 dd 0.541196100146196984399723
|
|
|
+ PD_1_306 times 2 dd 1.306562964876376527856643
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -58,29 +58,29 @@ PD_1_306 times 2 dd 1.306562964876376527
|
|
|
+ EXTN(jsimd_fdct_float_3dnow):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ ; push esi ; unused
|
|
|
+ ; push edi ; unused
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
|
|
|
+ mov ecx, DCTSIZE/2
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(0,3,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(1,3,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+
|
|
|
+ ; mm0=(00 01), mm1=(10 11), mm2=(06 07), mm3=(16 17)
|
|
|
+@@ -185,17 +185,17 @@ EXTN(jsimd_fdct_float_3dnow):
|
|
|
+ add edx, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
|
|
|
+ dec ecx
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ ; ---- Pass 2: process columns.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
|
|
|
+ mov ecx, DCTSIZE/2
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+
|
|
|
+ ; mm0=(00 10), mm1=(01 11), mm2=(60 70), mm3=(61 71)
|
|
|
+@@ -302,17 +302,17 @@ EXTN(jsimd_fdct_float_3dnow):
|
|
|
+ jnz near .columnloop
|
|
|
+
|
|
|
+ femms ; empty MMX/3DNow! state
|
|
|
+
|
|
|
+ ; pop edi ; unused
|
|
|
+ ; pop esi ; unused
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; need not be preserved
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jfdctflt-sse.asm b/media/libjpeg/simd/i386/jfdctflt-sse.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jfdctflt-sse.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jfdctflt-sse.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jfdctflt.asm - floating-point FDCT (SSE)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -29,27 +29,27 @@
|
|
|
+
|
|
|
+ %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
|
|
|
+ shufps %1, %2, 0xEE
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_float_sse)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_float_sse):
|
|
|
+
|
|
|
+ PD_0_382 times 4 dd 0.382683432365089771728460
|
|
|
+ PD_0_707 times 4 dd 0.707106781186547524400844
|
|
|
+ PD_0_541 times 4 dd 0.541196100146196984399723
|
|
|
+ PD_1_306 times 4 dd 1.306562964876376527856643
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -69,29 +69,29 @@ PD_1_306 times 4 dd 1.306562964876376527
|
|
|
+ EXTN(jsimd_fdct_float_sse):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ ; push esi ; unused
|
|
|
+ ; push edi ; unused
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
|
|
|
+ mov ecx, DCTSIZE/4
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm2, XMMWORD [XMMBLOCK(2,1,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm3, XMMWORD [XMMBLOCK(3,1,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+
|
|
|
+ ; xmm0=(20 21 22 23), xmm2=(24 25 26 27)
|
|
|
+@@ -217,17 +217,17 @@ EXTN(jsimd_fdct_float_sse):
|
|
|
+ add edx, 4*DCTSIZE*SIZEOF_FAST_FLOAT
|
|
|
+ dec ecx
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ ; ---- Pass 2: process columns.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
|
|
|
+ mov ecx, DCTSIZE/4
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm1, XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm2, XMMWORD [XMMBLOCK(6,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm3, XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+
|
|
|
+ ; xmm0=(02 12 22 32), xmm2=(42 52 62 72)
|
|
|
+@@ -353,17 +353,17 @@ EXTN(jsimd_fdct_float_sse):
|
|
|
+ add edx, byte 4*SIZEOF_FAST_FLOAT
|
|
|
+ dec ecx
|
|
|
+ jnz near .columnloop
|
|
|
+
|
|
|
+ ; pop edi ; unused
|
|
|
+ ; pop esi ; unused
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; need not be preserved
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jfdctfst-mmx.asm b/media/libjpeg/simd/i386/jfdctfst-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jfdctfst-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jfdctfst-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jfdctfst.asm - fast integer FDCT (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -44,27 +44,27 @@ F_1_306 equ DESCALE(1402911301, 30 - CON
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+ ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
|
|
|
+ ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
|
|
|
+
|
|
|
+ %define PRE_MULTIPLY_SCALE_BITS 2
|
|
|
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_ifast_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_ifast_mmx):
|
|
|
+
|
|
|
+ PW_F0707 times 4 dw F_0_707 << CONST_SHIFT
|
|
|
+ PW_F0382 times 4 dw F_0_382 << CONST_SHIFT
|
|
|
+ PW_F0541 times 4 dw F_0_541 << CONST_SHIFT
|
|
|
+ PW_F1306 times 4 dw F_1_306 << CONST_SHIFT
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -83,29 +83,29 @@ PW_F1306 times 4 dw F_1_306 << CONST_SHI
|
|
|
+ EXTN(jsimd_fdct_ifast_mmx):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ ; push esi ; unused
|
|
|
+ ; push edi ; unused
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (DCTELEM *)
|
|
|
+ mov ecx, DCTSIZE/4
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
|
|
|
+
|
|
|
+ ; mm0=(20 21 22 23), mm2=(24 25 26 27)
|
|
|
+@@ -236,17 +236,17 @@ EXTN(jsimd_fdct_ifast_mmx):
|
|
|
+ add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
|
|
|
+ dec ecx
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ ; ---- Pass 2: process columns.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (DCTELEM *)
|
|
|
+ mov ecx, DCTSIZE/4
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
|
|
|
+
|
|
|
+ ; mm0=(02 12 22 32), mm2=(42 52 62 72)
|
|
|
+@@ -379,17 +379,17 @@ EXTN(jsimd_fdct_ifast_mmx):
|
|
|
+ jnz near .columnloop
|
|
|
+
|
|
|
+ emms ; empty MMX state
|
|
|
+
|
|
|
+ ; pop edi ; unused
|
|
|
+ ; pop esi ; unused
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; need not be preserved
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jfdctfst-sse2.asm b/media/libjpeg/simd/i386/jfdctfst-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jfdctfst-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jfdctfst-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jfdctfst.asm - fast integer FDCT (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -44,27 +44,27 @@ F_1_306 equ DESCALE(1402911301, 30 - CON
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+ ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
|
|
|
+ ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
|
|
|
+
|
|
|
+ %define PRE_MULTIPLY_SCALE_BITS 2
|
|
|
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_ifast_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_ifast_sse2):
|
|
|
+
|
|
|
+ PW_F0707 times 8 dw F_0_707 << CONST_SHIFT
|
|
|
+ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
|
|
|
+ PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
|
|
|
+ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -84,23 +84,23 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHI
|
|
|
+ EXTN(jsimd_fdct_ifast_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; unused
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ ; push esi ; unused
|
|
|
+ ; push edi ; unused
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (DCTELEM *)
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
|
|
|
+@@ -387,17 +387,17 @@ EXTN(jsimd_fdct_ifast_sse2):
|
|
|
+ movdqa XMMWORD [XMMBLOCK(7,0,edx,SIZEOF_DCTELEM)], xmm3
|
|
|
+ movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm6
|
|
|
+ movdqa XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)], xmm2
|
|
|
+
|
|
|
+ ; pop edi ; unused
|
|
|
+ ; pop esi ; unused
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; unused
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jfdctint-avx2.asm b/media/libjpeg/simd/i386/jfdctint-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jfdctint-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jfdctint-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jfdctint.asm - accurate integer FDCT (AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -60,17 +60,17 @@ F_2_562 equ DESCALE(2751909506, 30 - CON
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; In-place 8x8x16-bit matrix transpose using AVX2 instructions
|
|
|
+ ; %1-%4: Input/output registers
|
|
|
+ ; %5-%8: Temp registers
|
|
|
+
|
|
|
+-%macro dotranspose 8
|
|
|
++%macro DOTRANSPOSE 8
|
|
|
+ ; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
|
|
|
+ ; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
|
|
|
+ ; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
|
|
|
+ ; %4=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
|
|
|
+
|
|
|
+ vpunpcklwd %5, %1, %2
|
|
|
+ vpunpckhwd %6, %1, %2
|
|
|
+ vpunpcklwd %7, %3, %4
|
|
|
+@@ -103,17 +103,17 @@ F_3_072 equ DESCALE(3299298341, 30 - CON
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; In-place 8x8x16-bit accurate integer forward DCT using AVX2 instructions
|
|
|
+ ; %1-%4: Input/output registers
|
|
|
+ ; %5-%8: Temp registers
|
|
|
+ ; %9: Pass (1 or 2)
|
|
|
+
|
|
|
+-%macro dodct 9
|
|
|
++%macro DODCT 9
|
|
|
+ vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7
|
|
|
+ vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0
|
|
|
+ vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2
|
|
|
+ vpsubw %8, %2, %3 ; %8=data3_2-data4_5=tmp4_5
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ vperm2i128 %6, %6, %6, 0x01 ; %6=tmp0_1
|
|
|
+@@ -218,17 +218,17 @@ F_3_072 equ DESCALE(3299298341, 30 - CON
|
|
|
+ vpsrad %5, %5, DESCALE_P %+ %9
|
|
|
+
|
|
|
+ vpackssdw %2, %8, %5 ; %2=data3_1
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_islow_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_islow_avx2):
|
|
|
+
|
|
|
+ PW_F130_F054_MF130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ times 4 dw (F_0_541 - F_1_847), F_0_541
|
|
|
+ PW_MF078_F117_F078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ times 4 dw (F_1_175 - F_0_390), F_1_175
|
|
|
+@@ -237,17 +237,17 @@ PW_MF060_MF089_MF050_MF256 times 4 dw
|
|
|
+ PW_F050_MF256_F060_MF089 times 4 dw (F_3_072 - F_2_562), -F_2_562
|
|
|
+ times 4 dw (F_1_501 - F_0_899), -F_0_899
|
|
|
+ PD_DESCALE_P1 times 8 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 8 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1)
|
|
|
+ PW_1_NEG1 times 8 dw 1
|
|
|
+ times 8 dw -1
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -257,23 +257,23 @@ PW_1_NEG1 times 8 dw
|
|
|
+ %define data(b) (b) + 8 ; DCTELEM *data
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_fdct_islow_avx2):
|
|
|
+ push ebp
|
|
|
+ mov ebp, esp
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; unused
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ ; push esi ; unused
|
|
|
+ ; push edi ; unused
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(ebp)] ; (DCTELEM *)
|
|
|
+
|
|
|
+ vmovdqu ymm4, YMMWORD [YMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ vmovdqu ymm5, YMMWORD [YMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ vmovdqu ymm6, YMMWORD [YMMBLOCK(4,0,edx,SIZEOF_DCTELEM)]
|
|
|
+@@ -287,29 +287,29 @@ EXTN(jsimd_fdct_islow_avx2):
|
|
|
+ vperm2i128 ymm1, ymm4, ymm6, 0x31
|
|
|
+ vperm2i128 ymm2, ymm5, ymm7, 0x20
|
|
|
+ vperm2i128 ymm3, ymm5, ymm7, 0x31
|
|
|
+ ; ymm0=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
|
|
|
+ ; ymm1=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
|
|
|
+ ; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
|
|
|
+ ; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
|
|
|
+
|
|
|
+- dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
|
|
|
++ DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
|
|
|
+
|
|
|
+- dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
|
|
|
++ DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
|
|
|
+ ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
|
|
|
+
|
|
|
+ ; ---- Pass 2: process columns.
|
|
|
+
|
|
|
+ vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7
|
|
|
+ vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5
|
|
|
+
|
|
|
+- dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
|
|
|
++ DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
|
|
|
+
|
|
|
+- dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
|
|
|
++ DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
|
|
|
+ ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
|
|
|
+
|
|
|
+ vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1
|
|
|
+ vperm2i128 ymm5, ymm2, ymm1, 0x20 ; ymm5=data2_3
|
|
|
+ vperm2i128 ymm6, ymm0, ymm4, 0x31 ; ymm6=data4_5
|
|
|
+ vperm2i128 ymm7, ymm2, ymm4, 0x21 ; ymm7=data6_7
|
|
|
+
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(0,0,edx,SIZEOF_DCTELEM)], ymm3
|
|
|
+@@ -317,15 +317,15 @@ EXTN(jsimd_fdct_islow_avx2):
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(4,0,edx,SIZEOF_DCTELEM)], ymm6
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(6,0,edx,SIZEOF_DCTELEM)], ymm7
|
|
|
+
|
|
|
+ vzeroupper
|
|
|
+ ; pop edi ; unused
|
|
|
+ ; pop esi ; unused
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; unused
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jfdctint-mmx.asm b/media/libjpeg/simd/i386/jfdctint-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jfdctint-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jfdctint-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jfdctint.asm - accurate integer FDCT (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2020, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -58,34 +58,34 @@ F_1_961 equ DESCALE(2106220350, 30 - CON
|
|
|
+ F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869)
|
|
|
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_islow_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_islow_mmx):
|
|
|
+
|
|
|
+ PW_F130_F054 times 2 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ PW_F054_MF130 times 2 dw F_0_541, (F_0_541 - F_1_847)
|
|
|
+ PW_MF078_F117 times 2 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ PW_F117_F078 times 2 dw F_1_175, (F_1_175 - F_0_390)
|
|
|
+ PW_MF060_MF089 times 2 dw (F_0_298 - F_0_899), -F_0_899
|
|
|
+ PW_MF089_F060 times 2 dw -F_0_899, (F_1_501 - F_0_899)
|
|
|
+ PW_MF050_MF256 times 2 dw (F_2_053 - F_2_562), -F_2_562
|
|
|
+ PW_MF256_F050 times 2 dw -F_2_562, (F_3_072 - F_2_562)
|
|
|
+ PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -104,29 +104,29 @@ PW_DESCALE_P2X times 4 dw 1 << (PASS1_B
|
|
|
+ EXTN(jsimd_fdct_islow_mmx):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ ; push esi ; unused
|
|
|
+ ; push edi ; unused
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (DCTELEM *)
|
|
|
+ mov ecx, DCTSIZE/4
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(2,1,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(3,1,edx,SIZEOF_DCTELEM)]
|
|
|
+
|
|
|
+ ; mm0=(20 21 22 23), mm2=(24 25 26 27)
|
|
|
+@@ -358,17 +358,17 @@ EXTN(jsimd_fdct_islow_mmx):
|
|
|
+ add edx, byte 4*DCTSIZE*SIZEOF_DCTELEM
|
|
|
+ dec ecx
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ ; ---- Pass 2: process columns.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (DCTELEM *)
|
|
|
+ mov ecx, DCTSIZE/4
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(3,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(6,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(7,0,edx,SIZEOF_DCTELEM)]
|
|
|
+
|
|
|
+ ; mm0=(02 12 22 32), mm2=(42 52 62 72)
|
|
|
+@@ -604,17 +604,17 @@ EXTN(jsimd_fdct_islow_mmx):
|
|
|
+ jnz near .columnloop
|
|
|
+
|
|
|
+ emms ; empty MMX state
|
|
|
+
|
|
|
+ ; pop edi ; unused
|
|
|
+ ; pop esi ; unused
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; need not be preserved
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jfdctint-sse2.asm b/media/libjpeg/simd/i386/jfdctint-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jfdctint-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jfdctint-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jfdctint.asm - accurate integer FDCT (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2020, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -58,34 +58,34 @@ F_1_961 equ DESCALE(2106220350, 30 - CON
|
|
|
+ F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869)
|
|
|
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_islow_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_islow_sse2):
|
|
|
+
|
|
|
+ PW_F130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ PW_F054_MF130 times 4 dw F_0_541, (F_0_541 - F_1_847)
|
|
|
+ PW_MF078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ PW_F117_F078 times 4 dw F_1_175, (F_1_175 - F_0_390)
|
|
|
+ PW_MF060_MF089 times 4 dw (F_0_298 - F_0_899), -F_0_899
|
|
|
+ PW_MF089_F060 times 4 dw -F_0_899, (F_1_501 - F_0_899)
|
|
|
+ PW_MF050_MF256 times 4 dw (F_2_053 - F_2_562), -F_2_562
|
|
|
+ PW_MF256_F050 times 4 dw -F_2_562, (F_3_072 - F_2_562)
|
|
|
+ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -105,23 +105,23 @@ PW_DESCALE_P2X times 8 dw 1 << (PASS1_B
|
|
|
+ EXTN(jsimd_fdct_islow_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; unused
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ ; push esi ; unused
|
|
|
+ ; push edi ; unused
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov edx, POINTER [data(eax)] ; (DCTELEM *)
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm1, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm2, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
|
|
|
+@@ -617,17 +617,17 @@ EXTN(jsimd_fdct_islow_sse2):
|
|
|
+
|
|
|
+ movdqa XMMWORD [XMMBLOCK(5,0,edx,SIZEOF_DCTELEM)], xmm1
|
|
|
+ movdqa XMMWORD [XMMBLOCK(3,0,edx,SIZEOF_DCTELEM)], xmm3
|
|
|
+
|
|
|
+ ; pop edi ; unused
|
|
|
+ ; pop esi ; unused
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; unused
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctflt-3dn.asm b/media/libjpeg/simd/i386/jidctflt-3dn.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctflt-3dn.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctflt-3dn.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctflt.asm - floating-point IDCT (3DNow! & MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -19,29 +19,29 @@
|
|
|
+ ; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+ %include "jdct.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_float_3dnow)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_float_3dnow):
|
|
|
+
|
|
|
+ PD_1_414 times 2 dd 1.414213562373095048801689
|
|
|
+ PD_1_847 times 2 dd 1.847759065022573512256366
|
|
|
+ PD_1_082 times 2 dd 1.082392200292393968799446
|
|
|
+ PD_2_613 times 2 dd 2.613125929752753055713286
|
|
|
+ PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3)
|
|
|
+ PB_CENTERJSAMP times 8 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -73,40 +73,40 @@ EXTN(jsimd_idct_float_3dnow):
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [workspace]
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input, store into work array.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+ lea edi, [workspace] ; FAST_FLOAT *wsptr
|
|
|
+ mov ecx, DCTSIZE/2 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
|
|
|
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ jnz short .columnDCT
|
|
|
+
|
|
|
+- pushpic ebx ; save GOT address
|
|
|
++ PUSHPIC ebx ; save GOT address
|
|
|
+ mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
|
|
+ mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or eax, ebx
|
|
|
+- poppic ebx ; restore GOT address
|
|
|
++ POPPIC ebx ; restore GOT address
|
|
|
+ jnz short .columnDCT
|
|
|
+
|
|
|
+ ; -- AC terms all zero
|
|
|
+
|
|
|
+ movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+
|
|
|
+ punpcklwd mm0, mm0
|
|
|
+ psrad mm0, (DWORD_BIT-WORD_BIT)
|
|
|
+@@ -122,17 +122,17 @@ EXTN(jsimd_idct_float_3dnow):
|
|
|
+ movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], mm0
|
|
|
+ movq MMWORD [MMBLOCK(0,2,edi,SIZEOF_FAST_FLOAT)], mm0
|
|
|
+ movq MMWORD [MMBLOCK(0,3,edi,SIZEOF_FAST_FLOAT)], mm0
|
|
|
+ movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], mm1
|
|
|
+ movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], mm1
|
|
|
+ movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
|
|
|
+ movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
|
|
|
+ jmp near .nextcolumn
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movd mm1, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movd mm2, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -288,17 +288,17 @@ EXTN(jsimd_idct_float_3dnow):
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows from work array, store into output array.
|
|
|
+
|
|
|
+ mov eax, [original_ebp]
|
|
|
+ lea esi, [workspace] ; FAST_FLOAT *wsptr
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [output_col(eax)]
|
|
|
+ mov ecx, DCTSIZE/2 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+@@ -415,24 +415,24 @@ EXTN(jsimd_idct_float_3dnow):
|
|
|
+ movq mm4, mm6 ; transpose coefficients(phase 2)
|
|
|
+ punpcklwd mm6, mm1 ; mm6=(00 01 02 03 10 11 12 13)
|
|
|
+ punpckhwd mm4, mm1 ; mm4=(04 05 06 07 14 15 16 17)
|
|
|
+
|
|
|
+ movq mm7, mm6 ; transpose coefficients(phase 3)
|
|
|
+ punpckldq mm6, mm4 ; mm6=(00 01 02 03 04 05 06 07)
|
|
|
+ punpckhdq mm7, mm4 ; mm7=(10 11 12 13 14 15 16 17)
|
|
|
+
|
|
|
+- pushpic ebx ; save GOT address
|
|
|
++ PUSHPIC ebx ; save GOT address
|
|
|
+
|
|
|
+ mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
|
|
+ mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
|
|
+ movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
|
|
|
+ movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
|
|
|
+
|
|
|
+- poppic ebx ; restore GOT address
|
|
|
++ POPPIC ebx ; restore GOT address
|
|
|
+
|
|
|
+ add esi, byte 2*SIZEOF_FAST_FLOAT ; wsptr
|
|
|
+ add edi, byte 2*SIZEOF_JSAMPROW
|
|
|
+ dec ecx ; ctr
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ femms ; empty MMX/3DNow! state
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctflt-sse.asm b/media/libjpeg/simd/i386/jidctflt-sse.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctflt-sse.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctflt-sse.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctflt.asm - floating-point IDCT (SSE & MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -18,40 +18,40 @@
|
|
|
+ ; (Discrete Cosine Transform). The following code is based directly on
|
|
|
+ ; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+ %include "jdct.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+
|
|
|
+-%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
|
|
|
++%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
|
|
|
+ shufps %1, %2, 0x44
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+-%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
|
|
|
++%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
|
|
|
+ shufps %1, %2, 0xEE
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_float_sse)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_float_sse):
|
|
|
+
|
|
|
+ PD_1_414 times 4 dd 1.414213562373095048801689
|
|
|
+ PD_1_847 times 4 dd 1.847759065022573512256366
|
|
|
+ PD_1_082 times 4 dd 1.082392200292393968799446
|
|
|
+ PD_M2_613 times 4 dd -2.613125929752753055713286
|
|
|
+ PD_0_125 times 4 dd 0.125 ; 1/8
|
|
|
+ PB_CENTERJSAMP times 8 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -83,26 +83,26 @@ EXTN(jsimd_idct_float_sse):
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [workspace]
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input, store into work array.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+ lea edi, [workspace] ; FAST_FLOAT *wsptr
|
|
|
+ mov ecx, DCTSIZE/4 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
|
|
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ jnz near .columnDCT
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -144,17 +144,17 @@ EXTN(jsimd_idct_float_sse):
|
|
|
+ movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+ jmp near .nextcolumn
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -320,36 +320,36 @@ EXTN(jsimd_idct_float_sse):
|
|
|
+ movaps xmm2, xmm7 ; transpose coefficients(phase 1)
|
|
|
+ unpcklps xmm7, xmm3 ; xmm7=(20 30 21 31)
|
|
|
+ unpckhps xmm2, xmm3 ; xmm2=(22 32 23 33)
|
|
|
+ movaps xmm4, xmm5 ; transpose coefficients(phase 1)
|
|
|
+ unpcklps xmm5, xmm0 ; xmm5=(40 50 41 51)
|
|
|
+ unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
|
|
|
+
|
|
|
+ movaps xmm3, xmm6 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
|
|
|
+- unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
|
|
|
++ UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
|
|
|
++ UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
|
|
|
+ movaps xmm0, xmm1 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
|
|
|
+- unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
|
|
|
++ UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
|
|
|
++ UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
|
|
|
+
|
|
|
+ movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
|
|
|
+ movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
|
|
|
+
|
|
|
+ movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
|
|
|
+
|
|
|
+ movaps xmm6, xmm5 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
|
|
|
+- unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
|
|
|
++ UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
|
|
|
++ UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
|
|
|
+ movaps xmm3, xmm4 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
|
|
|
+- unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
|
|
|
++ UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
|
|
|
++ UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
|
|
|
+
|
|
|
+ movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+
|
|
|
+ .nextcolumn:
|
|
|
+ add esi, byte 4*SIZEOF_JCOEF ; coef_block
|
|
|
+@@ -367,17 +367,17 @@ EXTN(jsimd_idct_float_sse):
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows from work array, store into output array.
|
|
|
+
|
|
|
+ mov eax, [original_ebp]
|
|
|
+ lea esi, [workspace] ; FAST_FLOAT *wsptr
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [output_col(eax)]
|
|
|
+ mov ecx, DCTSIZE/4 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+@@ -531,28 +531,28 @@ EXTN(jsimd_idct_float_sse):
|
|
|
+
|
|
|
+ movq mm1, mm0 ; transpose coefficients(phase 3)
|
|
|
+ punpckldq mm0, mm3 ; mm0=(00 01 02 03 04 05 06 07)
|
|
|
+ punpckhdq mm1, mm3 ; mm1=(10 11 12 13 14 15 16 17)
|
|
|
+ movq mm4, mm5 ; transpose coefficients(phase 3)
|
|
|
+ punpckldq mm5, mm6 ; mm5=(20 21 22 23 24 25 26 27)
|
|
|
+ punpckhdq mm4, mm6 ; mm4=(30 31 32 33 34 35 36 37)
|
|
|
+
|
|
|
+- pushpic ebx ; save GOT address
|
|
|
++ PUSHPIC ebx ; save GOT address
|
|
|
+
|
|
|
+ mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
|
|
+ mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
|
|
+ movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
|
|
|
+ movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
|
|
|
+ mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
|
|
|
+ mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
|
|
|
+ movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
|
|
|
+ movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
|
|
|
+
|
|
|
+- poppic ebx ; restore GOT address
|
|
|
++ POPPIC ebx ; restore GOT address
|
|
|
+
|
|
|
+ add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
|
|
|
+ add edi, byte 4*SIZEOF_JSAMPROW
|
|
|
+ dec ecx ; ctr
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ emms ; empty MMX state
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctflt-sse2.asm b/media/libjpeg/simd/i386/jidctflt-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctflt-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctflt-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctflt.asm - floating-point IDCT (SSE & SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -18,40 +18,40 @@
|
|
|
+ ; (Discrete Cosine Transform). The following code is based directly on
|
|
|
+ ; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+ %include "jdct.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+
|
|
|
+-%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
|
|
|
++%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
|
|
|
+ shufps %1, %2, 0x44
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+-%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
|
|
|
++%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
|
|
|
+ shufps %1, %2, 0xEE
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_float_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_float_sse2):
|
|
|
+
|
|
|
+ PD_1_414 times 4 dd 1.414213562373095048801689
|
|
|
+ PD_1_847 times 4 dd 1.847759065022573512256366
|
|
|
+ PD_1_082 times 4 dd 1.082392200292393968799446
|
|
|
+ PD_M2_613 times 4 dd -2.613125929752753055713286
|
|
|
+ PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3)
|
|
|
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -83,26 +83,26 @@ EXTN(jsimd_idct_float_sse2):
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [workspace]
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input, store into work array.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+ lea edi, [workspace] ; FAST_FLOAT *wsptr
|
|
|
+ mov ecx, DCTSIZE/4 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
|
|
|
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ jnz near .columnDCT
|
|
|
+
|
|
|
+ movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq xmm2, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -145,17 +145,17 @@ EXTN(jsimd_idct_float_sse2):
|
|
|
+ movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm0
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm1
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm1
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm2
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm2
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+ jmp near .nextcolumn
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movq xmm0, XMM_MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq xmm1, XMM_MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq xmm2, XMM_MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -282,36 +282,36 @@ EXTN(jsimd_idct_float_sse2):
|
|
|
+ movaps xmm2, xmm7 ; transpose coefficients(phase 1)
|
|
|
+ unpcklps xmm7, xmm3 ; xmm7=(20 30 21 31)
|
|
|
+ unpckhps xmm2, xmm3 ; xmm2=(22 32 23 33)
|
|
|
+ movaps xmm4, xmm5 ; transpose coefficients(phase 1)
|
|
|
+ unpcklps xmm5, xmm0 ; xmm5=(40 50 41 51)
|
|
|
+ unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
|
|
|
+
|
|
|
+ movaps xmm3, xmm6 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
|
|
|
+- unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
|
|
|
++ UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
|
|
|
++ UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
|
|
|
+ movaps xmm0, xmm1 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
|
|
|
+- unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
|
|
|
++ UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
|
|
|
++ UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
|
|
|
+
|
|
|
+ movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
|
|
|
+ movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
|
|
|
+
|
|
|
+ movaps XMMWORD [XMMBLOCK(0,0,edi,SIZEOF_FAST_FLOAT)], xmm6
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,0,edi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,0,edi,SIZEOF_FAST_FLOAT)], xmm1
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
|
|
|
+
|
|
|
+ movaps xmm6, xmm5 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
|
|
|
+- unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
|
|
|
++ UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
|
|
|
++ UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
|
|
|
+ movaps xmm3, xmm4 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
|
|
|
+- unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
|
|
|
++ UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
|
|
|
++ UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
|
|
|
+
|
|
|
+ movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,1,edi,SIZEOF_FAST_FLOAT)], xmm4
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+
|
|
|
+ .nextcolumn:
|
|
|
+ add esi, byte 4*SIZEOF_JCOEF ; coef_block
|
|
|
+@@ -329,17 +329,17 @@ EXTN(jsimd_idct_float_sse2):
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows from work array, store into output array.
|
|
|
+
|
|
|
+ mov eax, [original_ebp]
|
|
|
+ lea esi, [workspace] ; FAST_FLOAT *wsptr
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [output_col(eax)]
|
|
|
+ mov ecx, DCTSIZE/4 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm2, XMMWORD [XMMBLOCK(4,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm3, XMMWORD [XMMBLOCK(6,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+@@ -459,28 +459,28 @@ EXTN(jsimd_idct_float_sse2):
|
|
|
+
|
|
|
+ movdqa xmm7, xmm6 ; transpose coefficients(phase 3)
|
|
|
+ punpckldq xmm6, xmm4 ; xmm6=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
|
|
|
+ punpckhdq xmm7, xmm4 ; xmm7=(20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37)
|
|
|
+
|
|
|
+ pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
|
|
|
+ pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
|
|
|
+
|
|
|
+- pushpic ebx ; save GOT address
|
|
|
++ PUSHPIC ebx ; save GOT address
|
|
|
+
|
|
|
+ mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
|
|
+ mov ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
|
|
|
+ movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
|
|
|
+ movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm7
|
|
|
+ mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
|
|
+ mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
|
|
|
+ movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
|
|
|
+ movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
|
|
|
+
|
|
|
+- poppic ebx ; restore GOT address
|
|
|
++ POPPIC ebx ; restore GOT address
|
|
|
+
|
|
|
+ add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
|
|
|
+ add edi, byte 4*SIZEOF_JSAMPROW
|
|
|
+ dec ecx ; ctr
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ pop edi
|
|
|
+ pop esi
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctfst-mmx.asm b/media/libjpeg/simd/i386/jidctfst-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctfst-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctfst-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctfst.asm - fast integer IDCT (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -51,28 +51,28 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+ ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
|
|
|
+ ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
|
|
|
+
|
|
|
+ %define PRE_MULTIPLY_SCALE_BITS 2
|
|
|
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_ifast_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_ifast_mmx):
|
|
|
+
|
|
|
+ PW_F1414 times 4 dw F_1_414 << CONST_SHIFT
|
|
|
+ PW_F1847 times 4 dw F_1_847 << CONST_SHIFT
|
|
|
+ PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT
|
|
|
+ PW_F1082 times 4 dw F_1_082 << CONST_SHIFT
|
|
|
+ PB_CENTERJSAMP times 8 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -104,26 +104,26 @@ EXTN(jsimd_idct_ifast_mmx):
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [workspace]
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input, store into work array.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+ lea edi, [workspace] ; JCOEF *wsptr
|
|
|
+ mov ecx, DCTSIZE/4 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
|
|
|
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ jnz short .columnDCT
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -158,17 +158,17 @@ EXTN(jsimd_idct_ifast_mmx):
|
|
|
+ movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
|
|
|
+ movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
|
|
|
+ movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
|
|
|
+ movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
|
|
|
+ movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
|
|
|
+ movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
|
|
|
+ movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
|
|
|
+ jmp near .nextcolumn
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
|
|
|
+@@ -321,17 +321,17 @@ EXTN(jsimd_idct_ifast_mmx):
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows from work array, store into output array.
|
|
|
+
|
|
|
+ mov eax, [original_ebp]
|
|
|
+ lea esi, [workspace] ; JCOEF *wsptr
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [output_col(eax)]
|
|
|
+ mov ecx, DCTSIZE/4 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -459,28 +459,28 @@ EXTN(jsimd_idct_ifast_mmx):
|
|
|
+
|
|
|
+ movq mm7, mm6 ; transpose coefficients(phase 3)
|
|
|
+ punpckldq mm6, mm0 ; mm6=(00 01 02 03 04 05 06 07)
|
|
|
+ punpckhdq mm7, mm0 ; mm7=(10 11 12 13 14 15 16 17)
|
|
|
+ movq mm1, mm5 ; transpose coefficients(phase 3)
|
|
|
+ punpckldq mm5, mm4 ; mm5=(20 21 22 23 24 25 26 27)
|
|
|
+ punpckhdq mm1, mm4 ; mm1=(30 31 32 33 34 35 36 37)
|
|
|
+
|
|
|
+- pushpic ebx ; save GOT address
|
|
|
++ PUSHPIC ebx ; save GOT address
|
|
|
+
|
|
|
+ mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
|
|
+ mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
|
|
+ movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
|
|
|
+ movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
|
|
|
+ mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
|
|
|
+ mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
|
|
|
+ movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
|
|
|
+ movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
|
|
|
+
|
|
|
+- poppic ebx ; restore GOT address
|
|
|
++ POPPIC ebx ; restore GOT address
|
|
|
+
|
|
|
+ add esi, byte 4*SIZEOF_JCOEF ; wsptr
|
|
|
+ add edi, byte 4*SIZEOF_JSAMPROW
|
|
|
+ dec ecx ; ctr
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ emms ; empty MMX state
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctfst-sse2.asm b/media/libjpeg/simd/i386/jidctfst-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctfst-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctfst-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctfst.asm - fast integer IDCT (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -51,28 +51,28 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+ ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
|
|
|
+ ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
|
|
|
+
|
|
|
+ %define PRE_MULTIPLY_SCALE_BITS 2
|
|
|
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_ifast_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_ifast_sse2):
|
|
|
+
|
|
|
+ PW_F1414 times 8 dw F_1_414 << CONST_SHIFT
|
|
|
+ PW_F1847 times 8 dw F_1_847 << CONST_SHIFT
|
|
|
+ PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT
|
|
|
+ PW_F1082 times 8 dw F_1_082 << CONST_SHIFT
|
|
|
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -96,23 +96,23 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPL
|
|
|
+ EXTN(jsimd_idct_ifast_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; unused
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
|
|
|
+@@ -150,17 +150,17 @@ EXTN(jsimd_idct_ifast_sse2):
|
|
|
+ pshufd xmm1, xmm7, 0x00 ; xmm1=col4=(04 04 04 04 04 04 04 04)
|
|
|
+ pshufd xmm4, xmm7, 0x55 ; xmm4=col5=(05 05 05 05 05 05 05 05)
|
|
|
+ pshufd xmm3, xmm7, 0xAA ; xmm3=col6=(06 06 06 06 06 06 06 06)
|
|
|
+ pshufd xmm7, xmm7, 0xFF ; xmm7=col7=(07 07 07 07 07 07 07 07)
|
|
|
+
|
|
|
+ movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1
|
|
|
+ movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3
|
|
|
+ jmp near .column_end
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_IFAST_MULT_TYPE)]
|
|
|
+@@ -485,17 +485,17 @@ EXTN(jsimd_idct_ifast_sse2):
|
|
|
+ mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
|
|
|
+ movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
|
|
|
+ movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm2
|
|
|
+
|
|
|
+ pop edi
|
|
|
+ pop esi
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; unused
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctint-avx2.asm b/media/libjpeg/simd/i386/jidctint-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctint-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctint-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctint.asm - accurate integer IDCT (AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -60,17 +60,17 @@ F_2_562 equ DESCALE(2751909506, 30 - CON
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; In-place 8x8x16-bit inverse matrix transpose using AVX2 instructions
|
|
|
+ ; %1-%4: Input/output registers
|
|
|
+ ; %5-%8: Temp registers
|
|
|
+
|
|
|
+-%macro dotranspose 8
|
|
|
++%macro DOTRANSPOSE 8
|
|
|
+ ; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71)
|
|
|
+ ; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72)
|
|
|
+ ; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75)
|
|
|
+ ; %8=(07 17 27 37 47 57 67 77 06 16 26 36 46 56 66 76)
|
|
|
+
|
|
|
+ vpermq %5, %1, 0xD8
|
|
|
+ vpermq %6, %2, 0x72
|
|
|
+ vpermq %7, %3, 0xD8
|
|
|
+@@ -113,17 +113,17 @@ F_3_072 equ DESCALE(3299298341, 30 - CON
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; In-place 8x8x16-bit accurate integer inverse DCT using AVX2 instructions
|
|
|
+ ; %1-%4: Input/output registers
|
|
|
+ ; %5-%12: Temp registers
|
|
|
+ ; %9: Pass (1 or 2)
|
|
|
+
|
|
|
+-%macro dodct 13
|
|
|
++%macro DODCT 13
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ ; (Original)
|
|
|
+ ; z1 = (z2 + z3) * 0.541196100;
|
|
|
+ ; tmp2 = z1 + z3 * -1.847759065;
|
|
|
+ ; tmp3 = z1 + z2 * 0.765366865;
|
|
|
+ ;
|
|
|
+ ; (This implementation)
|
|
|
+@@ -245,17 +245,17 @@ F_3_072 equ DESCALE(3299298341, 30 - CON
|
|
|
+ vpsrad %3, %3, DESCALE_P %+ %13
|
|
|
+ vpsrad %6, %6, DESCALE_P %+ %13
|
|
|
+ vpackssdw %3, %3, %6 ; %3=data4_5
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_islow_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_islow_avx2):
|
|
|
+
|
|
|
+ PW_F130_F054_MF130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ times 4 dw (F_0_541 - F_1_847), F_0_541
|
|
|
+ PW_MF078_F117_F078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ times 4 dw (F_1_175 - F_0_390), F_1_175
|
|
|
+@@ -264,17 +264,17 @@ PW_MF060_MF089_MF050_MF256 times 4 dw
|
|
|
+ PW_MF089_F060_MF256_F050 times 4 dw -F_0_899, (F_1_501 - F_0_899)
|
|
|
+ times 4 dw -F_2_562, (F_3_072 - F_2_562)
|
|
|
+ PD_DESCALE_P1 times 8 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 8 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PB_CENTERJSAMP times 32 db CENTERJSAMPLE
|
|
|
+ PW_1_NEG1 times 8 dw 1
|
|
|
+ times 8 dw -1
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -298,23 +298,23 @@ PW_1_NEG1 times 8 dw
|
|
|
+ EXTN(jsimd_idct_islow_avx2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; unused
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
|
|
|
+@@ -348,17 +348,17 @@ EXTN(jsimd_idct_islow_avx2):
|
|
|
+ vinserti128 ymm4, ymm4, xmm5, 1
|
|
|
+
|
|
|
+ vpshufd ymm0, ymm4, 0x00 ; ymm0=col0_4=(00 00 00 00 00 00 00 00 04 04 04 04 04 04 04 04)
|
|
|
+ vpshufd ymm1, ymm4, 0x55 ; ymm1=col1_5=(01 01 01 01 01 01 01 01 05 05 05 05 05 05 05 05)
|
|
|
+ vpshufd ymm2, ymm4, 0xAA ; ymm2=col2_6=(02 02 02 02 02 02 02 02 06 06 06 06 06 06 06 06)
|
|
|
+ vpshufd ymm3, ymm4, 0xFF ; ymm3=col3_7=(03 03 03 03 03 03 03 03 07 07 07 07 07 07 07 07)
|
|
|
+
|
|
|
+ jmp near .column_end
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ vmovdqu ymm4, YMMWORD [YMMBLOCK(0,0,esi,SIZEOF_JCOEF)] ; ymm4=in0_1
|
|
|
+ vmovdqu ymm5, YMMWORD [YMMBLOCK(2,0,esi,SIZEOF_JCOEF)] ; ymm5=in2_3
|
|
|
+ vmovdqu ymm6, YMMWORD [YMMBLOCK(4,0,esi,SIZEOF_JCOEF)] ; ymm6=in4_5
|
|
|
+ vmovdqu ymm7, YMMWORD [YMMBLOCK(6,0,esi,SIZEOF_JCOEF)] ; ymm7=in6_7
|
|
|
+ vpmullw ymm4, ymm4, YMMWORD [YMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
|
|
|
+@@ -366,20 +366,20 @@ EXTN(jsimd_idct_islow_avx2):
|
|
|
+ vpmullw ymm6, ymm6, YMMWORD [YMMBLOCK(4,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
|
|
|
+ vpmullw ymm7, ymm7, YMMWORD [YMMBLOCK(6,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
|
|
|
+
|
|
|
+ vperm2i128 ymm0, ymm4, ymm6, 0x20 ; ymm0=in0_4
|
|
|
+ vperm2i128 ymm1, ymm5, ymm4, 0x31 ; ymm1=in3_1
|
|
|
+ vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6
|
|
|
+ vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5
|
|
|
+
|
|
|
+- dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
|
|
|
++ DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
|
|
|
+ ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
|
|
|
+
|
|
|
+- dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
|
|
|
++ DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
|
|
|
+ ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
|
|
|
+
|
|
|
+ .column_end:
|
|
|
+
|
|
|
+ ; -- Prefetch the next coefficient block
|
|
|
+
|
|
|
+ prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
|
|
|
+ prefetchnta [esi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
|
|
|
+@@ -390,20 +390,20 @@ EXTN(jsimd_idct_islow_avx2):
|
|
|
+
|
|
|
+ mov eax, [original_ebp]
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [output_col(eax)]
|
|
|
+
|
|
|
+ vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5
|
|
|
+ vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1
|
|
|
+
|
|
|
+- dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
|
|
|
++ DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
|
|
|
+ ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
|
|
|
+
|
|
|
+- dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
|
|
|
++ DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
|
|
|
+ ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
|
|
|
+
|
|
|
+ vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45
|
|
|
+ vpacksswb ymm1, ymm2, ymm4 ; ymm1=data23_67
|
|
|
+ vpaddb ymm0, ymm0, [GOTOFF(ebx,PB_CENTERJSAMP)]
|
|
|
+ vpaddb ymm1, ymm1, [GOTOFF(ebx,PB_CENTERJSAMP)]
|
|
|
+
|
|
|
+ vextracti128 xmm6, ymm1, 1 ; xmm3=data67
|
|
|
+@@ -437,17 +437,17 @@ EXTN(jsimd_idct_islow_avx2):
|
|
|
+ mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm6
|
|
|
+ movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm7
|
|
|
+
|
|
|
+ pop edi
|
|
|
+ pop esi
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; unused
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctint-mmx.asm b/media/libjpeg/simd/i386/jidctint-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctint-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctint-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctint.asm - accurate integer IDCT (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2020, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -58,34 +58,34 @@ F_1_961 equ DESCALE(2106220350, 30 - CON
|
|
|
+ F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869)
|
|
|
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_islow_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_islow_mmx):
|
|
|
+
|
|
|
+ PW_F130_F054 times 2 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ PW_F054_MF130 times 2 dw F_0_541, (F_0_541 - F_1_847)
|
|
|
+ PW_MF078_F117 times 2 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ PW_F117_F078 times 2 dw F_1_175, (F_1_175 - F_0_390)
|
|
|
+ PW_MF060_MF089 times 2 dw (F_0_298 - F_0_899), -F_0_899
|
|
|
+ PW_MF089_F060 times 2 dw -F_0_899, (F_1_501 - F_0_899)
|
|
|
+ PW_MF050_MF256 times 2 dw (F_2_053 - F_2_562), -F_2_562
|
|
|
+ PW_MF256_F050 times 2 dw -F_2_562, (F_3_072 - F_2_562)
|
|
|
+ PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PB_CENTERJSAMP times 8 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -117,26 +117,26 @@ EXTN(jsimd_idct_islow_mmx):
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [workspace]
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input, store into work array.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+ lea edi, [workspace] ; JCOEF *wsptr
|
|
|
+ mov ecx, DCTSIZE/4 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
|
|
|
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ jnz short .columnDCT
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -173,17 +173,17 @@ EXTN(jsimd_idct_islow_mmx):
|
|
|
+ movq MMWORD [MMBLOCK(0,1,edi,SIZEOF_JCOEF)], mm0
|
|
|
+ movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
|
|
|
+ movq MMWORD [MMBLOCK(1,1,edi,SIZEOF_JCOEF)], mm1
|
|
|
+ movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
|
|
|
+ movq MMWORD [MMBLOCK(2,1,edi,SIZEOF_JCOEF)], mm2
|
|
|
+ movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
|
|
|
+ movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
|
|
|
+ jmp near .nextcolumn
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ pmullw mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
|
|
|
+@@ -508,17 +508,17 @@ EXTN(jsimd_idct_islow_mmx):
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows from work array, store into output array.
|
|
|
+
|
|
|
+ mov eax, [original_ebp]
|
|
|
+ lea esi, [workspace] ; JCOEF *wsptr
|
|
|
+ mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [output_col(eax)]
|
|
|
+ mov ecx, DCTSIZE/4 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(4,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(6,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -811,28 +811,28 @@ EXTN(jsimd_idct_islow_mmx):
|
|
|
+
|
|
|
+ movq mm3, mm0 ; transpose coefficients(phase 3)
|
|
|
+ punpckldq mm0, mm1 ; mm0=(00 01 02 03 04 05 06 07)
|
|
|
+ punpckhdq mm3, mm1 ; mm3=(10 11 12 13 14 15 16 17)
|
|
|
+ movq mm4, mm7 ; transpose coefficients(phase 3)
|
|
|
+ punpckldq mm7, mm5 ; mm7=(20 21 22 23 24 25 26 27)
|
|
|
+ punpckhdq mm4, mm5 ; mm4=(30 31 32 33 34 35 36 37)
|
|
|
+
|
|
|
+- pushpic ebx ; save GOT address
|
|
|
++ PUSHPIC ebx ; save GOT address
|
|
|
+
|
|
|
+ mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
|
|
|
+ mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
|
|
|
+ movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm0
|
|
|
+ movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm3
|
|
|
+ mov edx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
|
|
|
+ mov ebx, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
|
|
|
+ movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
|
|
|
+ movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
|
|
|
+
|
|
|
+- poppic ebx ; restore GOT address
|
|
|
++ POPPIC ebx ; restore GOT address
|
|
|
+
|
|
|
+ add esi, byte 4*SIZEOF_JCOEF ; wsptr
|
|
|
+ add edi, byte 4*SIZEOF_JSAMPROW
|
|
|
+ dec ecx ; ctr
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ emms ; empty MMX state
|
|
|
+
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctint-sse2.asm b/media/libjpeg/simd/i386/jidctint-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctint-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctint-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctint.asm - accurate integer IDCT (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2020, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -58,34 +58,34 @@ F_1_961 equ DESCALE(2106220350, 30 - CON
|
|
|
+ F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869)
|
|
|
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_islow_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_islow_sse2):
|
|
|
+
|
|
|
+ PW_F130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ PW_F054_MF130 times 4 dw F_0_541, (F_0_541 - F_1_847)
|
|
|
+ PW_MF078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ PW_F117_F078 times 4 dw F_1_175, (F_1_175 - F_0_390)
|
|
|
+ PW_MF060_MF089 times 4 dw (F_0_298 - F_0_899), -F_0_899
|
|
|
+ PW_MF089_F060 times 4 dw -F_0_899, (F_1_501 - F_0_899)
|
|
|
+ PW_MF050_MF256 times 4 dw (F_2_053 - F_2_562), -F_2_562
|
|
|
+ PW_MF256_F050 times 4 dw -F_2_562, (F_3_072 - F_2_562)
|
|
|
+ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -109,23 +109,23 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPL
|
|
|
+ EXTN(jsimd_idct_islow_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; unused
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
|
|
|
+@@ -167,17 +167,17 @@ EXTN(jsimd_idct_islow_sse2):
|
|
|
+ pshufd xmm2, xmm4, 0xAA ; xmm2=col6=(06 06 06 06 06 06 06 06)
|
|
|
+ pshufd xmm4, xmm4, 0xFF ; xmm4=col7=(07 07 07 07 07 07 07 07)
|
|
|
+
|
|
|
+ movdqa XMMWORD [wk(8)], xmm6 ; wk(8)=col1
|
|
|
+ movdqa XMMWORD [wk(9)], xmm5 ; wk(9)=col3
|
|
|
+ movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5
|
|
|
+ movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7
|
|
|
+ jmp near .column_end
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movdqa xmm1, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ pmullw xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
|
|
|
+@@ -842,17 +842,17 @@ EXTN(jsimd_idct_islow_sse2):
|
|
|
+ mov esi, JSAMPROW [edi+7*SIZEOF_JSAMPROW]
|
|
|
+ movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm2
|
|
|
+ movq XMM_MMWORD [esi+eax*SIZEOF_JSAMPLE], xmm5
|
|
|
+
|
|
|
+ pop edi
|
|
|
+ pop esi
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; unused
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctred-mmx.asm b/media/libjpeg/simd/i386/jidctred-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctred-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctred-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctred.asm - reduced-size IDCT (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -64,17 +64,17 @@ F_1_847 equ DESCALE(1984016188, 30 - CON
|
|
|
+ F_2_172 equ DESCALE(2332956230, 30 - CONST_BITS) ; FIX(2.172734803)
|
|
|
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
|
|
|
+ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_red_mmx)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_red_mmx):
|
|
|
+
|
|
|
+ PW_F184_MF076 times 2 dw F_1_847, -F_0_765
|
|
|
+ PW_F256_F089 times 2 dw F_2_562, F_0_899
|
|
|
+ PW_F106_MF217 times 2 dw F_1_061, -F_2_172
|
|
|
+ PW_MF060_MF050 times 2 dw -F_0_601, -F_0_509
|
|
|
+@@ -82,17 +82,17 @@ PW_F145_MF021 times 2 dw F_1_451, -F_
|
|
|
+ PW_F362_MF127 times 2 dw F_3_624, -F_1_272
|
|
|
+ PW_F085_MF072 times 2 dw F_0_850, -F_0_720
|
|
|
+ PD_DESCALE_P1_4 times 2 dd 1 << (DESCALE_P1_4 - 1)
|
|
|
+ PD_DESCALE_P2_4 times 2 dd 1 << (DESCALE_P2_4 - 1)
|
|
|
+ PD_DESCALE_P1_2 times 2 dd 1 << (DESCALE_P1_2 - 1)
|
|
|
+ PD_DESCALE_P2_2 times 2 dd 1 << (DESCALE_P2_2 - 1)
|
|
|
+ PB_CENTERJSAMP times 8 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ ; producing a reduced-size 4x4 output block.
|
|
|
+ ;
|
|
|
+@@ -119,32 +119,32 @@ PB_CENTERJSAMP times 8 db CENTERJSAMPL
|
|
|
+ EXTN(jsimd_idct_4x4_mmx):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [workspace]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input, store into work array.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+ lea edi, [workspace] ; JCOEF *wsptr
|
|
|
+ mov ecx, DCTSIZE/4 ; ctr
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .columnloop:
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
|
|
|
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+ jnz short .columnDCT
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(2,0,esi,SIZEOF_JCOEF)]
|
|
|
+@@ -176,17 +176,17 @@ EXTN(jsimd_idct_4x4_mmx):
|
|
|
+ punpckldq mm2, mm2 ; mm2=(02 02 02 02)
|
|
|
+ punpckhdq mm3, mm3 ; mm3=(03 03 03 03)
|
|
|
+
|
|
|
+ movq MMWORD [MMBLOCK(0,0,edi,SIZEOF_JCOEF)], mm0
|
|
|
+ movq MMWORD [MMBLOCK(1,0,edi,SIZEOF_JCOEF)], mm1
|
|
|
+ movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
|
|
|
+ movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
|
|
|
+ jmp near .nextcolumn
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ ; -- Odd part
|
|
|
+
|
|
|
+ movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
|
|
+ pmullw mm0, MMWORD [MMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
|
|
|
+@@ -474,17 +474,17 @@ EXTN(jsimd_idct_4x4_mmx):
|
|
|
+ movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
|
|
|
+
|
|
|
+ emms ; empty MMX state
|
|
|
+
|
|
|
+ pop edi
|
|
|
+ pop esi
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; need not be preserved
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+@@ -507,17 +507,17 @@ EXTN(jsimd_idct_2x2_mmx):
|
|
|
+ push ebp
|
|
|
+ mov ebp, esp
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input.
|
|
|
+
|
|
|
+ mov edx, POINTER [dct_table(ebp)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(ebp)] ; inptr
|
|
|
+
|
|
|
+ ; | input: | result: |
|
|
|
+ ; | 00 01 ** 03 ** 05 ** 07 | |
|
|
|
+diff --git a/media/libjpeg/simd/i386/jidctred-sse2.asm b/media/libjpeg/simd/i386/jidctred-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jidctred-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jidctred-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctred.asm - reduced-size IDCT (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -64,17 +64,17 @@ F_1_847 equ DESCALE(1984016188, 30 - CON
|
|
|
+ F_2_172 equ DESCALE(2332956230, 30 - CONST_BITS) ; FIX(2.172734803)
|
|
|
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
|
|
|
+ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_red_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_red_sse2):
|
|
|
+
|
|
|
+ PW_F184_MF076 times 4 dw F_1_847, -F_0_765
|
|
|
+ PW_F256_F089 times 4 dw F_2_562, F_0_899
|
|
|
+ PW_F106_MF217 times 4 dw F_1_061, -F_2_172
|
|
|
+ PW_MF060_MF050 times 4 dw -F_0_601, -F_0_509
|
|
|
+@@ -82,17 +82,17 @@ PW_F145_MF021 times 4 dw F_1_451, -F
|
|
|
+ PW_F362_MF127 times 4 dw F_3_624, -F_1_272
|
|
|
+ PW_F085_MF072 times 4 dw F_0_850, -F_0_720
|
|
|
+ PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4 - 1)
|
|
|
+ PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4 - 1)
|
|
|
+ PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1)
|
|
|
+ PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1)
|
|
|
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 32
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ ; producing a reduced-size 4x4 output block.
|
|
|
+ ;
|
|
|
+@@ -117,23 +117,23 @@ PB_CENTERJSAMP times 16 db CENTERJSAMP
|
|
|
+ EXTN(jsimd_idct_4x4_sse2):
|
|
|
+ push ebp
|
|
|
+ mov eax, esp ; eax = original ebp
|
|
|
+ sub esp, byte 4
|
|
|
+ and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+ mov [esp], eax
|
|
|
+ mov ebp, esp ; ebp = aligned ebp
|
|
|
+ lea esp, [wk(0)]
|
|
|
+- pushpic ebx
|
|
|
++ PUSHPIC ebx
|
|
|
+ ; push ecx ; unused
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input.
|
|
|
+
|
|
|
+ ; mov eax, [original_ebp]
|
|
|
+ mov edx, POINTER [dct_table(eax)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(eax)] ; inptr
|
|
|
+
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
|
|
|
+@@ -166,17 +166,17 @@ EXTN(jsimd_idct_4x4_sse2):
|
|
|
+ punpckhwd xmm3, xmm3 ; xmm3=(04 04 05 05 06 06 07 07)
|
|
|
+
|
|
|
+ pshufd xmm1, xmm0, 0x50 ; xmm1=[col0 col1]=(00 00 00 00 01 01 01 01)
|
|
|
+ pshufd xmm0, xmm0, 0xFA ; xmm0=[col2 col3]=(02 02 02 02 03 03 03 03)
|
|
|
+ pshufd xmm6, xmm3, 0x50 ; xmm6=[col4 col5]=(04 04 04 04 05 05 05 05)
|
|
|
+ pshufd xmm3, xmm3, 0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
|
|
|
+
|
|
|
+ jmp near .column_end
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ %endif
|
|
|
+ .columnDCT:
|
|
|
+
|
|
|
+ ; -- Odd part
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
|
|
|
+ movdqa xmm1, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_JCOEF)]
|
|
|
+ pmullw xmm0, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_ISLOW_MULT_TYPE)]
|
|
|
+@@ -395,17 +395,17 @@ EXTN(jsimd_idct_4x4_sse2):
|
|
|
+ mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
|
|
|
+ movd XMM_DWORD [edx+eax*SIZEOF_JSAMPLE], xmm1
|
|
|
+ movd XMM_DWORD [esi+eax*SIZEOF_JSAMPLE], xmm3
|
|
|
+
|
|
|
+ pop edi
|
|
|
+ pop esi
|
|
|
+ ; pop edx ; need not be preserved
|
|
|
+ ; pop ecx ; unused
|
|
|
+- poppic ebx
|
|
|
++ POPPIC ebx
|
|
|
+ mov esp, ebp ; esp <- aligned ebp
|
|
|
+ pop esp ; esp <- original ebp
|
|
|
+ pop ebp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+@@ -428,17 +428,17 @@ EXTN(jsimd_idct_2x2_sse2):
|
|
|
+ push ebp
|
|
|
+ mov ebp, esp
|
|
|
+ push ebx
|
|
|
+ ; push ecx ; need not be preserved
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+- get_GOT ebx ; get GOT address
|
|
|
++ GET_GOT ebx ; get GOT address
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input.
|
|
|
+
|
|
|
+ mov edx, POINTER [dct_table(ebp)] ; quantptr
|
|
|
+ mov esi, JCOEFPTR [coef_block(ebp)] ; inptr
|
|
|
+
|
|
|
+ ; | input: | result: |
|
|
|
+ ; | 00 01 ** 03 ** 05 ** 07 | |
|
|
|
+diff --git a/media/libjpeg/simd/i386/jquant-3dn.asm b/media/libjpeg/simd/i386/jquant-3dn.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jquant-3dn.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jquant-3dn.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jquant.asm - sample data conversion and quantization (3DNow! & MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -47,17 +47,17 @@ EXTN(jsimd_convsamp_float_3dnow):
|
|
|
+ pcmpeqw mm7, mm7
|
|
|
+ psllw mm7, 7
|
|
|
+ packsswb mm7, mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..)
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [start_col]
|
|
|
+ mov edi, POINTER [workspace] ; (DCTELEM *)
|
|
|
+ mov ecx, DCTSIZE/2
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .convloop:
|
|
|
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+
|
|
|
+ movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
|
|
|
+ movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
|
|
|
+
|
|
|
+ psubb mm0, mm7 ; mm0=(01234567)
|
|
|
+@@ -149,17 +149,17 @@ EXTN(jsimd_quantize_float_3dnow):
|
|
|
+ mov eax, 0x4B400000 ; (float)0x00C00000 (rndint_magic)
|
|
|
+ movd mm7, eax
|
|
|
+ punpckldq mm7, mm7 ; mm7={12582912.0F 12582912.0F}
|
|
|
+
|
|
|
+ mov esi, POINTER [workspace]
|
|
|
+ mov edx, POINTER [divisors]
|
|
|
+ mov edi, JCOEFPTR [coef_block]
|
|
|
+ mov eax, DCTSIZE2/16
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .quantloop:
|
|
|
+ movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ pfmul mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ pfmul mm1, MMWORD [MMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm2, MMWORD [MMBLOCK(0,2,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(0,3,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ pfmul mm2, MMWORD [MMBLOCK(0,2,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+diff --git a/media/libjpeg/simd/i386/jquant-mmx.asm b/media/libjpeg/simd/i386/jquant-mmx.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jquant-mmx.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jquant-mmx.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jquant.asm - sample data conversion and quantization (MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -47,17 +47,17 @@ EXTN(jsimd_convsamp_mmx):
|
|
|
+ pxor mm6, mm6 ; mm6=(all 0's)
|
|
|
+ pcmpeqw mm7, mm7
|
|
|
+ psllw mm7, 7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [start_col]
|
|
|
+ mov edi, POINTER [workspace] ; (DCTELEM *)
|
|
|
+ mov ecx, DCTSIZE/4
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .convloop:
|
|
|
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+
|
|
|
+ movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; mm0=(01234567)
|
|
|
+ movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE] ; mm1=(89ABCDEF)
|
|
|
+
|
|
|
+ mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+@@ -152,20 +152,20 @@ EXTN(jsimd_quantize_mmx):
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+ mov esi, POINTER [workspace]
|
|
|
+ mov edx, POINTER [divisors]
|
|
|
+ mov edi, JCOEFPTR [coef_block]
|
|
|
+ mov ah, 2
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .quantloop1:
|
|
|
+ mov al, DCTSIZE2/8/2
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .quantloop2:
|
|
|
+ movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
|
|
|
+ movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]
|
|
|
+
|
|
|
+ movq mm0, mm2
|
|
|
+ movq mm1, mm3
|
|
|
+
|
|
|
+ psraw mm2, (WORD_BIT-1) ; -1 if value < 0, 0 otherwise
|
|
|
+diff --git a/media/libjpeg/simd/i386/jquant-sse.asm b/media/libjpeg/simd/i386/jquant-sse.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jquant-sse.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jquant-sse.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jquant.asm - sample data conversion and quantization (SSE & MMX)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -47,17 +47,17 @@ EXTN(jsimd_convsamp_float_sse):
|
|
|
+ pcmpeqw mm7, mm7
|
|
|
+ psllw mm7, 7
|
|
|
+ packsswb mm7, mm7 ; mm7 = PB_CENTERJSAMPLE (0x808080..)
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [start_col]
|
|
|
+ mov edi, POINTER [workspace] ; (DCTELEM *)
|
|
|
+ mov ecx, DCTSIZE/2
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .convloop:
|
|
|
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+
|
|
|
+ movq mm0, MMWORD [ebx+eax*SIZEOF_JSAMPLE]
|
|
|
+ movq mm1, MMWORD [edx+eax*SIZEOF_JSAMPLE]
|
|
|
+
|
|
|
+ psubb mm0, mm7 ; mm0=(01234567)
|
|
|
+@@ -145,17 +145,17 @@ EXTN(jsimd_quantize_float_sse):
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+ mov esi, POINTER [workspace]
|
|
|
+ mov edx, POINTER [divisors]
|
|
|
+ mov edi, JCOEFPTR [coef_block]
|
|
|
+ mov eax, DCTSIZE2/16
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .quantloop:
|
|
|
+ movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+diff --git a/media/libjpeg/simd/i386/jquantf-sse2.asm b/media/libjpeg/simd/i386/jquantf-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jquantf-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jquantf-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jquantf.asm - sample data conversion and quantization (SSE & SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -47,17 +47,17 @@ EXTN(jsimd_convsamp_float_sse2):
|
|
|
+ pcmpeqw xmm7, xmm7
|
|
|
+ psllw xmm7, 7
|
|
|
+ packsswb xmm7, xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [start_col]
|
|
|
+ mov edi, POINTER [workspace] ; (DCTELEM *)
|
|
|
+ mov ecx, DCTSIZE/2
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .convloop:
|
|
|
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+
|
|
|
+ movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE]
|
|
|
+ movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE]
|
|
|
+
|
|
|
+ psubb xmm0, xmm7 ; xmm0=(01234567)
|
|
|
+@@ -122,17 +122,17 @@ EXTN(jsimd_quantize_float_sse2):
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+ mov esi, POINTER [workspace]
|
|
|
+ mov edx, POINTER [divisors]
|
|
|
+ mov edi, JCOEFPTR [coef_block]
|
|
|
+ mov eax, DCTSIZE2/16
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .quantloop:
|
|
|
+ movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ mulps xmm0, XMMWORD [XMMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ mulps xmm1, XMMWORD [XMMBLOCK(0,1,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm2, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm3, XMMWORD [XMMBLOCK(1,1,esi,SIZEOF_FAST_FLOAT)]
|
|
|
+ mulps xmm2, XMMWORD [XMMBLOCK(1,0,edx,SIZEOF_FAST_FLOAT)]
|
|
|
+diff --git a/media/libjpeg/simd/i386/jquanti-sse2.asm b/media/libjpeg/simd/i386/jquanti-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/i386/jquanti-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/i386/jquanti-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jquanti.asm - sample data conversion and quantization (SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -47,17 +47,17 @@ EXTN(jsimd_convsamp_sse2):
|
|
|
+ pxor xmm6, xmm6 ; xmm6=(all 0's)
|
|
|
+ pcmpeqw xmm7, xmm7
|
|
|
+ psllw xmm7, 7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
|
|
|
+
|
|
|
+ mov esi, JSAMPARRAY [sample_data] ; (JSAMPROW *)
|
|
|
+ mov eax, JDIMENSION [start_col]
|
|
|
+ mov edi, POINTER [workspace] ; (DCTELEM *)
|
|
|
+ mov ecx, DCTSIZE/4
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .convloop:
|
|
|
+ mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+
|
|
|
+ movq xmm0, XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE] ; xmm0=(01234567)
|
|
|
+ movq xmm1, XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE] ; xmm1=(89ABCDEF)
|
|
|
+
|
|
|
+ mov ebx, JSAMPROW [esi+2*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+@@ -128,17 +128,17 @@ EXTN(jsimd_quantize_sse2):
|
|
|
+ ; push edx ; need not be preserved
|
|
|
+ push esi
|
|
|
+ push edi
|
|
|
+
|
|
|
+ mov esi, POINTER [workspace]
|
|
|
+ mov edx, POINTER [divisors]
|
|
|
+ mov edi, JCOEFPTR [coef_block]
|
|
|
+ mov eax, DCTSIZE2/32
|
|
|
+- alignx 16, 7
|
|
|
++ ALIGNX 16, 7
|
|
|
+ .quantloop:
|
|
|
+ movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm6, XMMWORD [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm7, XMMWORD [XMMBLOCK(3,0,esi,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm0, xmm4
|
|
|
+ movdqa xmm1, xmm5
|
|
|
+ movdqa xmm2, xmm6
|
|
|
+diff --git a/media/libjpeg/simd/nasm/jsimdext.inc b/media/libjpeg/simd/nasm/jsimdext.inc
|
|
|
+--- a/media/libjpeg/simd/nasm/jsimdext.inc
|
|
|
++++ b/media/libjpeg/simd/nasm/jsimdext.inc
|
|
|
+@@ -1,15 +1,16 @@
|
|
|
+ ;
|
|
|
+ ; jsimdext.inc - common declarations
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2010, 2016, 2018-2019, D. R. Commander.
|
|
|
++; Copyright (C) 2010, 2016, 2018-2019, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthieu Darbois.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
|
|
|
+ ;
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ;
|
|
|
+ ; This software is provided 'as-is', without any express or implied
|
|
|
+ ; warranty. In no event will the authors be held liable for any damages
|
|
|
+ ; arising from the use of this software.
|
|
|
+@@ -70,16 +71,24 @@
|
|
|
+ %elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
|
|
|
+ ; * Linux
|
|
|
+ ; * *BSD family Unix using elf format
|
|
|
+ ; * Unix System V, including Solaris x86, UnixWare and SCO Unix
|
|
|
+
|
|
|
+ ; mark stack as non-executable
|
|
|
+ section .note.GNU-stack noalloc noexec nowrite progbits
|
|
|
+
|
|
|
++%ifdef __CET__
|
|
|
++%ifdef __x86_64__
|
|
|
++section .note.gnu.property note alloc noexec align=8
|
|
|
++ dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47
|
|
|
++ dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000
|
|
|
++%endif
|
|
|
++%endif
|
|
|
++
|
|
|
+ ; -- segment definition --
|
|
|
+ ;
|
|
|
+ %ifdef __x86_64__
|
|
|
+ %define SEG_TEXT .text progbits align=32
|
|
|
+ %define SEG_CONST .rodata progbits align=32
|
|
|
+ %else
|
|
|
+ %define SEG_TEXT .text progbits alloc exec nowrite align=32
|
|
|
+ %define SEG_CONST .rodata progbits alloc noexec nowrite align=32
|
|
|
+@@ -266,17 +275,17 @@ section .note.GNU-stack noalloc noexec n
|
|
|
+ ; At present, nasm doesn't seem to support PIC generation for Mach-O.
|
|
|
+ ; The PIC support code below is a little tricky.
|
|
|
+
|
|
|
+ SECTION SEG_CONST
|
|
|
+ const_base:
|
|
|
+
|
|
|
+ %define GOTOFF(got, sym) (got) + (sym) - const_base
|
|
|
+
|
|
|
+-%imacro get_GOT 1
|
|
|
++%imacro GET_GOT 1
|
|
|
+ ; NOTE: this macro destroys ecx resister.
|
|
|
+ call %%geteip
|
|
|
+ add ecx, byte (%%ref - $)
|
|
|
+ jmp short %%adjust
|
|
|
+ %%geteip:
|
|
|
+ mov ecx, POINTER [esp]
|
|
|
+ ret
|
|
|
+ %%adjust:
|
|
|
+@@ -298,62 +307,62 @@ const_base:
|
|
|
+ %endif ; (%1 == ebx)
|
|
|
+ pop ebp
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ %else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
|
|
|
+
|
|
|
+ %define GOTOFF(got, sym) (got) + (sym) wrt ..gotoff
|
|
|
+
|
|
|
+-%imacro get_GOT 1
|
|
|
++%imacro GET_GOT 1
|
|
|
+ extern GOT_SYMBOL
|
|
|
+ call %%geteip
|
|
|
+ add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
|
|
|
+ jmp short %%done
|
|
|
+ %%geteip:
|
|
|
+ mov %1, POINTER [esp]
|
|
|
+ ret
|
|
|
+ %%done:
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ %endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
|
|
|
+
|
|
|
+-%imacro pushpic 1.nolist
|
|
|
++%imacro PUSHPIC 1.nolist
|
|
|
+ push %1
|
|
|
+ %endmacro
|
|
|
+-%imacro poppic 1.nolist
|
|
|
++%imacro POPPIC 1.nolist
|
|
|
+ pop %1
|
|
|
+ %endmacro
|
|
|
+-%imacro movpic 2.nolist
|
|
|
++%imacro MOVPIC 2.nolist
|
|
|
+ mov %1, %2
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ %else ; !PIC -----------------------------------------
|
|
|
+
|
|
|
+ %define GOTOFF(got, sym) (sym)
|
|
|
+
|
|
|
+-%imacro get_GOT 1.nolist
|
|
|
++%imacro GET_GOT 1.nolist
|
|
|
+ %endmacro
|
|
|
+-%imacro pushpic 1.nolist
|
|
|
++%imacro PUSHPIC 1.nolist
|
|
|
+ %endmacro
|
|
|
+-%imacro poppic 1.nolist
|
|
|
++%imacro POPPIC 1.nolist
|
|
|
+ %endmacro
|
|
|
+-%imacro movpic 2.nolist
|
|
|
++%imacro MOVPIC 2.nolist
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ %endif ; PIC -----------------------------------------
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; Align the next instruction on {2,4,8,16,..}-byte boundary.
|
|
|
+ ; ".balign n,,m" in GNU as
|
|
|
+ ;
|
|
|
+ %define MSKLE(x, y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
|
|
|
+ %define FILLB(b, n) (($$-(b)) & ((n)-1))
|
|
|
+
|
|
|
+-%imacro alignx 1-2.nolist 0xFFFF
|
|
|
++%imacro ALIGNX 1-2.nolist 0xFFFF
|
|
|
+ %%bs: \
|
|
|
+ times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \
|
|
|
+ db 0x90 ; nop
|
|
|
+ times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 9 \
|
|
|
+ db 0x8D, 0x9C, 0x23, 0x00, 0x00, 0x00, 0x00 ; lea ebx,[ebx+0x00000000]
|
|
|
+ times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 7 \
|
|
|
+ db 0x8D, 0xAC, 0x25, 0x00, 0x00, 0x00, 0x00 ; lea ebp,[ebp+0x00000000]
|
|
|
+ times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 6 \
|
|
|
+@@ -365,25 +374,25 @@ const_base:
|
|
|
+ times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 2 \
|
|
|
+ db 0x8B, 0xED ; mov ebp,ebp
|
|
|
+ times MSKLE(FILLB(%%bs, %1), %2) & FILLB($, %1) / 1 \
|
|
|
+ db 0x90 ; nop
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; Align the next data on {2,4,8,16,..}-byte boundary.
|
|
|
+ ;
|
|
|
+-%imacro alignz 1.nolist
|
|
|
++%imacro ALIGNZ 1.nolist
|
|
|
+ align %1, db 0 ; filling zeros
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ %ifdef __x86_64__
|
|
|
+
|
|
|
+ %ifdef WIN64
|
|
|
+
|
|
|
+-%imacro collect_args 1
|
|
|
++%imacro COLLECT_ARGS 1
|
|
|
+ sub rsp, SIZEOF_XMMWORD
|
|
|
+ movaps XMMWORD [rsp], xmm6
|
|
|
+ sub rsp, SIZEOF_XMMWORD
|
|
|
+ movaps XMMWORD [rsp], xmm7
|
|
|
+ mov r10, rcx
|
|
|
+ %if %1 > 1
|
|
|
+ mov r11, rdx
|
|
|
+ %endif
|
|
|
+@@ -392,27 +401,27 @@ const_base:
|
|
|
+ mov r12, r8
|
|
|
+ %endif
|
|
|
+ %if %1 > 3
|
|
|
+ push r13
|
|
|
+ mov r13, r9
|
|
|
+ %endif
|
|
|
+ %if %1 > 4
|
|
|
+ push r14
|
|
|
+- mov r14, [rax+48]
|
|
|
++ mov r14, [rbp+48]
|
|
|
+ %endif
|
|
|
+ %if %1 > 5
|
|
|
+ push r15
|
|
|
+- mov r15, [rax+56]
|
|
|
++ mov r15, [rbp+56]
|
|
|
+ %endif
|
|
|
+ push rsi
|
|
|
+ push rdi
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+-%imacro uncollect_args 1
|
|
|
++%imacro UNCOLLECT_ARGS 1
|
|
|
+ pop rdi
|
|
|
+ pop rsi
|
|
|
+ %if %1 > 5
|
|
|
+ pop r15
|
|
|
+ %endif
|
|
|
+ %if %1 > 4
|
|
|
+ pop r14
|
|
|
+ %endif
|
|
|
+@@ -423,47 +432,47 @@ const_base:
|
|
|
+ pop r12
|
|
|
+ %endif
|
|
|
+ movaps xmm7, XMMWORD [rsp]
|
|
|
+ add rsp, SIZEOF_XMMWORD
|
|
|
+ movaps xmm6, XMMWORD [rsp]
|
|
|
+ add rsp, SIZEOF_XMMWORD
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+-%imacro push_xmm 1
|
|
|
++%imacro PUSH_XMM 1
|
|
|
+ sub rsp, %1 * SIZEOF_XMMWORD
|
|
|
+ movaps XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8
|
|
|
+ %if %1 > 1
|
|
|
+ movaps XMMWORD [rsp+1*SIZEOF_XMMWORD], xmm9
|
|
|
+ %endif
|
|
|
+ %if %1 > 2
|
|
|
+ movaps XMMWORD [rsp+2*SIZEOF_XMMWORD], xmm10
|
|
|
+ %endif
|
|
|
+ %if %1 > 3
|
|
|
+ movaps XMMWORD [rsp+3*SIZEOF_XMMWORD], xmm11
|
|
|
+ %endif
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+-%imacro pop_xmm 1
|
|
|
++%imacro POP_XMM 1
|
|
|
+ movaps xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD]
|
|
|
+ %if %1 > 1
|
|
|
+ movaps xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD]
|
|
|
+ %endif
|
|
|
+ %if %1 > 2
|
|
|
+ movaps xmm10, XMMWORD [rsp+2*SIZEOF_XMMWORD]
|
|
|
+ %endif
|
|
|
+ %if %1 > 3
|
|
|
+ movaps xmm11, XMMWORD [rsp+3*SIZEOF_XMMWORD]
|
|
|
+ %endif
|
|
|
+ add rsp, %1 * SIZEOF_XMMWORD
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ %else
|
|
|
+
|
|
|
+-%imacro collect_args 1
|
|
|
++%imacro COLLECT_ARGS 1
|
|
|
+ push r10
|
|
|
+ mov r10, rdi
|
|
|
+ %if %1 > 1
|
|
|
+ push r11
|
|
|
+ mov r11, rsi
|
|
|
+ %endif
|
|
|
+ %if %1 > 2
|
|
|
+ push r12
|
|
|
+@@ -478,17 +487,17 @@ const_base:
|
|
|
+ mov r14, r8
|
|
|
+ %endif
|
|
|
+ %if %1 > 5
|
|
|
+ push r15
|
|
|
+ mov r15, r9
|
|
|
+ %endif
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+-%imacro uncollect_args 1
|
|
|
++%imacro UNCOLLECT_ARGS 1
|
|
|
+ %if %1 > 5
|
|
|
+ pop r15
|
|
|
+ %endif
|
|
|
+ %if %1 > 4
|
|
|
+ pop r14
|
|
|
+ %endif
|
|
|
+ %if %1 > 3
|
|
|
+ pop r13
|
|
|
+@@ -497,24 +506,37 @@ const_base:
|
|
|
+ pop r12
|
|
|
+ %endif
|
|
|
+ %if %1 > 1
|
|
|
+ pop r11
|
|
|
+ %endif
|
|
|
+ pop r10
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+-%imacro push_xmm 1
|
|
|
++%imacro PUSH_XMM 1
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+-%imacro pop_xmm 1
|
|
|
++%imacro POP_XMM 1
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ %endif
|
|
|
+
|
|
|
+ %endif
|
|
|
+
|
|
|
++%ifdef __CET__
|
|
|
++
|
|
|
++%imacro ENDBR64 0
|
|
|
++ dd 0xfa1e0ff3
|
|
|
++%endmacro
|
|
|
++
|
|
|
++%else
|
|
|
++
|
|
|
++%imacro ENDBR64 0
|
|
|
++%endmacro
|
|
|
++
|
|
|
++%endif
|
|
|
++
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; Defines picked up from the C headers
|
|
|
+ ;
|
|
|
+ %include "jsimdcfg.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jccolext-avx2.asm b/media/libjpeg/simd/x86_64/jccolext-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jccolext-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jccolext-avx2.asm
|
|
|
+@@ -1,14 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jccolext.asm - colorspace conversion (64-bit AVX2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -28,31 +29,32 @@
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10d = JDIMENSION img_width
|
|
|
+ ; r11 = JSAMPARRAY input_buf
|
|
|
+ ; r12 = JSAMPIMAGE output_buf
|
|
|
+ ; r13d = JDIMENSION output_row
|
|
|
+ ; r14d = int num_rows
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 8
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_rgb_ycc_convert_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 5
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, (SIZEOF_YMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 5
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov ecx, r10d
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push rcx
|
|
|
+
|
|
|
+@@ -543,17 +545,17 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
|
|
|
+ add rbx, byte SIZEOF_JSAMPROW
|
|
|
+ add rdx, byte SIZEOF_JSAMPROW
|
|
|
+ dec rax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 5
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 5
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jccolext-sse2.asm b/media/libjpeg/simd/x86_64/jccolext-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jccolext-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jccolext-sse2.asm
|
|
|
+@@ -1,13 +1,14 @@
|
|
|
+ ;
|
|
|
+ ; jccolext.asm - colorspace conversion (64-bit SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -27,31 +28,32 @@
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10d = JDIMENSION img_width
|
|
|
+ ; r11 = JSAMPARRAY input_buf
|
|
|
+ ; r12 = JSAMPIMAGE output_buf
|
|
|
+ ; r13d = JDIMENSION output_row
|
|
|
+ ; r14d = int num_rows
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 8
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_rgb_ycc_convert_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 5
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 5
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov ecx, r10d
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push rcx
|
|
|
+
|
|
|
+@@ -468,17 +470,17 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW
|
|
|
+ add rbx, byte SIZEOF_JSAMPROW
|
|
|
+ add rdx, byte SIZEOF_JSAMPROW
|
|
|
+ dec rax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 5
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 5
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jccolor-avx2.asm b/media/libjpeg/simd/x86_64/jccolor-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jccolor-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jccolor-avx2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jccolor.asm - colorspace conversion (64-bit AVX2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -28,30 +28,30 @@ F_0_299 equ 19595 ; FIX(0
|
|
|
+ F_0_331 equ 21709 ; FIX(0.33126)
|
|
|
+ F_0_418 equ 27439 ; FIX(0.41869)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_ycc_convert_avx2):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 8 dw F_0_114, F_0_250
|
|
|
+ PW_MF016_MF033 times 8 dw -F_0_168, -F_0_331
|
|
|
+ PW_MF008_MF041 times 8 dw -F_0_081, -F_0_418
|
|
|
+ PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \
|
|
|
+ (CENTERJSAMPLE << SCALEBITS)
|
|
|
+ PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+
|
|
|
+ %include "jccolext-avx2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jccolor-sse2.asm b/media/libjpeg/simd/x86_64/jccolor-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jccolor-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jccolor-sse2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jccolor.asm - colorspace conversion (64-bit SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -27,30 +27,30 @@ F_0_299 equ 19595 ; FIX(0
|
|
|
+ F_0_331 equ 21709 ; FIX(0.33126)
|
|
|
+ F_0_418 equ 27439 ; FIX(0.41869)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_ycc_convert_sse2):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 4 dw F_0_114, F_0_250
|
|
|
+ PW_MF016_MF033 times 4 dw -F_0_168, -F_0_331
|
|
|
+ PW_MF008_MF041 times 4 dw -F_0_081, -F_0_418
|
|
|
+ PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \
|
|
|
+ (CENTERJSAMPLE << SCALEBITS)
|
|
|
+ PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+
|
|
|
+ %include "jccolext-sse2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jcgray-avx2.asm b/media/libjpeg/simd/x86_64/jcgray-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jcgray-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jcgray-avx2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jcgray.asm - grayscale colorspace conversion (64-bit AVX2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -24,26 +24,26 @@ F_0_114 equ 7471 ; FIX(0
|
|
|
+ F_0_250 equ 16384 ; FIX(0.25000)
|
|
|
+ F_0_299 equ 19595 ; FIX(0.29900)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_gray_convert_avx2):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 8 dw F_0_114, F_0_250
|
|
|
+ PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+
|
|
|
+ %include "jcgryext-avx2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jcgray-sse2.asm b/media/libjpeg/simd/x86_64/jcgray-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jcgray-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jcgray-sse2.asm
|
|
|
+@@ -1,12 +1,12 @@
|
|
|
+ ;
|
|
|
+ ; jcgray.asm - grayscale colorspace conversion (64-bit SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -23,26 +23,26 @@ F_0_114 equ 7471 ; FIX(0
|
|
|
+ F_0_250 equ 16384 ; FIX(0.25000)
|
|
|
+ F_0_299 equ 19595 ; FIX(0.29900)
|
|
|
+ F_0_587 equ 38470 ; FIX(0.58700)
|
|
|
+ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_rgb_gray_convert_sse2):
|
|
|
+
|
|
|
+ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
|
|
|
+ PW_F0114_F0250 times 4 dw F_0_114, F_0_250
|
|
|
+ PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+
|
|
|
+ %include "jcgryext-sse2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jcgryext-avx2.asm b/media/libjpeg/simd/x86_64/jcgryext-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jcgryext-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jcgryext-avx2.asm
|
|
|
+@@ -1,14 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jcgryext.asm - grayscale colorspace conversion (64-bit AVX2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -28,31 +29,32 @@
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10d = JDIMENSION img_width
|
|
|
+ ; r11 = JSAMPARRAY input_buf
|
|
|
+ ; r12 = JSAMPIMAGE output_buf
|
|
|
+ ; r13d = JDIMENSION output_row
|
|
|
+ ; r14d = int num_rows
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 2
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_rgb_gray_convert_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 5
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_YMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 5
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov ecx, r10d
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push rcx
|
|
|
+
|
|
|
+@@ -422,17 +424,17 @@ EXTN(jsimd_rgb_gray_convert_avx2):
|
|
|
+ add rsi, byte SIZEOF_JSAMPROW ; input_buf
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW
|
|
|
+ dec rax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 5
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 5
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jcgryext-sse2.asm b/media/libjpeg/simd/x86_64/jcgryext-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jcgryext-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jcgryext-sse2.asm
|
|
|
+@@ -1,13 +1,14 @@
|
|
|
+ ;
|
|
|
+ ; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2011, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2011, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -27,31 +28,32 @@
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10d = JDIMENSION img_width
|
|
|
+ ; r11 = JSAMPARRAY input_buf
|
|
|
+ ; r12 = JSAMPIMAGE output_buf
|
|
|
+ ; r13d = JDIMENSION output_row
|
|
|
+ ; r14d = int num_rows
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 2
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_rgb_gray_convert_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 5
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 5
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov ecx, r10d
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push rcx
|
|
|
+
|
|
|
+@@ -347,17 +349,17 @@ EXTN(jsimd_rgb_gray_convert_sse2):
|
|
|
+
|
|
|
+ add rsi, byte SIZEOF_JSAMPROW ; input_buf
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW
|
|
|
+ dec rax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 5
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 5
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jchuff-sse2.asm b/media/libjpeg/simd/x86_64/jchuff-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jchuff-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jchuff-sse2.asm
|
|
|
+@@ -1,14 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jchuff-sse2.asm - Huffman entropy encoding (64-bit SSE2)
|
|
|
+ ;
|
|
|
+-; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, D. R. Commander.
|
|
|
++; Copyright (C) 2009-2011, 2014-2016, 2019, 2021, 2023-2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Matthieu Darbois.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -33,27 +34,27 @@ struc c_derived_tbl
|
|
|
+ .ehufco: resd 256 ; code for each symbol
|
|
|
+ .ehufsi: resb 256 ; length of code for each symbol
|
|
|
+ ; If no code has been allocated for a symbol S, ehufsi[S] contains 0
|
|
|
+ endstruc
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_huff_encode_one_block)
|
|
|
+
|
|
|
+ EXTN(jconst_huff_encode_one_block):
|
|
|
+
|
|
|
+ jpeg_mask_bits dd 0x0000, 0x0001, 0x0003, 0x0007
|
|
|
+ dd 0x000f, 0x001f, 0x003f, 0x007f
|
|
|
+ dd 0x00ff, 0x01ff, 0x03ff, 0x07ff
|
|
|
+ dd 0x0fff, 0x1fff, 0x3fff, 0x7fff
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ times 1 << 14 db 15
|
|
|
+ times 1 << 13 db 14
|
|
|
+ times 1 << 12 db 13
|
|
|
+ times 1 << 11 db 12
|
|
|
+ times 1 << 10 db 11
|
|
|
+ times 1 << 9 db 10
|
|
|
+ times 1 << 8 db 9
|
|
|
+@@ -61,17 +62,18 @@ times 1 << 7 db 8
|
|
|
+ times 1 << 6 db 7
|
|
|
+ times 1 << 5 db 6
|
|
|
+ times 1 << 4 db 5
|
|
|
+ times 1 << 3 db 4
|
|
|
+ times 1 << 2 db 3
|
|
|
+ times 1 << 1 db 2
|
|
|
+ times 1 << 0 db 1
|
|
|
+ times 1 db 0
|
|
|
+-jpeg_nbits_table:
|
|
|
++GLOBAL_DATA(jpeg_nbits_table)
|
|
|
++EXTN(jpeg_nbits_table):
|
|
|
+ times 1 db 0
|
|
|
+ times 1 << 0 db 1
|
|
|
+ times 1 << 1 db 2
|
|
|
+ times 1 << 2 db 3
|
|
|
+ times 1 << 3 db 4
|
|
|
+ times 1 << 4 db 5
|
|
|
+ times 1 << 5 db 6
|
|
|
+ times 1 << 6 db 7
|
|
|
+@@ -80,20 +82,20 @@ times 1 << 8 db 9
|
|
|
+ times 1 << 9 db 10
|
|
|
+ times 1 << 10 db 11
|
|
|
+ times 1 << 11 db 12
|
|
|
+ times 1 << 12 db 13
|
|
|
+ times 1 << 13 db 14
|
|
|
+ times 1 << 14 db 15
|
|
|
+ times 1 << 15 db 16
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ %define NBITS(x) nbits_base + x
|
|
|
+-%define MASK_BITS(x) NBITS((x) * 4) + (jpeg_mask_bits - jpeg_nbits_table)
|
|
|
++%define MASK_BITS(x) NBITS((x) * 4) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+
|
|
|
+ ; Shorthand used to describe SIMD operations:
|
|
|
+ ; wN: xmmN treated as eight signed 16-bit values
|
|
|
+ ; wN[i]: perform the same operation on all eight signed 16-bit values, i=0..7
|
|
|
+@@ -203,126 +205,131 @@ times 1 << 15 db 16
|
|
|
+ ; CPUs usually leave the destination unmodified if the source is zero.) This
|
|
|
+ ; can prevent out-of-order execution, so we clear the destination before
|
|
|
+ ; invoking tzcnt.
|
|
|
+ ;
|
|
|
+ ; Initial register allocation
|
|
|
+ ; rax - buffer
|
|
|
+ ; rbx - temp
|
|
|
+ ; rcx - nbits
|
|
|
+-; rdx - block --> free_bits
|
|
|
++; rdx - code
|
|
|
+ ; rsi - nbits_base
|
|
|
+ ; rdi - t
|
|
|
+-; rbp - code
|
|
|
+ ; r8 - dctbl --> code_temp
|
|
|
+ ; r9 - actbl
|
|
|
+ ; r10 - state
|
|
|
+ ; r11 - index
|
|
|
+ ; r12 - put_buffer
|
|
|
++; r15 - block --> free_bits
|
|
|
+
|
|
|
+ %define buffer rax
|
|
|
+ %ifdef WIN64
|
|
|
+ %define bufferp rax
|
|
|
+ %else
|
|
|
+ %define bufferp raxp
|
|
|
+ %endif
|
|
|
+ %define tempq rbx
|
|
|
+ %define tempd ebx
|
|
|
+ %define tempb bl
|
|
|
+ %define temph bh
|
|
|
+ %define nbitsq rcx
|
|
|
+ %define nbits ecx
|
|
|
+ %define nbitsb cl
|
|
|
+-%define block rdx
|
|
|
++%define codeq rdx
|
|
|
++%define code edx
|
|
|
+ %define nbits_base rsi
|
|
|
+ %define t rdi
|
|
|
+ %define td edi
|
|
|
+-%define codeq rbp
|
|
|
+-%define code ebp
|
|
|
+ %define dctbl r8
|
|
|
+ %define actbl r9
|
|
|
+ %define state r10
|
|
|
+ %define index r11
|
|
|
+ %define indexd r11d
|
|
|
+ %define put_buffer r12
|
|
|
+ %define put_bufferd r12d
|
|
|
++%define block r15
|
|
|
+
|
|
|
+ ; Step 1: Re-arrange input data according to jpeg_natural_order
|
|
|
+ ; xx 01 02 03 04 05 06 07 xx 01 08 16 09 02 03 10
|
|
|
+ ; 08 09 10 11 12 13 14 15 17 24 32 25 18 11 04 05
|
|
|
+ ; 16 17 18 19 20 21 22 23 12 19 26 33 40 48 41 34
|
|
|
+ ; 24 25 26 27 28 29 30 31 ==> 27 20 13 06 07 14 21 28
|
|
|
+ ; 32 33 34 35 36 37 38 39 35 42 49 56 57 50 43 36
|
|
|
+ ; 40 41 42 43 44 45 46 47 29 22 15 23 30 37 44 51
|
|
|
+ ; 48 49 50 51 52 53 54 55 58 59 52 45 38 31 39 46
|
|
|
+ ; 56 57 58 59 60 61 62 63 53 60 61 54 47 55 62 63
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_huff_encode_one_block_sse2):
|
|
|
++ ENDBR64
|
|
|
++ push rbp
|
|
|
++ mov rbp, rsp
|
|
|
+
|
|
|
+ %ifdef WIN64
|
|
|
+
|
|
|
+ ; rcx = working_state *state
|
|
|
+ ; rdx = JOCTET *buffer
|
|
|
+ ; r8 = JCOEFPTR block
|
|
|
+ ; r9 = int last_dc_val
|
|
|
+-; [rax+48] = c_derived_tbl *dctbl
|
|
|
+-; [rax+56] = c_derived_tbl *actbl
|
|
|
++; [rbp+48] = c_derived_tbl *dctbl
|
|
|
++; [rbp+56] = c_derived_tbl *actbl
|
|
|
+
|
|
|
+ ;X: X = code stream
|
|
|
+ mov buffer, rdx
|
|
|
++ push r15
|
|
|
+ mov block, r8
|
|
|
+ movups xmm3, XMMWORD [block + 0 * SIZEOF_WORD] ;D: w3 = xx 01 02 03 04 05 06 07
|
|
|
+ push rbx
|
|
|
+- push rbp
|
|
|
+ movdqa xmm0, xmm3 ;A: w0 = xx 01 02 03 04 05 06 07
|
|
|
+ push rsi
|
|
|
+ push rdi
|
|
|
+ push r12
|
|
|
+ movups xmm1, XMMWORD [block + 8 * SIZEOF_WORD] ;B: w1 = 08 09 10 11 12 13 14 15
|
|
|
+ mov state, rcx
|
|
|
+ movsx code, word [block] ;Z: code = block[0];
|
|
|
+ pxor xmm4, xmm4 ;A: w4[i] = 0;
|
|
|
+ sub code, r9d ;Z: code -= last_dc_val;
|
|
|
+- mov dctbl, POINTER [rsp+6*8+4*8]
|
|
|
+- mov actbl, POINTER [rsp+6*8+5*8]
|
|
|
++ mov dctbl, POINTER [rbp+48]
|
|
|
++ mov actbl, POINTER [rbp+56]
|
|
|
+ punpckldq xmm0, xmm1 ;A: w0 = xx 01 08 09 02 03 10 11
|
|
|
+- lea nbits_base, [rel jpeg_nbits_table]
|
|
|
+- add rsp, -DCTSIZE2 * SIZEOF_WORD
|
|
|
+- mov t, rsp
|
|
|
++ lea nbits_base, [rel EXTN(jpeg_nbits_table)]
|
|
|
+
|
|
|
+ %else
|
|
|
+
|
|
|
+ ; rdi = working_state *state
|
|
|
+ ; rsi = JOCTET *buffer
|
|
|
+ ; rdx = JCOEFPTR block
|
|
|
+ ; rcx = int last_dc_val
|
|
|
+ ; r8 = c_derived_tbl *dctbl
|
|
|
+ ; r9 = c_derived_tbl *actbl
|
|
|
+
|
|
|
+ ;X: X = code stream
|
|
|
++ push r15
|
|
|
++ mov block, rdx
|
|
|
+ movups xmm3, XMMWORD [block + 0 * SIZEOF_WORD] ;D: w3 = xx 01 02 03 04 05 06 07
|
|
|
+ push rbx
|
|
|
+- push rbp
|
|
|
+ movdqa xmm0, xmm3 ;A: w0 = xx 01 02 03 04 05 06 07
|
|
|
+ push r12
|
|
|
+ mov state, rdi
|
|
|
+ mov buffer, rsi
|
|
|
+ movups xmm1, XMMWORD [block + 8 * SIZEOF_WORD] ;B: w1 = 08 09 10 11 12 13 14 15
|
|
|
+ movsx codeq, word [block] ;Z: code = block[0];
|
|
|
+- lea nbits_base, [rel jpeg_nbits_table]
|
|
|
++ lea nbits_base, [rel EXTN(jpeg_nbits_table)]
|
|
|
+ pxor xmm4, xmm4 ;A: w4[i] = 0;
|
|
|
+ sub codeq, rcx ;Z: code -= last_dc_val;
|
|
|
+ punpckldq xmm0, xmm1 ;A: w0 = xx 01 08 09 02 03 10 11
|
|
|
+- lea t, [rsp - DCTSIZE2 * SIZEOF_WORD] ; use red zone for t_
|
|
|
+
|
|
|
+ %endif
|
|
|
+
|
|
|
++ ; Allocate stack space for t array, and realign stack.
|
|
|
++ add rsp, -DCTSIZE2 * SIZEOF_WORD - 8
|
|
|
++ mov t, rsp
|
|
|
++
|
|
|
+ pshuflw xmm0, xmm0, 11001001b ;A: w0 = 01 08 xx 09 02 03 10 11
|
|
|
+ pinsrw xmm0, word [block + 16 * SIZEOF_WORD], 2 ;A: w0 = 01 08 16 09 02 03 10 11
|
|
|
+ punpckhdq xmm3, xmm1 ;D: w3 = 04 05 12 13 06 07 14 15
|
|
|
+ punpcklqdq xmm1, xmm3 ;B: w1 = 08 09 10 11 04 05 12 13
|
|
|
+ pinsrw xmm0, word [block + 17 * SIZEOF_WORD], 7 ;A: w0 = 01 08 16 09 02 03 10 17
|
|
|
+ ;A: (Row 0, offset 1)
|
|
|
+ pcmpgtw xmm4, xmm0 ;A: w4[i] = (w0[i] < 0 ? -1 : 0);
|
|
|
+ paddw xmm0, xmm4 ;A: w0[i] += w4[i];
|
|
|
+@@ -438,19 +445,19 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|
|
+ pcmpgtw xmm2, xmm1 ;F: w2[i] = (w1[i] < 0 ? -1 : 0);
|
|
|
+ pmovmskb tempd, xmm4 ;Z: temp = 0; temp |= ((b4[i] >> 7) << i);
|
|
|
+ pinsrw xmm5, word [block + 36 * SIZEOF_WORD], 6 ;E: w5 = 42 49 56 57 50 43 36 59
|
|
|
+ paddw xmm1, xmm2 ;F: w1[i] += w2[i];
|
|
|
+ movaps XMMWORD [t + 40 * SIZEOF_WORD], xmm1 ;F: t[40+i] = w1[i];
|
|
|
+ pinsrw xmm5, word [block + 29 * SIZEOF_WORD], 7 ;E: w5 = 42 49 56 57 50 43 36 29
|
|
|
+ ; (Row 4, offset 1)
|
|
|
+ %undef block
|
|
|
+-%define free_bitsq rdx
|
|
|
+-%define free_bitsd edx
|
|
|
+-%define free_bitsb dl
|
|
|
++%define free_bitsq r15
|
|
|
++%define free_bitsd r15d
|
|
|
++%define free_bitsb r15b
|
|
|
+ pcmpeqw xmm1, xmm0 ;F: w1[i] = (w1[i] == 0 ? -1 : 0);
|
|
|
+ shl tempq, 48 ;Z: temp <<= 48;
|
|
|
+ pxor xmm2, xmm2 ;E: w2[i] = 0;
|
|
|
+ pcmpgtw xmm0, xmm5 ;E: w0[i] = (w5[i] < 0 ? -1 : 0);
|
|
|
+ paddw xmm5, xmm0 ;E: w5[i] += w0[i];
|
|
|
+ or tempq, put_buffer ;Z: temp |= put_buffer;
|
|
|
+ movaps XMMWORD [t + 32 * SIZEOF_WORD], xmm5 ;E: t[32+i] = w5[i];
|
|
|
+ lea t, [dword t - 2] ;Z: t = &t[-1];
|
|
|
+@@ -529,50 +536,45 @@ EXTN(jsimd_huff_encode_one_block_sse2):
|
|
|
+ or code, code_temp ; code |= code_temp;
|
|
|
+ sub free_bitsb, nbitsb ; if ((free_bits -= nbits) <= 0)
|
|
|
+ jle .EMIT_CODE ; goto .EMIT_CODE;
|
|
|
+ shl put_buffer, nbitsb ; put_buffer <<= nbits;
|
|
|
+ or put_buffer, codeq ; put_buffer |= code;
|
|
|
+ test index, index
|
|
|
+ jnz .BLOOP ; } while (index != 0);
|
|
|
+ .ELOOP: ; } /* index != 0 */
|
|
|
+- sub td, esp ; t -= (WIN64: &t_[0], UNIX: &t_[64]);
|
|
|
+-%ifdef WIN64
|
|
|
++ sub td, esp ; t -= &t_[0];
|
|
|
+ cmp td, (DCTSIZE2 - 2) * SIZEOF_WORD ; if (t != 62)
|
|
|
+-%else
|
|
|
+- cmp td, -2 * SIZEOF_WORD ; if (t != -2)
|
|
|
+-%endif
|
|
|
+ je .EFN ; {
|
|
|
+ movzx nbits, byte [actbl + c_derived_tbl.ehufsi + 0]
|
|
|
+ ; nbits = actbl->ehufsi[0];
|
|
|
+ mov code, [actbl + c_derived_tbl.ehufco + 0] ; code = actbl->ehufco[0];
|
|
|
+ sub free_bitsb, nbitsb ; if ((free_bits -= nbits) <= 0)
|
|
|
+ jg .EFN_SKIP_EMIT_CODE ; {
|
|
|
+ EMIT_QWORD .EFN ; insert code, flush buffer
|
|
|
+ align 16
|
|
|
+ .EFN_SKIP_EMIT_CODE: ; } else {
|
|
|
+ shl put_buffer, nbitsb ; put_buffer <<= nbits;
|
|
|
+ or put_buffer, codeq ; put_buffer |= code;
|
|
|
+ .EFN: ; } }
|
|
|
+ mov [state + working_state.cur.put_buffer.simd], put_buffer
|
|
|
+ ; state->cur.put_buffer.simd = put_buffer;
|
|
|
+ mov byte [state + working_state.cur.free_bits], free_bitsb
|
|
|
+ ; state->cur.free_bits = free_bits;
|
|
|
++ sub rsp, -DCTSIZE2 * SIZEOF_WORD - 8
|
|
|
++ pop r12
|
|
|
+ %ifdef WIN64
|
|
|
+- sub rsp, -DCTSIZE2 * SIZEOF_WORD
|
|
|
+- pop r12
|
|
|
+ pop rdi
|
|
|
+ pop rsi
|
|
|
+- pop rbp
|
|
|
+ pop rbx
|
|
|
+ %else
|
|
|
+- pop r12
|
|
|
+- pop rbp
|
|
|
+ pop rbx
|
|
|
+ %endif
|
|
|
++ pop r15
|
|
|
++ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
+
|
|
|
+ align 16
|
|
|
+ .EMIT_BRLOOP_CODE:
|
|
|
+ EMIT_QWORD .EMIT_BRLOOP_CODE_END, { mov nbits, code_temp }
|
|
|
+ ; insert code, flush buffer,
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jcphuff-sse2.asm b/media/libjpeg/simd/x86_64/jcphuff-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jcphuff-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jcphuff-sse2.asm
|
|
|
+@@ -1,13 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jcphuff-sse2.asm - prepare data for progressive Huffman encoding
|
|
|
+ ; (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright (C) 2016, 2018, Matthieu Darbois
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
++; Copyright (C) 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -276,26 +278,23 @@
|
|
|
+ %define VALUES r14
|
|
|
+ %define LEN r12d
|
|
|
+ %define LENEND r13d
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [rbp - 16]
|
|
|
+- collect_args 6
|
|
|
+-
|
|
|
+- movdqa XMMWORD [rbp - 16], ZERO
|
|
|
++ sub rsp, SIZEOF_XMMWORD
|
|
|
++ movdqa XMMWORD [rsp], ZERO
|
|
|
++ COLLECT_ARGS 6
|
|
|
+
|
|
|
+ movd AL, r13d
|
|
|
+ pxor ZERO, ZERO
|
|
|
+ mov K, LEN
|
|
|
+ mov LENEND, LEN
|
|
|
+ and K, -16
|
|
|
+ and LENEND, 7
|
|
|
+ shr K, 4
|
|
|
+@@ -379,20 +378,19 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_s
|
|
|
+ add VALUES, 8*2
|
|
|
+ inc K
|
|
|
+ jnz .ZEROLOOP
|
|
|
+ .EPADDING:
|
|
|
+ sub VALUES, DCTSIZE2*2
|
|
|
+
|
|
|
+ REDUCE0
|
|
|
+
|
|
|
+- movdqa ZERO, XMMWORD [rbp - 16]
|
|
|
+- uncollect_args 6
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 6
|
|
|
++ movdqa ZERO, XMMWORD [rsp]
|
|
|
++ mov rsp, rbp
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ %undef ZERO
|
|
|
+ %undef X0
|
|
|
+ %undef X1
|
|
|
+ %undef N0
|
|
|
+ %undef N1
|
|
|
+@@ -444,26 +442,23 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_s
|
|
|
+ %define VALUES r14
|
|
|
+ %define LEN r12d
|
|
|
+ %define LENEND r13d
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [rbp - 16]
|
|
|
+- collect_args 6
|
|
|
+-
|
|
|
+- movdqa XMMWORD [rbp - 16], ZERO
|
|
|
++ sub rsp, SIZEOF_XMMWORD
|
|
|
++ movdqa XMMWORD [rsp], ZERO
|
|
|
++ COLLECT_ARGS 6
|
|
|
+
|
|
|
+ xor SIGN, SIGN
|
|
|
+ xor EOB, EOB
|
|
|
+ xor KK, KK
|
|
|
+ movd AL, r13d
|
|
|
+ pxor ZERO, ZERO
|
|
|
+ pcmpeqw ONE, ONE
|
|
|
+ psrlw ONE, 15
|
|
|
+@@ -601,20 +596,19 @@ EXTN(jsimd_encode_mcu_AC_refine_prepare_
|
|
|
+ .EPADDINGR:
|
|
|
+ not SIGN
|
|
|
+ sub VALUES, DCTSIZE2*2
|
|
|
+ mov MMWORD [r15+SIZEOF_MMWORD], SIGN
|
|
|
+
|
|
|
+ REDUCE0
|
|
|
+
|
|
|
+ mov eax, EOB
|
|
|
+- movdqa ZERO, XMMWORD [rbp - 16]
|
|
|
+- uncollect_args 6
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 6
|
|
|
++ movdqa ZERO, XMMWORD [rsp]
|
|
|
++ mov rsp, rbp
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ %undef ZERO
|
|
|
+ %undef ONE
|
|
|
+ %undef X0
|
|
|
+ %undef X1
|
|
|
+ %undef N0
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jcsample-avx2.asm b/media/libjpeg/simd/x86_64/jcsample-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jcsample-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jcsample-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jcsample.asm - downsampling (64-bit AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+@@ -39,20 +39,20 @@
|
|
|
+ ; r13d = JDIMENSION width_in_blocks
|
|
|
+ ; r14 = JSAMPARRAY input_data
|
|
|
+ ; r15 = JSAMPARRAY output_data
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_downsample_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 6
|
|
|
++ COLLECT_ARGS 6
|
|
|
+
|
|
|
+ mov ecx, r13d
|
|
|
+ shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov edx, r10d
|
|
|
+
|
|
|
+ ; -- expand_right_edge
|
|
|
+@@ -173,17 +173,17 @@ EXTN(jsimd_h2v1_downsample_avx2):
|
|
|
+
|
|
|
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec rax ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 6
|
|
|
++ UNCOLLECT_ARGS 6
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Downsample pixel values of a single component.
|
|
|
+ ; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
|
|
|
+ ; without smoothing.
|
|
|
+@@ -201,20 +201,20 @@ EXTN(jsimd_h2v1_downsample_avx2):
|
|
|
+ ; r13d = JDIMENSION width_in_blocks
|
|
|
+ ; r14 = JSAMPARRAY input_data
|
|
|
+ ; r15 = JSAMPARRAY output_data
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v2_downsample_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 6
|
|
|
++ COLLECT_ARGS 6
|
|
|
+
|
|
|
+ mov ecx, r13d
|
|
|
+ shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov edx, r10d
|
|
|
+
|
|
|
+ ; -- expand_right_edge
|
|
|
+@@ -353,15 +353,15 @@ EXTN(jsimd_h2v2_downsample_avx2):
|
|
|
+
|
|
|
+ add rsi, byte 2*SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte 1*SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec rax ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 6
|
|
|
++ UNCOLLECT_ARGS 6
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jcsample-sse2.asm b/media/libjpeg/simd/x86_64/jcsample-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jcsample-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jcsample-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jcsample.asm - downsampling (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -38,20 +38,20 @@
|
|
|
+ ; r13d = JDIMENSION width_in_blocks
|
|
|
+ ; r14 = JSAMPARRAY input_data
|
|
|
+ ; r15 = JSAMPARRAY output_data
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_downsample_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 6
|
|
|
++ COLLECT_ARGS 6
|
|
|
+
|
|
|
+ mov ecx, r13d
|
|
|
+ shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov edx, r10d
|
|
|
+
|
|
|
+ ; -- expand_right_edge
|
|
|
+@@ -155,17 +155,17 @@ EXTN(jsimd_h2v1_downsample_sse2):
|
|
|
+ pop rcx
|
|
|
+
|
|
|
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec rax ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+- uncollect_args 6
|
|
|
++ UNCOLLECT_ARGS 6
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Downsample pixel values of a single component.
|
|
|
+ ; This version handles the standard case of 2:1 horizontal and 2:1 vertical,
|
|
|
+ ; without smoothing.
|
|
|
+@@ -183,20 +183,20 @@ EXTN(jsimd_h2v1_downsample_sse2):
|
|
|
+ ; r13d = JDIMENSION width_in_blocks
|
|
|
+ ; r14 = JSAMPARRAY input_data
|
|
|
+ ; r15 = JSAMPARRAY output_data
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v2_downsample_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 6
|
|
|
++ COLLECT_ARGS 6
|
|
|
+
|
|
|
+ mov ecx, r13d
|
|
|
+ shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov edx, r10d
|
|
|
+
|
|
|
+ ; -- expand_right_edge
|
|
|
+@@ -316,15 +316,15 @@ EXTN(jsimd_h2v2_downsample_sse2):
|
|
|
+ pop rcx
|
|
|
+
|
|
|
+ add rsi, byte 2*SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte 1*SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec rax ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+- uncollect_args 6
|
|
|
++ UNCOLLECT_ARGS 6
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdcolext-avx2.asm b/media/libjpeg/simd/x86_64/jdcolext-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdcolext-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdcolext-avx2.asm
|
|
|
+@@ -1,15 +1,16 @@
|
|
|
+ ;
|
|
|
+ ; jdcolext.asm - colorspace conversion (64-bit AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -29,31 +30,32 @@
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10d = JDIMENSION out_width
|
|
|
+ ; r11 = JSAMPIMAGE input_buf
|
|
|
+ ; r12d = JDIMENSION input_row
|
|
|
+ ; r13 = JSAMPARRAY output_buf
|
|
|
+ ; r14d = int num_rows
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 2
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 5
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (WK_NUM * SIZEOF_YMMWORD)
|
|
|
++ COLLECT_ARGS 5
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov ecx, r10d ; num_cols
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push rcx
|
|
|
+
|
|
|
+@@ -480,17 +482,17 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
|
|
|
+ dec rax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ sfence ; flush the write buffer
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 5
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 5
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdcolext-sse2.asm b/media/libjpeg/simd/x86_64/jdcolext-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdcolext-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdcolext-sse2.asm
|
|
|
+@@ -1,14 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jdcolext.asm - colorspace conversion (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -28,31 +29,32 @@
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10d = JDIMENSION out_width
|
|
|
+ ; r11 = JSAMPIMAGE input_buf
|
|
|
+ ; r12d = JDIMENSION input_row
|
|
|
+ ; r13 = JSAMPARRAY output_buf
|
|
|
+ ; r14d = int num_rows
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 2
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 5
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 5
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov ecx, r10d ; num_cols
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push rcx
|
|
|
+
|
|
|
+@@ -423,17 +425,17 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW ; output_buf
|
|
|
+ dec rax ; num_rows
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ sfence ; flush the write buffer
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 5
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 5
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdcolor-avx2.asm b/media/libjpeg/simd/x86_64/jdcolor-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdcolor-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdcolor-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdcolor.asm - colorspace conversion (64-bit AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -27,28 +27,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_ycc_rgb_convert_avx2):
|
|
|
+
|
|
|
+ PW_F0402 times 16 dw F_0_402
|
|
|
+ PW_MF0228 times 16 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 16 dw 1
|
|
|
+ PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+
|
|
|
+ %include "jdcolext-avx2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdcolor-sse2.asm b/media/libjpeg/simd/x86_64/jdcolor-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdcolor-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdcolor-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdcolor.asm - colorspace conversion (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -26,28 +26,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_ycc_rgb_convert_sse2):
|
|
|
+
|
|
|
+ PW_F0402 times 8 dw F_0_402
|
|
|
+ PW_MF0228 times 8 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 8 dw 1
|
|
|
+ PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+
|
|
|
+ %include "jdcolext-sse2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdmerge-avx2.asm b/media/libjpeg/simd/x86_64/jdmerge-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdmerge-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdmerge-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdmerge.asm - merged upsampling/color conversion (64-bit AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -27,28 +27,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_merged_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_merged_upsample_avx2):
|
|
|
+
|
|
|
+ PW_F0402 times 16 dw F_0_402
|
|
|
+ PW_MF0228 times 16 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 16 dw 1
|
|
|
+ PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+
|
|
|
+ %include "jdmrgext-avx2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdmerge-sse2.asm b/media/libjpeg/simd/x86_64/jdmerge-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdmerge-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdmerge-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -26,28 +26,28 @@ F_1_402 equ 91881 ; FIX(1.
|
|
|
+ F_1_772 equ 116130 ; FIX(1.77200)
|
|
|
+ F_0_402 equ (F_1_402 - 65536) ; FIX(1.40200) - FIX(1)
|
|
|
+ F_0_285 equ ( 65536 - F_0_714) ; FIX(1) - FIX(0.71414)
|
|
|
+ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_merged_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_merged_upsample_sse2):
|
|
|
+
|
|
|
+ PW_F0402 times 8 dw F_0_402
|
|
|
+ PW_MF0228 times 8 dw -F_0_228
|
|
|
+ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
|
|
|
+ PW_ONE times 8 dw 1
|
|
|
+ PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+
|
|
|
+ %include "jdmrgext-sse2.asm"
|
|
|
+
|
|
|
+ %undef RGB_RED
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdmrgext-avx2.asm b/media/libjpeg/simd/x86_64/jdmrgext-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdmrgext-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdmrgext-avx2.asm
|
|
|
+@@ -1,15 +1,16 @@
|
|
|
+ ;
|
|
|
+ ; jdmrgext.asm - merged upsampling/color conversion (64-bit AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -29,31 +30,32 @@
|
|
|
+ ; JSAMPARRAY output_buf);
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10d = JDIMENSION output_width
|
|
|
+ ; r11 = JSAMPIMAGE input_buf
|
|
|
+ ; r12d = JDIMENSION in_row_group_ctr
|
|
|
+ ; r13 = JSAMPARRAY output_buf
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 3
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 4
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, SIZEOF_YMMWORD * WK_NUM
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov ecx, r10d ; col
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push rcx
|
|
|
+
|
|
|
+@@ -474,19 +476,19 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|
|
+ %endif ; RGB_PIXELSIZE ; ---------------
|
|
|
+
|
|
|
+ .endcolumn:
|
|
|
+ sfence ; flush the write buffer
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 4
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -500,20 +502,20 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
|
|
+ ; r11 = JSAMPIMAGE input_buf
|
|
|
+ ; r12d = JDIMENSION in_row_group_ctr
|
|
|
+ ; r13 = JSAMPARRAY output_buf
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v2_merged_upsample_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov eax, r10d
|
|
|
+
|
|
|
+ mov rdi, r11
|
|
|
+ mov ecx, r12d
|
|
|
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
|
|
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
|
|
+@@ -582,15 +584,15 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
|
|
|
+ pop rcx
|
|
|
+ pop rdi
|
|
|
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
|
|
|
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
|
|
|
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
|
|
|
+ add rsp, SIZEOF_JSAMPARRAY*4
|
|
|
+
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 4
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdmrgext-sse2.asm b/media/libjpeg/simd/x86_64/jdmrgext-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdmrgext-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdmrgext-sse2.asm
|
|
|
+@@ -1,14 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2012, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -28,31 +29,32 @@
|
|
|
+ ; JSAMPARRAY output_buf);
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10d = JDIMENSION output_width
|
|
|
+ ; r11 = JSAMPIMAGE input_buf
|
|
|
+ ; r12d = JDIMENSION in_row_group_ctr
|
|
|
+ ; r13 = JSAMPARRAY output_buf
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 3
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 4
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov ecx, r10d ; col
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ push rcx
|
|
|
+
|
|
|
+@@ -416,19 +418,19 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|
|
+
|
|
|
+ %endif ; RGB_PIXELSIZE ; ---------------
|
|
|
+
|
|
|
+ .endcolumn:
|
|
|
+ sfence ; flush the write buffer
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 4
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -442,20 +444,20 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
|
|
+ ; r11 = JSAMPIMAGE input_buf
|
|
|
+ ; r12d = JDIMENSION in_row_group_ctr
|
|
|
+ ; r13 = JSAMPARRAY output_buf
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v2_merged_upsample_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov eax, r10d
|
|
|
+
|
|
|
+ mov rdi, r11
|
|
|
+ mov ecx, r12d
|
|
|
+ mov rsip, JSAMPARRAY [rdi+0*SIZEOF_JSAMPARRAY]
|
|
|
+ mov rbxp, JSAMPARRAY [rdi+1*SIZEOF_JSAMPARRAY]
|
|
|
+@@ -524,15 +526,15 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
|
|
|
+ pop rcx
|
|
|
+ pop rdi
|
|
|
+ mov rsip, JSAMPARRAY [rsp+0*SIZEOF_JSAMPARRAY]
|
|
|
+ mov rbxp, JSAMPARRAY [rsp+1*SIZEOF_JSAMPARRAY]
|
|
|
+ mov rdxp, JSAMPARRAY [rsp+2*SIZEOF_JSAMPARRAY]
|
|
|
+ add rsp, SIZEOF_JSAMPARRAY*4
|
|
|
+
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 4
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdsample-avx2.asm b/media/libjpeg/simd/x86_64/jdsample-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdsample-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdsample-avx2.asm
|
|
|
+@@ -1,43 +1,44 @@
|
|
|
+ ;
|
|
|
+ ; jdsample.asm - upsampling (64-bit AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2015, Intel Corporation.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+ ; NASM is available from http://nasm.sourceforge.net/ or
|
|
|
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fancy_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_fancy_upsample_avx2):
|
|
|
+
|
|
|
+ PW_ONE times 16 dw 1
|
|
|
+ PW_TWO times 16 dw 2
|
|
|
+ PW_THREE times 16 dw 3
|
|
|
+ PW_SEVEN times 16 dw 7
|
|
|
+ PW_EIGHT times 16 dw 8
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ ;
|
|
|
+ ; The upsampling algorithm is linear interpolation between pixel centers,
|
|
|
+@@ -56,21 +57,21 @@ PW_EIGHT times 16 dw 8
|
|
|
+ ; r11d = JDIMENSION downsampled_width
|
|
|
+ ; r12 = JSAMPARRAY input_data
|
|
|
+ ; r13 = JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- push_xmm 3
|
|
|
+- collect_args 4
|
|
|
++ PUSH_XMM 3
|
|
|
++ COLLECT_ARGS 4
|
|
|
+
|
|
|
+ mov eax, r11d ; colctr
|
|
|
+ test rax, rax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov rcx, r10 ; rowctr
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+@@ -181,18 +182,18 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
|
|
+
|
|
|
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec rcx ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 4
|
|
|
+- pop_xmm 3
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ POP_XMM 3
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ ; Again a triangle filter; see comments for h2v1 case, above.
|
|
|
+ ;
|
|
|
+@@ -203,32 +204,33 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
|
|
+ ; JSAMPARRAY *output_data_ptr);
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = int max_v_samp_factor
|
|
|
+ ; r11d = JDIMENSION downsampled_width
|
|
|
+ ; r12 = JSAMPARRAY input_data
|
|
|
+ ; r13 = JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_YMMWORD ; ymmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 4
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
+- and rsp, byte (-SIZEOF_YMMWORD) ; align to 256 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- push_xmm 3
|
|
|
+- collect_args 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
++ and rsp, byte (-SIZEOF_YMMWORD) ; align to 128 bits
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, (SIZEOF_YMMWORD * WK_NUM)
|
|
|
++ PUSH_XMM 3
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov eax, r11d ; colctr
|
|
|
+ test rax, rax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov rcx, r10 ; rowctr
|
|
|
+ test rcx, rcx
|
|
|
+@@ -493,20 +495,20 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
|
|
+ add rsi, byte 1*SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte 2*SIZEOF_JSAMPROW ; output_data
|
|
|
+ sub rcx, byte 2 ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 4
|
|
|
+- pop_xmm 3
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ POP_XMM 3
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ ; It's still a box filter.
|
|
|
+ ;
|
|
|
+@@ -519,20 +521,20 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
|
|
+ ; r11d = JDIMENSION output_width
|
|
|
+ ; r12 = JSAMPARRAY input_data
|
|
|
+ ; r13 = JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_upsample_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+
|
|
|
+ mov edx, r11d
|
|
|
+ add rdx, byte (SIZEOF_YMMWORD-1)
|
|
|
+ and rdx, -SIZEOF_YMMWORD
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov rcx, r10 ; rowctr
|
|
|
+ test rcx, rcx
|
|
|
+@@ -585,17 +587,17 @@ EXTN(jsimd_h2v1_upsample_avx2):
|
|
|
+
|
|
|
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec rcx ; rowctr
|
|
|
+ jg short .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 4
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ ; It's still a box filter.
|
|
|
+ ;
|
|
|
+@@ -608,20 +610,20 @@ EXTN(jsimd_h2v1_upsample_avx2):
|
|
|
+ ; r11d = JDIMENSION output_width
|
|
|
+ ; r12 = JSAMPARRAY input_data
|
|
|
+ ; r13 = JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v2_upsample_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov edx, r11d
|
|
|
+ add rdx, byte (SIZEOF_YMMWORD-1)
|
|
|
+ and rdx, -SIZEOF_YMMWORD
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov rcx, r10 ; rowctr
|
|
|
+@@ -682,15 +684,15 @@ EXTN(jsimd_h2v2_upsample_avx2):
|
|
|
+ add rsi, byte 1*SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte 2*SIZEOF_JSAMPROW ; output_data
|
|
|
+ sub rcx, byte 2 ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 4
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jdsample-sse2.asm b/media/libjpeg/simd/x86_64/jdsample-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jdsample-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jdsample-sse2.asm
|
|
|
+@@ -1,42 +1,43 @@
|
|
|
+ ;
|
|
|
+ ; jdsample.asm - upsampling (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+ ; NASM is available from http://nasm.sourceforge.net/ or
|
|
|
+ ; http://sourceforge.net/project/showfiles.php?group_id=6208
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fancy_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_fancy_upsample_sse2):
|
|
|
+
|
|
|
+ PW_ONE times 8 dw 1
|
|
|
+ PW_TWO times 8 dw 2
|
|
|
+ PW_THREE times 8 dw 3
|
|
|
+ PW_SEVEN times 8 dw 7
|
|
|
+ PW_EIGHT times 8 dw 8
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ ;
|
|
|
+ ; The upsampling algorithm is linear interpolation between pixel centers,
|
|
|
+@@ -55,20 +56,20 @@ PW_EIGHT times 8 dw 8
|
|
|
+ ; r11d = JDIMENSION downsampled_width
|
|
|
+ ; r12 = JSAMPARRAY input_data
|
|
|
+ ; r13 = JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+
|
|
|
+ mov eax, r11d ; colctr
|
|
|
+ test rax, rax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov rcx, r10 ; rowctr
|
|
|
+ test rcx, rcx
|
|
|
+ jz near .return
|
|
|
+@@ -169,17 +170,17 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
|
|
+ pop rax
|
|
|
+
|
|
|
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec rcx ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+- uncollect_args 4
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ ; Again a triangle filter; see comments for h2v1 case, above.
|
|
|
+ ;
|
|
|
+@@ -190,31 +191,32 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
|
|
+ ; JSAMPARRAY *output_data_ptr);
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = int max_v_samp_factor
|
|
|
+ ; r11d = JDIMENSION downsampled_width
|
|
|
+ ; r12 = JSAMPARRAY input_data
|
|
|
+ ; r13 = JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 4
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 4
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov eax, r11d ; colctr
|
|
|
+ test rax, rax
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov rcx, r10 ; rowctr
|
|
|
+ test rcx, rcx
|
|
|
+@@ -467,19 +469,19 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
|
|
+
|
|
|
+ add rsi, byte 1*SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte 2*SIZEOF_JSAMPROW ; output_data
|
|
|
+ sub rcx, byte 2 ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 4
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
|
|
|
+ ; It's still a box filter.
|
|
|
+ ;
|
|
|
+@@ -492,20 +494,20 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
|
|
+ ; r11d = JDIMENSION output_width
|
|
|
+ ; r12 = JSAMPARRAY input_data
|
|
|
+ ; r13 = JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v1_upsample_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+
|
|
|
+ mov edx, r11d
|
|
|
+ add rdx, byte (2*SIZEOF_XMMWORD)-1
|
|
|
+ and rdx, byte -(2*SIZEOF_XMMWORD)
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov rcx, r10 ; rowctr
|
|
|
+ test rcx, rcx
|
|
|
+@@ -556,17 +558,17 @@ EXTN(jsimd_h2v1_upsample_sse2):
|
|
|
+ pop rdi
|
|
|
+
|
|
|
+ add rsi, byte SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte SIZEOF_JSAMPROW ; output_data
|
|
|
+ dec rcx ; rowctr
|
|
|
+ jg short .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+- uncollect_args 4
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
|
|
|
+ ; It's still a box filter.
|
|
|
+ ;
|
|
|
+@@ -579,20 +581,20 @@ EXTN(jsimd_h2v1_upsample_sse2):
|
|
|
+ ; r11d = JDIMENSION output_width
|
|
|
+ ; r12 = JSAMPARRAY input_data
|
|
|
+ ; r13 = JSAMPARRAY *output_data_ptr
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_h2v2_upsample_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ mov edx, r11d
|
|
|
+ add rdx, byte (2*SIZEOF_XMMWORD)-1
|
|
|
+ and rdx, byte -(2*SIZEOF_XMMWORD)
|
|
|
+ jz near .return
|
|
|
+
|
|
|
+ mov rcx, r10 ; rowctr
|
|
|
+@@ -651,15 +653,15 @@ EXTN(jsimd_h2v2_upsample_sse2):
|
|
|
+
|
|
|
+ add rsi, byte 1*SIZEOF_JSAMPROW ; input_data
|
|
|
+ add rdi, byte 2*SIZEOF_JSAMPROW ; output_data
|
|
|
+ sub rcx, byte 2 ; rowctr
|
|
|
+ jg near .rowloop
|
|
|
+
|
|
|
+ .return:
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 4
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jfdctflt-sse.asm b/media/libjpeg/simd/x86_64/jfdctflt-sse.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jfdctflt-sse.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jfdctflt-sse.asm
|
|
|
+@@ -1,13 +1,14 @@
|
|
|
+ ;
|
|
|
+ ; jfdctflt.asm - floating-point FDCT (64-bit SSE)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -29,55 +30,56 @@
|
|
|
+
|
|
|
+ %macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
|
|
|
+ shufps %1, %2, 0xEE
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_float_sse)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_float_sse):
|
|
|
+
|
|
|
+ PD_0_382 times 4 dd 0.382683432365089771728460
|
|
|
+ PD_0_707 times 4 dd 0.707106781186547524400844
|
|
|
+ PD_0_541 times 4 dd 0.541196100146196984399723
|
|
|
+ PD_1_306 times 4 dd 1.306562964876376527856643
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+ ; jsimd_fdct_float_sse(FAST_FLOAT *data)
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = FAST_FLOAT *data
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 2
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_fdct_float_sse)
|
|
|
+
|
|
|
+ EXTN(jsimd_fdct_float_sse):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 1
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 1
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov rdx, r10 ; (FAST_FLOAT *)
|
|
|
+ mov rcx, DCTSIZE/4
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ movaps xmm0, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_FAST_FLOAT)]
|
|
|
+@@ -339,17 +341,17 @@ EXTN(jsimd_fdct_float_sse):
|
|
|
+ movaps XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_FAST_FLOAT)], xmm5
|
|
|
+ movaps XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_FAST_FLOAT)], xmm7
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_FAST_FLOAT)], xmm4
|
|
|
+
|
|
|
+ add rdx, byte 4*SIZEOF_FAST_FLOAT
|
|
|
+ dec rcx
|
|
|
+ jnz near .columnloop
|
|
|
+
|
|
|
+- uncollect_args 1
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 1
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jfdctfst-sse2.asm b/media/libjpeg/simd/x86_64/jfdctfst-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jfdctfst-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jfdctfst-sse2.asm
|
|
|
+@@ -1,13 +1,14 @@
|
|
|
+ ;
|
|
|
+ ; jfdctfst.asm - fast integer FDCT (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -44,55 +45,56 @@ F_1_306 equ DESCALE(1402911301, 30 - CON
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+ ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
|
|
|
+ ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
|
|
|
+
|
|
|
+ %define PRE_MULTIPLY_SCALE_BITS 2
|
|
|
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_ifast_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_ifast_sse2):
|
|
|
+
|
|
|
+ PW_F0707 times 8 dw F_0_707 << CONST_SHIFT
|
|
|
+ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
|
|
|
+ PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
|
|
|
+ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+ ; jsimd_fdct_ifast_sse2(DCTELEM *data)
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = DCTELEM *data
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 2
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_fdct_ifast_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 1
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 1
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov rdx, r10 ; (DCTELEM *)
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)]
|
|
|
+@@ -373,17 +375,17 @@ EXTN(jsimd_fdct_ifast_sse2):
|
|
|
+ paddw xmm6, xmm4 ; xmm6=data5
|
|
|
+ paddw xmm2, xmm1 ; xmm2=data1
|
|
|
+
|
|
|
+ movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm5
|
|
|
+ movdqa XMMWORD [XMMBLOCK(7,0,rdx,SIZEOF_DCTELEM)], xmm3
|
|
|
+ movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6
|
|
|
+ movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2
|
|
|
+
|
|
|
+- uncollect_args 1
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 1
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jfdctint-avx2.asm b/media/libjpeg/simd/x86_64/jfdctint-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jfdctint-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jfdctint-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jfdctint.asm - accurate integer FDCT (64-bit AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -60,17 +60,17 @@ F_2_562 equ DESCALE(2751909506, 30 - CON
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; In-place 8x8x16-bit matrix transpose using AVX2 instructions
|
|
|
+ ; %1-%4: Input/output registers
|
|
|
+ ; %5-%8: Temp registers
|
|
|
+
|
|
|
+-%macro dotranspose 8
|
|
|
++%macro DOTRANSPOSE 8
|
|
|
+ ; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
|
|
|
+ ; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
|
|
|
+ ; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
|
|
|
+ ; %4=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
|
|
|
+
|
|
|
+ vpunpcklwd %5, %1, %2
|
|
|
+ vpunpckhwd %6, %1, %2
|
|
|
+ vpunpcklwd %7, %3, %4
|
|
|
+@@ -103,17 +103,17 @@ F_3_072 equ DESCALE(3299298341, 30 - CON
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; In-place 8x8x16-bit accurate integer forward DCT using AVX2 instructions
|
|
|
+ ; %1-%4: Input/output registers
|
|
|
+ ; %5-%8: Temp registers
|
|
|
+ ; %9: Pass (1 or 2)
|
|
|
+
|
|
|
+-%macro dodct 9
|
|
|
++%macro DODCT 9
|
|
|
+ vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7
|
|
|
+ vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0
|
|
|
+ vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2
|
|
|
+ vpsubw %8, %2, %3 ; %8=data3_2-data4_5=tmp4_5
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ vperm2i128 %6, %6, %6, 0x01 ; %6=tmp0_1
|
|
|
+@@ -218,17 +218,17 @@ F_3_072 equ DESCALE(3299298341, 30 - CON
|
|
|
+ vpsrad %5, %5, DESCALE_P %+ %9
|
|
|
+
|
|
|
+ vpackssdw %2, %8, %5 ; %2=data3_1
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_islow_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_islow_avx2):
|
|
|
+
|
|
|
+ PW_F130_F054_MF130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ times 4 dw (F_0_541 - F_1_847), F_0_541
|
|
|
+ PW_MF078_F117_F078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ times 4 dw (F_1_175 - F_0_390), F_1_175
|
|
|
+@@ -237,17 +237,17 @@ PW_MF060_MF089_MF050_MF256 times 4 dw
|
|
|
+ PW_F050_MF256_F060_MF089 times 4 dw (F_3_072 - F_2_562), -F_2_562
|
|
|
+ times 4 dw (F_1_501 - F_0_899), -F_0_899
|
|
|
+ PD_DESCALE_P1 times 8 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 8 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1)
|
|
|
+ PW_1_NEG1 times 8 dw 1
|
|
|
+ times 8 dw -1
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -255,20 +255,20 @@ PW_1_NEG1 times 8 dw
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = DCTELEM *data
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_fdct_islow_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 1
|
|
|
++ COLLECT_ARGS 1
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ vmovdqu ymm4, YMMWORD [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)]
|
|
|
+ vmovdqu ymm5, YMMWORD [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)]
|
|
|
+ vmovdqu ymm6, YMMWORD [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)]
|
|
|
+ vmovdqu ymm7, YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)]
|
|
|
+ ; ymm4=(00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17)
|
|
|
+@@ -280,41 +280,41 @@ EXTN(jsimd_fdct_islow_avx2):
|
|
|
+ vperm2i128 ymm1, ymm4, ymm6, 0x31
|
|
|
+ vperm2i128 ymm2, ymm5, ymm7, 0x20
|
|
|
+ vperm2i128 ymm3, ymm5, ymm7, 0x31
|
|
|
+ ; ymm0=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
|
|
|
+ ; ymm1=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
|
|
|
+ ; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
|
|
|
+ ; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
|
|
|
+
|
|
|
+- dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
|
|
|
++ DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
|
|
|
+
|
|
|
+- dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
|
|
|
++ DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
|
|
|
+ ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
|
|
|
+
|
|
|
+ ; ---- Pass 2: process columns.
|
|
|
+
|
|
|
+ vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7
|
|
|
+ vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5
|
|
|
+
|
|
|
+- dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
|
|
|
++ DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
|
|
|
+
|
|
|
+- dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
|
|
|
++ DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
|
|
|
+ ; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
|
|
|
+
|
|
|
+ vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1
|
|
|
+ vperm2i128 ymm5, ymm2, ymm1, 0x20 ; ymm5=data2_3
|
|
|
+ vperm2i128 ymm6, ymm0, ymm4, 0x31 ; ymm6=data4_5
|
|
|
+ vperm2i128 ymm7, ymm2, ymm4, 0x21 ; ymm7=data6_7
|
|
|
+
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm3
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm5
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm6
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm7
|
|
|
+
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 1
|
|
|
++ UNCOLLECT_ARGS 1
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jfdctint-sse2.asm b/media/libjpeg/simd/x86_64/jfdctint-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jfdctint-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jfdctint-sse2.asm
|
|
|
+@@ -1,13 +1,14 @@
|
|
|
+ ;
|
|
|
+ ; jfdctint.asm - accurate integer FDCT (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -58,62 +59,63 @@ F_1_961 equ DESCALE(2106220350, 30 - CON
|
|
|
+ F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869)
|
|
|
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_fdct_islow_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_fdct_islow_sse2):
|
|
|
+
|
|
|
+ PW_F130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ PW_F054_MF130 times 4 dw F_0_541, (F_0_541 - F_1_847)
|
|
|
+ PW_MF078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ PW_F117_F078 times 4 dw F_1_175, (F_1_175 - F_0_390)
|
|
|
+ PW_MF060_MF089 times 4 dw (F_0_298 - F_0_899), -F_0_899
|
|
|
+ PW_MF089_F060 times 4 dw -F_0_899, (F_1_501 - F_0_899)
|
|
|
+ PW_MF050_MF256 times 4 dw (F_2_053 - F_2_562), -F_2_562
|
|
|
+ PW_MF256_F050 times 4 dw -F_2_562, (F_3_072 - F_2_562)
|
|
|
+ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Perform the forward DCT on one block of samples.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+ ; jsimd_fdct_islow_sse2(DCTELEM *data)
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = DCTELEM *data
|
|
|
+
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 6
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_fdct_islow_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 1
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 1
|
|
|
+
|
|
|
+ ; ---- Pass 1: process rows.
|
|
|
+
|
|
|
+ mov rdx, r10 ; (DCTELEM *)
|
|
|
+
|
|
|
+ movdqa xmm0, XMMWORD [XMMBLOCK(0,0,rdx,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm1, XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm2, XMMWORD [XMMBLOCK(2,0,rdx,SIZEOF_DCTELEM)]
|
|
|
+@@ -603,17 +605,17 @@ EXTN(jsimd_fdct_islow_sse2):
|
|
|
+ psrad xmm7, DESCALE_P2
|
|
|
+
|
|
|
+ packssdw xmm1, xmm5 ; xmm1=data5
|
|
|
+ packssdw xmm3, xmm7 ; xmm3=data3
|
|
|
+
|
|
|
+ movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1
|
|
|
+ movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3
|
|
|
+
|
|
|
+- uncollect_args 1
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 1
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jidctflt-sse2.asm b/media/libjpeg/simd/x86_64/jidctflt-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jidctflt-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jidctflt-sse2.asm
|
|
|
+@@ -1,14 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -19,40 +20,40 @@
|
|
|
+ ; (Discrete Cosine Transform). The following code is based directly on
|
|
|
+ ; the IJG's original jidctflt.c; see the jidctflt.c for more details.
|
|
|
+
|
|
|
+ %include "jsimdext.inc"
|
|
|
+ %include "jdct.inc"
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+
|
|
|
+-%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
|
|
|
++%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
|
|
|
+ shufps %1, %2, 0x44
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+-%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
|
|
|
++%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
|
|
|
+ shufps %1, %2, 0xEE
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_float_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_float_sse2):
|
|
|
+
|
|
|
+ PD_1_414 times 4 dd 1.414213562373095048801689
|
|
|
+ PD_1_847 times 4 dd 1.847759065022573512256366
|
|
|
+ PD_1_082 times 4 dd 1.082392200292393968799446
|
|
|
+ PD_M2_613 times 4 dd -2.613125929752753055713286
|
|
|
+ PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3)
|
|
|
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -60,35 +61,35 @@ PB_CENTERJSAMP times 16 db CENTERJSAMP
|
|
|
+ ; JSAMPARRAY output_buf, JDIMENSION output_col)
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = void *dct_table
|
|
|
+ ; r11 = JCOEFPTR coef_block
|
|
|
+ ; r12 = JSAMPARRAY output_buf
|
|
|
+ ; r13d = JDIMENSION output_col
|
|
|
+
|
|
|
+-%define original_rbp rbp + 0
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
|
|
|
+ ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 2
|
|
|
+ %define workspace wk(0) - DCTSIZE2 * SIZEOF_FAST_FLOAT
|
|
|
+ ; FAST_FLOAT workspace[DCTSIZE2]
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_idct_float_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_idct_float_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
+ lea rsp, [workspace]
|
|
|
+- collect_args 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input, store into work array.
|
|
|
+
|
|
|
+ mov rdx, r10 ; quantptr
|
|
|
+ mov rsi, r11 ; inptr
|
|
|
+ lea rdi, [workspace] ; FAST_FLOAT *wsptr
|
|
|
+ mov rcx, DCTSIZE/4 ; ctr
|
|
|
+@@ -275,36 +276,36 @@ EXTN(jsimd_idct_float_sse2):
|
|
|
+ movaps xmm2, xmm7 ; transpose coefficients(phase 1)
|
|
|
+ unpcklps xmm7, xmm3 ; xmm7=(20 30 21 31)
|
|
|
+ unpckhps xmm2, xmm3 ; xmm2=(22 32 23 33)
|
|
|
+ movaps xmm4, xmm5 ; transpose coefficients(phase 1)
|
|
|
+ unpcklps xmm5, xmm0 ; xmm5=(40 50 41 51)
|
|
|
+ unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
|
|
|
+
|
|
|
+ movaps xmm3, xmm6 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
|
|
|
+- unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
|
|
|
++ UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
|
|
|
++ UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
|
|
|
+ movaps xmm0, xmm1 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
|
|
|
+- unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
|
|
|
++ UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
|
|
|
++ UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
|
|
|
+
|
|
|
+ movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
|
|
|
+ movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
|
|
|
+
|
|
|
+ movaps XMMWORD [XMMBLOCK(0,0,rdi,SIZEOF_FAST_FLOAT)], xmm6
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,0,rdi,SIZEOF_FAST_FLOAT)], xmm1
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
|
|
|
+
|
|
|
+ movaps xmm6, xmm5 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
|
|
|
+- unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
|
|
|
++ UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
|
|
|
++ UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
|
|
|
+ movaps xmm3, xmm4 ; transpose coefficients(phase 2)
|
|
|
+- unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
|
|
|
+- unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
|
|
|
++ UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
|
|
|
++ UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
|
|
|
+
|
|
|
+ movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
|
|
|
+ movaps XMMWORD [XMMBLOCK(2,1,rdi,SIZEOF_FAST_FLOAT)], xmm4
|
|
|
+ movaps XMMWORD [XMMBLOCK(3,1,rdi,SIZEOF_FAST_FLOAT)], xmm3
|
|
|
+
|
|
|
+ .nextcolumn:
|
|
|
+ add rsi, byte 4*SIZEOF_JCOEF ; coef_block
|
|
|
+@@ -317,17 +318,16 @@ EXTN(jsimd_idct_float_sse2):
|
|
|
+
|
|
|
+ prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 0*32]
|
|
|
+ prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 1*32]
|
|
|
+ prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 2*32]
|
|
|
+ prefetchnta [rsi + (DCTSIZE2-8)*SIZEOF_JCOEF + 3*32]
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows from work array, store into output array.
|
|
|
+
|
|
|
+- mov rax, [original_rbp]
|
|
|
+ lea rsi, [workspace] ; FAST_FLOAT *wsptr
|
|
|
+ mov rdi, r12 ; (JSAMPROW *)
|
|
|
+ mov eax, r13d
|
|
|
+ mov rcx, DCTSIZE/4 ; ctr
|
|
|
+ .rowloop:
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+@@ -466,17 +466,17 @@ EXTN(jsimd_idct_float_sse2):
|
|
|
+ movq XMM_MMWORD [rbx+rax*SIZEOF_JSAMPLE], xmm3
|
|
|
+
|
|
|
+ add rsi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
|
|
|
+ add rdi, byte 4*SIZEOF_JSAMPROW
|
|
|
+ dec rcx ; ctr
|
|
|
+ jnz near .rowloop
|
|
|
+
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 4
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jidctfst-sse2.asm b/media/libjpeg/simd/x86_64/jidctfst-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jidctfst-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jidctfst-sse2.asm
|
|
|
+@@ -1,14 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jidctfst.asm - fast integer IDCT (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -52,28 +53,28 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+ ; PRE_MULTIPLY_SCALE_BITS <= 2 (to avoid overflow)
|
|
|
+ ; CONST_BITS + CONST_SHIFT + PRE_MULTIPLY_SCALE_BITS == 16 (for pmulhw)
|
|
|
+
|
|
|
+ %define PRE_MULTIPLY_SCALE_BITS 2
|
|
|
+ %define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_ifast_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_ifast_sse2):
|
|
|
+
|
|
|
+ PW_F1414 times 8 dw F_1_414 << CONST_SHIFT
|
|
|
+ PW_F1847 times 8 dw F_1_847 << CONST_SHIFT
|
|
|
+ PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT
|
|
|
+ PW_F1082 times 8 dw F_1_082 << CONST_SHIFT
|
|
|
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -81,33 +82,33 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPL
|
|
|
+ ; JSAMPARRAY output_buf, JDIMENSION output_col)
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = jpeg_component_info *compptr
|
|
|
+ ; r11 = JCOEFPTR coef_block
|
|
|
+ ; r12 = JSAMPARRAY output_buf
|
|
|
+ ; r13d = JDIMENSION output_col
|
|
|
+
|
|
|
+-%define original_rbp rbp + 0
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
|
|
|
+ ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 2
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_idct_ifast_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 4
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 4
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input.
|
|
|
+
|
|
|
+ mov rdx, r10 ; quantptr
|
|
|
+ mov rsi, r11 ; inptr
|
|
|
+
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
|
|
|
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
|
|
+@@ -315,17 +316,16 @@ EXTN(jsimd_idct_ifast_sse2):
|
|
|
+
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows from work array, store into output array.
|
|
|
+
|
|
|
+- mov rax, [original_rbp]
|
|
|
+ mov rdi, r12 ; (JSAMPROW *)
|
|
|
+ mov eax, r13d
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ ; xmm6=col0, xmm5=col2, xmm1=col4, xmm3=col6
|
|
|
+
|
|
|
+ movdqa xmm2, xmm6
|
|
|
+@@ -474,18 +474,18 @@ EXTN(jsimd_idct_ifast_sse2):
|
|
|
+ mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
|
|
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm5
|
|
|
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
|
|
|
+ mov rdxp, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
|
|
|
+ mov rsip, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
|
|
|
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
|
|
|
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
|
|
|
+
|
|
|
+- uncollect_args 4
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jidctint-avx2.asm b/media/libjpeg/simd/x86_64/jidctint-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jidctint-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jidctint-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jidctint.asm - accurate integer IDCT (64-bit AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -61,17 +61,17 @@ F_2_562 equ DESCALE(2751909506, 30 - CON
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; In-place 8x8x16-bit inverse matrix transpose using AVX2 instructions
|
|
|
+ ; %1-%4: Input/output registers
|
|
|
+ ; %5-%8: Temp registers
|
|
|
+
|
|
|
+-%macro dotranspose 8
|
|
|
++%macro DOTRANSPOSE 8
|
|
|
+ ; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71)
|
|
|
+ ; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72)
|
|
|
+ ; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75)
|
|
|
+ ; %8=(07 17 27 37 47 57 67 77 06 16 26 36 46 56 66 76)
|
|
|
+
|
|
|
+ vpermq %5, %1, 0xD8
|
|
|
+ vpermq %6, %2, 0x72
|
|
|
+ vpermq %7, %3, 0xD8
|
|
|
+@@ -114,17 +114,17 @@ F_3_072 equ DESCALE(3299298341, 30 - CON
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ; In-place 8x8x16-bit accurate integer inverse DCT using AVX2 instructions
|
|
|
+ ; %1-%4: Input/output registers
|
|
|
+ ; %5-%12: Temp registers
|
|
|
+ ; %9: Pass (1 or 2)
|
|
|
+
|
|
|
+-%macro dodct 13
|
|
|
++%macro DODCT 13
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ ; (Original)
|
|
|
+ ; z1 = (z2 + z3) * 0.541196100;
|
|
|
+ ; tmp2 = z1 + z3 * -1.847759065;
|
|
|
+ ; tmp3 = z1 + z2 * 0.765366865;
|
|
|
+ ;
|
|
|
+ ; (This implementation)
|
|
|
+@@ -236,17 +236,17 @@ F_3_072 equ DESCALE(3299298341, 30 - CON
|
|
|
+ vpsrad %7, %7, DESCALE_P %+ %13
|
|
|
+ vpsrad %8, %8, DESCALE_P %+ %13
|
|
|
+ vpackssdw %3, %7, %8 ; %3=data4_5
|
|
|
+ %endmacro
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_islow_avx2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_islow_avx2):
|
|
|
+
|
|
|
+ PW_F130_F054_MF130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ times 4 dw (F_0_541 - F_1_847), F_0_541
|
|
|
+ PW_MF078_F117_F078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ times 4 dw (F_1_175 - F_0_390), F_1_175
|
|
|
+@@ -255,17 +255,17 @@ PW_MF060_MF089_MF050_MF256 times 4 dw
|
|
|
+ PW_MF089_F060_MF256_F050 times 4 dw -F_0_899, (F_1_501 - F_0_899)
|
|
|
+ times 4 dw -F_2_562, (F_3_072 - F_2_562)
|
|
|
+ PD_DESCALE_P1 times 8 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 8 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PB_CENTERJSAMP times 32 db CENTERJSAMPLE
|
|
|
+ PW_1_NEG1 times 8 dw 1
|
|
|
+ times 8 dw -1
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -277,21 +277,21 @@ PW_1_NEG1 times 8 dw
|
|
|
+ ; r11 = JCOEFPTR coef_block
|
|
|
+ ; r12 = JSAMPARRAY output_buf
|
|
|
+ ; r13d = JDIMENSION output_col
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_idct_islow_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_idct_islow_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+ mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- push_xmm 4
|
|
|
+- collect_args 4
|
|
|
++ PUSH_XMM 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns.
|
|
|
+
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
|
|
|
+ mov eax, dword [DWBLOCK(1,0,r11,SIZEOF_JCOEF)]
|
|
|
+ or eax, dword [DWBLOCK(2,0,r11,SIZEOF_JCOEF)]
|
|
|
+ jnz near .columnDCT
|
|
|
+
|
|
|
+@@ -338,40 +338,40 @@ EXTN(jsimd_idct_islow_avx2):
|
|
|
+ vpmullw ymm6, ymm6, YMMWORD [YMMBLOCK(4,0,r10,SIZEOF_ISLOW_MULT_TYPE)]
|
|
|
+ vpmullw ymm7, ymm7, YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_ISLOW_MULT_TYPE)]
|
|
|
+
|
|
|
+ vperm2i128 ymm0, ymm4, ymm6, 0x20 ; ymm0=in0_4
|
|
|
+ vperm2i128 ymm1, ymm5, ymm4, 0x31 ; ymm1=in3_1
|
|
|
+ vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6
|
|
|
+ vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5
|
|
|
+
|
|
|
+- dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
|
|
|
++ DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
|
|
|
+ ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
|
|
|
+
|
|
|
+- dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
|
|
|
++ DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
|
|
|
+ ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
|
|
|
+
|
|
|
+ .column_end:
|
|
|
+
|
|
|
+ ; -- Prefetch the next coefficient block
|
|
|
+
|
|
|
+ prefetchnta [r11 + DCTSIZE2*SIZEOF_JCOEF + 0*32]
|
|
|
+ prefetchnta [r11 + DCTSIZE2*SIZEOF_JCOEF + 1*32]
|
|
|
+ prefetchnta [r11 + DCTSIZE2*SIZEOF_JCOEF + 2*32]
|
|
|
+ prefetchnta [r11 + DCTSIZE2*SIZEOF_JCOEF + 3*32]
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows.
|
|
|
+
|
|
|
+ vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5
|
|
|
+ vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1
|
|
|
+
|
|
|
+- dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
|
|
|
++ DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
|
|
|
+ ; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
|
|
|
+
|
|
|
+- dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
|
|
|
++ DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
|
|
|
+ ; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
|
|
|
+
|
|
|
+ vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45
|
|
|
+ vpacksswb ymm1, ymm2, ymm4 ; ymm1=data23_67
|
|
|
+ vpaddb ymm0, ymm0, [rel PB_CENTERJSAMP]
|
|
|
+ vpaddb ymm1, ymm1, [rel PB_CENTERJSAMP]
|
|
|
+
|
|
|
+ vextracti128 xmm6, ymm1, 1 ; xmm3=data67
|
|
|
+@@ -403,16 +403,16 @@ EXTN(jsimd_idct_islow_avx2):
|
|
|
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
|
|
|
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
|
|
|
+
|
|
|
+ mov rdxp, JSAMPROW [r12+6*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ mov rsip, JSAMPROW [r12+7*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
|
|
|
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
|
|
|
+
|
|
|
+- uncollect_args 4
|
|
|
+- pop_xmm 4
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ POP_XMM 4
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jidctint-sse2.asm b/media/libjpeg/simd/x86_64/jidctint-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jidctint-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jidctint-sse2.asm
|
|
|
+@@ -1,14 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jidctint.asm - accurate integer IDCT (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, 2020, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -59,34 +60,34 @@ F_1_961 equ DESCALE(2106220350, 30 - CON
|
|
|
+ F_2_053 equ DESCALE(2204520673, 30 - CONST_BITS) ; FIX(2.053119869)
|
|
|
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
|
|
|
+ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_islow_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_islow_sse2):
|
|
|
+
|
|
|
+ PW_F130_F054 times 4 dw (F_0_541 + F_0_765), F_0_541
|
|
|
+ PW_F054_MF130 times 4 dw F_0_541, (F_0_541 - F_1_847)
|
|
|
+ PW_MF078_F117 times 4 dw (F_1_175 - F_1_961), F_1_175
|
|
|
+ PW_F117_F078 times 4 dw F_1_175, (F_1_175 - F_0_390)
|
|
|
+ PW_MF060_MF089 times 4 dw (F_0_298 - F_0_899), -F_0_899
|
|
|
+ PW_MF089_F060 times 4 dw -F_0_899, (F_1_501 - F_0_899)
|
|
|
+ PW_MF050_MF256 times 4 dw (F_2_053 - F_2_562), -F_2_562
|
|
|
+ PW_MF256_F050 times 4 dw -F_2_562, (F_3_072 - F_2_562)
|
|
|
+ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
|
|
|
+ PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
|
|
|
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients.
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -94,33 +95,33 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPL
|
|
|
+ ; JSAMPARRAY output_buf, JDIMENSION output_col)
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = jpeg_component_info *compptr
|
|
|
+ ; r11 = JCOEFPTR coef_block
|
|
|
+ ; r12 = JSAMPARRAY output_buf
|
|
|
+ ; r13d = JDIMENSION output_col
|
|
|
+
|
|
|
+-%define original_rbp rbp + 0
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
|
|
|
+ ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 12
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_idct_islow_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 4
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 4
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input.
|
|
|
+
|
|
|
+ mov rdx, r10 ; quantptr
|
|
|
+ mov rsi, r11 ; inptr
|
|
|
+
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
|
|
|
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
|
|
+@@ -507,17 +508,16 @@ EXTN(jsimd_idct_islow_sse2):
|
|
|
+
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows from work array, store into output array.
|
|
|
+
|
|
|
+- mov rax, [original_rbp]
|
|
|
+ mov rdi, r12 ; (JSAMPROW *)
|
|
|
+ mov eax, r13d
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ ; xmm7=col0, xmm1=col2, xmm0=col4, xmm2=col6
|
|
|
+
|
|
|
+ ; (Original)
|
|
|
+@@ -831,17 +831,17 @@ EXTN(jsimd_idct_islow_sse2):
|
|
|
+ mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
|
|
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
|
|
|
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm0
|
|
|
+ mov rdxp, JSAMPROW [rdi+5*SIZEOF_JSAMPROW]
|
|
|
+ mov rsip, JSAMPROW [rdi+7*SIZEOF_JSAMPROW]
|
|
|
+ movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
|
|
|
+ movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
|
|
|
+
|
|
|
+- uncollect_args 4
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jidctred-sse2.asm b/media/libjpeg/simd/x86_64/jidctred-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jidctred-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jidctred-sse2.asm
|
|
|
+@@ -1,14 +1,15 @@
|
|
|
+ ;
|
|
|
+ ; jidctred.asm - reduced-size IDCT (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+ ; assembler (including Borland's Turbo Assembler).
|
|
|
+@@ -65,17 +66,17 @@ F_1_847 equ DESCALE(1984016188, 30 - CON
|
|
|
+ F_2_172 equ DESCALE(2332956230, 30 - CONST_BITS) ; FIX(2.172734803)
|
|
|
+ F_2_562 equ DESCALE(2751909506, 30 - CONST_BITS) ; FIX(2.562915447)
|
|
|
+ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
|
|
|
+ %endif
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_CONST
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+ GLOBAL_DATA(jconst_idct_red_sse2)
|
|
|
+
|
|
|
+ EXTN(jconst_idct_red_sse2):
|
|
|
+
|
|
|
+ PW_F184_MF076 times 4 dw F_1_847, -F_0_765
|
|
|
+ PW_F256_F089 times 4 dw F_2_562, F_0_899
|
|
|
+ PW_F106_MF217 times 4 dw F_1_061, -F_2_172
|
|
|
+ PW_MF060_MF050 times 4 dw -F_0_601, -F_0_509
|
|
|
+@@ -83,17 +84,17 @@ PW_F145_MF021 times 4 dw F_1_451, -F
|
|
|
+ PW_F362_MF127 times 4 dw F_3_624, -F_1_272
|
|
|
+ PW_F085_MF072 times 4 dw F_0_850, -F_0_720
|
|
|
+ PD_DESCALE_P1_4 times 4 dd 1 << (DESCALE_P1_4 - 1)
|
|
|
+ PD_DESCALE_P2_4 times 4 dd 1 << (DESCALE_P2_4 - 1)
|
|
|
+ PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1)
|
|
|
+ PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1)
|
|
|
+ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
|
|
+
|
|
|
+- alignz 32
|
|
|
++ ALIGNZ 32
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ SECTION SEG_TEXT
|
|
|
+ BITS 64
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ ; producing a reduced-size 4x4 output block.
|
|
|
+ ;
|
|
|
+@@ -102,33 +103,33 @@ PB_CENTERJSAMP times 16 db CENTERJSAMP
|
|
|
+ ; JSAMPARRAY output_buf, JDIMENSION output_col)
|
|
|
+ ;
|
|
|
+
|
|
|
+ ; r10 = void *dct_table
|
|
|
+ ; r11 = JCOEFPTR coef_block
|
|
|
+ ; r12 = JSAMPARRAY output_buf
|
|
|
+ ; r13d = JDIMENSION output_col
|
|
|
+
|
|
|
+-%define original_rbp rbp + 0
|
|
|
+-%define wk(i) rbp - (WK_NUM - (i)) * SIZEOF_XMMWORD
|
|
|
++%define wk(i) r15 - (WK_NUM - (i)) * SIZEOF_XMMWORD
|
|
|
+ ; xmmword wk[WK_NUM]
|
|
|
+ %define WK_NUM 2
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_idct_4x4_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp ; rax = original rbp
|
|
|
+- sub rsp, byte 4
|
|
|
++ mov rbp, rsp
|
|
|
++ push r15
|
|
|
+ and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
|
|
+- mov [rsp], rax
|
|
|
+- mov rbp, rsp ; rbp = aligned rbp
|
|
|
+- lea rsp, [wk(0)]
|
|
|
+- collect_args 4
|
|
|
++ ; Allocate stack space for wk array. r15 is used to access it.
|
|
|
++ mov r15, rsp
|
|
|
++ sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
|
|
|
++ COLLECT_ARGS 4
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input.
|
|
|
+
|
|
|
+ mov rdx, r10 ; quantptr
|
|
|
+ mov rsi, r11 ; inptr
|
|
|
+
|
|
|
+ %ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
|
|
|
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
|
|
|
+@@ -304,17 +305,16 @@ EXTN(jsimd_idct_4x4_sse2):
|
|
|
+
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 0*32]
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 1*32]
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 2*32]
|
|
|
+ prefetchnta [rsi + DCTSIZE2*SIZEOF_JCOEF + 3*32]
|
|
|
+
|
|
|
+ ; ---- Pass 2: process rows, store into output array.
|
|
|
+
|
|
|
+- mov rax, [original_rbp]
|
|
|
+ mov rdi, r12 ; (JSAMPROW *)
|
|
|
+ mov eax, r13d
|
|
|
+
|
|
|
+ ; -- Even part
|
|
|
+
|
|
|
+ pxor xmm4, xmm4
|
|
|
+ punpcklwd xmm4, xmm1 ; xmm4=tmp0
|
|
|
+ psrad xmm4, (16-CONST_BITS-1) ; psrad xmm4,16 & pslld xmm4,CONST_BITS+1
|
|
|
+@@ -384,19 +384,19 @@ EXTN(jsimd_idct_4x4_sse2):
|
|
|
+ mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
|
|
+ movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm4
|
|
|
+ movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
|
|
|
+ mov rdxp, JSAMPROW [rdi+2*SIZEOF_JSAMPROW]
|
|
|
+ mov rsip, JSAMPROW [rdi+3*SIZEOF_JSAMPROW]
|
|
|
+ movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
|
|
|
+ movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
|
|
|
+
|
|
|
+- uncollect_args 4
|
|
|
+- mov rsp, rbp ; rsp <- aligned rbp
|
|
|
+- pop rsp ; rsp <- original rbp
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
++ lea rsp, [rbp-8]
|
|
|
++ pop r15
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Perform dequantization and inverse DCT on one block of coefficients,
|
|
|
+ ; producing a reduced-size 2x2 output block.
|
|
|
+ ;
|
|
|
+@@ -409,20 +409,20 @@ EXTN(jsimd_idct_4x4_sse2):
|
|
|
+ ; r11 = JCOEFPTR coef_block
|
|
|
+ ; r12 = JSAMPARRAY output_buf
|
|
|
+ ; r13d = JDIMENSION output_col
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_idct_2x2_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 4
|
|
|
++ COLLECT_ARGS 4
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ ; ---- Pass 1: process columns from input.
|
|
|
+
|
|
|
+ mov rdx, r10 ; quantptr
|
|
|
+ mov rsi, r11 ; inptr
|
|
|
+
|
|
|
+ ; | input: | result: |
|
|
|
+@@ -560,15 +560,15 @@ EXTN(jsimd_idct_2x2_sse2):
|
|
|
+ pextrw ecx, xmm6, 0x01 ; ecx=(C1 D1 -- --)
|
|
|
+
|
|
|
+ mov rdxp, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
|
|
|
+ mov rsip, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
|
|
|
+ mov word [rdx+rax*SIZEOF_JSAMPLE], bx
|
|
|
+ mov word [rsi+rax*SIZEOF_JSAMPLE], cx
|
|
|
+
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 4
|
|
|
++ UNCOLLECT_ARGS 4
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jquantf-sse2.asm b/media/libjpeg/simd/x86_64/jquantf-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jquantf-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jquantf-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -32,20 +32,20 @@
|
|
|
+ ; r10 = JSAMPARRAY sample_data
|
|
|
+ ; r11d = JDIMENSION start_col
|
|
|
+ ; r12 = FAST_FLOAT *workspace
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_convsamp_float_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 3
|
|
|
++ COLLECT_ARGS 3
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ pcmpeqw xmm7, xmm7
|
|
|
+ psllw xmm7, 7
|
|
|
+ packsswb xmm7, xmm7 ; xmm7 = PB_CENTERJSAMPLE (0x808080..)
|
|
|
+
|
|
|
+ mov rsi, r10
|
|
|
+ mov eax, r11d
|
|
|
+@@ -84,17 +84,17 @@ EXTN(jsimd_convsamp_float_sse2):
|
|
|
+ movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm1
|
|
|
+
|
|
|
+ add rsi, byte 2*SIZEOF_JSAMPROW
|
|
|
+ add rdi, byte 2*DCTSIZE*SIZEOF_FAST_FLOAT
|
|
|
+ dec rcx
|
|
|
+ jnz short .convloop
|
|
|
+
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 3
|
|
|
++ UNCOLLECT_ARGS 3
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Quantize/descale the coefficients, and store into coef_block
|
|
|
+ ;
|
|
|
+ ; GLOBAL(void)
|
|
|
+@@ -105,20 +105,20 @@ EXTN(jsimd_convsamp_float_sse2):
|
|
|
+ ; r10 = JCOEFPTR coef_block
|
|
|
+ ; r11 = FAST_FLOAT *divisors
|
|
|
+ ; r12 = FAST_FLOAT *workspace
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_quantize_float_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 3
|
|
|
++ COLLECT_ARGS 3
|
|
|
+
|
|
|
+ mov rsi, r12
|
|
|
+ mov rdx, r11
|
|
|
+ mov rdi, r10
|
|
|
+ mov rax, DCTSIZE2/16
|
|
|
+ .quantloop:
|
|
|
+ movaps xmm0, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_FAST_FLOAT)]
|
|
|
+ movaps xmm1, XMMWORD [XMMBLOCK(0,1,rsi,SIZEOF_FAST_FLOAT)]
|
|
|
+@@ -141,15 +141,15 @@ EXTN(jsimd_quantize_float_sse2):
|
|
|
+ movdqa XMMWORD [XMMBLOCK(1,0,rdi,SIZEOF_JCOEF)], xmm2
|
|
|
+
|
|
|
+ add rsi, byte 16*SIZEOF_FAST_FLOAT
|
|
|
+ add rdx, byte 16*SIZEOF_FAST_FLOAT
|
|
|
+ add rdi, byte 16*SIZEOF_JCOEF
|
|
|
+ dec rax
|
|
|
+ jnz short .quantloop
|
|
|
+
|
|
|
+- uncollect_args 3
|
|
|
++ UNCOLLECT_ARGS 3
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jquanti-avx2.asm b/media/libjpeg/simd/x86_64/jquanti-avx2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jquanti-avx2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jquanti-avx2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, 2018, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2018, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2016, Matthieu Darbois.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+@@ -33,20 +33,20 @@
|
|
|
+ ; r10 = JSAMPARRAY sample_data
|
|
|
+ ; r11d = JDIMENSION start_col
|
|
|
+ ; r12 = DCTELEM *workspace
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_convsamp_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_convsamp_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 3
|
|
|
++ COLLECT_ARGS 3
|
|
|
+
|
|
|
+ mov eax, r11d
|
|
|
+
|
|
|
+ mov rsip, JSAMPROW [r10+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ mov rdip, JSAMPROW [r10+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
|
|
|
+ movq xmm0, XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE]
|
|
|
+ pinsrq xmm0, XMM_MMWORD [rdi+rax*SIZEOF_JSAMPLE], 1
|
|
|
+
|
|
|
+@@ -79,17 +79,17 @@ EXTN(jsimd_convsamp_avx2):
|
|
|
+ vpaddw ymm3, ymm3, ymm7
|
|
|
+
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)], ymm0
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)], ymm1
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(4,0,r12,SIZEOF_DCTELEM)], ymm2
|
|
|
+ vmovdqu YMMWORD [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)], ymm3
|
|
|
+
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 3
|
|
|
++ UNCOLLECT_ARGS 3
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Quantize/descale the coefficients, and store into coef_block
|
|
|
+ ;
|
|
|
+ ; This implementation is based on an algorithm described in
|
|
|
+@@ -111,20 +111,20 @@ EXTN(jsimd_convsamp_avx2):
|
|
|
+ ; r10 = JCOEFPTR coef_block
|
|
|
+ ; r11 = DCTELEM *divisors
|
|
|
+ ; r12 = DCTELEM *workspace
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_quantize_avx2)
|
|
|
+
|
|
|
+ EXTN(jsimd_quantize_avx2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 3
|
|
|
++ COLLECT_ARGS 3
|
|
|
+
|
|
|
+ vmovdqu ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
|
|
|
+ vmovdqu ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
|
|
|
+ vmovdqu ymm6, [YMMBLOCK(4,0,r12,SIZEOF_DCTELEM)]
|
|
|
+ vmovdqu ymm7, [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)]
|
|
|
+ vpabsw ymm0, ymm4
|
|
|
+ vpabsw ymm1, ymm5
|
|
|
+ vpabsw ymm2, ymm6
|
|
|
+@@ -149,15 +149,15 @@ EXTN(jsimd_quantize_avx2):
|
|
|
+ vpsignw ymm3, ymm3, ymm7
|
|
|
+
|
|
|
+ vmovdqu [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm0
|
|
|
+ vmovdqu [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm1
|
|
|
+ vmovdqu [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm2
|
|
|
+ vmovdqu [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
|
|
|
+
|
|
|
+ vzeroupper
|
|
|
+- uncollect_args 3
|
|
|
++ UNCOLLECT_ARGS 3
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jquanti-sse2.asm b/media/libjpeg/simd/x86_64/jquanti-sse2.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jquanti-sse2.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jquanti-sse2.asm
|
|
|
+@@ -1,13 +1,13 @@
|
|
|
+ ;
|
|
|
+ ; jquanti.asm - sample data conversion and quantization (64-bit SSE2)
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+-; Copyright (C) 2009, 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2009, 2016, 2024, D. R. Commander.
|
|
|
+ ; Copyright (C) 2018, Matthias Räncker.
|
|
|
+ ;
|
|
|
+ ; Based on the x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -32,20 +32,20 @@
|
|
|
+ ; r10 = JSAMPARRAY sample_data
|
|
|
+ ; r11d = JDIMENSION start_col
|
|
|
+ ; r12 = DCTELEM *workspace
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_convsamp_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_convsamp_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 3
|
|
|
++ COLLECT_ARGS 3
|
|
|
+ push rbx
|
|
|
+
|
|
|
+ pxor xmm6, xmm6 ; xmm6=(all 0's)
|
|
|
+ pcmpeqw xmm7, xmm7
|
|
|
+ psllw xmm7, 7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
|
|
|
+
|
|
|
+ mov rsi, r10
|
|
|
+ mov eax, r11d
|
|
|
+@@ -79,17 +79,17 @@ EXTN(jsimd_convsamp_sse2):
|
|
|
+ movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
|
|
|
+
|
|
|
+ add rsi, byte 4*SIZEOF_JSAMPROW
|
|
|
+ add rdi, byte 4*DCTSIZE*SIZEOF_DCTELEM
|
|
|
+ dec rcx
|
|
|
+ jnz short .convloop
|
|
|
+
|
|
|
+ pop rbx
|
|
|
+- uncollect_args 3
|
|
|
++ UNCOLLECT_ARGS 3
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; --------------------------------------------------------------------------
|
|
|
+ ;
|
|
|
+ ; Quantize/descale the coefficients, and store into coef_block
|
|
|
+ ;
|
|
|
+ ; This implementation is based on an algorithm described in
|
|
|
+@@ -111,20 +111,20 @@ EXTN(jsimd_convsamp_sse2):
|
|
|
+ ; r10 = JCOEFPTR coef_block
|
|
|
+ ; r11 = DCTELEM *divisors
|
|
|
+ ; r12 = DCTELEM *workspace
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jsimd_quantize_sse2)
|
|
|
+
|
|
|
+ EXTN(jsimd_quantize_sse2):
|
|
|
++ ENDBR64
|
|
|
+ push rbp
|
|
|
+- mov rax, rsp
|
|
|
+ mov rbp, rsp
|
|
|
+- collect_args 3
|
|
|
++ COLLECT_ARGS 3
|
|
|
+
|
|
|
+ mov rsi, r12
|
|
|
+ mov rdx, r11
|
|
|
+ mov rdi, r10
|
|
|
+ mov rax, DCTSIZE2/32
|
|
|
+ .quantloop:
|
|
|
+ movdqa xmm4, XMMWORD [XMMBLOCK(0,0,rsi,SIZEOF_DCTELEM)]
|
|
|
+ movdqa xmm5, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_DCTELEM)]
|
|
|
+@@ -174,15 +174,15 @@ EXTN(jsimd_quantize_sse2):
|
|
|
+ movdqa XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_DCTELEM)], xmm3
|
|
|
+
|
|
|
+ add rsi, byte 32*SIZEOF_DCTELEM
|
|
|
+ add rdx, byte 32*SIZEOF_DCTELEM
|
|
|
+ add rdi, byte 32*SIZEOF_JCOEF
|
|
|
+ dec rax
|
|
|
+ jnz near .quantloop
|
|
|
+
|
|
|
+- uncollect_args 3
|
|
|
++ UNCOLLECT_ARGS 3
|
|
|
+ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|
|
|
+diff --git a/media/libjpeg/simd/x86_64/jsimdcpu.asm b/media/libjpeg/simd/x86_64/jsimdcpu.asm
|
|
|
+--- a/media/libjpeg/simd/x86_64/jsimdcpu.asm
|
|
|
++++ b/media/libjpeg/simd/x86_64/jsimdcpu.asm
|
|
|
+@@ -1,13 +1,14 @@
|
|
|
+ ;
|
|
|
+ ; jsimdcpu.asm - SIMD instruction support check
|
|
|
+ ;
|
|
|
+ ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
|
+ ; Copyright (C) 2016, D. R. Commander.
|
|
|
++; Copyright (C) 2023, Aliaksiej Kandracienka.
|
|
|
+ ;
|
|
|
+ ; Based on
|
|
|
+ ; x86 SIMD extension for IJG JPEG library
|
|
|
+ ; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
|
+ ; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
|
+ ;
|
|
|
+ ; This file should be assembled with NASM (Netwide Assembler),
|
|
|
+ ; can *not* be assembled with Microsoft's MASM or any compatible
|
|
|
+@@ -26,16 +27,18 @@
|
|
|
+ ; GLOBAL(unsigned int)
|
|
|
+ ; jpeg_simd_cpu_support(void)
|
|
|
+ ;
|
|
|
+
|
|
|
+ align 32
|
|
|
+ GLOBAL_FUNCTION(jpeg_simd_cpu_support)
|
|
|
+
|
|
|
+ EXTN(jpeg_simd_cpu_support):
|
|
|
++ push rbp
|
|
|
++ mov rbp, rsp
|
|
|
+ push rbx
|
|
|
+ push rdi
|
|
|
+
|
|
|
+ xor rdi, rdi ; simd support flag
|
|
|
+
|
|
|
+ ; Assume that all x86-64 processors support SSE & SSE2 instructions
|
|
|
+ or rdi, JSIMD_SSE2
|
|
|
+ or rdi, JSIMD_SSE
|
|
|
+@@ -74,13 +77,14 @@ EXTN(jpeg_simd_cpu_support):
|
|
|
+
|
|
|
+ or rdi, JSIMD_AVX2
|
|
|
+
|
|
|
+ .return:
|
|
|
+ mov rax, rdi
|
|
|
+
|
|
|
+ pop rdi
|
|
|
+ pop rbx
|
|
|
++ pop rbp
|
|
|
+ ret
|
|
|
+
|
|
|
+ ; For some reason, the OS X linker does not honor the request to align the
|
|
|
+ ; segment unless we do this.
|
|
|
+ align 32
|