1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // -----------------------------------------------------------------------------
8 // Image transforms and color space conversion methods for lossless decoder.
10 // Authors: Vikas Arora (vikaas.arora@gmail.com)
11 // Jyrki Alakuijala (jyrki@google.com)
12 // Urvang Joshi (urvang@google.com)
16 // Define the following if target arch is sure to have SSE2
17 // #define WEBP_TARGET_HAS_SSE2
19 #if defined(__cplusplus) || defined(c_plusplus)
23 #if defined(WEBP_TARGET_HAS_SSE2)
24 #include <emmintrin.h>
29 #include "./lossless.h"
30 #include "../dec/vp8li.h"
33 #define MAX_DIFF_COST (1e30f)
35 // lookup table for small values of log2(int)
36 #define APPROX_LOG_MAX 4096
37 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
38 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
39 0.0000000000000000f, 0.0000000000000000f,
40 1.0000000000000000f, 1.5849625007211560f,
41 2.0000000000000000f, 2.3219280948873621f,
42 2.5849625007211560f, 2.8073549220576041f,
43 3.0000000000000000f, 3.1699250014423121f,
44 3.3219280948873621f, 3.4594316186372973f,
45 3.5849625007211560f, 3.7004397181410921f,
46 3.8073549220576041f, 3.9068905956085187f,
47 4.0000000000000000f, 4.0874628412503390f,
48 4.1699250014423121f, 4.2479275134435852f,
49 4.3219280948873626f, 4.3923174227787606f,
50 4.4594316186372973f, 4.5235619560570130f,
51 4.5849625007211560f, 4.6438561897747243f,
52 4.7004397181410917f, 4.7548875021634682f,
53 4.8073549220576037f, 4.8579809951275718f,
54 4.9068905956085187f, 4.9541963103868749f,
55 5.0000000000000000f, 5.0443941193584533f,
56 5.0874628412503390f, 5.1292830169449663f,
57 5.1699250014423121f, 5.2094533656289501f,
58 5.2479275134435852f, 5.2854022188622487f,
59 5.3219280948873626f, 5.3575520046180837f,
60 5.3923174227787606f, 5.4262647547020979f,
61 5.4594316186372973f, 5.4918530963296747f,
62 5.5235619560570130f, 5.5545888516776376f,
63 5.5849625007211560f, 5.6147098441152083f,
64 5.6438561897747243f, 5.6724253419714951f,
65 5.7004397181410917f, 5.7279204545631987f,
66 5.7548875021634682f, 5.7813597135246599f,
67 5.8073549220576037f, 5.8328900141647412f,
68 5.8579809951275718f, 5.8826430493618415f,
69 5.9068905956085187f, 5.9307373375628866f,
70 5.9541963103868749f, 5.9772799234999167f,
71 6.0000000000000000f, 6.0223678130284543f,
72 6.0443941193584533f, 6.0660891904577720f,
73 6.0874628412503390f, 6.1085244567781691f,
74 6.1292830169449663f, 6.1497471195046822f,
75 6.1699250014423121f, 6.1898245588800175f,
76 6.2094533656289501f, 6.2288186904958804f,
77 6.2479275134435852f, 6.2667865406949010f,
78 6.2854022188622487f, 6.3037807481771030f,
79 6.3219280948873626f, 6.3398500028846243f,
80 6.3575520046180837f, 6.3750394313469245f,
81 6.3923174227787606f, 6.4093909361377017f,
82 6.4262647547020979f, 6.4429434958487279f,
83 6.4594316186372973f, 6.4757334309663976f,
84 6.4918530963296747f, 6.5077946401986963f,
85 6.5235619560570130f, 6.5391588111080309f,
86 6.5545888516776376f, 6.5698556083309478f,
87 6.5849625007211560f, 6.5999128421871278f,
88 6.6147098441152083f, 6.6293566200796094f,
89 6.6438561897747243f, 6.6582114827517946f,
90 6.6724253419714951f, 6.6865005271832185f,
91 6.7004397181410917f, 6.7142455176661224f,
92 6.7279204545631987f, 6.7414669864011464f,
93 6.7548875021634682f, 6.7681843247769259f,
94 6.7813597135246599f, 6.7944158663501061f,
95 6.8073549220576037f, 6.8201789624151878f,
96 6.8328900141647412f, 6.8454900509443747f,
97 6.8579809951275718f, 6.8703647195834047f,
98 6.8826430493618415f, 6.8948177633079437f,
99 6.9068905956085187f, 6.9188632372745946f,
100 6.9307373375628866f, 6.9425145053392398f,
101 6.9541963103868749f, 6.9657842846620869f,
102 6.9772799234999167f, 6.9886846867721654f,
103 7.0000000000000000f, 7.0112272554232539f,
104 7.0223678130284543f, 7.0334230015374501f,
105 7.0443941193584533f, 7.0552824355011898f,
106 7.0660891904577720f, 7.0768155970508308f,
107 7.0874628412503390f, 7.0980320829605263f,
108 7.1085244567781691f, 7.1189410727235076f,
109 7.1292830169449663f, 7.1395513523987936f,
110 7.1497471195046822f, 7.1598713367783890f,
111 7.1699250014423121f, 7.1799090900149344f,
112 7.1898245588800175f, 7.1996723448363644f,
113 7.2094533656289501f, 7.2191685204621611f,
114 7.2288186904958804f, 7.2384047393250785f,
115 7.2479275134435852f, 7.2573878426926521f,
116 7.2667865406949010f, 7.2761244052742375f,
117 7.2854022188622487f, 7.2946207488916270f,
118 7.3037807481771030f, 7.3128829552843557f,
119 7.3219280948873626f, 7.3309168781146167f,
120 7.3398500028846243f, 7.3487281542310771f,
121 7.3575520046180837f, 7.3663222142458160f,
122 7.3750394313469245f, 7.3837042924740519f,
123 7.3923174227787606f, 7.4008794362821843f,
124 7.4093909361377017f, 7.4178525148858982f,
125 7.4262647547020979f, 7.4346282276367245f,
126 7.4429434958487279f, 7.4512111118323289f,
127 7.4594316186372973f, 7.4676055500829976f,
128 7.4757334309663976f, 7.4838157772642563f,
129 7.4918530963296747f, 7.4998458870832056f,
130 7.5077946401986963f, 7.5156998382840427f,
131 7.5235619560570130f, 7.5313814605163118f,
132 7.5391588111080309f, 7.5468944598876364f,
133 7.5545888516776376f, 7.5622424242210728f,
134 7.5698556083309478f, 7.5774288280357486f,
135 7.5849625007211560f, 7.5924570372680806f,
136 7.5999128421871278f, 7.6073303137496104f,
137 7.6147098441152083f, 7.6220518194563764f,
138 7.6293566200796094f, 7.6366246205436487f,
139 7.6438561897747243f, 7.6510516911789281f,
140 7.6582114827517946f, 7.6653359171851764f,
141 7.6724253419714951f, 7.6794800995054464f,
142 7.6865005271832185f, 7.6934869574993252f,
143 7.7004397181410917f, 7.7073591320808825f,
144 7.7142455176661224f, 7.7210991887071855f,
145 7.7279204545631987f, 7.7347096202258383f,
146 7.7414669864011464f, 7.7481928495894605f,
147 7.7548875021634682f, 7.7615512324444795f,
148 7.7681843247769259f, 7.7747870596011736f,
149 7.7813597135246599f, 7.7879025593914317f,
150 7.7944158663501061f, 7.8008998999203047f,
151 7.8073549220576037f, 7.8137811912170374f,
152 7.8201789624151878f, 7.8265484872909150f,
153 7.8328900141647412f, 7.8392037880969436f,
154 7.8454900509443747f, 7.8517490414160571f,
155 7.8579809951275718f, 7.8641861446542797f,
156 7.8703647195834047f, 7.8765169465649993f,
157 7.8826430493618415f, 7.8887432488982591f,
158 7.8948177633079437f, 7.9008668079807486f,
159 7.9068905956085187f, 7.9128893362299619f,
160 7.9188632372745946f, 7.9248125036057812f,
161 7.9307373375628866f, 7.9366379390025709f,
162 7.9425145053392398f, 7.9483672315846778f,
163 7.9541963103868749f, 7.9600019320680805f,
164 7.9657842846620869f, 7.9715435539507719f,
165 7.9772799234999167f, 7.9829935746943103f,
166 7.9886846867721654f, 7.9943534368588577f
169 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
170 0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
171 8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
172 24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
173 43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
174 64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
175 86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
176 110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
177 134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
178 160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
179 186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
180 212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
181 240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
182 268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
183 296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
184 325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
185 354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
186 384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
187 413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
188 444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
189 474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
190 505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
191 536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
192 568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
193 600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
194 632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
195 664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
196 696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
197 729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
198 762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
199 795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
200 828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
201 862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
202 896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
203 929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
204 963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
205 998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
206 1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
207 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
208 1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
209 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
210 1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
211 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
212 1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
213 1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
214 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
215 1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
216 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
217 1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
218 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
219 1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
220 1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
221 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
222 1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
223 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
224 1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
225 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
226 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
227 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
228 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
229 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
230 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
231 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
232 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
233 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
236 float VP8LFastSLog2Slow(int v) {
237 assert(v >= LOG_LOOKUP_IDX_MAX);
238 if (v < APPROX_LOG_MAX) {
240 const float v_f = (float)v;
241 while (v >= LOG_LOOKUP_IDX_MAX) {
245 return v_f * (kLog2Table[v] + log_cnt);
247 return (float)(LOG_2_RECIPROCAL * v * log((double)v));
251 float VP8LFastLog2Slow(int v) {
252 assert(v >= LOG_LOOKUP_IDX_MAX);
253 if (v < APPROX_LOG_MAX) {
255 while (v >= LOG_LOOKUP_IDX_MAX) {
259 return kLog2Table[v] + log_cnt;
261 return (float)(LOG_2_RECIPROCAL * log((double)v));
265 //------------------------------------------------------------------------------
268 // In-place sum of each component with mod 256.
269 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
270 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
271 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
272 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
275 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
276 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
279 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
280 return Average2(Average2(a0, a2), a1);
283 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
284 uint32_t a2, uint32_t a3) {
285 return Average2(Average2(a0, a1), Average2(a2, a3));
288 #if defined(WEBP_TARGET_HAS_SSE2)
289 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
291 const __m128i zero = _mm_setzero_si128();
292 const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
293 const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
294 const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
295 const __m128i V1 = _mm_add_epi16(C0, C1);
296 const __m128i V2 = _mm_sub_epi16(V1, C2);
297 const __m128i b = _mm_packus_epi16(V2, V2);
298 const uint32_t output = _mm_cvtsi128_si32(b);
302 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
304 const uint32_t ave = Average2(c0, c1);
305 const __m128i zero = _mm_setzero_si128();
306 const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
307 const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
308 const __m128i A1 = _mm_sub_epi16(A0, B0);
309 const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
310 const __m128i A2 = _mm_sub_epi16(A1, BgtA);
311 const __m128i A3 = _mm_srai_epi16(A2, 1);
312 const __m128i A4 = _mm_add_epi16(A0, A3);
313 const __m128i A5 = _mm_packus_epi16(A4, A4);
314 const uint32_t output = _mm_cvtsi128_si32(A5);
318 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
320 const __m128i zero = _mm_setzero_si128();
321 const __m128i A0 = _mm_cvtsi32_si128(a);
322 const __m128i B0 = _mm_cvtsi32_si128(b);
323 const __m128i C0 = _mm_cvtsi32_si128(c);
324 const __m128i AC0 = _mm_subs_epu8(A0, C0);
325 const __m128i CA0 = _mm_subs_epu8(C0, A0);
326 const __m128i BC0 = _mm_subs_epu8(B0, C0);
327 const __m128i CB0 = _mm_subs_epu8(C0, B0);
328 const __m128i AC = _mm_or_si128(AC0, CA0);
329 const __m128i BC = _mm_or_si128(BC0, CB0);
330 const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
331 const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
332 const __m128i diff = _mm_sub_epi16(pb, pa);
335 _mm_storeu_si128((__m128i*)out, diff);
336 pa_minus_pb = out[0] + out[1] + out[2] + out[3];
338 return (pa_minus_pb <= 0) ? a : b;
343 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
347 // return 0, when a is a negative integer.
348 // return 255, when a is positive.
352 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
353 return Clip255(a + b - c);
356 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
358 const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
359 const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
362 const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
365 const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
366 return (a << 24) | (r << 16) | (g << 8) | b;
369 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
370 return Clip255(a + (a - b) / 2);
373 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
375 const uint32_t ave = Average2(c0, c1);
376 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
377 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
378 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
379 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
380 return (a << 24) | (r << 16) | (g << 8) | b;
383 static WEBP_INLINE int Sub3(int a, int b, int c) {
384 const int pb = b - c;
385 const int pa = a - c;
386 return abs(pb) - abs(pa);
389 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
390 const int pa_minus_pb =
391 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
392 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
393 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
394 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
395 return (pa_minus_pb <= 0) ? a : b;
399 //------------------------------------------------------------------------------
402 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
407 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
411 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
415 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
419 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
423 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
424 const uint32_t pred = Average3(left, top[0], top[1]);
427 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
428 const uint32_t pred = Average2(left, top[-1]);
431 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
432 const uint32_t pred = Average2(left, top[0]);
435 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
436 const uint32_t pred = Average2(top[-1], top[0]);
440 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
441 const uint32_t pred = Average2(top[0], top[1]);
445 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
446 const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
449 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
450 const uint32_t pred = Select(top[0], left, top[-1]);
453 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
454 const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
457 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
458 const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
462 typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
463 static const PredictorFunc kPredictors[16] = {
464 Predictor0, Predictor1, Predictor2, Predictor3,
465 Predictor4, Predictor5, Predictor6, Predictor7,
466 Predictor8, Predictor9, Predictor10, Predictor11,
467 Predictor12, Predictor13,
468 Predictor0, Predictor0 // <- padding security sentinels
471 // TODO(vikasa): Replace 256 etc with defines.
472 static float PredictionCostSpatial(const int* counts,
473 int weight_0, double exp_val) {
474 const int significant_symbols = 16;
475 const double exp_decay_factor = 0.6;
476 double bits = weight_0 * counts[0];
478 for (i = 1; i < significant_symbols; ++i) {
479 bits += exp_val * (counts[i] + counts[256 - i]);
480 exp_val *= exp_decay_factor;
482 return (float)(-0.1 * bits);
485 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
486 static float CombinedShannonEntropy(const int* const X,
487 const int* const Y, int n) {
490 int sumX = 0, sumXY = 0;
491 for (i = 0; i < n; ++i) {
493 const int xy = X[i] + Y[i];
496 retval -= VP8LFastSLog2(x);
500 retval -= VP8LFastSLog2(xy);
503 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
504 return (float)retval;
507 static float PredictionCostSpatialHistogram(int accumulated[4][256],
511 for (i = 0; i < 4; ++i) {
512 const double kExpValue = 0.94;
513 retval += PredictionCostSpatial(tile[i], 1, kExpValue);
514 retval += CombinedShannonEntropy(tile[i], accumulated[i], 256);
516 return (float)retval;
519 static int GetBestPredictorForTile(int width, int height,
520 int tile_x, int tile_y, int bits,
521 int accumulated[4][256],
522 const uint32_t* const argb_scratch) {
523 const int kNumPredModes = 14;
524 const int col_start = tile_x << bits;
525 const int row_start = tile_y << bits;
526 const int tile_size = 1 << bits;
527 const int ymax = (tile_size <= height - row_start) ?
528 tile_size : height - row_start;
529 const int xmax = (tile_size <= width - col_start) ?
530 tile_size : width - col_start;
532 float best_diff = MAX_DIFF_COST;
536 for (mode = 0; mode < kNumPredModes; ++mode) {
537 const uint32_t* current_row = argb_scratch;
538 const PredictorFunc pred_func = kPredictors[mode];
541 memset(&histo[0][0], 0, sizeof(histo));
542 for (y = 0; y < ymax; ++y) {
544 const int row = row_start + y;
545 const uint32_t* const upper_row = current_row;
546 current_row = upper_row + width;
547 for (x = 0; x < xmax; ++x) {
548 const int col = col_start + x;
550 uint32_t predict_diff;
552 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
553 } else if (col == 0) {
554 predict = upper_row[col]; // Top.
556 predict = pred_func(current_row[col - 1], upper_row + col);
558 predict_diff = VP8LSubPixels(current_row[col], predict);
559 ++histo[0][predict_diff >> 24];
560 ++histo[1][((predict_diff >> 16) & 0xff)];
561 ++histo[2][((predict_diff >> 8) & 0xff)];
562 ++histo[3][(predict_diff & 0xff)];
565 cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
566 if (cur_diff < best_diff) {
567 best_diff = cur_diff;
575 static void CopyTileWithPrediction(int width, int height,
576 int tile_x, int tile_y, int bits, int mode,
577 const uint32_t* const argb_scratch,
578 uint32_t* const argb) {
579 const int col_start = tile_x << bits;
580 const int row_start = tile_y << bits;
581 const int tile_size = 1 << bits;
582 const int ymax = (tile_size <= height - row_start) ?
583 tile_size : height - row_start;
584 const int xmax = (tile_size <= width - col_start) ?
585 tile_size : width - col_start;
586 const PredictorFunc pred_func = kPredictors[mode];
587 const uint32_t* current_row = argb_scratch;
590 for (y = 0; y < ymax; ++y) {
592 const int row = row_start + y;
593 const uint32_t* const upper_row = current_row;
594 current_row = upper_row + width;
595 for (x = 0; x < xmax; ++x) {
596 const int col = col_start + x;
597 const int pix = row * width + col;
600 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
601 } else if (col == 0) {
602 predict = upper_row[col]; // Top.
604 predict = pred_func(current_row[col - 1], upper_row + col);
606 argb[pix] = VP8LSubPixels(current_row[col], predict);
611 void VP8LResidualImage(int width, int height, int bits,
612 uint32_t* const argb, uint32_t* const argb_scratch,
613 uint32_t* const image) {
614 const int max_tile_size = 1 << bits;
615 const int tiles_per_row = VP8LSubSampleSize(width, bits);
616 const int tiles_per_col = VP8LSubSampleSize(height, bits);
617 uint32_t* const upper_row = argb_scratch;
618 uint32_t* const current_tile_rows = argb_scratch + width;
621 memset(histo, 0, sizeof(histo));
622 for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
623 const int tile_y_offset = tile_y * max_tile_size;
624 const int this_tile_height =
625 (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
628 memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
629 width * sizeof(*upper_row));
631 memcpy(current_tile_rows, &argb[tile_y_offset * width],
632 this_tile_height * width * sizeof(*current_tile_rows));
633 for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
636 const int tile_x_offset = tile_x * max_tile_size;
637 int all_x_max = tile_x_offset + max_tile_size;
638 if (all_x_max > width) {
641 pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
643 image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
644 CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
646 for (y = 0; y < max_tile_size; ++y) {
649 int all_y = tile_y_offset + y;
650 if (all_y >= height) {
653 ix = all_y * width + tile_x_offset;
654 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
655 const uint32_t a = argb[ix];
657 ++histo[1][((a >> 16) & 0xff)];
658 ++histo[2][((a >> 8) & 0xff)];
659 ++histo[3][(a & 0xff)];
666 // Inverse prediction.
667 static void PredictorInverseTransform(const VP8LTransform* const transform,
668 int y_start, int y_end, uint32_t* data) {
669 const int width = transform->xsize_;
670 if (y_start == 0) { // First Row follows the L (mode=1) mode.
672 const uint32_t pred0 = Predictor0(data[-1], NULL);
673 AddPixelsEq(data, pred0);
674 for (x = 1; x < width; ++x) {
675 const uint32_t pred1 = Predictor1(data[x - 1], NULL);
676 AddPixelsEq(data + x, pred1);
684 const int mask = (1 << transform->bits_) - 1;
685 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
686 const uint32_t* pred_mode_base =
687 transform->data_ + (y >> transform->bits_) * tiles_per_row;
691 const uint32_t pred2 = Predictor2(data[-1], data - width);
692 const uint32_t* pred_mode_src = pred_mode_base;
693 PredictorFunc pred_func;
695 // First pixel follows the T (mode=2) mode.
696 AddPixelsEq(data, pred2);
699 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
700 for (x = 1; x < width; ++x) {
702 if ((x & mask) == 0) { // start of tile. Read predictor function.
703 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
705 pred = pred_func(data[x - 1], data + x - width);
706 AddPixelsEq(data + x, pred);
710 if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
711 pred_mode_base += tiles_per_row;
717 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
719 #if defined(WEBP_TARGET_HAS_SSE2)
720 const __m128i mask = _mm_set1_epi32(0x0000ff00);
721 for (; i + 4 < num_pixs; i += 4) {
722 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
723 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
724 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
725 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
726 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
727 const __m128i out = _mm_sub_epi8(in, in_0g0g);
728 _mm_storeu_si128((__m128i*)&argb_data[i], out);
730 // fallthrough and finish off with plain-C
732 for (; i < num_pixs; ++i) {
733 const uint32_t argb = argb_data[i];
734 const uint32_t green = (argb >> 8) & 0xff;
735 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
736 const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
737 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
741 // Add green to blue and red channels (i.e. perform the inverse transform of
742 // 'subtract green').
743 static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
744 int y_start, int y_end, uint32_t* data) {
745 const int width = transform->xsize_;
746 const uint32_t* const data_end = data + (y_end - y_start) * width;
747 #if defined(WEBP_TARGET_HAS_SSE2)
748 const __m128i mask = _mm_set1_epi32(0x0000ff00);
749 for (; data + 4 < data_end; data += 4) {
750 const __m128i in = _mm_loadu_si128((__m128i*)data);
751 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
752 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
753 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
754 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
755 const __m128i out = _mm_add_epi8(in, in_0g0g);
756 _mm_storeu_si128((__m128i*)data, out);
758 // fallthrough and finish off with plain-C
760 while (data < data_end) {
761 const uint32_t argb = *data;
762 const uint32_t green = ((argb >> 8) & 0xff);
763 uint32_t red_blue = (argb & 0x00ff00ffu);
764 red_blue += (green << 16) | green;
765 red_blue &= 0x00ff00ffu;
766 *data++ = (argb & 0xff00ff00u) | red_blue;
771 // Note: the members are uint8_t, so that any negative values are
772 // automatically converted to "mod 256" values.
773 uint8_t green_to_red_;
774 uint8_t green_to_blue_;
775 uint8_t red_to_blue_;
778 static WEBP_INLINE void MultipliersClear(Multipliers* m) {
779 m->green_to_red_ = 0;
780 m->green_to_blue_ = 0;
784 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
786 return (uint32_t)((int)(color_pred) * color) >> 5;
789 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
790 Multipliers* const m) {
791 m->green_to_red_ = (color_code >> 0) & 0xff;
792 m->green_to_blue_ = (color_code >> 8) & 0xff;
793 m->red_to_blue_ = (color_code >> 16) & 0xff;
796 static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
798 ((uint32_t)(m->red_to_blue_) << 16) |
799 ((uint32_t)(m->green_to_blue_) << 8) |
803 static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
804 uint32_t argb, int inverse) {
805 const uint32_t green = argb >> 8;
806 const uint32_t red = argb >> 16;
807 uint32_t new_red = red;
808 uint32_t new_blue = argb;
811 new_red += ColorTransformDelta(m->green_to_red_, green);
813 new_blue += ColorTransformDelta(m->green_to_blue_, green);
814 new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
817 new_red -= ColorTransformDelta(m->green_to_red_, green);
819 new_blue -= ColorTransformDelta(m->green_to_blue_, green);
820 new_blue -= ColorTransformDelta(m->red_to_blue_, red);
823 return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
826 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
828 const uint32_t green = argb >> 8;
829 uint32_t new_red = argb >> 16;
830 new_red -= ColorTransformDelta(green_to_red, green);
831 return (new_red & 0xff);
834 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
837 const uint32_t green = argb >> 8;
838 const uint32_t red = argb >> 16;
839 uint8_t new_blue = argb;
840 new_blue -= ColorTransformDelta(green_to_blue, green);
841 new_blue -= ColorTransformDelta(red_to_blue, red);
842 return (new_blue & 0xff);
845 static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
847 const uint32_t v = argb[ix];
848 if (ix >= xsize + 3) {
849 if (v == argb[ix - xsize] &&
850 argb[ix - 1] == argb[ix - xsize - 1] &&
851 argb[ix - 2] == argb[ix - xsize - 2] &&
852 argb[ix - 3] == argb[ix - xsize - 3]) {
855 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
856 } else if (ix >= 3) {
857 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
862 static float PredictionCostCrossColor(const int accumulated[256],
863 const int counts[256]) {
864 // Favor low entropy, locally and globally.
865 // Favor small absolute values for PredictionCostSpatial
866 static const double kExpValue = 2.4;
867 return CombinedShannonEntropy(counts, accumulated, 256) +
868 PredictionCostSpatial(counts, 3, kExpValue);
871 static Multipliers GetBestColorTransformForTile(
872 int tile_x, int tile_y, int bits,
875 int step, int xsize, int ysize,
876 int* accumulated_red_histo,
877 int* accumulated_blue_histo,
878 const uint32_t* const argb) {
879 float best_diff = MAX_DIFF_COST;
881 const int halfstep = step / 2;
882 const int max_tile_size = 1 << bits;
883 const int tile_y_offset = tile_y * max_tile_size;
884 const int tile_x_offset = tile_x * max_tile_size;
888 int all_x_max = tile_x_offset + max_tile_size;
889 int all_y_max = tile_y_offset + max_tile_size;
891 MultipliersClear(&best_tx);
892 if (all_x_max > xsize) {
895 if (all_y_max > ysize) {
899 for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
900 int histo[256] = { 0 };
903 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
904 int ix = all_y * xsize + tile_x_offset;
906 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
907 if (SkipRepeatedPixels(argb, ix, xsize)) {
910 ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
913 cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
914 if ((uint8_t)green_to_red == prevX.green_to_red_) {
915 cur_diff -= 3; // favor keeping the areas locally similar
917 if ((uint8_t)green_to_red == prevY.green_to_red_) {
918 cur_diff -= 3; // favor keeping the areas locally similar
920 if (green_to_red == 0) {
923 if (cur_diff < best_diff) {
924 best_diff = cur_diff;
925 best_tx.green_to_red_ = green_to_red;
928 best_diff = MAX_DIFF_COST;
929 for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
930 for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
932 int histo[256] = { 0 };
933 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
935 int ix = all_y * xsize + tile_x_offset;
936 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
937 if (SkipRepeatedPixels(argb, ix, xsize)) {
940 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
944 PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
945 if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
946 cur_diff -= 3; // favor keeping the areas locally similar
948 if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
949 cur_diff -= 3; // favor keeping the areas locally similar
951 if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
952 cur_diff -= 3; // favor keeping the areas locally similar
954 if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
955 cur_diff -= 3; // favor keeping the areas locally similar
957 if (green_to_blue == 0) {
960 if (red_to_blue == 0) {
963 if (cur_diff < best_diff) {
964 best_diff = cur_diff;
965 best_tx.green_to_blue_ = green_to_blue;
966 best_tx.red_to_blue_ = red_to_blue;
973 static void CopyTileWithColorTransform(int xsize, int ysize,
974 int tile_x, int tile_y, int bits,
975 Multipliers color_transform,
976 uint32_t* const argb) {
978 int xscan = 1 << bits;
979 int yscan = 1 << bits;
982 if (xscan > xsize - tile_x) {
983 xscan = xsize - tile_x;
985 if (yscan > ysize - tile_y) {
986 yscan = ysize - tile_y;
989 for (y = tile_y; y < yscan; ++y) {
990 int ix = y * xsize + tile_x;
991 const int end_ix = ix + xscan;
992 for (; ix < end_ix; ++ix) {
993 argb[ix] = TransformColor(&color_transform, argb[ix], 0);
998 void VP8LColorSpaceTransform(int width, int height, int bits, int step,
999 uint32_t* const argb, uint32_t* image) {
1000 const int max_tile_size = 1 << bits;
1001 int tile_xsize = VP8LSubSampleSize(width, bits);
1002 int tile_ysize = VP8LSubSampleSize(height, bits);
1003 int accumulated_red_histo[256] = { 0 };
1004 int accumulated_blue_histo[256] = { 0 };
1009 MultipliersClear(&prevY);
1010 MultipliersClear(&prevX);
1011 for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
1012 for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
1013 Multipliers color_transform;
1016 const int tile_y_offset = tile_y * max_tile_size;
1017 const int tile_x_offset = tile_x * max_tile_size;
1019 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
1020 ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
1022 } else if (tile_x != 0) {
1023 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
1026 GetBestColorTransformForTile(tile_x, tile_y, bits,
1028 step, width, height,
1029 &accumulated_red_histo[0],
1030 &accumulated_blue_histo[0],
1032 image[tile_y * tile_xsize + tile_x] =
1033 MultipliersToColorCode(&color_transform);
1034 CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
1035 color_transform, argb);
1037 // Gather accumulated histogram data.
1038 all_x_max = tile_x_offset + max_tile_size;
1039 if (all_x_max > width) {
1042 for (y = 0; y < max_tile_size; ++y) {
1045 int all_y = tile_y_offset + y;
1046 if (all_y >= height) {
1049 ix = all_y * width + tile_x_offset;
1050 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
1052 argb[ix] == argb[ix - 2] &&
1053 argb[ix] == argb[ix - 1]) {
1054 continue; // repeated pixels are handled by backward references
1056 if (ix >= width + 2 &&
1057 argb[ix - 2] == argb[ix - width - 2] &&
1058 argb[ix - 1] == argb[ix - width - 1] &&
1059 argb[ix] == argb[ix - width]) {
1060 continue; // repeated pixels are handled by backward references
1062 ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
1063 ++accumulated_blue_histo[argb[ix] & 0xff];
1070 // Color space inverse transform.
1071 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
1072 int y_start, int y_end, uint32_t* data) {
1073 const int width = transform->xsize_;
1074 const int mask = (1 << transform->bits_) - 1;
1075 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
1077 const uint32_t* pred_row =
1078 transform->data_ + (y >> transform->bits_) * tiles_per_row;
1081 const uint32_t* pred = pred_row;
1082 Multipliers m = { 0, 0, 0 };
1085 for (x = 0; x < width; ++x) {
1086 if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
1087 data[x] = TransformColor(&m, data[x], 1);
1091 if ((y & mask) == 0) pred_row += tiles_per_row;;
1095 // Separate out pixels packed together using pixel-bundling.
1096 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
1097 #define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
1098 void FUNC_NAME(const VP8LTransform* const transform, \
1099 int y_start, int y_end, const TYPE* src, TYPE* dst) { \
1101 const int bits_per_pixel = 8 >> transform->bits_; \
1102 const int width = transform->xsize_; \
1103 const uint32_t* const color_map = transform->data_; \
1104 if (bits_per_pixel < 8) { \
1105 const int pixels_per_byte = 1 << transform->bits_; \
1106 const int count_mask = pixels_per_byte - 1; \
1107 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
1108 for (y = y_start; y < y_end; ++y) { \
1109 uint32_t packed_pixels = 0; \
1111 for (x = 0; x < width; ++x) { \
1112 /* We need to load fresh 'packed_pixels' once every */ \
1113 /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \
1114 /* is a power of 2, so can just use a mask for that, instead of */ \
1115 /* decrementing a counter. */ \
1116 if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \
1117 *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \
1118 packed_pixels >>= bits_per_pixel; \
1122 for (y = y_start; y < y_end; ++y) { \
1124 for (x = 0; x < width; ++x) { \
1125 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
1131 static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
1132 return (idx >> 8) & 0xff;
1135 static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
1139 static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
1143 static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
1144 return (val >> 8) & 0xff;
1147 static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
1149 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
1152 #undef COLOR_INDEX_INVERSE
1154 void VP8LInverseTransform(const VP8LTransform* const transform,
1155 int row_start, int row_end,
1156 const uint32_t* const in, uint32_t* const out) {
1157 assert(row_start < row_end);
1158 assert(row_end <= transform->ysize_);
1159 switch (transform->type_) {
1160 case SUBTRACT_GREEN:
1161 AddGreenToBlueAndRed(transform, row_start, row_end, out);
1163 case PREDICTOR_TRANSFORM:
1164 PredictorInverseTransform(transform, row_start, row_end, out);
1165 if (row_end != transform->ysize_) {
1166 // The last predicted row in this iteration will be the top-pred row
1167 // for the first row in next iteration.
1168 const int width = transform->xsize_;
1169 memcpy(out - width, out + (row_end - row_start - 1) * width,
1170 width * sizeof(*out));
1173 case CROSS_COLOR_TRANSFORM:
1174 ColorSpaceInverseTransform(transform, row_start, row_end, out);
1176 case COLOR_INDEXING_TRANSFORM:
1177 if (in == out && transform->bits_ > 0) {
1178 // Move packed pixels to the end of unpacked region, so that unpacking
1179 // can occur seamlessly.
1180 // Also, note that this is the only transform that applies on
1181 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1182 // transforms work on effective width of xsize_.
1183 const int out_stride = (row_end - row_start) * transform->xsize_;
1184 const int in_stride = (row_end - row_start) *
1185 VP8LSubSampleSize(transform->xsize_, transform->bits_);
1186 uint32_t* const src = out + out_stride - in_stride;
1187 memmove(src, out, in_stride * sizeof(*src));
1188 ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1190 ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1196 //------------------------------------------------------------------------------
1197 // Color space conversion.
1199 static int is_big_endian(void) {
1200 static const union {
1204 return (tmp.b[0] != 1);
1207 static void ConvertBGRAToRGB(const uint32_t* src,
1208 int num_pixels, uint8_t* dst) {
1209 const uint32_t* const src_end = src + num_pixels;
1210 while (src < src_end) {
1211 const uint32_t argb = *src++;
1212 *dst++ = (argb >> 16) & 0xff;
1213 *dst++ = (argb >> 8) & 0xff;
1214 *dst++ = (argb >> 0) & 0xff;
1218 static void ConvertBGRAToRGBA(const uint32_t* src,
1219 int num_pixels, uint8_t* dst) {
1220 const uint32_t* const src_end = src + num_pixels;
1221 while (src < src_end) {
1222 const uint32_t argb = *src++;
1223 *dst++ = (argb >> 16) & 0xff;
1224 *dst++ = (argb >> 8) & 0xff;
1225 *dst++ = (argb >> 0) & 0xff;
1226 *dst++ = (argb >> 24) & 0xff;
1230 static void ConvertBGRAToRGBA4444(const uint32_t* src,
1231 int num_pixels, uint8_t* dst) {
1232 const uint32_t* const src_end = src + num_pixels;
1233 while (src < src_end) {
1234 const uint32_t argb = *src++;
1235 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1236 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
1237 #ifdef WEBP_SWAP_16BIT_CSP
1247 static void ConvertBGRAToRGB565(const uint32_t* src,
1248 int num_pixels, uint8_t* dst) {
1249 const uint32_t* const src_end = src + num_pixels;
1250 while (src < src_end) {
1251 const uint32_t argb = *src++;
1252 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1253 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
1254 #ifdef WEBP_SWAP_16BIT_CSP
1264 static void ConvertBGRAToBGR(const uint32_t* src,
1265 int num_pixels, uint8_t* dst) {
1266 const uint32_t* const src_end = src + num_pixels;
1267 while (src < src_end) {
1268 const uint32_t argb = *src++;
1269 *dst++ = (argb >> 0) & 0xff;
1270 *dst++ = (argb >> 8) & 0xff;
1271 *dst++ = (argb >> 16) & 0xff;
1275 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1276 int swap_on_big_endian) {
1277 if (is_big_endian() == swap_on_big_endian) {
1278 const uint32_t* const src_end = src + num_pixels;
1279 while (src < src_end) {
1280 uint32_t argb = *src++;
1282 #if !defined(__BIG_ENDIAN__)
1283 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1284 #if defined(__i386__) || defined(__x86_64__)
1285 __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
1286 *(uint32_t*)dst = argb;
1287 #elif defined(_MSC_VER)
1288 argb = _byteswap_ulong(argb);
1289 *(uint32_t*)dst = argb;
1291 dst[0] = (argb >> 24) & 0xff;
1292 dst[1] = (argb >> 16) & 0xff;
1293 dst[2] = (argb >> 8) & 0xff;
1294 dst[3] = (argb >> 0) & 0xff;
1296 #else // WEBP_REFERENCE_IMPLEMENTATION
1297 dst[0] = (argb >> 24) & 0xff;
1298 dst[1] = (argb >> 16) & 0xff;
1299 dst[2] = (argb >> 8) & 0xff;
1300 dst[3] = (argb >> 0) & 0xff;
1302 #else // __BIG_ENDIAN__
1303 dst[0] = (argb >> 0) & 0xff;
1304 dst[1] = (argb >> 8) & 0xff;
1305 dst[2] = (argb >> 16) & 0xff;
1306 dst[3] = (argb >> 24) & 0xff;
1308 dst += sizeof(argb);
1311 memcpy(dst, src, num_pixels * sizeof(*src));
1315 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1316 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1317 switch (out_colorspace) {
1319 ConvertBGRAToRGB(in_data, num_pixels, rgba);
1322 ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1325 ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1326 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1329 ConvertBGRAToBGR(in_data, num_pixels, rgba);
1332 CopyOrSwap(in_data, num_pixels, rgba, 1);
1335 CopyOrSwap(in_data, num_pixels, rgba, 1);
1336 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1339 CopyOrSwap(in_data, num_pixels, rgba, 0);
1342 CopyOrSwap(in_data, num_pixels, rgba, 0);
1343 WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
1345 case MODE_RGBA_4444:
1346 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1348 case MODE_rgbA_4444:
1349 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1350 WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
1353 ConvertBGRAToRGB565(in_data, num_pixels, rgba);
1356 assert(0); // Code flow should not reach here.
1360 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
1361 void VP8LBundleColorMap(const uint8_t* const row, int width,
1362 int xbits, uint32_t* const dst) {
1365 const int bit_depth = 1 << (3 - xbits);
1366 const int mask = (1 << xbits) - 1;
1367 uint32_t code = 0xff000000;
1368 for (x = 0; x < width; ++x) {
1369 const int xsub = x & mask;
1373 code |= row[x] << (8 + bit_depth * xsub);
1374 dst[x >> xbits] = code;
1377 for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
1381 //------------------------------------------------------------------------------
1383 #if defined(__cplusplus) || defined(c_plusplus)