1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // -----------------------------------------------------------------------------
8 // WebP encoder: internal header.
10 // Author: Skal (pascal.massimino@gmail.com)
12 #ifndef WEBP_ENC_VP8ENCI_H_
13 #define WEBP_ENC_VP8ENCI_H_
15 #include <string.h> // for memcpy()
16 #include "../webp/encode.h"
17 #include "../dsp/dsp.h"
18 #include "../utils/bit_writer.h"
19 #include "../utils/thread.h"
21 #if defined(__cplusplus) || defined(c_plusplus)
25 //------------------------------------------------------------------------------
26 // Various defines and enums
29 #define ENC_MAJ_VERSION 0
30 #define ENC_MIN_VERSION 3
31 #define ENC_REV_VERSION 0
33 // intra prediction modes
34 enum { B_DC_PRED = 0, // 4x4 modes
44 NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10
47 DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
48 H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
52 enum { NUM_MB_SEGMENTS = 4,
53 MAX_NUM_PARTITIONS = 8,
54 NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC
58 MAX_LF_LEVELS = 64, // Maximum loop filter level
59 MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
60 MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67)
63 typedef enum { // Rate-distortion optimization levels
64 RD_OPT_NONE = 0, // no rd-opt
65 RD_OPT_BASIC = 1, // basic scoring (no trellis)
66 RD_OPT_TRELLIS = 2, // perform trellis-quant on the final decision only
67 RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)
70 // YUV-cache parameters. Cache is 16-pixels wide.
71 // The original or reconstructed samples can be accessed using VP8Scan[]
72 // The predicted blocks can be accessed using offsets to yuv_p_ and
73 // the arrays VP8*ModeOffsets[];
74 // +----+ YUV Samples area. See VP8Scan[] for accessing the blocks.
75 // Y_OFF |YYYY| <- original samples (enc->yuv_in_)
79 // U_OFF |UUVV| V_OFF (=U_OFF + 8)
82 // Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_')
83 // |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_')
89 // +----+ Prediction area ('yuv_p_', size = PRED_SIZE)
90 // I16DC16 |YYYY| Intra16 predictions (16x16 block each)
106 // +----+ Chroma U/V predictions (16x8 block each)
115 // +----+ Intra 4x4 predictions (4x4 block each)
116 // |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
117 // |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
118 // |YY..| I4HD4 I4HU4 I4TMP
120 #define BPS 16 // this is the common stride
121 #define Y_SIZE (BPS * 16)
122 #define UV_SIZE (BPS * 8)
123 #define YUV_SIZE (Y_SIZE + UV_SIZE)
124 #define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
126 #define U_OFF (Y_SIZE)
127 #define V_OFF (U_OFF + 8)
129 #define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
131 extern const int VP8Scan[16 + 4 + 4]; // in quant.c
132 extern const int VP8UVModeOffsets[4]; // in analyze.c
133 extern const int VP8I16ModeOffsets[4];
134 extern const int VP8I4ModeOffsets[NUM_BMODES];
136 // Layout of prediction blocks
138 #define I16DC16 (0 * 16 * BPS)
139 #define I16TM16 (1 * 16 * BPS)
140 #define I16VE16 (2 * 16 * BPS)
141 #define I16HE16 (3 * 16 * BPS)
142 // chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
143 #define C8DC8 (4 * 16 * BPS)
144 #define C8TM8 (4 * 16 * BPS + 8 * BPS)
145 #define C8VE8 (5 * 16 * BPS)
146 #define C8HE8 (5 * 16 * BPS + 8 * BPS)
148 #define I4DC4 (6 * 16 * BPS + 0)
149 #define I4TM4 (6 * 16 * BPS + 4)
150 #define I4VE4 (6 * 16 * BPS + 8)
151 #define I4HE4 (6 * 16 * BPS + 12)
152 #define I4RD4 (6 * 16 * BPS + 4 * BPS + 0)
153 #define I4VR4 (6 * 16 * BPS + 4 * BPS + 4)
154 #define I4LD4 (6 * 16 * BPS + 4 * BPS + 8)
155 #define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
156 #define I4HD4 (6 * 16 * BPS + 8 * BPS + 0)
157 #define I4HU4 (6 * 16 * BPS + 8 * BPS + 4)
158 #define I4TMP (6 * 16 * BPS + 8 * BPS + 8)
160 typedef int64_t score_t; // type used for scores, rate, distortion
161 #define MAX_COST ((score_t)0x7fffffffffffffLL)
164 #define BIAS(b) ((b) << (QFIX - 8))
165 // Fun fact: this is the _only_ line where we're actually being lossy and
167 static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
168 return (n * iQ + B) >> QFIX;
171 // size of histogram used by CollectHistogram.
172 #define MAX_COEFF_THRESH 31
173 typedef struct VP8Histogram VP8Histogram;
174 struct VP8Histogram {
175 // TODO(skal): we only need to store the max_value and last_non_zero actually.
176 int distribution[MAX_COEFF_THRESH + 1];
179 // Uncomment the following to remove token-buffer code:
180 // #define DISABLE_TOKEN_BUFFER
182 //------------------------------------------------------------------------------
185 typedef uint32_t proba_t; // 16b + 16b
186 typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
187 typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
188 typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
189 typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
191 typedef struct VP8Encoder VP8Encoder;
195 int num_segments_; // Actual number of segments. 1 segment only = unused.
196 int update_map_; // whether to update the segment map or not.
197 // must be 0 if there's only 1 segment.
198 int size_; // bit-cost for transmitting the segment map
201 // Struct collecting all frame-persistent probabilities.
203 uint8_t segments_[3]; // probabilities for segment tree
204 uint8_t skip_proba_; // final probability of being skipped.
205 ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes
206 StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
207 CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k
208 int dirty_; // if true, need to call VP8CalculateLevelCosts()
209 int use_skip_proba_; // Note: we always use skip_proba for now.
210 int nb_skip_; // number of skipped blocks
213 // Filter parameters. Not actually used in the code (we don't perform
214 // the in-loop filtering), but filled from user's config
216 int simple_; // filtering type: 0=complex, 1=simple
217 int level_; // base filter level [0..63]
218 int sharpness_; // [0..7]
219 int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16
222 //------------------------------------------------------------------------------
223 // Informations about the macroblocks.
227 unsigned int type_:2; // 0=i4x4, 1=i16x16
228 unsigned int uv_mode_:2;
229 unsigned int skip_:1;
230 unsigned int segment_:2;
231 uint8_t alpha_; // quantization-susceptibility
234 typedef struct VP8Matrix {
235 uint16_t q_[16]; // quantizer steps
236 uint16_t iq_[16]; // reciprocals, fixed point.
237 uint16_t bias_[16]; // rounding bias
238 uint16_t zthresh_[16]; // value under which a coefficient is zeroed
239 uint16_t sharpen_[16]; // frequency boosters for slight sharpening
243 VP8Matrix y1_, y2_, uv_; // quantization matrices
244 int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral.
245 // Lower values indicate a lower risk of blurriness.
246 int beta_; // filter-susceptibility, range [0,255].
247 int quant_; // final segment quantizer.
248 int fstrength_; // final in-loop filtering strength
250 int lambda_i16_, lambda_i4_, lambda_uv_;
251 int lambda_mode_, lambda_trellis_, tlambda_;
252 int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
255 // Handy transcient struct to accumulate score and info during RD-optimization
256 // and mode evaluation.
258 score_t D, SD, R, score; // Distortion, spectral distortion, rate, score.
259 int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma.
260 int16_t y_ac_levels[16][16];
261 int16_t uv_levels[4 + 4][16];
262 int mode_i16; // mode number for intra16 prediction
263 uint8_t modes_i4[16]; // mode numbers for intra4 predictions
264 int mode_uv; // mode number of chroma prediction
265 uint32_t nz; // non-zero blocks
268 // Iterator structure to iterate through macroblocks, pointing to the
269 // right neighbouring data (samples, predictions, contexts, ...)
271 int x_, y_; // current macroblock
272 int y_offset_, uv_offset_; // offset to the luma / chroma planes
273 int y_stride_, uv_stride_; // respective strides
274 uint8_t* yuv_in_; // borrowed from enc_ (for now)
275 uint8_t* yuv_out_; // ''
276 uint8_t* yuv_out2_; // ''
277 uint8_t* yuv_p_; // ''
278 VP8Encoder* enc_; // back-pointer
279 VP8MBInfo* mb_; // current macroblock
280 VP8BitWriter* bw_; // current bit-writer
281 uint8_t* preds_; // intra mode predictors (4x4 blocks)
282 uint32_t* nz_; // non-zero pattern
283 uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4
284 uint8_t* i4_top_; // pointer to the current top boundary sample
285 int i4_; // current intra4x4 mode being tested
286 int top_nz_[9]; // top-non-zero context.
287 int left_nz_[9]; // left-non-zero. left_nz[8] is independent.
288 uint64_t bit_count_[4][3]; // bit counters for coded levels.
289 uint64_t luma_bits_; // macroblock bit-cost for luma
290 uint64_t uv_bits_; // macroblock bit-cost for chroma
291 LFStats* lf_stats_; // filter stats (borrowed from enc_)
292 int do_trellis_; // if true, perform extra level optimisation
293 int done_; // true when scan is finished
294 int percent0_; // saved initial progress percent
298 // must be called first.
299 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
301 void VP8IteratorReset(VP8EncIterator* const it);
302 // import samples from source
303 void VP8IteratorImport(const VP8EncIterator* const it);
304 // export decimated samples
305 void VP8IteratorExport(const VP8EncIterator* const it);
306 // go to next macroblock. Returns !done_. If *block_to_save is non-null, will
307 // save the boundary values to top_/left_ arrays. block_to_save can be
308 // it->yuv_out_ or it->yuv_in_.
309 int VP8IteratorNext(VP8EncIterator* const it,
310 const uint8_t* const block_to_save);
311 // Report progression based on macroblock rows. Return 0 for user-abort request.
312 int VP8IteratorProgress(const VP8EncIterator* const it,
313 int final_delta_percent);
314 // Intra4x4 iterations
315 void VP8IteratorStartI4(VP8EncIterator* const it);
316 // returns true if not done.
317 int VP8IteratorRotateI4(VP8EncIterator* const it,
318 const uint8_t* const yuv_out);
320 // Non-zero context setup/teardown
321 void VP8IteratorNzToBytes(VP8EncIterator* const it);
322 void VP8IteratorBytesToNz(VP8EncIterator* const it);
324 // Helper functions to set mode properties
325 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
326 void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);
327 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
328 void VP8SetSkip(const VP8EncIterator* const it, int skip);
329 void VP8SetSegment(const VP8EncIterator* const it, int segment);
331 //------------------------------------------------------------------------------
332 // Paginated token buffer
334 typedef struct VP8Tokens VP8Tokens; // struct details in token.c
337 #if !defined(DISABLE_TOKEN_BUFFER)
338 VP8Tokens* pages_; // first page
339 VP8Tokens** last_page_; // last page
340 uint16_t* tokens_; // set to (*last_page_)->tokens_
341 int left_; // how many free tokens left before the page is full.
343 int error_; // true in case of malloc error
346 void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer
347 void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory
349 #if !defined(DISABLE_TOKEN_BUFFER)
351 // Finalizes bitstream when probabilities are known.
352 // Deletes the allocated token memory if final_pass is true.
353 int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
354 const uint8_t* const probas, int final_pass);
356 // record the coding of coefficients without knowing the probabilities yet
357 int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
358 const int16_t* const coeffs,
359 VP8TBuffer* const tokens);
362 void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
364 #endif // !DISABLE_TOKEN_BUFFER
366 //------------------------------------------------------------------------------
370 const WebPConfig* config_; // user configuration and parameters
371 WebPPicture* pic_; // input / output picture
374 VP8FilterHeader filter_hdr_; // filtering information
375 VP8SegmentHeader segment_hdr_; // segment information
377 int profile_; // VP8's profile, deduced from Config.
379 // dimension, in macroblock units.
381 int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1)
383 // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
386 // per-partition boolean decoders.
387 VP8BitWriter bw_; // part0
388 VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
389 VP8TBuffer tokens_; // token buffer
391 int percent_; // for progress
395 uint8_t* alpha_data_; // non-NULL if transparency is present
396 uint32_t alpha_data_size_;
397 WebPWorker alpha_worker_;
401 VP8BitWriter layer_bw_;
402 uint8_t* layer_data_;
403 size_t layer_data_size_;
405 // quantization info (one set of DC/AC dequant factor per segment)
406 VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
407 int base_quant_; // nominal quantizer value. Only used
408 // for relative coding of segments' quant.
409 int alpha_; // global susceptibility (<=> complexity)
410 int uv_alpha_; // U/V quantization susceptibility
411 // global offset of quantizers, shared by all segments
413 int dq_y2_dc_, dq_y2_ac_;
414 int dq_uv_dc_, dq_uv_ac_;
416 // probabilities and statistics
418 uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks
419 uint64_t sse_count_; // pixel count for the sse_[] stats
421 int residual_bytes_[3][4];
424 // quality/speed settings
425 int method_; // 0=fastest, 6=best/slowest.
426 VP8RDLevel rd_opt_level_; // Deduced from method_.
427 int max_i4_header_bits_; // partition #0 safeness factor
428 int thread_level_; // derived from config->thread_level
429 int do_search_; // derived from config->target_XXX
430 int use_tokens_; // if true, use token buffer
433 VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
434 uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1)
435 uint32_t* nz_; // non-zero bit context: mb_w+1
436 uint8_t* yuv_in_; // input samples
437 uint8_t* yuv_out_; // output samples
438 uint8_t* yuv_out2_; // secondary scratch out-buffer. swapped with yuv_out_.
439 uint8_t* yuv_p_; // scratch buffer for prediction
440 uint8_t *y_top_; // top luma samples.
441 uint8_t *uv_top_; // top u/v samples.
442 // U and V are packed into 16 pixels (8 U + 8 V)
443 uint8_t *y_left_; // left luma samples (adressable from index -1 to 15).
444 uint8_t *u_left_; // left u samples (adressable from index -1 to 7)
445 uint8_t *v_left_; // left v samples (adressable from index -1 to 7)
447 LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off)
450 //------------------------------------------------------------------------------
451 // internal functions. Not public.
454 extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
456 VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
457 // Reset the token probabilities to their initial (default) values
458 void VP8DefaultProbas(VP8Encoder* const enc);
459 // Write the token probabilities
460 void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
461 // Writes the partition #0 modes (that is: all intra modes)
462 void VP8CodeIntraModes(VP8Encoder* const enc);
465 // Generates the final bitstream by coding the partition0 and headers,
466 // and appending an assembly of all the pre-coded token partitions.
467 // Return true if everything is ok.
468 int VP8EncWrite(VP8Encoder* const enc);
469 // Release memory allocated for bit-writing in VP8EncLoop & seq.
470 void VP8EncFreeBitWriters(VP8Encoder* const enc);
473 extern const uint8_t VP8EncBands[16 + 1];
474 extern const uint8_t VP8Cat3[];
475 extern const uint8_t VP8Cat4[];
476 extern const uint8_t VP8Cat5[];
477 extern const uint8_t VP8Cat6[];
479 // Form all the four Intra16x16 predictions in the yuv_p_ cache
480 void VP8MakeLuma16Preds(const VP8EncIterator* const it);
481 // Form all the four Chroma8x8 predictions in the yuv_p_ cache
482 void VP8MakeChroma8Preds(const VP8EncIterator* const it);
483 // Form all the ten Intra4x4 predictions in the yuv_p_ cache
484 // for the 4x4 block it->i4_
485 void VP8MakeIntra4Preds(const VP8EncIterator* const it);
487 int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
488 int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
489 int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
491 int VP8EncLoop(VP8Encoder* const enc);
492 int VP8EncTokenLoop(VP8Encoder* const enc);
495 // Assign an error code to a picture. Return false for convenience.
496 int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);
497 int WebPReportProgress(const WebPPicture* const pic,
498 int percent, int* const percent_store);
501 // Main analysis loop. Decides the segmentations and complexity.
502 // Assigns a first guess for Intra16 and uvmode_ prediction modes.
503 int VP8EncAnalyze(VP8Encoder* const enc);
506 // Sets up segment's quantization values, base_quant_ and filter strengths.
507 void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
508 // Pick best modes and fills the levels. Returns true if skipped.
509 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
513 void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression
514 int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process
515 int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data
516 int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
519 void VP8EncInitLayer(VP8Encoder* const enc); // init everything
520 void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock
521 int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding
522 void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory
528 double w, xm, ym, xxm, xym, yym;
530 void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst);
531 void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
532 const uint8_t* src2, int stride2,
533 int W, int H, DistoStats* const stats);
534 double VP8SSIMGet(const DistoStats* const stats);
535 double VP8SSIMGetSquaredError(const DistoStats* const stats);
538 void VP8InitFilter(VP8EncIterator* const it);
539 void VP8StoreFilterStats(VP8EncIterator* const it);
540 void VP8AdjustFilterStrength(VP8EncIterator* const it);
542 //------------------------------------------------------------------------------
544 #if defined(__cplusplus) || defined(c_plusplus)
548 #endif /* WEBP_ENC_VP8ENCI_H_ */