1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31
dml32_dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)32 unsigned int dml32_dscceComputeDelay(
33 unsigned int bpc,
34 double BPP,
35 unsigned int sliceWidth,
36 unsigned int numSlices,
37 enum output_format_class pixelFormat,
38 enum output_encoder_class Output)
39 {
40 // valid bpc = source bits per component in the set of {8, 10, 12}
41 // valid bpp = increments of 1/16 of a bit
42 // min = 6/7/8 in N420/N422/444, respectively
43 // max = such that compression is 1:1
44 //valid sliceWidth = number of pixels per slice line,
45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48
49 // fixed value
50 unsigned int rcModelSize = 8192;
51
52 // N422/N420 operate at 2 pixels per clock
53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 Delay, pixels;
55
56 if (pixelFormat == dm_420)
57 pixelsPerClock = 2;
58 else if (pixelFormat == dm_n422)
59 pixelsPerClock = 2;
60 // #all other modes operate at 1 pixel per clock
61 else
62 pixelsPerClock = 1;
63
64 //initial transmit delay as per PPS
65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66
67 //compute ssm delay
68 if (bpc == 8)
69 D = 81;
70 else if (bpc == 10)
71 D = 89;
72 else
73 D = 113;
74
75 //divide by pixel per cycle to compute slice width as seen by DSC
76 w = sliceWidth / pixelsPerClock;
77
78 //422 mode has an additional cycle of delay
79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 s = 0;
81 else
82 s = 1;
83
84 //main calculation for the dscce
85 ix = initalXmitDelay + 45;
86 wx = (w + 2) / 3;
87 p = 3 * wx - w;
88 l0 = ix / w;
89 a = ix + p * l0;
90 ax = (a + 2) / 3 + D + 6 + 1;
91 L = (ax + wx - 1) / wx;
92 if ((ix % w) == 0 && p != 0)
93 lstall = 1;
94 else
95 lstall = 0;
96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97
98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 pixels = Delay * 3 * pixelsPerClock;
100
101 #ifdef __DML_VBA_DEBUG__
102 dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 dml_print("DML::%s: Output: %d\n", __func__, Output);
108 dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110
111 return pixels;
112 }
113
dml32_dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 unsigned int Delay = 0;
117
118 if (pixelFormat == dm_420) {
119 // sfr
120 Delay = Delay + 2;
121 // dsccif
122 Delay = Delay + 0;
123 // dscc - input deserializer
124 Delay = Delay + 3;
125 // dscc gets pixels every other cycle
126 Delay = Delay + 2;
127 // dscc - input cdc fifo
128 Delay = Delay + 12;
129 // dscc gets pixels every other cycle
130 Delay = Delay + 13;
131 // dscc - cdc uncertainty
132 Delay = Delay + 2;
133 // dscc - output cdc fifo
134 Delay = Delay + 7;
135 // dscc gets pixels every other cycle
136 Delay = Delay + 3;
137 // dscc - cdc uncertainty
138 Delay = Delay + 2;
139 // dscc - output serializer
140 Delay = Delay + 1;
141 // sft
142 Delay = Delay + 1;
143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 // sfr
145 Delay = Delay + 2;
146 // dsccif
147 Delay = Delay + 1;
148 // dscc - input deserializer
149 Delay = Delay + 5;
150 // dscc - input cdc fifo
151 Delay = Delay + 25;
152 // dscc - cdc uncertainty
153 Delay = Delay + 2;
154 // dscc - output cdc fifo
155 Delay = Delay + 10;
156 // dscc - cdc uncertainty
157 Delay = Delay + 2;
158 // dscc - output serializer
159 Delay = Delay + 1;
160 // sft
161 Delay = Delay + 1;
162 } else {
163 // sfr
164 Delay = Delay + 2;
165 // dsccif
166 Delay = Delay + 0;
167 // dscc - input deserializer
168 Delay = Delay + 3;
169 // dscc - input cdc fifo
170 Delay = Delay + 12;
171 // dscc - cdc uncertainty
172 Delay = Delay + 2;
173 // dscc - output cdc fifo
174 Delay = Delay + 7;
175 // dscc - output serializer
176 Delay = Delay + 1;
177 // dscc - cdc uncertainty
178 Delay = Delay + 2;
179 // sft
180 Delay = Delay + 1;
181 }
182
183 return Delay;
184 }
185
186
IsVertical(enum dm_rotation_angle Scan)187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 bool is_vert = false;
190
191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 is_vert = true;
193 else
194 is_vert = false;
195 return is_vert;
196 }
197
dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 double HRatio,
200 double HRatioChroma,
201 double VRatio,
202 double VRatioChroma,
203 double MaxDCHUBToPSCLThroughput,
204 double MaxPSCLToLBThroughput,
205 double PixelClock,
206 enum source_format_class SourcePixelFormat,
207 unsigned int HTaps,
208 unsigned int HTapsChroma,
209 unsigned int VTaps,
210 unsigned int VTapsChroma,
211
212 /* output */
213 double *PSCL_THROUGHPUT,
214 double *PSCL_THROUGHPUT_CHROMA,
215 double *DPPCLKUsingSingleDPP)
216 {
217 double DPPCLKUsingSingleDPPLuma;
218 double DPPCLKUsingSingleDPPChroma;
219
220 if (HRatio > 1) {
221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 dml_ceil((double) HTaps / 6.0, 1.0));
223 } else {
224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 }
226
227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 *PSCL_THROUGHPUT, 1);
229
230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232
233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 SourcePixelFormat != dm_rgbe_alpha)) {
235 *PSCL_THROUGHPUT_CHROMA = 0;
236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 } else {
238 if (HRatioChroma > 1) {
239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 } else {
242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 }
244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 }
250 }
251
dml32_CalculateBytePerPixelAndBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC)252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 enum source_format_class SourcePixelFormat,
254 enum dm_swizzle_mode SurfaceTiling,
255
256 /* Output */
257 unsigned int *BytePerPixelY,
258 unsigned int *BytePerPixelC,
259 double *BytePerPixelDETY,
260 double *BytePerPixelDETC,
261 unsigned int *BlockHeight256BytesY,
262 unsigned int *BlockHeight256BytesC,
263 unsigned int *BlockWidth256BytesY,
264 unsigned int *BlockWidth256BytesC,
265 unsigned int *MacroTileHeightY,
266 unsigned int *MacroTileHeightC,
267 unsigned int *MacroTileWidthY,
268 unsigned int *MacroTileWidthC)
269 {
270 if (SourcePixelFormat == dm_444_64) {
271 *BytePerPixelDETY = 8;
272 *BytePerPixelDETC = 0;
273 *BytePerPixelY = 8;
274 *BytePerPixelC = 0;
275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 *BytePerPixelDETY = 4;
277 *BytePerPixelDETC = 0;
278 *BytePerPixelY = 4;
279 *BytePerPixelC = 0;
280 } else if (SourcePixelFormat == dm_444_16) {
281 *BytePerPixelDETY = 2;
282 *BytePerPixelDETC = 0;
283 *BytePerPixelY = 2;
284 *BytePerPixelC = 0;
285 } else if (SourcePixelFormat == dm_444_8) {
286 *BytePerPixelDETY = 1;
287 *BytePerPixelDETC = 0;
288 *BytePerPixelY = 1;
289 *BytePerPixelC = 0;
290 } else if (SourcePixelFormat == dm_rgbe_alpha) {
291 *BytePerPixelDETY = 4;
292 *BytePerPixelDETC = 1;
293 *BytePerPixelY = 4;
294 *BytePerPixelC = 1;
295 } else if (SourcePixelFormat == dm_420_8) {
296 *BytePerPixelDETY = 1;
297 *BytePerPixelDETC = 2;
298 *BytePerPixelY = 1;
299 *BytePerPixelC = 2;
300 } else if (SourcePixelFormat == dm_420_12) {
301 *BytePerPixelDETY = 2;
302 *BytePerPixelDETC = 4;
303 *BytePerPixelY = 2;
304 *BytePerPixelC = 4;
305 } else {
306 *BytePerPixelDETY = 4.0 / 3;
307 *BytePerPixelDETC = 8.0 / 3;
308 *BytePerPixelY = 2;
309 *BytePerPixelC = 4;
310 }
311 #ifdef __DML_VBA_DEBUG__
312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
317 #endif
318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 || SourcePixelFormat == dm_444_16
320 || SourcePixelFormat == dm_444_8
321 || SourcePixelFormat == dm_mono_16
322 || SourcePixelFormat == dm_mono_8
323 || SourcePixelFormat == dm_rgbe)) {
324 if (SurfaceTiling == dm_sw_linear)
325 *BlockHeight256BytesY = 1;
326 else if (SourcePixelFormat == dm_444_64)
327 *BlockHeight256BytesY = 4;
328 else if (SourcePixelFormat == dm_444_8)
329 *BlockHeight256BytesY = 16;
330 else
331 *BlockHeight256BytesY = 8;
332
333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 *BlockHeight256BytesC = 0;
335 *BlockWidth256BytesC = 0;
336 } else {
337 if (SurfaceTiling == dm_sw_linear) {
338 *BlockHeight256BytesY = 1;
339 *BlockHeight256BytesC = 1;
340 } else if (SourcePixelFormat == dm_rgbe_alpha) {
341 *BlockHeight256BytesY = 8;
342 *BlockHeight256BytesC = 16;
343 } else if (SourcePixelFormat == dm_420_8) {
344 *BlockHeight256BytesY = 16;
345 *BlockHeight256BytesC = 8;
346 } else {
347 *BlockHeight256BytesY = 8;
348 *BlockHeight256BytesC = 8;
349 }
350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 }
353 #ifdef __DML_VBA_DEBUG__
354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359
360 if (SurfaceTiling == dm_sw_linear) {
361 *MacroTileHeightY = *BlockHeight256BytesY;
362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 *MacroTileHeightC = *BlockHeight256BytesC;
364 if (*MacroTileHeightC == 0)
365 *MacroTileWidthC = 0;
366 else
367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 if (*MacroTileHeightC == 0)
374 *MacroTileWidthC = 0;
375 else
376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 } else {
378 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 if (*MacroTileHeightC == 0)
382 *MacroTileWidthC = 0;
383 else
384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 }
386
387 #ifdef __DML_VBA_DEBUG__
388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394
dml32_CalculateSwathAndDETConfiguration(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int ConfigReturnBufferSizeInKByte,unsigned int MaxTotalDETInKByte,unsigned int MinCompressedBufferSizeInKByte,double ForceSingleDPP,unsigned int NumberOfActiveSurfaces,unsigned int nomDETInKByte,enum unbounded_requesting_policy UseUnboundedRequestingFinal,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,unsigned int PixelChunkSizeKBytes,unsigned int ROBSizeKBytes,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum output_encoder_class Output[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],enum odm_combine_mode ODMMode[],unsigned int BlendingAndTiming[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],unsigned int HActive[],double HRatio[],double HRatioChroma[],unsigned int DPPPerSurface[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int DETBufferSizeInKByte[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool * UnboundedRequestEnabled,unsigned int * CompressedBufferSizeInkByte,unsigned int * CompBufReservedSpaceKBytes,bool * CompBufReservedSpaceNeedAdjustment,bool ViewportSizeSupportPerSurface[],bool * ViewportSizeSupport)395 void dml32_CalculateSwathAndDETConfiguration(
396 unsigned int DETSizeOverride[],
397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 unsigned int ConfigReturnBufferSizeInKByte,
399 unsigned int MaxTotalDETInKByte,
400 unsigned int MinCompressedBufferSizeInKByte,
401 double ForceSingleDPP,
402 unsigned int NumberOfActiveSurfaces,
403 unsigned int nomDETInKByte,
404 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 unsigned int PixelChunkSizeKBytes,
407 unsigned int ROBSizeKBytes,
408 unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 enum output_encoder_class Output[],
410 double ReadBandwidthLuma[],
411 double ReadBandwidthChroma[],
412 double MaximumSwathWidthLuma[],
413 double MaximumSwathWidthChroma[],
414 enum dm_rotation_angle SourceRotation[],
415 bool ViewportStationary[],
416 enum source_format_class SourcePixelFormat[],
417 enum dm_swizzle_mode SurfaceTiling[],
418 unsigned int ViewportWidth[],
419 unsigned int ViewportHeight[],
420 unsigned int ViewportXStart[],
421 unsigned int ViewportYStart[],
422 unsigned int ViewportXStartC[],
423 unsigned int ViewportYStartC[],
424 unsigned int SurfaceWidthY[],
425 unsigned int SurfaceWidthC[],
426 unsigned int SurfaceHeightY[],
427 unsigned int SurfaceHeightC[],
428 unsigned int Read256BytesBlockHeightY[],
429 unsigned int Read256BytesBlockHeightC[],
430 unsigned int Read256BytesBlockWidthY[],
431 unsigned int Read256BytesBlockWidthC[],
432 enum odm_combine_mode ODMMode[],
433 unsigned int BlendingAndTiming[],
434 unsigned int BytePerPixY[],
435 unsigned int BytePerPixC[],
436 double BytePerPixDETY[],
437 double BytePerPixDETC[],
438 unsigned int HActive[],
439 double HRatio[],
440 double HRatioChroma[],
441 unsigned int DPPPerSurface[],
442
443 /* Output */
444 unsigned int swath_width_luma_ub[],
445 unsigned int swath_width_chroma_ub[],
446 double SwathWidth[],
447 double SwathWidthChroma[],
448 unsigned int SwathHeightY[],
449 unsigned int SwathHeightC[],
450 unsigned int DETBufferSizeInKByte[],
451 unsigned int DETBufferSizeY[],
452 unsigned int DETBufferSizeC[],
453 bool *UnboundedRequestEnabled,
454 unsigned int *CompressedBufferSizeInkByte,
455 unsigned int *CompBufReservedSpaceKBytes,
456 bool *CompBufReservedSpaceNeedAdjustment,
457 bool ViewportSizeSupportPerSurface[],
458 bool *ViewportSizeSupport)
459 {
460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 unsigned int RoundedUpSwathSizeBytesY;
465 unsigned int RoundedUpSwathSizeBytesC;
466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 unsigned int k;
469 unsigned int TotalActiveDPP = 0;
470 bool NoChromaSurfaces = true;
471 unsigned int DETBufferSizeInKByteForSwathCalculation;
472
473 #ifdef __DML_VBA_DEBUG__
474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 dml32_CalculateSwathWidth(ForceSingleDPP,
479 NumberOfActiveSurfaces,
480 SourcePixelFormat,
481 SourceRotation,
482 ViewportStationary,
483 ViewportWidth,
484 ViewportHeight,
485 ViewportXStart,
486 ViewportYStart,
487 ViewportXStartC,
488 ViewportYStartC,
489 SurfaceWidthY,
490 SurfaceWidthC,
491 SurfaceHeightY,
492 SurfaceHeightC,
493 ODMMode,
494 BytePerPixY,
495 BytePerPixC,
496 Read256BytesBlockHeightY,
497 Read256BytesBlockHeightC,
498 Read256BytesBlockWidthY,
499 Read256BytesBlockWidthC,
500 BlendingAndTiming,
501 HActive,
502 HRatio,
503 DPPPerSurface,
504
505 /* Output */
506 SwathWidthdoubleDPP,
507 SwathWidthdoubleDPPChroma,
508 SwathWidth,
509 SwathWidthChroma,
510 MaximumSwathHeightY,
511 MaximumSwathHeightC,
512 swath_width_luma_ub,
513 swath_width_chroma_ub);
514
515 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 RoundedUpMaxSwathSizeBytesY[k]);
525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531
532 if (SourcePixelFormat[k] == dm_420_10) {
533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 }
536 }
537
538 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 NoChromaSurfaces = false;
543 }
544 }
545
546 // By default, just set the reserved space to 2 pixel chunks size
547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548
549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553
554 if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 }
557
558 #ifdef __DML_VBA_DEBUG__
559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
561 #endif
562
563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564
565 dml32_CalculateDETBufferSize(DETSizeOverride,
566 UseMALLForPStateChange,
567 ForceSingleDPP,
568 NumberOfActiveSurfaces,
569 *UnboundedRequestEnabled,
570 nomDETInKByte,
571 MaxTotalDETInKByte,
572 ConfigReturnBufferSizeInKByte,
573 MinCompressedBufferSizeInKByte,
574 CompressedBufferSegmentSizeInkByteFinal,
575 SourcePixelFormat,
576 ReadBandwidthLuma,
577 ReadBandwidthChroma,
578 RoundedUpMaxSwathSizeBytesY,
579 RoundedUpMaxSwathSizeBytesC,
580 DPPPerSurface,
581
582 /* Output */
583 DETBufferSizeInKByte, // per hubp pipe
584 CompressedBufferSizeInkByte);
585
586 #ifdef __DML_VBA_DEBUG__
587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594
595 *ViewportSizeSupport = true;
596 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597
598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 DETBufferSizeInKByteForSwathCalculation);
603 #endif
604
605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 SwathHeightY[k] = MaximumSwathHeightY[k];
608 SwathHeightC[k] = MaximumSwathHeightC[k];
609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 SwathHeightC[k] = MaximumSwathHeightC[k];
616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 SwathHeightY[k] = MaximumSwathHeightY[k];
622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 } else {
626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 }
631
632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 *ViewportSizeSupport = false;
637 ViewportSizeSupportPerSurface[k] = false;
638 } else {
639 ViewportSizeSupportPerSurface[k] = true;
640 }
641
642 if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 DETBufferSizeC[k] = 0;
648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 } else {
655 #ifdef __DML_VBA_DEBUG__
656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 }
661
662 #ifdef __DML_VBA_DEBUG__
663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 k, RoundedUpMaxSwathSizeBytesY[k]);
667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 k, RoundedUpMaxSwathSizeBytesC[k]);
669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 ViewportSizeSupportPerSurface[k]);
676 #endif
677
678 }
679 } // CalculateSwathAndDETConfiguration
680
dml32_CalculateSwathWidth(bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum source_format_class SourcePixelFormat[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int BlendingAndTiming[],unsigned int HActive[],double HRatio[],unsigned int DPPPerSurface[],double SwathWidthdoubleDPPY[],double SwathWidthdoubleDPPC[],double SwathWidthY[],double SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])681 void dml32_CalculateSwathWidth(
682 bool ForceSingleDPP,
683 unsigned int NumberOfActiveSurfaces,
684 enum source_format_class SourcePixelFormat[],
685 enum dm_rotation_angle SourceRotation[],
686 bool ViewportStationary[],
687 unsigned int ViewportWidth[],
688 unsigned int ViewportHeight[],
689 unsigned int ViewportXStart[],
690 unsigned int ViewportYStart[],
691 unsigned int ViewportXStartC[],
692 unsigned int ViewportYStartC[],
693 unsigned int SurfaceWidthY[],
694 unsigned int SurfaceWidthC[],
695 unsigned int SurfaceHeightY[],
696 unsigned int SurfaceHeightC[],
697 enum odm_combine_mode ODMMode[],
698 unsigned int BytePerPixY[],
699 unsigned int BytePerPixC[],
700 unsigned int Read256BytesBlockHeightY[],
701 unsigned int Read256BytesBlockHeightC[],
702 unsigned int Read256BytesBlockWidthY[],
703 unsigned int Read256BytesBlockWidthC[],
704 unsigned int BlendingAndTiming[],
705 unsigned int HActive[],
706 double HRatio[],
707 unsigned int DPPPerSurface[],
708
709 /* Output */
710 double SwathWidthdoubleDPPY[],
711 double SwathWidthdoubleDPPC[],
712 double SwathWidthY[], // per-pipe
713 double SwathWidthC[], // per-pipe
714 unsigned int MaximumSwathHeightY[],
715 unsigned int MaximumSwathHeightC[],
716 unsigned int swath_width_luma_ub[], // per-pipe
717 unsigned int swath_width_chroma_ub[]) // per-pipe
718 {
719 unsigned int k, j;
720 enum odm_combine_mode MainSurfaceODMMode;
721
722 unsigned int surface_width_ub_l;
723 unsigned int surface_height_ub_l;
724 unsigned int surface_width_ub_c = 0;
725 unsigned int surface_height_ub_c = 0;
726
727 #ifdef __DML_VBA_DEBUG__
728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731
732 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 if (!IsVertical(SourceRotation[k]))
734 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 else
736 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737
738 #ifdef __DML_VBA_DEBUG__
739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742
743 MainSurfaceODMMode = ODMMode[k];
744 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 if (BlendingAndTiming[k] == j)
746 MainSurfaceODMMode = ODMMode[j];
747 }
748
749 if (ForceSingleDPP) {
750 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 } else {
752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 dml_round(HActive[k] / 4.0 * HRatio[k]));
755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 dml_round(HActive[k] / 2.0 * HRatio[k]));
758 } else if (DPPPerSurface[k] == 2) {
759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 } else {
761 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 }
763 }
764
765 #ifdef __DML_VBA_DEBUG__
766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772
773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 SourcePixelFormat[k] == dm_420_12) {
775 SwathWidthC[k] = SwathWidthY[k] / 2;
776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 } else {
778 SwathWidthC[k] = SwathWidthY[k];
779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 }
781
782 if (ForceSingleDPP == true) {
783 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 }
786
787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789
790 if (!IsVertical(SourceRotation[k])) {
791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 dml_floor(ViewportXStart[k] +
796 SwathWidthY[k] +
797 Read256BytesBlockWidthY[k] - 1,
798 Read256BytesBlockWidthY[k]) -
799 dml_floor(ViewportXStart[k],
800 Read256BytesBlockWidthY[k]));
801 } else {
802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 dml_ceil(SwathWidthY[k] - 1,
804 Read256BytesBlockWidthY[k]) +
805 Read256BytesBlockWidthY[k]);
806 }
807 if (BytePerPixC[k] > 0) {
808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 Read256BytesBlockWidthC[k] - 1,
813 Read256BytesBlockWidthC[k]) -
814 dml_floor(ViewportXStartC[k],
815 Read256BytesBlockWidthC[k]));
816 } else {
817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 dml_ceil(SwathWidthC[k] - 1,
819 Read256BytesBlockWidthC[k]) +
820 Read256BytesBlockWidthC[k]);
821 }
822 } else {
823 swath_width_chroma_ub[k] = 0;
824 }
825 } else {
826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828
829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 Read256BytesBlockHeightY[k]) -
833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 } else {
835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 }
838 if (BytePerPixC[k] > 0) {
839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 Read256BytesBlockHeightC[k] - 1,
844 Read256BytesBlockHeightC[k]) -
845 dml_floor(ViewportYStartC[k],
846 Read256BytesBlockHeightC[k]));
847 } else {
848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 Read256BytesBlockHeightC[k]);
851 }
852 } else {
853 swath_width_chroma_ub[k] = 0;
854 }
855 }
856
857 #ifdef __DML_VBA_DEBUG__
858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873
874 }
875 } // CalculateSwathWidth
876
dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,unsigned int TotalNumberOfActiveDPP,bool NoChroma,enum output_encoder_class Output,enum dm_swizzle_mode SurfaceTiling,bool CompBufReservedSpaceNeedAdjustment,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 unsigned int TotalNumberOfActiveDPP,
879 bool NoChroma,
880 enum output_encoder_class Output,
881 enum dm_swizzle_mode SurfaceTiling,
882 bool CompBufReservedSpaceNeedAdjustment,
883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 bool ret_val = false;
886
887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 TotalNumberOfActiveDPP == 1 && NoChroma);
889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 ret_val = false;
891
892 if (SurfaceTiling == dm_sw_linear)
893 ret_val = false;
894
895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 ret_val = false;
897
898 #ifdef __DML_VBA_DEBUG__
899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
902 #endif
903
904 return (ret_val);
905 }
906
dml32_CalculateDETBufferSize(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum source_format_class SourcePixelFormat[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int RoundedUpMaxSwathSizeBytesY[],unsigned int RoundedUpMaxSwathSizeBytesC[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)907 void dml32_CalculateDETBufferSize(
908 unsigned int DETSizeOverride[],
909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 bool ForceSingleDPP,
911 unsigned int NumberOfActiveSurfaces,
912 bool UnboundedRequestEnabled,
913 unsigned int nomDETInKByte,
914 unsigned int MaxTotalDETInKByte,
915 unsigned int ConfigReturnBufferSizeInKByte,
916 unsigned int MinCompressedBufferSizeInKByte,
917 unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 enum source_format_class SourcePixelFormat[],
919 double ReadBandwidthLuma[],
920 double ReadBandwidthChroma[],
921 unsigned int RoundedUpMaxSwathSizeBytesY[],
922 unsigned int RoundedUpMaxSwathSizeBytesC[],
923 unsigned int DPPPerSurface[],
924 /* Output */
925 unsigned int DETBufferSizeInKByte[],
926 unsigned int *CompressedBufferSizeInkByte)
927 {
928 unsigned int DETBufferSizePoolInKByte;
929 unsigned int NextDETBufferPieceInKByte;
930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 bool NextPotentialSurfaceToAssignDETPieceFound;
932 unsigned int NextSurfaceToAssignDETPiece;
933 double TotalBandwidth;
934 double BandwidthOfSurfacesNotAssignedDETPiece;
935 unsigned int max_minDET;
936 unsigned int minDET;
937 unsigned int minDET_pipe;
938 unsigned int j, k;
939
940 #ifdef __DML_VBA_DEBUG__
941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951
952 // Note: Will use default det size if that fits 2 swaths
953 if (UnboundedRequestEnabled) {
954 if (DETSizeOverride[0] > 0) {
955 DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 } else {
957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 ((double) RoundedUpMaxSwathSizeBytesY[0] +
959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 }
961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 } else {
963 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 DETBufferSizeInKByte[k] = nomDETInKByte;
966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 SourcePixelFormat[k] == dm_420_12) {
968 max_minDET = nomDETInKByte - 64;
969 } else {
970 max_minDET = nomDETInKByte;
971 }
972 minDET = 128;
973 minDET_pipe = 0;
974
975 // add DET resource until can hold 2 full swaths
976 while (minDET <= max_minDET && minDET_pipe == 0) {
977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 minDET_pipe = minDET;
980 minDET = minDET + 64;
981 }
982
983 #ifdef __DML_VBA_DEBUG__
984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 RoundedUpMaxSwathSizeBytesY[k]);
989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992
993 if (minDET_pipe == 0) {
994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 __func__, k, minDET_pipe);
999 #endif
1000 }
1001
1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 DETBufferSizeInKByte[k] = 0;
1004 } else if (DETSizeOverride[k] > 0) {
1005 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 DETBufferSizeInKByte[k] = minDET_pipe;
1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 }
1013
1014 #ifdef __DML_VBA_DEBUG__
1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 }
1021
1022 TotalBandwidth = 0;
1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 }
1027 #ifdef __DML_VBA_DEBUG__
1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036
1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 } else {
1046 DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 }
1048 #ifdef __DML_VBA_DEBUG__
1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 DETPieceAssignedToThisSurfaceAlready[k]);
1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 }
1055
1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 NextPotentialSurfaceToAssignDETPieceFound = false;
1058 NextSurfaceToAssignDETPiece = 0;
1059
1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 ReadBandwidthLuma[k]);
1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 ReadBandwidthChroma[k]);
1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 NextSurfaceToAssignDETPiece);
1072 #endif
1073 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 (!NextPotentialSurfaceToAssignDETPieceFound ||
1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 NextSurfaceToAssignDETPiece = k;
1079 NextPotentialSurfaceToAssignDETPieceFound = true;
1080 }
1081 #ifdef __DML_VBA_DEBUG__
1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 }
1088
1089 if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 // Note: To show the banker's rounding behavior in VBA and also the fact
1091 // that the DET buffer size varies due to precision issue
1092 //
1093 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 // BandwidthOfSurfacesNotAssignedDETPiece /
1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 //
1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106
1107 NextDETBufferPieceInKByte = dml_min(
1108 dml_round((double) DETBufferSizePoolInKByte *
1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 BandwidthOfSurfacesNotAssignedDETPiece /
1112 ((ForceSingleDPP ? 1 :
1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 (ForceSingleDPP ? 1 :
1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 dml_floor((double) DETBufferSizePoolInKByte,
1117 (ForceSingleDPP ? 1 :
1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119
1120 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 // We should limit the per-pipe DET size to the nominal / max per pipe.
1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 } else {
1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 // already has the max per-pipe value
1130 NextDETBufferPieceInKByte = 0;
1131 }
1132 }
1133
1134 #ifdef __DML_VBA_DEBUG__
1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 DETBufferSizePoolInKByte);
1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 NextSurfaceToAssignDETPiece);
1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 NextDETBufferPieceInKByte);
1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 __func__, j, NextSurfaceToAssignDETPiece,
1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151
1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 + NextDETBufferPieceInKByte
1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159
1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 }
1166 }
1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 }
1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170
1171 #ifdef __DML_VBA_DEBUG__
1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 }
1178 #endif
1179 } // CalculateDETBufferSize
1180
dml32_CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum output_format_class OutFormat,enum output_encoder_class Output,enum odm_combine_policy ODMUse,double StateDispclk,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum odm_combine_mode * ODMMode,double * RequiredDISPCLKPerSurface)1181 void dml32_CalculateODMMode(
1182 unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 unsigned int HActive,
1184 enum output_format_class OutFormat,
1185 enum output_encoder_class Output,
1186 enum odm_combine_policy ODMUse,
1187 double StateDispclk,
1188 double MaxDispclk,
1189 bool DSCEnable,
1190 unsigned int TotalNumberOfActiveDPP,
1191 unsigned int MaxNumDPP,
1192 double PixelClock,
1193 double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 double DISPCLKRampingMargin,
1195 double DISPCLKDPPCLKVCOSpeed,
1196 unsigned int NumberOfDSCSlices,
1197
1198 /* Output */
1199 bool *TotalAvailablePipesSupport,
1200 unsigned int *NumberOfDPP,
1201 enum odm_combine_mode *ODMMode,
1202 double *RequiredDISPCLKPerSurface)
1203 {
1204
1205 double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208
1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 MaxDispclk);
1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 MaxDispclk);
1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 MaxDispclk);
1218 *TotalAvailablePipesSupport = true;
1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220
1221 if (ODMUse == dm_odm_combine_policy_none)
1222 *ODMMode = dm_odm_combine_mode_disabled;
1223
1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 *NumberOfDPP = 0;
1226
1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229
1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 || NumberOfDSCSlices > 8)))) {
1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 *ODMMode = dm_odm_combine_mode_4to1;
1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 *NumberOfDPP = 4;
1238 } else {
1239 *TotalAvailablePipesSupport = false;
1240 }
1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 *ODMMode = dm_odm_combine_mode_2to1;
1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 *NumberOfDPP = 2;
1250 } else {
1251 *TotalAvailablePipesSupport = false;
1252 }
1253 } else {
1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 *NumberOfDPP = 1;
1256 else
1257 *TotalAvailablePipesSupport = false;
1258 }
1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 ODMUse != dm_odm_combine_policy_4to1) {
1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 *ODMMode = dm_odm_combine_mode_disabled;
1263 *NumberOfDPP = 0;
1264 *TotalAvailablePipesSupport = false;
1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 *ODMMode == dm_odm_combine_mode_4to1) {
1267 *ODMMode = dm_odm_combine_mode_4to1;
1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 *NumberOfDPP = 4;
1270 } else {
1271 *ODMMode = dm_odm_combine_mode_2to1;
1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 *NumberOfDPP = 2;
1274 }
1275 }
1276 if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 *ODMMode = dm_odm_combine_mode_disabled;
1279 *NumberOfDPP = 0;
1280 *TotalAvailablePipesSupport = false;
1281 }
1282 }
1283
dml32_CalculateRequiredDispclk(enum odm_combine_mode ODMMode,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,double MaxDispclk)1284 double dml32_CalculateRequiredDispclk(
1285 enum odm_combine_mode ODMMode,
1286 double PixelClock,
1287 double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 double DISPCLKRampingMargin,
1289 double DISPCLKDPPCLKVCOSpeed,
1290 double MaxDispclk)
1291 {
1292 double RequiredDispclk = 0.;
1293 double PixelClockAfterODM;
1294 double DISPCLKWithRampingRoundedToDFSGranularity;
1295 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 double MaxDispclkRoundedDownToDFSGranularity;
1297
1298 if (ODMMode == dm_odm_combine_mode_4to1)
1299 PixelClockAfterODM = PixelClock / 4;
1300 else if (ODMMode == dm_odm_combine_mode_2to1)
1301 PixelClockAfterODM = PixelClock / 2;
1302 else
1303 PixelClockAfterODM = PixelClock;
1304
1305
1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309
1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312
1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314
1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 else
1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321
1322 return RequiredDispclk;
1323 }
1324
dml32_RoundToDFSGranularity(double Clock,bool round_up,double VCOSpeed)1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 if (Clock <= 0.0)
1328 return 0.0;
1329
1330 if (round_up)
1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 else
1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335
dml32_CalculateOutputLink(double PHYCLKPerState,double PHYCLKD18PerState,double PHYCLKD32PerState,double Downspreading,bool IsMainSurfaceUsingTheIndicatedTiming,enum output_encoder_class Output,enum output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,bool DSCEnable,unsigned int OutputLinkDPLanes,enum dm_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,double * RequiresFEC,double * OutBpp,enum dm_output_type * OutputType,enum dm_output_rate * OutputRate,unsigned int * RequiredSlots)1336 void dml32_CalculateOutputLink(
1337 double PHYCLKPerState,
1338 double PHYCLKD18PerState,
1339 double PHYCLKD32PerState,
1340 double Downspreading,
1341 bool IsMainSurfaceUsingTheIndicatedTiming,
1342 enum output_encoder_class Output,
1343 enum output_format_class OutputFormat,
1344 unsigned int HTotal,
1345 unsigned int HActive,
1346 double PixelClockBackEnd,
1347 double ForcedOutputLinkBPP,
1348 unsigned int DSCInputBitPerComponent,
1349 unsigned int NumberOfDSCSlices,
1350 double AudioSampleRate,
1351 unsigned int AudioSampleLayout,
1352 enum odm_combine_mode ODMModeNoDSC,
1353 enum odm_combine_mode ODMModeDSC,
1354 bool DSCEnable,
1355 unsigned int OutputLinkDPLanes,
1356 enum dm_output_link_dp_rate OutputLinkDPRate,
1357
1358 /* Output */
1359 bool *RequiresDSC,
1360 double *RequiresFEC,
1361 double *OutBpp,
1362 enum dm_output_type *OutputType,
1363 enum dm_output_rate *OutputRate,
1364 unsigned int *RequiredSlots)
1365 {
1366 bool LinkDSCEnable;
1367 unsigned int dummy;
1368 *RequiresDSC = false;
1369 *RequiresFEC = false;
1370 *OutBpp = 0;
1371 *OutputType = dm_output_type_unknown;
1372 *OutputRate = dm_output_rate_unknown;
1373
1374 if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 if (Output == dm_hdmi) {
1376 *RequiresDSC = false;
1377 *RequiresFEC = false;
1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 ODMModeNoDSC, ODMModeDSC, &dummy);
1382 //OutputTypeAndRate = "HDMI";
1383 *OutputType = dm_output_type_hdmi;
1384
1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 if (DSCEnable == true) {
1387 *RequiresDSC = true;
1388 LinkDSCEnable = true;
1389 if (Output == dm_dp || Output == dm_dp2p0)
1390 *RequiresFEC = true;
1391 else
1392 *RequiresFEC = false;
1393 } else {
1394 *RequiresDSC = false;
1395 LinkDSCEnable = false;
1396 if (Output == dm_dp2p0)
1397 *RequiresFEC = true;
1398 else
1399 *RequiresFEC = false;
1400 }
1401 if (Output == dm_dp2p0) {
1402 *OutBpp = 0;
1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 PHYCLKD32PerState >= 10000 / 32) {
1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411 ForcedOutputLinkBPP == 0) {
1412 *RequiresDSC = true;
1413 LinkDSCEnable = true;
1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 OutputFormat, DSCInputBitPerComponent,
1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 }
1421 //OutputTypeAndRate = Output & " UHBR10";
1422 *OutputType = dm_output_type_dp2p0;
1423 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 }
1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432
1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 ForcedOutputLinkBPP == 0) {
1435 *RequiresDSC = true;
1436 LinkDSCEnable = true;
1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 OutputFormat, DSCInputBitPerComponent,
1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 }
1444 //OutputTypeAndRate = Output & " UHBR13p5";
1445 *OutputType = dm_output_type_dp2p0;
1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 }
1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 *RequiresDSC = true;
1457 LinkDSCEnable = true;
1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 OutputFormat, DSCInputBitPerComponent,
1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 }
1465 //OutputTypeAndRate = Output & " UHBR20";
1466 *OutputType = dm_output_type_dp2p0;
1467 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 }
1469 } else {
1470 *OutBpp = 0;
1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 PHYCLKPerState >= 270) {
1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 ForcedOutputLinkBPP == 0) {
1480 *RequiresDSC = true;
1481 LinkDSCEnable = true;
1482 if (Output == dm_dp)
1483 *RequiresFEC = true;
1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 OutputFormat, DSCInputBitPerComponent,
1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 }
1491 //OutputTypeAndRate = Output & " HBR";
1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 *OutputRate = dm_output_rate_dp_rate_hbr;
1494 }
1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 *OutBpp == 0 && PHYCLKPerState >= 540) {
1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502
1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 ForcedOutputLinkBPP == 0) {
1505 *RequiresDSC = true;
1506 LinkDSCEnable = true;
1507 if (Output == dm_dp)
1508 *RequiresFEC = true;
1509
1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 OutputFormat, DSCInputBitPerComponent,
1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 }
1517 //OutputTypeAndRate = Output & " HBR2";
1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 *OutputRate = dm_output_rate_dp_rate_hbr2;
1520 }
1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 RequiredSlots);
1528
1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 *RequiresDSC = true;
1531 LinkDSCEnable = true;
1532 if (Output == dm_dp)
1533 *RequiresFEC = true;
1534
1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 OutputFormat, DSCInputBitPerComponent,
1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 }
1542 //OutputTypeAndRate = Output & " HBR3";
1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 *OutputRate = dm_output_rate_dp_rate_hbr3;
1545 }
1546 }
1547 }
1548 }
1549 }
1550
dml32_CalculateDPPCLK(unsigned int NumberOfActiveSurfaces,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKDPPCLKVCOSpeed,double DPPCLKUsingSingleDPP[],unsigned int DPPPerSurface[],double * GlobalDPPCLK,double Dppclk[])1551 void dml32_CalculateDPPCLK(
1552 unsigned int NumberOfActiveSurfaces,
1553 double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 double DISPCLKDPPCLKVCOSpeed,
1555 double DPPCLKUsingSingleDPP[],
1556 unsigned int DPPPerSurface[],
1557
1558 /* output */
1559 double *GlobalDPPCLK,
1560 double Dppclk[])
1561 {
1562 unsigned int k;
1563 *GlobalDPPCLK = 0;
1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 }
1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572
dml32_TruncToValidBPP(double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,unsigned int * RequiredSlots)1573 double dml32_TruncToValidBPP(
1574 double LinkBitRate,
1575 unsigned int Lanes,
1576 unsigned int HTotal,
1577 unsigned int HActive,
1578 double PixelClock,
1579 double DesiredBPP,
1580 bool DSCEnable,
1581 enum output_encoder_class Output,
1582 enum output_format_class Format,
1583 unsigned int DSCInputBitPerComponent,
1584 unsigned int DSCSlices,
1585 unsigned int AudioRate,
1586 unsigned int AudioLayout,
1587 enum odm_combine_mode ODMModeNoDSC,
1588 enum odm_combine_mode ODMModeDSC,
1589 /* Output */
1590 unsigned int *RequiredSlots)
1591 {
1592 double MaxLinkBPP;
1593 unsigned int MinDSCBPP;
1594 double MaxDSCBPP;
1595 unsigned int NonDSCBPP0;
1596 unsigned int NonDSCBPP1;
1597 unsigned int NonDSCBPP2;
1598
1599 if (Format == dm_420) {
1600 NonDSCBPP0 = 12;
1601 NonDSCBPP1 = 15;
1602 NonDSCBPP2 = 18;
1603 MinDSCBPP = 6;
1604 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1605 } else if (Format == dm_444) {
1606 NonDSCBPP0 = 24;
1607 NonDSCBPP1 = 30;
1608 NonDSCBPP2 = 36;
1609 MinDSCBPP = 8;
1610 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1611 } else {
1612 if (Output == dm_hdmi) {
1613 NonDSCBPP0 = 24;
1614 NonDSCBPP1 = 24;
1615 NonDSCBPP2 = 24;
1616 } else {
1617 NonDSCBPP0 = 16;
1618 NonDSCBPP1 = 20;
1619 NonDSCBPP2 = 24;
1620 }
1621 if (Format == dm_n422) {
1622 MinDSCBPP = 7;
1623 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1624 } else {
1625 MinDSCBPP = 8;
1626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1627 }
1628 }
1629 if (Output == dm_dp2p0) {
1630 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1631 } else if (DSCEnable && Output == dm_dp) {
1632 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1633 } else {
1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1635 }
1636
1637 if (DSCEnable) {
1638 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1639 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1640 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1641 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1642 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1643 MaxLinkBPP = 2 * MaxLinkBPP;
1644 } else {
1645 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1646 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1647 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1648 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1649 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1650 MaxLinkBPP = 2 * MaxLinkBPP;
1651 }
1652
1653 if (DesiredBPP == 0) {
1654 if (DSCEnable) {
1655 if (MaxLinkBPP < MinDSCBPP)
1656 return BPP_INVALID;
1657 else if (MaxLinkBPP >= MaxDSCBPP)
1658 return MaxDSCBPP;
1659 else
1660 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1661 } else {
1662 if (MaxLinkBPP >= NonDSCBPP2)
1663 return NonDSCBPP2;
1664 else if (MaxLinkBPP >= NonDSCBPP1)
1665 return NonDSCBPP1;
1666 else if (MaxLinkBPP >= NonDSCBPP0)
1667 return 16.0;
1668 else
1669 return BPP_INVALID;
1670 }
1671 } else {
1672 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1673 DesiredBPP <= NonDSCBPP0)) ||
1674 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1675 return BPP_INVALID;
1676 else
1677 return DesiredBPP;
1678 }
1679
1680 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1681
1682 return BPP_INVALID;
1683 } // TruncToValidBPP
1684
dml32_RequiredDTBCLK(bool DSCEnable,double PixelClock,enum output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)1685 double dml32_RequiredDTBCLK(
1686 bool DSCEnable,
1687 double PixelClock,
1688 enum output_format_class OutputFormat,
1689 double OutputBpp,
1690 unsigned int DSCSlices,
1691 unsigned int HTotal,
1692 unsigned int HActive,
1693 unsigned int AudioRate,
1694 unsigned int AudioLayout)
1695 {
1696 double PixelWordRate;
1697 double HCActive;
1698 double HCBlank;
1699 double AverageTribyteRate;
1700 double HActiveTribyteRate;
1701
1702 if (DSCEnable != true)
1703 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1704
1705 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1706 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1707 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1708 HCBlank = 64 + 32 *
1709 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1710 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1711 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1712 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1713 }
1714
dml32_DSCDelayRequirement(bool DSCEnabled,enum odm_combine_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum output_format_class OutputFormat,enum output_encoder_class Output,double PixelClock,double PixelClockBackEnd,double dsc_delay_factor_wa)1715 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1716 enum odm_combine_mode ODMMode,
1717 unsigned int DSCInputBitPerComponent,
1718 double OutputBpp,
1719 unsigned int HActive,
1720 unsigned int HTotal,
1721 unsigned int NumberOfDSCSlices,
1722 enum output_format_class OutputFormat,
1723 enum output_encoder_class Output,
1724 double PixelClock,
1725 double PixelClockBackEnd,
1726 double dsc_delay_factor_wa)
1727 {
1728 unsigned int DSCDelayRequirement_val;
1729
1730 if (DSCEnabled == true && OutputBpp != 0) {
1731 if (ODMMode == dm_odm_combine_mode_4to1) {
1732 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1733 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1734 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1735 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1736 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1737 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1738 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1739 } else {
1740 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1741 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1742 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1743 }
1744
1745 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1746 dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1747
1748 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1749
1750 } else {
1751 DSCDelayRequirement_val = 0;
1752 }
1753
1754 #ifdef __DML_VBA_DEBUG__
1755 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
1756 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
1757 dml_print("DML::%s: HActive = %d\n", __func__, HActive);
1758 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
1759 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1760 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
1761 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1762 #endif
1763
1764 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1765 }
1766
dml32_CalculateSurfaceSizeInMall(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],bool DCCEnable[],bool ViewportStationary[],unsigned int ViewportXStartY[],unsigned int ViewportYStartY[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int ViewportWidthY[],unsigned int ViewportHeightY[],unsigned int BytesPerPixelY[],unsigned int ViewportWidthC[],unsigned int ViewportHeightC[],unsigned int BytesPerPixelC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int DCCMetaPitchY[],unsigned int DCCMetaPitchC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)1767 void dml32_CalculateSurfaceSizeInMall(
1768 unsigned int NumberOfActiveSurfaces,
1769 unsigned int MALLAllocatedForDCN,
1770 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1771 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1772 bool DCCEnable[],
1773 bool ViewportStationary[],
1774 unsigned int ViewportXStartY[],
1775 unsigned int ViewportYStartY[],
1776 unsigned int ViewportXStartC[],
1777 unsigned int ViewportYStartC[],
1778 unsigned int ViewportWidthY[],
1779 unsigned int ViewportHeightY[],
1780 unsigned int BytesPerPixelY[],
1781 unsigned int ViewportWidthC[],
1782 unsigned int ViewportHeightC[],
1783 unsigned int BytesPerPixelC[],
1784 unsigned int SurfaceWidthY[],
1785 unsigned int SurfaceWidthC[],
1786 unsigned int SurfaceHeightY[],
1787 unsigned int SurfaceHeightC[],
1788 unsigned int Read256BytesBlockWidthY[],
1789 unsigned int Read256BytesBlockWidthC[],
1790 unsigned int Read256BytesBlockHeightY[],
1791 unsigned int Read256BytesBlockHeightC[],
1792 unsigned int ReadBlockWidthY[],
1793 unsigned int ReadBlockWidthC[],
1794 unsigned int ReadBlockHeightY[],
1795 unsigned int ReadBlockHeightC[],
1796 unsigned int DCCMetaPitchY[],
1797 unsigned int DCCMetaPitchC[],
1798
1799 /* Output */
1800 unsigned int SurfaceSizeInMALL[],
1801 bool *ExceededMALLSize)
1802 {
1803 unsigned int k;
1804 unsigned int TotalSurfaceSizeInMALLForSS = 0;
1805 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1806 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1807
1808 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1809 if (ViewportStationary[k]) {
1810 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1811 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1812 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1813 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1814 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1815 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1816 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1817
1818 if (ReadBlockWidthC[k] > 0) {
1819 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1820 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1821 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1822 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1823 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1824 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1825 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1826 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1827 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1828 BytesPerPixelC[k];
1829 }
1830 if (DCCEnable[k] == true) {
1831 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1832 (dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
1833 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1834 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1835 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1836 * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1837 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1838 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1839 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
1840 Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1841 if (Read256BytesBlockWidthC[k] > 0) {
1842 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1843 dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
1844 Read256BytesBlockWidthC[k]),
1845 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1846 * Read256BytesBlockWidthC[k] - 1, 8 *
1847 Read256BytesBlockWidthC[k]) -
1848 dml_floor(ViewportXStartC[k], 8 *
1849 Read256BytesBlockWidthC[k])) *
1850 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1851 Read256BytesBlockHeightC[k]),
1852 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1853 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1854 Read256BytesBlockHeightC[k]) -
1855 dml_floor(ViewportYStartC[k], 8 *
1856 Read256BytesBlockHeightC[k])) *
1857 BytesPerPixelC[k] / 256;
1858 }
1859 }
1860 } else {
1861 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1862 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1863 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1864 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1865 BytesPerPixelY[k];
1866 if (ReadBlockWidthC[k] > 0) {
1867 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1868 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1869 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1870 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1871 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1872 BytesPerPixelC[k];
1873 }
1874 if (DCCEnable[k] == true) {
1875 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1876 (dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
1877 Read256BytesBlockWidthY[k] - 1), 8 *
1878 Read256BytesBlockWidthY[k]) *
1879 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1880 Read256BytesBlockHeightY[k] - 1), 8 *
1881 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1882
1883 if (Read256BytesBlockWidthC[k] > 0) {
1884 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1885 dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
1886 Read256BytesBlockWidthC[k] - 1), 8 *
1887 Read256BytesBlockWidthC[k]) *
1888 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1889 Read256BytesBlockHeightC[k] - 1), 8 *
1890 Read256BytesBlockHeightC[k]) *
1891 BytesPerPixelC[k] / 256;
1892 }
1893 }
1894 }
1895 }
1896
1897 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1898 /* SS and Subvp counted separate as they are never used at the same time */
1899 if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1900 TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1901 else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1902 TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1903 }
1904 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1905 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1906 } // CalculateSurfaceSizeInMall
1907
dml32_CalculateVMRowAndSwath(unsigned int NumberOfActiveSurfaces,DmlPipe myPipe[],unsigned int SurfaceSizeInMALL[],unsigned int PTEBufferSizeInRequestsLuma,unsigned int PTEBufferSizeInRequestsChroma,unsigned int DCCMetaBufferSizeBytes,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int MALLAllocatedForDCN,double SwathWidthY[],double SwathWidthC[],bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes[],unsigned int HostVMMinPageSize,bool PTEBufferSizeNotExceeded[],bool DCCMetaBufferSizeNotExceeded[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int dpte_row_height_luma[],unsigned int dpte_row_height_chroma[],unsigned int dpte_row_height_linear_luma[],unsigned int dpte_row_height_linear_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int vm_group_bytes[],unsigned int dpte_group_bytes[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PTERequestSizeY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int PTERequestSizeC[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_c[],double PrefetchSourceLinesY[],double PrefetchSourceLinesC[],double VInitPreFillY[],double VInitPreFillC[],unsigned int MaxNumSwathY[],unsigned int MaxNumSwathC[],double meta_row_bw[],double dpte_row_bw[],double PixelPTEBytesPerRow[],double PDEAndMetaPTEBytesFrame[],double MetaRowByte[],bool use_one_row_for_frame[],bool use_one_row_for_frame_flip[],bool UsesMALLForStaticScreen[],bool PTE_BUFFER_MODE[],unsigned int BIGK_FRAGMENT_SIZE[])1908 void dml32_CalculateVMRowAndSwath(
1909 unsigned int NumberOfActiveSurfaces,
1910 DmlPipe myPipe[],
1911 unsigned int SurfaceSizeInMALL[],
1912 unsigned int PTEBufferSizeInRequestsLuma,
1913 unsigned int PTEBufferSizeInRequestsChroma,
1914 unsigned int DCCMetaBufferSizeBytes,
1915 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1916 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1917 unsigned int MALLAllocatedForDCN,
1918 double SwathWidthY[],
1919 double SwathWidthC[],
1920 bool GPUVMEnable,
1921 bool HostVMEnable,
1922 unsigned int HostVMMaxNonCachedPageTableLevels,
1923 unsigned int GPUVMMaxPageTableLevels,
1924 unsigned int GPUVMMinPageSizeKBytes[],
1925 unsigned int HostVMMinPageSize,
1926
1927 /* Output */
1928 bool PTEBufferSizeNotExceeded[],
1929 bool DCCMetaBufferSizeNotExceeded[],
1930 unsigned int dpte_row_width_luma_ub[],
1931 unsigned int dpte_row_width_chroma_ub[],
1932 unsigned int dpte_row_height_luma[],
1933 unsigned int dpte_row_height_chroma[],
1934 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1935 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1936 unsigned int meta_req_width[],
1937 unsigned int meta_req_width_chroma[],
1938 unsigned int meta_req_height[],
1939 unsigned int meta_req_height_chroma[],
1940 unsigned int meta_row_width[],
1941 unsigned int meta_row_width_chroma[],
1942 unsigned int meta_row_height[],
1943 unsigned int meta_row_height_chroma[],
1944 unsigned int vm_group_bytes[],
1945 unsigned int dpte_group_bytes[],
1946 unsigned int PixelPTEReqWidthY[],
1947 unsigned int PixelPTEReqHeightY[],
1948 unsigned int PTERequestSizeY[],
1949 unsigned int PixelPTEReqWidthC[],
1950 unsigned int PixelPTEReqHeightC[],
1951 unsigned int PTERequestSizeC[],
1952 unsigned int dpde0_bytes_per_frame_ub_l[],
1953 unsigned int meta_pte_bytes_per_frame_ub_l[],
1954 unsigned int dpde0_bytes_per_frame_ub_c[],
1955 unsigned int meta_pte_bytes_per_frame_ub_c[],
1956 double PrefetchSourceLinesY[],
1957 double PrefetchSourceLinesC[],
1958 double VInitPreFillY[],
1959 double VInitPreFillC[],
1960 unsigned int MaxNumSwathY[],
1961 unsigned int MaxNumSwathC[],
1962 double meta_row_bw[],
1963 double dpte_row_bw[],
1964 double PixelPTEBytesPerRow[],
1965 double PDEAndMetaPTEBytesFrame[],
1966 double MetaRowByte[],
1967 bool use_one_row_for_frame[],
1968 bool use_one_row_for_frame_flip[],
1969 bool UsesMALLForStaticScreen[],
1970 bool PTE_BUFFER_MODE[],
1971 unsigned int BIGK_FRAGMENT_SIZE[])
1972 {
1973 unsigned int k;
1974 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1975 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1976 unsigned int PDEAndMetaPTEBytesFrameY;
1977 unsigned int PDEAndMetaPTEBytesFrameC;
1978 unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1979 unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1980 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1981 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1982 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1983 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1984 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1985 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1986 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1987 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1988 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1989
1990 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1991 if (HostVMEnable == true) {
1992 vm_group_bytes[k] = 512;
1993 dpte_group_bytes[k] = 512;
1994 } else if (GPUVMEnable == true) {
1995 vm_group_bytes[k] = 2048;
1996 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1997 dpte_group_bytes[k] = 512;
1998 else
1999 dpte_group_bytes[k] = 2048;
2000 } else {
2001 vm_group_bytes[k] = 0;
2002 dpte_group_bytes[k] = 0;
2003 }
2004
2005 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2006 myPipe[k].SourcePixelFormat == dm_420_12 ||
2007 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2008 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2009 !IsVertical(myPipe[k].SourceRotation)) {
2010 PTEBufferSizeInRequestsForLuma[k] =
2011 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2012 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2013 } else {
2014 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2015 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2016 }
2017
2018 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2019 myPipe[k].ViewportStationary,
2020 myPipe[k].DCCEnable,
2021 myPipe[k].DPPPerSurface,
2022 myPipe[k].BlockHeight256BytesC,
2023 myPipe[k].BlockWidth256BytesC,
2024 myPipe[k].SourcePixelFormat,
2025 myPipe[k].SurfaceTiling,
2026 myPipe[k].BytePerPixelC,
2027 myPipe[k].SourceRotation,
2028 SwathWidthC[k],
2029 myPipe[k].ViewportHeightChroma,
2030 myPipe[k].ViewportXStartC,
2031 myPipe[k].ViewportYStartC,
2032 GPUVMEnable,
2033 HostVMEnable,
2034 HostVMMaxNonCachedPageTableLevels,
2035 GPUVMMaxPageTableLevels,
2036 GPUVMMinPageSizeKBytes[k],
2037 HostVMMinPageSize,
2038 PTEBufferSizeInRequestsForChroma[k],
2039 myPipe[k].PitchC,
2040 myPipe[k].DCCMetaPitchC,
2041 myPipe[k].BlockWidthC,
2042 myPipe[k].BlockHeightC,
2043
2044 /* Output */
2045 &MetaRowByteC[k],
2046 &PixelPTEBytesPerRowC[k],
2047 &dpte_row_width_chroma_ub[k],
2048 &dpte_row_height_chroma[k],
2049 &dpte_row_height_linear_chroma[k],
2050 &PixelPTEBytesPerRowC_one_row_per_frame[k],
2051 &dpte_row_width_chroma_ub_one_row_per_frame[k],
2052 &dpte_row_height_chroma_one_row_per_frame[k],
2053 &meta_req_width_chroma[k],
2054 &meta_req_height_chroma[k],
2055 &meta_row_width_chroma[k],
2056 &meta_row_height_chroma[k],
2057 &PixelPTEReqWidthC[k],
2058 &PixelPTEReqHeightC[k],
2059 &PTERequestSizeC[k],
2060 &dpde0_bytes_per_frame_ub_c[k],
2061 &meta_pte_bytes_per_frame_ub_c[k]);
2062
2063 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2064 myPipe[k].VRatioChroma,
2065 myPipe[k].VTapsChroma,
2066 myPipe[k].InterlaceEnable,
2067 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2068 myPipe[k].SwathHeightC,
2069 myPipe[k].SourceRotation,
2070 myPipe[k].ViewportStationary,
2071 SwathWidthC[k],
2072 myPipe[k].ViewportHeightChroma,
2073 myPipe[k].ViewportXStartC,
2074 myPipe[k].ViewportYStartC,
2075
2076 /* Output */
2077 &VInitPreFillC[k],
2078 &MaxNumSwathC[k]);
2079 } else {
2080 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2081 PTEBufferSizeInRequestsForChroma[k] = 0;
2082 PixelPTEBytesPerRowC[k] = 0;
2083 PDEAndMetaPTEBytesFrameC = 0;
2084 MetaRowByteC[k] = 0;
2085 MaxNumSwathC[k] = 0;
2086 PrefetchSourceLinesC[k] = 0;
2087 dpte_row_height_chroma_one_row_per_frame[k] = 0;
2088 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2089 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2090 }
2091
2092 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2093 myPipe[k].ViewportStationary,
2094 myPipe[k].DCCEnable,
2095 myPipe[k].DPPPerSurface,
2096 myPipe[k].BlockHeight256BytesY,
2097 myPipe[k].BlockWidth256BytesY,
2098 myPipe[k].SourcePixelFormat,
2099 myPipe[k].SurfaceTiling,
2100 myPipe[k].BytePerPixelY,
2101 myPipe[k].SourceRotation,
2102 SwathWidthY[k],
2103 myPipe[k].ViewportHeight,
2104 myPipe[k].ViewportXStart,
2105 myPipe[k].ViewportYStart,
2106 GPUVMEnable,
2107 HostVMEnable,
2108 HostVMMaxNonCachedPageTableLevels,
2109 GPUVMMaxPageTableLevels,
2110 GPUVMMinPageSizeKBytes[k],
2111 HostVMMinPageSize,
2112 PTEBufferSizeInRequestsForLuma[k],
2113 myPipe[k].PitchY,
2114 myPipe[k].DCCMetaPitchY,
2115 myPipe[k].BlockWidthY,
2116 myPipe[k].BlockHeightY,
2117
2118 /* Output */
2119 &MetaRowByteY[k],
2120 &PixelPTEBytesPerRowY[k],
2121 &dpte_row_width_luma_ub[k],
2122 &dpte_row_height_luma[k],
2123 &dpte_row_height_linear_luma[k],
2124 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2125 &dpte_row_width_luma_ub_one_row_per_frame[k],
2126 &dpte_row_height_luma_one_row_per_frame[k],
2127 &meta_req_width[k],
2128 &meta_req_height[k],
2129 &meta_row_width[k],
2130 &meta_row_height[k],
2131 &PixelPTEReqWidthY[k],
2132 &PixelPTEReqHeightY[k],
2133 &PTERequestSizeY[k],
2134 &dpde0_bytes_per_frame_ub_l[k],
2135 &meta_pte_bytes_per_frame_ub_l[k]);
2136
2137 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2138 myPipe[k].VRatio,
2139 myPipe[k].VTaps,
2140 myPipe[k].InterlaceEnable,
2141 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2142 myPipe[k].SwathHeightY,
2143 myPipe[k].SourceRotation,
2144 myPipe[k].ViewportStationary,
2145 SwathWidthY[k],
2146 myPipe[k].ViewportHeight,
2147 myPipe[k].ViewportXStart,
2148 myPipe[k].ViewportYStart,
2149
2150 /* Output */
2151 &VInitPreFillY[k],
2152 &MaxNumSwathY[k]);
2153
2154 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2155 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2156
2157 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2158 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2159 PTEBufferSizeNotExceeded[k] = true;
2160 } else {
2161 PTEBufferSizeNotExceeded[k] = false;
2162 }
2163
2164 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2165 PTEBufferSizeInRequestsForLuma[k] &&
2166 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2167 }
2168
2169 dml32_CalculateMALLUseForStaticScreen(
2170 NumberOfActiveSurfaces,
2171 MALLAllocatedForDCN,
2172 UseMALLForStaticScreen, // mode
2173 SurfaceSizeInMALL,
2174 one_row_per_frame_fits_in_buffer,
2175 /* Output */
2176 UsesMALLForStaticScreen); // boolen
2177
2178 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2179 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2180 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2181 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2182 (GPUVMMinPageSizeKBytes[k] > 64);
2183 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2184 }
2185
2186 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2187 #ifdef __DML_VBA_DEBUG__
2188 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
2189 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2190 #endif
2191 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2192 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2193 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2194 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2195
2196 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2197 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2198
2199 if (use_one_row_for_frame[k]) {
2200 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2201 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2202 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2203 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2204 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2205 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2206 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2207 }
2208
2209 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2210 DCCMetaBufferSizeNotExceeded[k] = true;
2211 else
2212 DCCMetaBufferSizeNotExceeded[k] = false;
2213
2214 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2215 if (use_one_row_for_frame[k])
2216 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2217
2218 dml32_CalculateRowBandwidth(
2219 GPUVMEnable,
2220 myPipe[k].SourcePixelFormat,
2221 myPipe[k].VRatio,
2222 myPipe[k].VRatioChroma,
2223 myPipe[k].DCCEnable,
2224 myPipe[k].HTotal / myPipe[k].PixelClock,
2225 MetaRowByteY[k], MetaRowByteC[k],
2226 meta_row_height[k],
2227 meta_row_height_chroma[k],
2228 PixelPTEBytesPerRowY[k],
2229 PixelPTEBytesPerRowC[k],
2230 dpte_row_height_luma[k],
2231 dpte_row_height_chroma[k],
2232
2233 /* Output */
2234 &meta_row_bw[k],
2235 &dpte_row_bw[k]);
2236 #ifdef __DML_VBA_DEBUG__
2237 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
2238 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
2239 __func__, k, use_one_row_for_frame_flip[k]);
2240 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
2241 __func__, k, UseMALLForPStateChange[k]);
2242 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
2243 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
2244 __func__, k, dpte_row_width_luma_ub[k]);
2245 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
2246 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
2247 __func__, k, dpte_row_height_chroma[k]);
2248 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
2249 __func__, k, dpte_row_width_chroma_ub[k]);
2250 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
2251 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
2252 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
2253 __func__, k, PTEBufferSizeNotExceeded[k]);
2254 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2255 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2256 #endif
2257 }
2258 } // CalculateVMRowAndSwath
2259
dml32_CalculateVMAndRowBytes(bool ViewportStationary,bool DCCEnable,unsigned int NumberOfDPPs,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum dm_rotation_angle SourceRotation,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int MacroTileWidth,unsigned int MacroTileHeight,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * dpte_row_height_linear,unsigned int * PixelPTEBytesPerRow_one_row_per_frame,unsigned int * dpte_row_width_ub_one_row_per_frame,unsigned int * dpte_row_height_one_row_per_frame,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)2260 unsigned int dml32_CalculateVMAndRowBytes(
2261 bool ViewportStationary,
2262 bool DCCEnable,
2263 unsigned int NumberOfDPPs,
2264 unsigned int BlockHeight256Bytes,
2265 unsigned int BlockWidth256Bytes,
2266 enum source_format_class SourcePixelFormat,
2267 unsigned int SurfaceTiling,
2268 unsigned int BytePerPixel,
2269 enum dm_rotation_angle SourceRotation,
2270 double SwathWidth,
2271 unsigned int ViewportHeight,
2272 unsigned int ViewportXStart,
2273 unsigned int ViewportYStart,
2274 bool GPUVMEnable,
2275 bool HostVMEnable,
2276 unsigned int HostVMMaxNonCachedPageTableLevels,
2277 unsigned int GPUVMMaxPageTableLevels,
2278 unsigned int GPUVMMinPageSizeKBytes,
2279 unsigned int HostVMMinPageSize,
2280 unsigned int PTEBufferSizeInRequests,
2281 unsigned int Pitch,
2282 unsigned int DCCMetaPitch,
2283 unsigned int MacroTileWidth,
2284 unsigned int MacroTileHeight,
2285
2286 /* Output */
2287 unsigned int *MetaRowByte,
2288 unsigned int *PixelPTEBytesPerRow,
2289 unsigned int *dpte_row_width_ub,
2290 unsigned int *dpte_row_height,
2291 unsigned int *dpte_row_height_linear,
2292 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2293 unsigned int *dpte_row_width_ub_one_row_per_frame,
2294 unsigned int *dpte_row_height_one_row_per_frame,
2295 unsigned int *MetaRequestWidth,
2296 unsigned int *MetaRequestHeight,
2297 unsigned int *meta_row_width,
2298 unsigned int *meta_row_height,
2299 unsigned int *PixelPTEReqWidth,
2300 unsigned int *PixelPTEReqHeight,
2301 unsigned int *PTERequestSize,
2302 unsigned int *DPDE0BytesFrame,
2303 unsigned int *MetaPTEBytesFrame)
2304 {
2305 unsigned int MPDEBytesFrame;
2306 unsigned int DCCMetaSurfaceBytes;
2307 unsigned int ExtraDPDEBytesFrame;
2308 unsigned int PDEAndMetaPTEBytesFrame;
2309 unsigned int HostVMDynamicLevels = 0;
2310 unsigned int MacroTileSizeBytes;
2311 unsigned int vp_height_meta_ub;
2312 unsigned int vp_height_dpte_ub;
2313 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2314
2315 if (GPUVMEnable == true && HostVMEnable == true) {
2316 if (HostVMMinPageSize < 2048)
2317 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2318 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2319 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2320 else
2321 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2322 }
2323
2324 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2325 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2326 if (SurfaceTiling == dm_sw_linear) {
2327 *meta_row_height = 32;
2328 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2329 - dml_floor(ViewportXStart, *MetaRequestWidth);
2330 } else if (!IsVertical(SourceRotation)) {
2331 *meta_row_height = *MetaRequestHeight;
2332 if (ViewportStationary && NumberOfDPPs == 1) {
2333 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2334 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2335 } else {
2336 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2337 }
2338 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2339 } else {
2340 *meta_row_height = *MetaRequestWidth;
2341 if (ViewportStationary && NumberOfDPPs == 1) {
2342 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2343 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2344 } else {
2345 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2346 }
2347 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2348 }
2349
2350 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2351 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2352 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2353 } else if (!IsVertical(SourceRotation)) {
2354 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2355 } else {
2356 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2357 }
2358
2359 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2360
2361 if (GPUVMEnable == true) {
2362 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2363 (8 * 4.0 * 1024), 1) + 1) * 64;
2364 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2365 } else {
2366 *MetaPTEBytesFrame = 0;
2367 MPDEBytesFrame = 0;
2368 }
2369
2370 if (DCCEnable != true) {
2371 *MetaPTEBytesFrame = 0;
2372 MPDEBytesFrame = 0;
2373 *MetaRowByte = 0;
2374 }
2375
2376 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2377
2378 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2379 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2380 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2381 MacroTileHeight - 1, MacroTileHeight) -
2382 dml_floor(ViewportYStart, MacroTileHeight);
2383 } else if (!IsVertical(SourceRotation)) {
2384 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2385 } else {
2386 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2387 }
2388 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2389 (8 * 2097152), 1) + 1);
2390 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2391 } else {
2392 *DPDE0BytesFrame = 0;
2393 ExtraDPDEBytesFrame = 0;
2394 vp_height_dpte_ub = 0;
2395 }
2396
2397 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2398
2399 #ifdef __DML_VBA_DEBUG__
2400 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2401 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2402 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2403 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2404 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2405 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2406 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2407 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2408 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2409 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2410 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2411 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2412 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2413 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2414 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2415 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2416 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2417 #endif
2418
2419 if (HostVMEnable == true)
2420 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2421
2422 if (SurfaceTiling == dm_sw_linear) {
2423 *PixelPTEReqHeight = 1;
2424 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2425 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2426 *PTERequestSize = 64;
2427 } else if (GPUVMMinPageSizeKBytes == 4) {
2428 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2429 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2430 *PTERequestSize = 128;
2431 } else {
2432 *PixelPTEReqHeight = MacroTileHeight;
2433 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2434 *PTERequestSize = 64;
2435 }
2436 #ifdef __DML_VBA_DEBUG__
2437 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2438 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2439 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2440 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2441 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2442 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2443 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2444 #endif
2445
2446 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2447 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2448 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2449 (double) *PixelPTEReqWidth;
2450 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2451 *PTERequestSize;
2452
2453 if (SurfaceTiling == dm_sw_linear) {
2454 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2455 *PixelPTEReqWidth / Pitch), 1));
2456 #ifdef __DML_VBA_DEBUG__
2457 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2458 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2459 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2460 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2461 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2462 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2463 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2464 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2465 *PixelPTEReqWidth / Pitch), 1));
2466 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2467 #endif
2468 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2469 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2470 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2471
2472 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2473 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2474 PixelPTEReqWidth_linear / Pitch), 1);
2475 if (*dpte_row_height_linear > 128)
2476 *dpte_row_height_linear = 128;
2477
2478 } else if (!IsVertical(SourceRotation)) {
2479 *dpte_row_height = *PixelPTEReqHeight;
2480
2481 if (GPUVMMinPageSizeKBytes > 64) {
2482 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2483 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2484 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2485 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2486 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2487 dml_floor(ViewportXStart, *PixelPTEReqWidth);
2488 } else {
2489 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2490 *PixelPTEReqWidth;
2491 }
2492
2493 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2494 } else {
2495 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2496
2497 if (ViewportStationary && (NumberOfDPPs == 1)) {
2498 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2499 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2500 } else {
2501 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2502 * *PixelPTEReqHeight;
2503 }
2504
2505 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2506 }
2507
2508 if (GPUVMEnable != true)
2509 *PixelPTEBytesPerRow = 0;
2510 if (HostVMEnable == true)
2511 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2512
2513 #ifdef __DML_VBA_DEBUG__
2514 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2515 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2516 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2517 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2518 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2519 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2520 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2521 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2522 __func__, *dpte_row_width_ub_one_row_per_frame);
2523 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2524 __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2525 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2526 *MetaPTEBytesFrame);
2527 #endif
2528
2529 return PDEAndMetaPTEBytesFrame;
2530 } // CalculateVMAndRowBytes
2531
dml32_CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dm_rotation_angle SourceRotation,bool ViewportStationary,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)2532 double dml32_CalculatePrefetchSourceLines(
2533 double VRatio,
2534 unsigned int VTaps,
2535 bool Interlace,
2536 bool ProgressiveToInterlaceUnitInOPP,
2537 unsigned int SwathHeight,
2538 enum dm_rotation_angle SourceRotation,
2539 bool ViewportStationary,
2540 double SwathWidth,
2541 unsigned int ViewportHeight,
2542 unsigned int ViewportXStart,
2543 unsigned int ViewportYStart,
2544
2545 /* Output */
2546 double *VInitPreFill,
2547 unsigned int *MaxNumSwath)
2548 {
2549
2550 unsigned int vp_start_rot;
2551 unsigned int sw0_tmp;
2552 unsigned int MaxPartialSwath;
2553 double numLines;
2554
2555 #ifdef __DML_VBA_DEBUG__
2556 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2557 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2558 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2559 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2560 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2561 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2562 #endif
2563 if (ProgressiveToInterlaceUnitInOPP)
2564 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2565 else
2566 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2567
2568 if (ViewportStationary) {
2569 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2570 vp_start_rot = SwathHeight -
2571 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2572 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2573 vp_start_rot = ViewportXStart;
2574 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2575 vp_start_rot = SwathHeight -
2576 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2577 } else {
2578 vp_start_rot = ViewportYStart;
2579 }
2580 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2581 if (sw0_tmp < *VInitPreFill)
2582 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2583 else
2584 *MaxNumSwath = 1;
2585 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2586 } else {
2587 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2588 if (*VInitPreFill > 1)
2589 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2590 else
2591 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2592 }
2593 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2594
2595 #ifdef __DML_VBA_DEBUG__
2596 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2597 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2598 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2599 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2600 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2601 #endif
2602 return numLines;
2603
2604 } // CalculatePrefetchSourceLines
2605
dml32_CalculateMALLUseForStaticScreen(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCNFinal,enum dm_use_mall_for_static_screen_mode * UseMALLForStaticScreen,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool UsesMALLForStaticScreen[])2606 void dml32_CalculateMALLUseForStaticScreen(
2607 unsigned int NumberOfActiveSurfaces,
2608 unsigned int MALLAllocatedForDCNFinal,
2609 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2610 unsigned int SurfaceSizeInMALL[],
2611 bool one_row_per_frame_fits_in_buffer[],
2612
2613 /* output */
2614 bool UsesMALLForStaticScreen[])
2615 {
2616 unsigned int k;
2617 unsigned int SurfaceToAddToMALL;
2618 bool CanAddAnotherSurfaceToMALL;
2619 unsigned int TotalSurfaceSizeInMALL;
2620
2621 TotalSurfaceSizeInMALL = 0;
2622 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2623 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2624 if (UsesMALLForStaticScreen[k])
2625 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2626 #ifdef __DML_VBA_DEBUG__
2627 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2628 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
2629 #endif
2630 }
2631
2632 SurfaceToAddToMALL = 0;
2633 CanAddAnotherSurfaceToMALL = true;
2634 while (CanAddAnotherSurfaceToMALL) {
2635 CanAddAnotherSurfaceToMALL = false;
2636 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2637 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2638 !UsesMALLForStaticScreen[k] &&
2639 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2640 one_row_per_frame_fits_in_buffer[k] &&
2641 (!CanAddAnotherSurfaceToMALL ||
2642 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2643 CanAddAnotherSurfaceToMALL = true;
2644 SurfaceToAddToMALL = k;
2645 #ifdef __DML_VBA_DEBUG__
2646 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2647 __func__, k, UseMALLForStaticScreen[k]);
2648 #endif
2649 }
2650 }
2651 if (CanAddAnotherSurfaceToMALL) {
2652 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2653 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2654
2655 #ifdef __DML_VBA_DEBUG__
2656 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
2657 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
2658 #endif
2659
2660 }
2661 }
2662 }
2663
dml32_CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)2664 void dml32_CalculateRowBandwidth(
2665 bool GPUVMEnable,
2666 enum source_format_class SourcePixelFormat,
2667 double VRatio,
2668 double VRatioChroma,
2669 bool DCCEnable,
2670 double LineTime,
2671 unsigned int MetaRowByteLuma,
2672 unsigned int MetaRowByteChroma,
2673 unsigned int meta_row_height_luma,
2674 unsigned int meta_row_height_chroma,
2675 unsigned int PixelPTEBytesPerRowLuma,
2676 unsigned int PixelPTEBytesPerRowChroma,
2677 unsigned int dpte_row_height_luma,
2678 unsigned int dpte_row_height_chroma,
2679 /* Output */
2680 double *meta_row_bw,
2681 double *dpte_row_bw)
2682 {
2683 if (DCCEnable != true) {
2684 *meta_row_bw = 0;
2685 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2686 SourcePixelFormat == dm_rgbe_alpha) {
2687 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2688 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2689 } else {
2690 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2691 }
2692
2693 if (GPUVMEnable != true) {
2694 *dpte_row_bw = 0;
2695 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2696 SourcePixelFormat == dm_rgbe_alpha) {
2697 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2698 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2699 } else {
2700 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2701 }
2702 }
2703
dml32_CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)2704 double dml32_CalculateUrgentLatency(
2705 double UrgentLatencyPixelDataOnly,
2706 double UrgentLatencyPixelMixedWithVMData,
2707 double UrgentLatencyVMDataOnly,
2708 bool DoUrgentLatencyAdjustment,
2709 double UrgentLatencyAdjustmentFabricClockComponent,
2710 double UrgentLatencyAdjustmentFabricClockReference,
2711 double FabricClock)
2712 {
2713 double ret;
2714
2715 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2716 if (DoUrgentLatencyAdjustment == true) {
2717 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2718 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2719 }
2720 return ret;
2721 }
2722
dml32_CalculateUrgentBurstFactor(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)2723 void dml32_CalculateUrgentBurstFactor(
2724 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2725 unsigned int swath_width_luma_ub,
2726 unsigned int swath_width_chroma_ub,
2727 unsigned int SwathHeightY,
2728 unsigned int SwathHeightC,
2729 double LineTime,
2730 double UrgentLatency,
2731 double CursorBufferSize,
2732 unsigned int CursorWidth,
2733 unsigned int CursorBPP,
2734 double VRatio,
2735 double VRatioC,
2736 double BytePerPixelInDETY,
2737 double BytePerPixelInDETC,
2738 unsigned int DETBufferSizeY,
2739 unsigned int DETBufferSizeC,
2740 /* Output */
2741 double *UrgentBurstFactorCursor,
2742 double *UrgentBurstFactorLuma,
2743 double *UrgentBurstFactorChroma,
2744 bool *NotEnoughUrgentLatencyHiding)
2745 {
2746 double LinesInDETLuma;
2747 double LinesInDETChroma;
2748 unsigned int LinesInCursorBuffer;
2749 double CursorBufferSizeInTime;
2750 double DETBufferSizeInTimeLuma;
2751 double DETBufferSizeInTimeChroma;
2752
2753 *NotEnoughUrgentLatencyHiding = 0;
2754
2755 if (CursorWidth > 0) {
2756 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2757 (CursorWidth * CursorBPP / 8.0)), 1.0);
2758 if (VRatio > 0) {
2759 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2760 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2761 *NotEnoughUrgentLatencyHiding = 1;
2762 *UrgentBurstFactorCursor = 0;
2763 } else {
2764 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2765 (CursorBufferSizeInTime - UrgentLatency);
2766 }
2767 } else {
2768 *UrgentBurstFactorCursor = 1;
2769 }
2770 }
2771
2772 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2773 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2774
2775 if (VRatio > 0) {
2776 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2777 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2778 *NotEnoughUrgentLatencyHiding = 1;
2779 *UrgentBurstFactorLuma = 0;
2780 } else {
2781 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2782 }
2783 } else {
2784 *UrgentBurstFactorLuma = 1;
2785 }
2786
2787 if (BytePerPixelInDETC > 0) {
2788 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2789 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2790 / swath_width_chroma_ub;
2791
2792 if (VRatio > 0) {
2793 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2794 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2795 *NotEnoughUrgentLatencyHiding = 1;
2796 *UrgentBurstFactorChroma = 0;
2797 } else {
2798 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2799 / (DETBufferSizeInTimeChroma - UrgentLatency);
2800 }
2801 } else {
2802 *UrgentBurstFactorChroma = 1;
2803 }
2804 }
2805 } // CalculateUrgentBurstFactor
2806
dml32_CalculateDCFCLKDeepSleep(unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)2807 void dml32_CalculateDCFCLKDeepSleep(
2808 unsigned int NumberOfActiveSurfaces,
2809 unsigned int BytePerPixelY[],
2810 unsigned int BytePerPixelC[],
2811 double VRatio[],
2812 double VRatioChroma[],
2813 double SwathWidthY[],
2814 double SwathWidthC[],
2815 unsigned int DPPPerSurface[],
2816 double HRatio[],
2817 double HRatioChroma[],
2818 double PixelClock[],
2819 double PSCL_THROUGHPUT[],
2820 double PSCL_THROUGHPUT_CHROMA[],
2821 double Dppclk[],
2822 double ReadBandwidthLuma[],
2823 double ReadBandwidthChroma[],
2824 unsigned int ReturnBusWidth,
2825
2826 /* Output */
2827 double *DCFClkDeepSleep)
2828 {
2829 unsigned int k;
2830 double DisplayPipeLineDeliveryTimeLuma;
2831 double DisplayPipeLineDeliveryTimeChroma;
2832 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2833 double ReadBandwidth = 0.0;
2834
2835 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2836
2837 if (VRatio[k] <= 1) {
2838 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2839 / PixelClock[k];
2840 } else {
2841 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2842 }
2843 if (BytePerPixelC[k] == 0) {
2844 DisplayPipeLineDeliveryTimeChroma = 0;
2845 } else {
2846 if (VRatioChroma[k] <= 1) {
2847 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2848 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2849 } else {
2850 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2851 / Dppclk[k];
2852 }
2853 }
2854
2855 if (BytePerPixelC[k] > 0) {
2856 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2857 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2858 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2859 32.0 / DisplayPipeLineDeliveryTimeChroma);
2860 } else {
2861 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2862 64.0 / DisplayPipeLineDeliveryTimeLuma;
2863 }
2864 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2865
2866 #ifdef __DML_VBA_DEBUG__
2867 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2868 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2869 #endif
2870 }
2871
2872 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2873 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2874
2875 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2876
2877 #ifdef __DML_VBA_DEBUG__
2878 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2879 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2880 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2881 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2882 #endif
2883
2884 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2885 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2886 #ifdef __DML_VBA_DEBUG__
2887 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2888 #endif
2889 } // CalculateDCFCLKDeepSleep
2890
dml32_CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)2891 double dml32_CalculateWriteBackDelay(
2892 enum source_format_class WritebackPixelFormat,
2893 double WritebackHRatio,
2894 double WritebackVRatio,
2895 unsigned int WritebackVTaps,
2896 unsigned int WritebackDestinationWidth,
2897 unsigned int WritebackDestinationHeight,
2898 unsigned int WritebackSourceHeight,
2899 unsigned int HTotal)
2900 {
2901 double CalculateWriteBackDelay;
2902 double Line_length;
2903 double Output_lines_last_notclamped;
2904 double WritebackVInit;
2905
2906 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2907 Line_length = dml_max((double) WritebackDestinationWidth,
2908 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2909 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2910 dml_ceil(((double)WritebackSourceHeight -
2911 (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2912 if (Output_lines_last_notclamped < 0) {
2913 CalculateWriteBackDelay = 0;
2914 } else {
2915 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2916 (HTotal - WritebackDestinationWidth) + 80;
2917 }
2918 return CalculateWriteBackDelay;
2919 }
2920
dml32_UseMinimumDCFCLK(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool DRRDisplay[],bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,unsigned int MaxInterDCNTileRepeaters,unsigned int MaxPrefetchMode,double DRAMClockChangeLatencyFinal,double FCLKChangeLatency,double SREnterPlusExitTime,unsigned int ReturnBusWidth,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,unsigned int PixelChunkSizeInKByte,unsigned int MetaChunkSize,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,bool ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,unsigned int VTotal[],unsigned int VActive[],unsigned int DynamicMetadataTransmittedBytes[],unsigned int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFClkDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],unsigned int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])2921 void dml32_UseMinimumDCFCLK(
2922 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2923 bool DRRDisplay[],
2924 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2925 unsigned int MaxInterDCNTileRepeaters,
2926 unsigned int MaxPrefetchMode,
2927 double DRAMClockChangeLatencyFinal,
2928 double FCLKChangeLatency,
2929 double SREnterPlusExitTime,
2930 unsigned int ReturnBusWidth,
2931 unsigned int RoundTripPingLatencyCycles,
2932 unsigned int ReorderingBytes,
2933 unsigned int PixelChunkSizeInKByte,
2934 unsigned int MetaChunkSize,
2935 bool GPUVMEnable,
2936 unsigned int GPUVMMaxPageTableLevels,
2937 bool HostVMEnable,
2938 unsigned int NumberOfActiveSurfaces,
2939 double HostVMMinPageSize,
2940 unsigned int HostVMMaxNonCachedPageTableLevels,
2941 bool DynamicMetadataVMEnabled,
2942 bool ImmediateFlipRequirement,
2943 bool ProgressiveToInterlaceUnitInOPP,
2944 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2945 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2946 unsigned int VTotal[],
2947 unsigned int VActive[],
2948 unsigned int DynamicMetadataTransmittedBytes[],
2949 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2950 bool Interlace[],
2951 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2952 double RequiredDISPCLK[][2],
2953 double UrgLatency[],
2954 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2955 double ProjectedDCFClkDeepSleep[][2],
2956 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2957 unsigned int TotalNumberOfActiveDPP[][2],
2958 unsigned int TotalNumberOfDCCActiveDPP[][2],
2959 unsigned int dpte_group_bytes[],
2960 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2961 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2962 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2963 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2964 unsigned int BytePerPixelY[],
2965 unsigned int BytePerPixelC[],
2966 unsigned int HTotal[],
2967 double PixelClock[],
2968 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2969 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2970 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2971 bool DynamicMetadataEnable[],
2972 double ReadBandwidthLuma[],
2973 double ReadBandwidthChroma[],
2974 double DCFCLKPerState[],
2975 /* Output */
2976 double DCFCLKState[][2])
2977 {
2978 unsigned int i, j, k;
2979 unsigned int dummy1;
2980 double dummy2, dummy3;
2981 double NormalEfficiency;
2982 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2983
2984 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2985 for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2986 for (j = 0; j <= 1; ++j) {
2987 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2988 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2989 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2990 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2991 double MinimumTWait = 0.0;
2992 double DPTEBandwidth;
2993 double DCFCLKRequiredForAverageBandwidth;
2994 unsigned int ExtraLatencyBytes;
2995 double ExtraLatencyCycles;
2996 double DCFCLKRequiredForPeakBandwidth;
2997 unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2998 double MinimumTvmPlus2Tr0;
2999
3000 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
3001 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3002 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3003 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3004 / (15.75 * HTotal[k] / PixelClock[k]);
3005 }
3006
3007 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3008 NoOfDPPState[k] = NoOfDPP[i][j][k];
3009
3010 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3011 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3012
3013 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3014 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3015 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3016 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3017 HostVMMaxNonCachedPageTableLevels);
3018 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3019 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3020 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3021 double DCFCLKCyclesRequiredInPrefetch;
3022 double PrefetchTime;
3023
3024 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3025 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3026 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3027 * BytePerPixelC[k]) / NormalEfficiency
3028 / ReturnBusWidth;
3029 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3030 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3031 / NormalEfficiency / ReturnBusWidth
3032 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3033 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3034 / ReturnBusWidth
3035 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3036 + PixelDCFCLKCyclesRequiredInPrefetch[k];
3037 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3038 * HTotal[k] / PixelClock[k];
3039 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3040 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3041 UrgLatency[i] * GPUVMMaxPageTableLevels *
3042 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3043
3044 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3045 UseMALLForPStateChange[k],
3046 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3047 DRRDisplay[k],
3048 DRAMClockChangeLatencyFinal,
3049 FCLKChangeLatency,
3050 UrgLatency[i],
3051 SREnterPlusExitTime);
3052
3053 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3054 MinimumTWait - UrgLatency[i] *
3055 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3056 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
3057 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3058 DynamicMetadataVMExtraLatency[k];
3059
3060 if (PrefetchTime > 0) {
3061 double ExpectedVRatioPrefetch;
3062
3063 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3064 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3065 DCFCLKCyclesRequiredInPrefetch);
3066 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3067 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3068 PrefetchPixelLinesTime[k] *
3069 dml_max(1.0, ExpectedVRatioPrefetch) *
3070 dml_max(1.0, ExpectedVRatioPrefetch / 4);
3071 if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3072 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3073 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3074 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3075 NormalEfficiency / ReturnBusWidth;
3076 }
3077 } else {
3078 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3079 }
3080 if (DynamicMetadataEnable[k] == true) {
3081 double TSetupPipe;
3082 double TdmbfPipe;
3083 double TdmsksPipe;
3084 double TdmecPipe;
3085 double AllowedTimeForUrgentExtraLatency;
3086
3087 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3088 MaxInterDCNTileRepeaters,
3089 RequiredDPPCLKPerSurface[i][j][k],
3090 RequiredDISPCLK[i][j],
3091 ProjectedDCFClkDeepSleep[i][j],
3092 PixelClock[k],
3093 HTotal[k],
3094 VTotal[k] - VActive[k],
3095 DynamicMetadataTransmittedBytes[k],
3096 DynamicMetadataLinesBeforeActiveRequired[k],
3097 Interlace[k],
3098 ProgressiveToInterlaceUnitInOPP,
3099
3100 /* output */
3101 &TSetupPipe,
3102 &TdmbfPipe,
3103 &TdmecPipe,
3104 &TdmsksPipe,
3105 &dummy1,
3106 &dummy2,
3107 &dummy3);
3108 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3109 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3110 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3111 if (AllowedTimeForUrgentExtraLatency > 0)
3112 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3113 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3114 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3115 else
3116 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3117 }
3118 }
3119 DCFCLKRequiredForPeakBandwidth = 0;
3120 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3121 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3122 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3123 }
3124 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3125 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3126 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3127 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3128 double MaximumTvmPlus2Tr0PlusTsw;
3129
3130 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3131 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3132 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3133 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3134 } else {
3135 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3136 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3137 MinimumTvmPlus2Tr0 -
3138 PrefetchPixelLinesTime[k] / 4),
3139 (2 * ExtraLatencyCycles +
3140 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3141 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3142 }
3143 }
3144 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3145 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3146 }
3147 }
3148 }
3149
dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3150 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3151 unsigned int TotalNumberOfActiveDPP,
3152 unsigned int PixelChunkSizeInKByte,
3153 unsigned int TotalNumberOfDCCActiveDPP,
3154 unsigned int MetaChunkSize,
3155 bool GPUVMEnable,
3156 bool HostVMEnable,
3157 unsigned int NumberOfActiveSurfaces,
3158 unsigned int NumberOfDPP[],
3159 unsigned int dpte_group_bytes[],
3160 double HostVMInefficiencyFactor,
3161 double HostVMMinPageSize,
3162 unsigned int HostVMMaxNonCachedPageTableLevels)
3163 {
3164 unsigned int k;
3165 double ret;
3166 unsigned int HostVMDynamicLevels;
3167
3168 if (GPUVMEnable == true && HostVMEnable == true) {
3169 if (HostVMMinPageSize < 2048)
3170 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3171 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3172 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3173 else
3174 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3175 } else {
3176 HostVMDynamicLevels = 0;
3177 }
3178
3179 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3180 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3181
3182 if (GPUVMEnable == true) {
3183 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3184 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3185 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3186 }
3187 }
3188 return ret;
3189 }
3190
dml32_CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3191 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3192 unsigned int MaxInterDCNTileRepeaters,
3193 double Dppclk,
3194 double Dispclk,
3195 double DCFClkDeepSleep,
3196 double PixelClock,
3197 unsigned int HTotal,
3198 unsigned int VBlank,
3199 unsigned int DynamicMetadataTransmittedBytes,
3200 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3201 unsigned int InterlaceEnable,
3202 bool ProgressiveToInterlaceUnitInOPP,
3203
3204 /* output */
3205 double *TSetup,
3206 double *Tdmbf,
3207 double *Tdmec,
3208 double *Tdmsks,
3209 unsigned int *VUpdateOffsetPix,
3210 double *VUpdateWidthPix,
3211 double *VReadyOffsetPix)
3212 {
3213 double TotalRepeaterDelayTime;
3214
3215 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3216 *VUpdateWidthPix =
3217 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3218 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
3219 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3220 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3221 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3222 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3223 *Tdmec = HTotal / PixelClock;
3224
3225 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3226 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3227 else
3228 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3229
3230 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3231 *Tdmsks = *Tdmsks / 2;
3232 #ifdef __DML_VBA_DEBUG__
3233 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3234 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3235 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3236
3237 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3238 __func__, DynamicMetadataLinesBeforeActiveRequired);
3239 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3240 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3241 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3242 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3243 #endif
3244 }
3245
dml32_CalculateTWait(unsigned int PrefetchMode,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,bool DRRDisplay,double DRAMClockChangeLatency,double FCLKChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3246 double dml32_CalculateTWait(
3247 unsigned int PrefetchMode,
3248 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3249 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3250 bool DRRDisplay,
3251 double DRAMClockChangeLatency,
3252 double FCLKChangeLatency,
3253 double UrgentLatency,
3254 double SREnterPlusExitTime)
3255 {
3256 double TWait = 0.0;
3257
3258 if (PrefetchMode == 0 &&
3259 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3260 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3261 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3262 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3263 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3264 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3265 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3266 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3267 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3268 } else {
3269 TWait = UrgentLatency;
3270 }
3271
3272 #ifdef __DML_VBA_DEBUG__
3273 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3274 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3275 #endif
3276 return TWait;
3277 } // CalculateTWait
3278
3279 // Function: get_return_bw_mbps
3280 // Megabyte per second
dml32_get_return_bw_mbps(const soc_bounding_box_st * soc,const int VoltageLevel,const bool HostVMEnable,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3281 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3282 const int VoltageLevel,
3283 const bool HostVMEnable,
3284 const double DCFCLK,
3285 const double FabricClock,
3286 const double DRAMSpeed)
3287 {
3288 double ReturnBW = 0.;
3289 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3290 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3291 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3292 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3293 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3294 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3295 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3296 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3297 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3298 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3299 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3300
3301 if (HostVMEnable != true)
3302 ReturnBW = PixelDataOnlyReturnBW;
3303 else
3304 ReturnBW = PixelMixedWithVMDataReturnBW;
3305
3306 #ifdef __DML_VBA_DEBUG__
3307 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3308 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3309 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3310 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3311 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3312 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
3313 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
3314 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
3315 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
3316 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3317 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
3318 #endif
3319 return ReturnBW;
3320 }
3321
3322 // Function: get_return_bw_mbps_vm_only
3323 // Megabyte per second
dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st * soc,const int VoltageLevel,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3324 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3325 const int VoltageLevel,
3326 const double DCFCLK,
3327 const double FabricClock,
3328 const double DRAMSpeed)
3329 {
3330 double VMDataOnlyReturnBW = dml_min3(
3331 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3332 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3333 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3334 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3335 * (VoltageLevel < 2 ?
3336 soc->pct_ideal_dram_bw_after_urgent_strobe :
3337 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3338 #ifdef __DML_VBA_DEBUG__
3339 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3340 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3341 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3342 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3343 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3344 #endif
3345 return VMDataOnlyReturnBW;
3346 }
3347
dml32_CalculateExtraLatency(unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3348 double dml32_CalculateExtraLatency(
3349 unsigned int RoundTripPingLatencyCycles,
3350 unsigned int ReorderingBytes,
3351 double DCFCLK,
3352 unsigned int TotalNumberOfActiveDPP,
3353 unsigned int PixelChunkSizeInKByte,
3354 unsigned int TotalNumberOfDCCActiveDPP,
3355 unsigned int MetaChunkSize,
3356 double ReturnBW,
3357 bool GPUVMEnable,
3358 bool HostVMEnable,
3359 unsigned int NumberOfActiveSurfaces,
3360 unsigned int NumberOfDPP[],
3361 unsigned int dpte_group_bytes[],
3362 double HostVMInefficiencyFactor,
3363 double HostVMMinPageSize,
3364 unsigned int HostVMMaxNonCachedPageTableLevels)
3365 {
3366 double ExtraLatencyBytes;
3367 double ExtraLatency;
3368
3369 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3370 ReorderingBytes,
3371 TotalNumberOfActiveDPP,
3372 PixelChunkSizeInKByte,
3373 TotalNumberOfDCCActiveDPP,
3374 MetaChunkSize,
3375 GPUVMEnable,
3376 HostVMEnable,
3377 NumberOfActiveSurfaces,
3378 NumberOfDPP,
3379 dpte_group_bytes,
3380 HostVMInefficiencyFactor,
3381 HostVMMinPageSize,
3382 HostVMMaxNonCachedPageTableLevels);
3383
3384 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3385
3386 #ifdef __DML_VBA_DEBUG__
3387 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3388 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3389 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3390 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3391 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3392 #endif
3393
3394 return ExtraLatency;
3395 } // CalculateExtraLatency
3396
dml32_CalculatePrefetchSchedule(struct vba_vars_st * v,unsigned int k,double HostVMInefficiencyFactor,DmlPipe * myPipe,unsigned int DSCDelay,unsigned int DPP_RECOUT_WIDTH,unsigned int VStartup,unsigned int MaxVStartup,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,unsigned int VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,unsigned int VInitPreFillC,unsigned int MaxNumSwathC,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double TPreReq,bool ExtendPrefetchIfPossible,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3397 bool dml32_CalculatePrefetchSchedule(
3398 struct vba_vars_st *v,
3399 unsigned int k,
3400 double HostVMInefficiencyFactor,
3401 DmlPipe *myPipe,
3402 unsigned int DSCDelay,
3403 unsigned int DPP_RECOUT_WIDTH,
3404 unsigned int VStartup,
3405 unsigned int MaxVStartup,
3406 double UrgentLatency,
3407 double UrgentExtraLatency,
3408 double TCalc,
3409 unsigned int PDEAndMetaPTEBytesFrame,
3410 unsigned int MetaRowByte,
3411 unsigned int PixelPTEBytesPerRow,
3412 double PrefetchSourceLinesY,
3413 unsigned int SwathWidthY,
3414 unsigned int VInitPreFillY,
3415 unsigned int MaxNumSwathY,
3416 double PrefetchSourceLinesC,
3417 unsigned int SwathWidthC,
3418 unsigned int VInitPreFillC,
3419 unsigned int MaxNumSwathC,
3420 unsigned int swath_width_luma_ub,
3421 unsigned int swath_width_chroma_ub,
3422 unsigned int SwathHeightY,
3423 unsigned int SwathHeightC,
3424 double TWait,
3425 double TPreReq,
3426 bool ExtendPrefetchIfPossible,
3427 /* Output */
3428 double *DSTXAfterScaler,
3429 double *DSTYAfterScaler,
3430 double *DestinationLinesForPrefetch,
3431 double *PrefetchBandwidth,
3432 double *DestinationLinesToRequestVMInVBlank,
3433 double *DestinationLinesToRequestRowInVBlank,
3434 double *VRatioPrefetchY,
3435 double *VRatioPrefetchC,
3436 double *RequiredPrefetchPixDataBWLuma,
3437 double *RequiredPrefetchPixDataBWChroma,
3438 bool *NotEnoughTimeForDynamicMetadata,
3439 double *Tno_bw,
3440 double *prefetch_vmrow_bw,
3441 double *Tdmdl_vm,
3442 double *Tdmdl,
3443 double *TSetup,
3444 unsigned int *VUpdateOffsetPix,
3445 double *VUpdateWidthPix,
3446 double *VReadyOffsetPix)
3447 {
3448 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3449 bool MyError = false;
3450 unsigned int DPPCycles, DISPCLKCycles;
3451 double DSTTotalPixelsAfterScaler;
3452 double LineTime;
3453 double dst_y_prefetch_equ;
3454 double prefetch_bw_oto;
3455 double Tvm_oto;
3456 double Tr0_oto;
3457 double Tvm_oto_lines;
3458 double Tr0_oto_lines;
3459 double dst_y_prefetch_oto;
3460 double TimeForFetchingMetaPTE = 0;
3461 double TimeForFetchingRowInVBlank = 0;
3462 double LinesToRequestPrefetchPixelData = 0;
3463 double LinesForPrefetchBandwidth = 0;
3464 unsigned int HostVMDynamicLevelsTrips;
3465 double trip_to_mem;
3466 double Tvm_trips;
3467 double Tr0_trips;
3468 double Tvm_trips_rounded;
3469 double Tr0_trips_rounded;
3470 double Lsw_oto;
3471 double Tpre_rounded;
3472 double prefetch_bw_equ;
3473 double Tvm_equ;
3474 double Tr0_equ;
3475 double Tdmbf;
3476 double Tdmec;
3477 double Tdmsks;
3478 double prefetch_sw_bytes;
3479 double bytes_pp;
3480 double dep_bytes;
3481 unsigned int max_vratio_pre = v->MaxVRatioPre;
3482 double min_Lsw;
3483 double Tsw_est1 = 0;
3484 double Tsw_est3 = 0;
3485
3486 if (v->GPUVMEnable == true && v->HostVMEnable == true)
3487 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3488 else
3489 HostVMDynamicLevelsTrips = 0;
3490 #ifdef __DML_VBA_DEBUG__
3491 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3492 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3493 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3494 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3495 __func__, v->HostVMEnable, HostVMInefficiencyFactor);
3496 #endif
3497 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3498 v->MaxInterDCNTileRepeaters,
3499 myPipe->Dppclk,
3500 myPipe->Dispclk,
3501 myPipe->DCFClkDeepSleep,
3502 myPipe->PixelClock,
3503 myPipe->HTotal,
3504 myPipe->VBlank,
3505 v->DynamicMetadataTransmittedBytes[k],
3506 v->DynamicMetadataLinesBeforeActiveRequired[k],
3507 myPipe->InterlaceEnable,
3508 myPipe->ProgressiveToInterlaceUnitInOPP,
3509 TSetup,
3510
3511 /* output */
3512 &Tdmbf,
3513 &Tdmec,
3514 &Tdmsks,
3515 VUpdateOffsetPix,
3516 VUpdateWidthPix,
3517 VReadyOffsetPix);
3518
3519 LineTime = myPipe->HTotal / myPipe->PixelClock;
3520 trip_to_mem = UrgentLatency;
3521 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3522
3523 if (v->DynamicMetadataVMEnabled == true)
3524 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3525 else
3526 *Tdmdl = TWait + UrgentExtraLatency;
3527
3528 #ifdef __DML_VBA_ALLOW_DELTA__
3529 if (v->DynamicMetadataEnable[k] == false)
3530 *Tdmdl = 0.0;
3531 #endif
3532
3533 if (v->DynamicMetadataEnable[k] == true) {
3534 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3535 *NotEnoughTimeForDynamicMetadata = true;
3536 #ifdef __DML_VBA_DEBUG__
3537 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3538 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3539 __func__, Tdmbf);
3540 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3541 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3542 __func__, Tdmsks);
3543 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3544 __func__, *Tdmdl);
3545 #endif
3546 } else {
3547 *NotEnoughTimeForDynamicMetadata = false;
3548 }
3549 } else {
3550 *NotEnoughTimeForDynamicMetadata = false;
3551 }
3552
3553 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3554 v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3555
3556 if (myPipe->ScalerEnabled)
3557 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3558 else
3559 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3560
3561 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3562
3563 DISPCLKCycles = v->DISPCLKDelaySubtotal;
3564
3565 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3566 return true;
3567
3568 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3569 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3570
3571 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3572 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3573 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3574 myPipe->HActive / 2 : 0)
3575 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3576
3577 #ifdef __DML_VBA_DEBUG__
3578 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3579 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3580 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3581 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3582 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
3583 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
3584 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
3585 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3586 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
3587 #endif
3588
3589 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3590 *DSTYAfterScaler = 1;
3591 else
3592 *DSTYAfterScaler = 0;
3593
3594 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3595 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3596 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3597 #ifdef __DML_VBA_DEBUG__
3598 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
3599 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3600 #endif
3601
3602 MyError = false;
3603
3604 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3605
3606 if (v->GPUVMEnable == true) {
3607 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3608 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3609 if (v->GPUVMMaxPageTableLevels >= 3) {
3610 *Tno_bw = UrgentExtraLatency + trip_to_mem *
3611 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3612 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3613 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3614 4.0 * LineTime; // VBA_ERROR
3615 *Tno_bw = UrgentExtraLatency;
3616 } else {
3617 *Tno_bw = 0;
3618 }
3619 } else if (myPipe->DCCEnable == true) {
3620 Tvm_trips_rounded = LineTime / 4.0;
3621 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3622 *Tno_bw = 0;
3623 } else {
3624 Tvm_trips_rounded = LineTime / 4.0;
3625 Tr0_trips_rounded = LineTime / 2.0;
3626 *Tno_bw = 0;
3627 }
3628 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3629 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3630
3631 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3632 || myPipe->SourcePixelFormat == dm_420_12) {
3633 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3634 } else {
3635 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3636 }
3637
3638 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3639 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3640 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3641 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3642
3643 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3644 min_Lsw = dml_max(min_Lsw, 1.0);
3645 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3646
3647 if (v->GPUVMEnable == true) {
3648 Tvm_oto = dml_max3(
3649 Tvm_trips,
3650 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3651 LineTime / 4.0);
3652 } else
3653 Tvm_oto = LineTime / 4.0;
3654
3655 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3656 Tr0_oto = dml_max4(
3657 Tr0_trips,
3658 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3659 (LineTime - Tvm_oto)/2.0,
3660 LineTime / 4.0);
3661 #ifdef __DML_VBA_DEBUG__
3662 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3663 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3664 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3665 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3666 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3667 #endif
3668 } else
3669 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3670
3671 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3672 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3673 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3674
3675 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3676 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3677
3678 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3679 #ifdef __DML_VBA_DEBUG__
3680 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3681 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3682 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3683 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3684 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3685 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3686 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3687 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3688 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3689 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3690 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3691 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3692 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3693 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3694 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3695 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3696 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3697 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3698 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3699 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3700 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3701 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3702 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3703 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3704 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3705 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3706 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3707 #endif
3708
3709 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3710 Tpre_rounded = dst_y_prefetch_equ * LineTime;
3711 #ifdef __DML_VBA_DEBUG__
3712 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3713 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3714 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3715 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3716 __func__, VStartup * LineTime);
3717 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3718 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3719 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3720 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3721 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3722 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3723 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3724 __func__, *DSTYAfterScaler);
3725 #endif
3726 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3727 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3728
3729 if (prefetch_sw_bytes < dep_bytes)
3730 prefetch_sw_bytes = 2 * dep_bytes;
3731
3732 *PrefetchBandwidth = 0;
3733 *DestinationLinesToRequestVMInVBlank = 0;
3734 *DestinationLinesToRequestRowInVBlank = 0;
3735 *VRatioPrefetchY = 0;
3736 *VRatioPrefetchC = 0;
3737 *RequiredPrefetchPixDataBWLuma = 0;
3738 if (dst_y_prefetch_equ > 1 &&
3739 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3740 double PrefetchBandwidth1;
3741 double PrefetchBandwidth2;
3742 double PrefetchBandwidth3;
3743 double PrefetchBandwidth4;
3744
3745 if (Tpre_rounded - *Tno_bw > 0) {
3746 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3747 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3748 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3749 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3750 } else
3751 PrefetchBandwidth1 = 0;
3752
3753 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3754 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3755 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3756 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3757 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3758 }
3759
3760 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3761 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3762 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3763 else
3764 PrefetchBandwidth2 = 0;
3765
3766 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3767 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3768 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3769 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3770 } else
3771 PrefetchBandwidth3 = 0;
3772
3773
3774 if (VStartup == MaxVStartup &&
3775 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3776 LineTime - Tvm_trips_rounded > 0) {
3777 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3778 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3779 }
3780
3781 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3782 PrefetchBandwidth4 = prefetch_sw_bytes /
3783 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3784 } else {
3785 PrefetchBandwidth4 = 0;
3786 }
3787
3788 #ifdef __DML_VBA_DEBUG__
3789 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3790 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3791 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3792 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3793 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3794 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3795 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3796 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3797 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3798 #endif
3799 {
3800 bool Case1OK;
3801 bool Case2OK;
3802 bool Case3OK;
3803
3804 if (PrefetchBandwidth1 > 0) {
3805 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3806 >= Tvm_trips_rounded
3807 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3808 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3809 Case1OK = true;
3810 } else {
3811 Case1OK = false;
3812 }
3813 } else {
3814 Case1OK = false;
3815 }
3816
3817 if (PrefetchBandwidth2 > 0) {
3818 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3819 >= Tvm_trips_rounded
3820 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3821 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3822 Case2OK = true;
3823 } else {
3824 Case2OK = false;
3825 }
3826 } else {
3827 Case2OK = false;
3828 }
3829
3830 if (PrefetchBandwidth3 > 0) {
3831 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3832 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3833 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3834 Tr0_trips_rounded) {
3835 Case3OK = true;
3836 } else {
3837 Case3OK = false;
3838 }
3839 } else {
3840 Case3OK = false;
3841 }
3842
3843 if (Case1OK)
3844 prefetch_bw_equ = PrefetchBandwidth1;
3845 else if (Case2OK)
3846 prefetch_bw_equ = PrefetchBandwidth2;
3847 else if (Case3OK)
3848 prefetch_bw_equ = PrefetchBandwidth3;
3849 else
3850 prefetch_bw_equ = PrefetchBandwidth4;
3851
3852 #ifdef __DML_VBA_DEBUG__
3853 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3854 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3855 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3856 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3857 #endif
3858
3859 if (prefetch_bw_equ > 0) {
3860 if (v->GPUVMEnable == true) {
3861 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3862 HostVMInefficiencyFactor / prefetch_bw_equ,
3863 Tvm_trips, LineTime / 4);
3864 } else {
3865 Tvm_equ = LineTime / 4;
3866 }
3867
3868 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3869 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3870 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3871 (LineTime - Tvm_equ) / 2, LineTime / 4);
3872 } else {
3873 Tr0_equ = (LineTime - Tvm_equ) / 2;
3874 }
3875 } else {
3876 Tvm_equ = 0;
3877 Tr0_equ = 0;
3878 #ifdef __DML_VBA_DEBUG__
3879 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3880 #endif
3881 }
3882 }
3883
3884 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3885 if (dst_y_prefetch_oto * LineTime < TPreReq) {
3886 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3887 } else {
3888 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3889 }
3890 TimeForFetchingMetaPTE = Tvm_oto;
3891 TimeForFetchingRowInVBlank = Tr0_oto;
3892 *PrefetchBandwidth = prefetch_bw_oto;
3893 /* Clamp to oto for bandwidth calculation */
3894 LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3895 } else {
3896 /* For mode programming we want to extend the prefetch as much as possible
3897 * (up to oto, or as long as we can for equ) if we're not already applying
3898 * the 60us prefetch requirement. This is to avoid intermittent underflow
3899 * issues during prefetch.
3900 *
3901 * The prefetch extension is applied under the following scenarios:
3902 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
3903 * 2. We're using subvp or drr methods of p-state switch, in which case we
3904 * we don't care if prefetch takes up more of the blanking time
3905 *
3906 * Mode programming typically chooses the smallest prefetch time possible
3907 * (i.e. highest bandwidth during prefetch) presumably to create margin between
3908 * p-states / c-states that happen in vblank and prefetch. Therefore we only
3909 * apply this prefetch extension when p-state in vblank is not required (UCLK
3910 * p-states take up the most vblank time).
3911 */
3912 if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
3913 MyError = true;
3914 } else {
3915 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3916 TimeForFetchingMetaPTE = Tvm_equ;
3917 TimeForFetchingRowInVBlank = Tr0_equ;
3918 *PrefetchBandwidth = prefetch_bw_equ;
3919 /* Clamp to equ for bandwidth calculation */
3920 LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3921 }
3922 }
3923
3924 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3925
3926 *DestinationLinesToRequestRowInVBlank =
3927 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3928
3929 LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3930 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3931
3932 #ifdef __DML_VBA_DEBUG__
3933 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3934 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3935 __func__, *DestinationLinesToRequestVMInVBlank);
3936 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3937 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3938 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3939 __func__, *DestinationLinesToRequestRowInVBlank);
3940 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3941 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3942 #endif
3943
3944 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3945 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3946 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3947 #ifdef __DML_VBA_DEBUG__
3948 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3949 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3950 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3951 #endif
3952 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3953 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3954 *VRatioPrefetchY =
3955 dml_max((double) PrefetchSourceLinesY /
3956 LinesToRequestPrefetchPixelData,
3957 (double) MaxNumSwathY * SwathHeightY /
3958 (LinesToRequestPrefetchPixelData -
3959 (VInitPreFillY - 3.0) / 2.0));
3960 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3961 } else {
3962 MyError = true;
3963 *VRatioPrefetchY = 0;
3964 }
3965 #ifdef __DML_VBA_DEBUG__
3966 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3967 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3968 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3969 #endif
3970 }
3971
3972 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3973 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3974
3975 #ifdef __DML_VBA_DEBUG__
3976 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3977 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3978 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3979 #endif
3980 if ((SwathHeightC > 4)) {
3981 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3982 *VRatioPrefetchC =
3983 dml_max(*VRatioPrefetchC,
3984 (double) MaxNumSwathC * SwathHeightC /
3985 (LinesToRequestPrefetchPixelData -
3986 (VInitPreFillC - 3.0) / 2.0));
3987 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3988 } else {
3989 MyError = true;
3990 *VRatioPrefetchC = 0;
3991 }
3992 #ifdef __DML_VBA_DEBUG__
3993 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3994 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3995 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3996 #endif
3997 }
3998
3999 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
4000 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
4001 / LineTime;
4002
4003 #ifdef __DML_VBA_DEBUG__
4004 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
4005 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
4006 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4007 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
4008 __func__, *RequiredPrefetchPixDataBWLuma);
4009 #endif
4010 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
4011 LinesToRequestPrefetchPixelData
4012 * myPipe->BytePerPixelC
4013 * swath_width_chroma_ub / LineTime;
4014 } else {
4015 MyError = true;
4016 #ifdef __DML_VBA_DEBUG__
4017 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
4018 __func__, LinesToRequestPrefetchPixelData);
4019 #endif
4020 *VRatioPrefetchY = 0;
4021 *VRatioPrefetchC = 0;
4022 *RequiredPrefetchPixDataBWLuma = 0;
4023 *RequiredPrefetchPixDataBWChroma = 0;
4024 }
4025 #ifdef __DML_VBA_DEBUG__
4026 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4027 (double)LinesToRequestPrefetchPixelData * LineTime +
4028 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4029 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4030 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4031 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4032 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4033 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4034 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4035 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4036 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4037 PixelPTEBytesPerRow);
4038 #endif
4039 } else {
4040 MyError = true;
4041 #ifdef __DML_VBA_DEBUG__
4042 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4043 __func__, dst_y_prefetch_equ);
4044 #endif
4045 }
4046
4047 {
4048 double prefetch_vm_bw;
4049 double prefetch_row_bw;
4050
4051 if (PDEAndMetaPTEBytesFrame == 0) {
4052 prefetch_vm_bw = 0;
4053 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
4054 #ifdef __DML_VBA_DEBUG__
4055 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4056 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4057 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4058 __func__, *DestinationLinesToRequestVMInVBlank);
4059 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4060 #endif
4061 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4062 (*DestinationLinesToRequestVMInVBlank * LineTime);
4063 #ifdef __DML_VBA_DEBUG__
4064 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4065 #endif
4066 } else {
4067 prefetch_vm_bw = 0;
4068 MyError = true;
4069 #ifdef __DML_VBA_DEBUG__
4070 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4071 __func__, *DestinationLinesToRequestVMInVBlank);
4072 #endif
4073 }
4074
4075 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4076 prefetch_row_bw = 0;
4077 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4078 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4079 (*DestinationLinesToRequestRowInVBlank * LineTime);
4080
4081 #ifdef __DML_VBA_DEBUG__
4082 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4083 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4084 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4085 __func__, *DestinationLinesToRequestRowInVBlank);
4086 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4087 #endif
4088 } else {
4089 prefetch_row_bw = 0;
4090 MyError = true;
4091 #ifdef __DML_VBA_DEBUG__
4092 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4093 __func__, *DestinationLinesToRequestRowInVBlank);
4094 #endif
4095 }
4096
4097 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4098 }
4099
4100 if (MyError) {
4101 *PrefetchBandwidth = 0;
4102 TimeForFetchingMetaPTE = 0;
4103 TimeForFetchingRowInVBlank = 0;
4104 *DestinationLinesToRequestVMInVBlank = 0;
4105 *DestinationLinesToRequestRowInVBlank = 0;
4106 *DestinationLinesForPrefetch = 0;
4107 LinesToRequestPrefetchPixelData = 0;
4108 *VRatioPrefetchY = 0;
4109 *VRatioPrefetchC = 0;
4110 *RequiredPrefetchPixDataBWLuma = 0;
4111 *RequiredPrefetchPixDataBWChroma = 0;
4112 }
4113
4114 return MyError;
4115 } // CalculatePrefetchSchedule
4116
dml32_CalculateFlipSchedule(double HostVMInefficiencyFactor,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,bool use_one_row_for_frame_flip,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)4117 void dml32_CalculateFlipSchedule(
4118 double HostVMInefficiencyFactor,
4119 double UrgentExtraLatency,
4120 double UrgentLatency,
4121 unsigned int GPUVMMaxPageTableLevels,
4122 bool HostVMEnable,
4123 unsigned int HostVMMaxNonCachedPageTableLevels,
4124 bool GPUVMEnable,
4125 double HostVMMinPageSize,
4126 double PDEAndMetaPTEBytesPerFrame,
4127 double MetaRowBytes,
4128 double DPTEBytesPerRow,
4129 double BandwidthAvailableForImmediateFlip,
4130 unsigned int TotImmediateFlipBytes,
4131 enum source_format_class SourcePixelFormat,
4132 double LineTime,
4133 double VRatio,
4134 double VRatioChroma,
4135 double Tno_bw,
4136 bool DCCEnable,
4137 unsigned int dpte_row_height,
4138 unsigned int meta_row_height,
4139 unsigned int dpte_row_height_chroma,
4140 unsigned int meta_row_height_chroma,
4141 bool use_one_row_for_frame_flip,
4142
4143 /* Output */
4144 double *DestinationLinesToRequestVMInImmediateFlip,
4145 double *DestinationLinesToRequestRowInImmediateFlip,
4146 double *final_flip_bw,
4147 bool *ImmediateFlipSupportedForPipe)
4148 {
4149 double min_row_time = 0.0;
4150 unsigned int HostVMDynamicLevelsTrips;
4151 double TimeForFetchingMetaPTEImmediateFlip;
4152 double TimeForFetchingRowInVBlankImmediateFlip;
4153 double ImmediateFlipBW = 1.0;
4154
4155 if (GPUVMEnable == true && HostVMEnable == true)
4156 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4157 else
4158 HostVMDynamicLevelsTrips = 0;
4159
4160 #ifdef __DML_VBA_DEBUG__
4161 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4162 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4163 #endif
4164
4165 if (TotImmediateFlipBytes > 0) {
4166 if (use_one_row_for_frame_flip) {
4167 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4168 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4169 } else {
4170 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4171 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4172 }
4173 if (GPUVMEnable == true) {
4174 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4175 HostVMInefficiencyFactor / ImmediateFlipBW,
4176 UrgentExtraLatency + UrgentLatency *
4177 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4178 LineTime / 4.0);
4179 } else {
4180 TimeForFetchingMetaPTEImmediateFlip = 0;
4181 }
4182 if ((GPUVMEnable == true || DCCEnable == true)) {
4183 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4184 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4185 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4186 } else {
4187 TimeForFetchingRowInVBlankImmediateFlip = 0;
4188 }
4189
4190 *DestinationLinesToRequestVMInImmediateFlip =
4191 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4192 *DestinationLinesToRequestRowInImmediateFlip =
4193 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4194
4195 if (GPUVMEnable == true) {
4196 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4197 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4198 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4199 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4200 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4201 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4202 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4203 } else {
4204 *final_flip_bw = 0;
4205 }
4206 } else {
4207 TimeForFetchingMetaPTEImmediateFlip = 0;
4208 TimeForFetchingRowInVBlankImmediateFlip = 0;
4209 *DestinationLinesToRequestVMInImmediateFlip = 0;
4210 *DestinationLinesToRequestRowInImmediateFlip = 0;
4211 *final_flip_bw = 0;
4212 }
4213
4214 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4215 if (GPUVMEnable == true && DCCEnable != true) {
4216 min_row_time = dml_min(dpte_row_height *
4217 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4218 } else if (GPUVMEnable != true && DCCEnable == true) {
4219 min_row_time = dml_min(meta_row_height *
4220 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4221 } else {
4222 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4223 LineTime / VRatio, dpte_row_height_chroma * LineTime /
4224 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4225 }
4226 } else {
4227 if (GPUVMEnable == true && DCCEnable != true) {
4228 min_row_time = dpte_row_height * LineTime / VRatio;
4229 } else if (GPUVMEnable != true && DCCEnable == true) {
4230 min_row_time = meta_row_height * LineTime / VRatio;
4231 } else {
4232 min_row_time =
4233 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4234 }
4235 }
4236
4237 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4238 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4239 > min_row_time) {
4240 *ImmediateFlipSupportedForPipe = false;
4241 } else {
4242 *ImmediateFlipSupportedForPipe = true;
4243 }
4244
4245 #ifdef __DML_VBA_DEBUG__
4246 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4247 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4248 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4249 __func__, *DestinationLinesToRequestVMInImmediateFlip);
4250 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4251 __func__, *DestinationLinesToRequestRowInImmediateFlip);
4252 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4253 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4254 __func__, TimeForFetchingRowInVBlankImmediateFlip);
4255 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4256 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4257 #endif
4258 } // CalculateFlipSchedule
4259
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct vba_vars_st * v,unsigned int PrefetchMode,double DCFCLK,double ReturnBW,SOCParametersList mmSOCParameters,double SOCCLK,double DCFClkDeepSleep,unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool UnboundedRequestEnabled,unsigned int CompressedBufferSizeInkByte,enum clock_change_support * DRAMClockChangeSupport,double MaxActiveDRAMClockChangeLatencySupported[],unsigned int SubViewportLinesNeededInMALL[],enum dm_fclock_change_support * FCLKChangeSupport,double * MinActiveFCLKChangeLatencySupported,bool * USRRetrainingSupport,double ActiveDRAMClockChangeLatencyMargin[])4260 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4261 struct vba_vars_st *v,
4262 unsigned int PrefetchMode,
4263 double DCFCLK,
4264 double ReturnBW,
4265 SOCParametersList mmSOCParameters,
4266 double SOCCLK,
4267 double DCFClkDeepSleep,
4268 unsigned int DETBufferSizeY[],
4269 unsigned int DETBufferSizeC[],
4270 unsigned int SwathHeightY[],
4271 unsigned int SwathHeightC[],
4272 double SwathWidthY[],
4273 double SwathWidthC[],
4274 unsigned int DPPPerSurface[],
4275 double BytePerPixelDETY[],
4276 double BytePerPixelDETC[],
4277 double DSTXAfterScaler[],
4278 double DSTYAfterScaler[],
4279 bool UnboundedRequestEnabled,
4280 unsigned int CompressedBufferSizeInkByte,
4281
4282 /* Output */
4283 enum clock_change_support *DRAMClockChangeSupport,
4284 double MaxActiveDRAMClockChangeLatencySupported[],
4285 unsigned int SubViewportLinesNeededInMALL[],
4286 enum dm_fclock_change_support *FCLKChangeSupport,
4287 double *MinActiveFCLKChangeLatencySupported,
4288 bool *USRRetrainingSupport,
4289 double ActiveDRAMClockChangeLatencyMargin[])
4290 {
4291 unsigned int i, j, k;
4292 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4293 unsigned int DRAMClockChangeSupportNumber = 0;
4294 unsigned int LastSurfaceWithoutMargin;
4295 unsigned int DRAMClockChangeMethod = 0;
4296 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4297 double MinActiveFCLKChangeMargin = 0.;
4298 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4299 double ActiveClockChangeLatencyHidingY;
4300 double ActiveClockChangeLatencyHidingC;
4301 double ActiveClockChangeLatencyHiding;
4302 double EffectiveDETBufferSizeY;
4303 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4304 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4305 double TotalPixelBW = 0.0;
4306 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4307 double EffectiveLBLatencyHidingY;
4308 double EffectiveLBLatencyHidingC;
4309 double LinesInDETY[DC__NUM_DPP__MAX];
4310 double LinesInDETC[DC__NUM_DPP__MAX];
4311 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4312 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4313 double FullDETBufferingTimeY;
4314 double FullDETBufferingTimeC;
4315 double WritebackDRAMClockChangeLatencyMargin;
4316 double WritebackFCLKChangeLatencyMargin;
4317 double WritebackLatencyHiding;
4318 bool SameTimingForFCLKChange;
4319
4320 unsigned int TotalActiveWriteback = 0;
4321 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4322 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4323
4324 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4325 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4326 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4327 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4328 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4329 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4330 + 10 / DCFClkDeepSleep;
4331 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4332 + 10 / DCFClkDeepSleep;
4333 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4334 + 10 / DCFClkDeepSleep;
4335 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4336 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4337
4338 #ifdef __DML_VBA_DEBUG__
4339 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4340 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4341 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4342 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4343 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4344 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4345 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4346 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4347 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4348 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4349 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4350 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4351 #endif
4352
4353
4354 TotalActiveWriteback = 0;
4355 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4356 if (v->WritebackEnable[k] == true)
4357 TotalActiveWriteback = TotalActiveWriteback + 1;
4358 }
4359
4360 if (TotalActiveWriteback <= 1) {
4361 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4362 } else {
4363 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4364 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4365 }
4366 if (v->USRRetrainingRequiredFinal)
4367 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4368 + mmSOCParameters.USRRetrainingLatency;
4369
4370 if (TotalActiveWriteback <= 1) {
4371 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4372 + mmSOCParameters.WritebackLatency;
4373 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4374 + mmSOCParameters.WritebackLatency;
4375 } else {
4376 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4377 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4378 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4379 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4380 }
4381
4382 if (v->USRRetrainingRequiredFinal)
4383 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4384 + mmSOCParameters.USRRetrainingLatency;
4385
4386 if (v->USRRetrainingRequiredFinal)
4387 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4388 + mmSOCParameters.USRRetrainingLatency;
4389
4390 #ifdef __DML_VBA_DEBUG__
4391 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4392 __func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4393 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4394 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4395 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4396 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4397 #endif
4398
4399 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4400 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4401 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4402 }
4403
4404 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4405
4406 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4407 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4408
4409
4410 #ifdef __DML_VBA_DEBUG__
4411 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4412 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal);
4413 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]);
4414 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]);
4415 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]);
4416 #endif
4417
4418 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4419 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4420 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4421
4422 if (UnboundedRequestEnabled) {
4423 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4424 + CompressedBufferSizeInkByte * 1024
4425 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4426 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4427 }
4428
4429 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4430 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4431 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4432
4433 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4434 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4435
4436 if (v->NumberOfActiveSurfaces > 1) {
4437 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4438 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4439 / v->PixelClock[k] / v->VRatio[k];
4440 }
4441
4442 if (BytePerPixelDETC[k] > 0) {
4443 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4444 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4445 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4446 / v->VRatioChroma[k];
4447 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4448 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4449 / v->PixelClock[k];
4450 if (v->NumberOfActiveSurfaces > 1) {
4451 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4452 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4453 / v->PixelClock[k] / v->VRatioChroma[k];
4454 }
4455 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4456 ActiveClockChangeLatencyHidingC);
4457 } else {
4458 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4459 }
4460
4461 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4462 - v->Watermark.DRAMClockChangeWatermark;
4463 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4464 - v->Watermark.FCLKChangeWatermark;
4465 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4466
4467 if (v->WritebackEnable[k]) {
4468 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4469 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4470 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4471 if (v->WritebackPixelFormat[k] == dm_444_64)
4472 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4473
4474 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4475 - v->Watermark.WritebackDRAMClockChangeWatermark;
4476
4477 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4478 - v->Watermark.WritebackFCLKChangeWatermark;
4479
4480 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4481 WritebackFCLKChangeLatencyMargin);
4482 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4483 WritebackDRAMClockChangeLatencyMargin);
4484 }
4485 MaxActiveDRAMClockChangeLatencySupported[k] =
4486 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4487 0 :
4488 (ActiveDRAMClockChangeLatencyMargin[k]
4489 + mmSOCParameters.DRAMClockChangeLatency);
4490 }
4491
4492 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4493 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4494 if (i == j ||
4495 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4496 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4497 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4498 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4499 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4500 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4501 (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4502 SynchronizedSurfaces[i][j] = true;
4503 } else {
4504 SynchronizedSurfaces[i][j] = false;
4505 }
4506 }
4507 }
4508
4509 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4510 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4511 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4512 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4513 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4514 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4515 SurfaceWithMinActiveFCLKChangeMargin = k;
4516 }
4517 }
4518
4519 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4520
4521 SameTimingForFCLKChange = true;
4522 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4523 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4524 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4525 (SameTimingForFCLKChange ||
4526 ActiveFCLKChangeLatencyMargin[k] <
4527 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4528 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4529 }
4530 SameTimingForFCLKChange = false;
4531 }
4532 }
4533
4534 if (MinActiveFCLKChangeMargin > 0) {
4535 *FCLKChangeSupport = dm_fclock_change_vactive;
4536 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4537 (PrefetchMode <= 1)) {
4538 *FCLKChangeSupport = dm_fclock_change_vblank;
4539 } else {
4540 *FCLKChangeSupport = dm_fclock_change_unsupported;
4541 }
4542
4543 *USRRetrainingSupport = true;
4544 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4545 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4546 (USRRetrainingLatencyMargin[k] < 0)) {
4547 *USRRetrainingSupport = false;
4548 }
4549 }
4550
4551 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4552 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4553 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4554 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4555 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4556 if (PrefetchMode > 0) {
4557 DRAMClockChangeSupportNumber = 2;
4558 } else if (DRAMClockChangeSupportNumber == 0) {
4559 DRAMClockChangeSupportNumber = 1;
4560 LastSurfaceWithoutMargin = k;
4561 } else if (DRAMClockChangeSupportNumber == 1 &&
4562 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4563 DRAMClockChangeSupportNumber = 2;
4564 }
4565 }
4566 }
4567
4568 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4569 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4570 DRAMClockChangeMethod = 1;
4571 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4572 DRAMClockChangeMethod = 2;
4573 }
4574
4575 if (DRAMClockChangeMethod == 0) {
4576 if (DRAMClockChangeSupportNumber == 0)
4577 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4578 else if (DRAMClockChangeSupportNumber == 1)
4579 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4580 else
4581 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4582 } else if (DRAMClockChangeMethod == 1) {
4583 if (DRAMClockChangeSupportNumber == 0)
4584 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4585 else if (DRAMClockChangeSupportNumber == 1)
4586 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4587 else
4588 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4589 } else {
4590 if (DRAMClockChangeSupportNumber == 0)
4591 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4592 else if (DRAMClockChangeSupportNumber == 1)
4593 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4594 else
4595 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4596 }
4597
4598 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4599 unsigned int dst_y_pstate;
4600 unsigned int src_y_pstate_l;
4601 unsigned int src_y_pstate_c;
4602 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4603
4604 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4605 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4606 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4607 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4608
4609 #ifdef __DML_VBA_DEBUG__
4610 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
4611 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
4612 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
4613 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
4614 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4615 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
4616 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
4617 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
4618 dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]);
4619 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
4620 #endif
4621 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4622
4623 if (BytePerPixelDETC[k] > 0) {
4624 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4625 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4626 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4627 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4628
4629 #ifdef __DML_VBA_DEBUG__
4630 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
4631 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
4632 dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4633 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
4634 #endif
4635 }
4636 }
4637 #ifdef __DML_VBA_DEBUG__
4638 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4639 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4640 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4641 __func__, *MinActiveFCLKChangeLatencySupported);
4642 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4643 #endif
4644 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4645
dml32_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize,double DISPCLKDPPCLKVCOSpeed)4646 double dml32_CalculateWriteBackDISPCLK(
4647 enum source_format_class WritebackPixelFormat,
4648 double PixelClock,
4649 double WritebackHRatio,
4650 double WritebackVRatio,
4651 unsigned int WritebackHTaps,
4652 unsigned int WritebackVTaps,
4653 unsigned int WritebackSourceWidth,
4654 unsigned int WritebackDestinationWidth,
4655 unsigned int HTotal,
4656 unsigned int WritebackLineBufferSize,
4657 double DISPCLKDPPCLKVCOSpeed)
4658 {
4659 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4660
4661 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4662 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4663 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4664 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4665 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4666 }
4667
dml32_CalculateMinAndMaxPrefetchMode(enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,unsigned int * MinPrefetchMode,unsigned int * MaxPrefetchMode)4668 void dml32_CalculateMinAndMaxPrefetchMode(
4669 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4670 unsigned int *MinPrefetchMode,
4671 unsigned int *MaxPrefetchMode)
4672 {
4673 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4674 *MinPrefetchMode = 3;
4675 *MaxPrefetchMode = 3;
4676 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4677 *MinPrefetchMode = 2;
4678 *MaxPrefetchMode = 2;
4679 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4680 *MinPrefetchMode = 1;
4681 *MaxPrefetchMode = 1;
4682 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4683 *MinPrefetchMode = 0;
4684 *MaxPrefetchMode = 0;
4685 } else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4686 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4687 *MinPrefetchMode = 0;
4688 *MaxPrefetchMode = 3;
4689 } else {
4690 *MinPrefetchMode = 0;
4691 *MaxPrefetchMode = 3;
4692 }
4693 } // CalculateMinAndMaxPrefetchMode
4694
dml32_CalculatePixelDeliveryTimes(unsigned int NumberOfActiveSurfaces,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])4695 void dml32_CalculatePixelDeliveryTimes(
4696 unsigned int NumberOfActiveSurfaces,
4697 double VRatio[],
4698 double VRatioChroma[],
4699 double VRatioPrefetchY[],
4700 double VRatioPrefetchC[],
4701 unsigned int swath_width_luma_ub[],
4702 unsigned int swath_width_chroma_ub[],
4703 unsigned int DPPPerSurface[],
4704 double HRatio[],
4705 double HRatioChroma[],
4706 double PixelClock[],
4707 double PSCL_THROUGHPUT[],
4708 double PSCL_THROUGHPUT_CHROMA[],
4709 double Dppclk[],
4710 unsigned int BytePerPixelC[],
4711 enum dm_rotation_angle SourceRotation[],
4712 unsigned int NumberOfCursors[],
4713 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
4714 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
4715 unsigned int BlockWidth256BytesY[],
4716 unsigned int BlockHeight256BytesY[],
4717 unsigned int BlockWidth256BytesC[],
4718 unsigned int BlockHeight256BytesC[],
4719
4720 /* Output */
4721 double DisplayPipeLineDeliveryTimeLuma[],
4722 double DisplayPipeLineDeliveryTimeChroma[],
4723 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4724 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4725 double DisplayPipeRequestDeliveryTimeLuma[],
4726 double DisplayPipeRequestDeliveryTimeChroma[],
4727 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4728 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4729 double CursorRequestDeliveryTime[],
4730 double CursorRequestDeliveryTimePrefetch[])
4731 {
4732 double req_per_swath_ub;
4733 unsigned int k;
4734
4735 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4736
4737 #ifdef __DML_VBA_DEBUG__
4738 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4739 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4740 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4741 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4742 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4743 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4744 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4745 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4746 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4747 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4748 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4749 #endif
4750
4751 if (VRatio[k] <= 1) {
4752 DisplayPipeLineDeliveryTimeLuma[k] =
4753 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4754 } else {
4755 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4756 }
4757
4758 if (BytePerPixelC[k] == 0) {
4759 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4760 } else {
4761 if (VRatioChroma[k] <= 1) {
4762 DisplayPipeLineDeliveryTimeChroma[k] =
4763 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4764 } else {
4765 DisplayPipeLineDeliveryTimeChroma[k] =
4766 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4767 }
4768 }
4769
4770 if (VRatioPrefetchY[k] <= 1) {
4771 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4772 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4773 } else {
4774 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4775 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4776 }
4777
4778 if (BytePerPixelC[k] == 0) {
4779 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4780 } else {
4781 if (VRatioPrefetchC[k] <= 1) {
4782 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4783 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4784 } else {
4785 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4786 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4787 }
4788 }
4789 #ifdef __DML_VBA_DEBUG__
4790 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4791 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4792 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4793 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4794 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4795 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4796 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4797 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4798 #endif
4799 }
4800
4801 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4802 if (!IsVertical(SourceRotation[k]))
4803 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4804 else
4805 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4806 #ifdef __DML_VBA_DEBUG__
4807 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4808 #endif
4809
4810 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4811 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4812 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4813 if (BytePerPixelC[k] == 0) {
4814 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4815 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4816 } else {
4817 if (!IsVertical(SourceRotation[k]))
4818 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4819 else
4820 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4821 #ifdef __DML_VBA_DEBUG__
4822 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4823 #endif
4824 DisplayPipeRequestDeliveryTimeChroma[k] =
4825 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4826 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4827 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4828 }
4829 #ifdef __DML_VBA_DEBUG__
4830 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4831 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4832 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4833 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4834 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4835 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4836 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4837 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4838 #endif
4839 }
4840
4841 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4842 unsigned int cursor_req_per_width;
4843
4844 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4845 256.0 / 8.0, 1.0);
4846 if (NumberOfCursors[k] > 0) {
4847 if (VRatio[k] <= 1) {
4848 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4849 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4850 } else {
4851 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4852 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4853 }
4854 if (VRatioPrefetchY[k] <= 1) {
4855 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4856 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4857 } else {
4858 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4859 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4860 }
4861 } else {
4862 CursorRequestDeliveryTime[k] = 0;
4863 CursorRequestDeliveryTimePrefetch[k] = 0;
4864 }
4865 #ifdef __DML_VBA_DEBUG__
4866 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4867 __func__, k, NumberOfCursors[k]);
4868 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4869 __func__, k, CursorRequestDeliveryTime[k]);
4870 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4871 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4872 #endif
4873 }
4874 } // CalculatePixelDeliveryTimes
4875
dml32_CalculateMetaAndPTETimes(bool use_one_row_for_frame[],unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])4876 void dml32_CalculateMetaAndPTETimes(
4877 bool use_one_row_for_frame[],
4878 unsigned int NumberOfActiveSurfaces,
4879 bool GPUVMEnable,
4880 unsigned int MetaChunkSize,
4881 unsigned int MinMetaChunkSizeBytes,
4882 unsigned int HTotal[],
4883 double VRatio[],
4884 double VRatioChroma[],
4885 double DestinationLinesToRequestRowInVBlank[],
4886 double DestinationLinesToRequestRowInImmediateFlip[],
4887 bool DCCEnable[],
4888 double PixelClock[],
4889 unsigned int BytePerPixelY[],
4890 unsigned int BytePerPixelC[],
4891 enum dm_rotation_angle SourceRotation[],
4892 unsigned int dpte_row_height[],
4893 unsigned int dpte_row_height_chroma[],
4894 unsigned int meta_row_width[],
4895 unsigned int meta_row_width_chroma[],
4896 unsigned int meta_row_height[],
4897 unsigned int meta_row_height_chroma[],
4898 unsigned int meta_req_width[],
4899 unsigned int meta_req_width_chroma[],
4900 unsigned int meta_req_height[],
4901 unsigned int meta_req_height_chroma[],
4902 unsigned int dpte_group_bytes[],
4903 unsigned int PTERequestSizeY[],
4904 unsigned int PTERequestSizeC[],
4905 unsigned int PixelPTEReqWidthY[],
4906 unsigned int PixelPTEReqHeightY[],
4907 unsigned int PixelPTEReqWidthC[],
4908 unsigned int PixelPTEReqHeightC[],
4909 unsigned int dpte_row_width_luma_ub[],
4910 unsigned int dpte_row_width_chroma_ub[],
4911
4912 /* Output */
4913 double DST_Y_PER_PTE_ROW_NOM_L[],
4914 double DST_Y_PER_PTE_ROW_NOM_C[],
4915 double DST_Y_PER_META_ROW_NOM_L[],
4916 double DST_Y_PER_META_ROW_NOM_C[],
4917 double TimePerMetaChunkNominal[],
4918 double TimePerChromaMetaChunkNominal[],
4919 double TimePerMetaChunkVBlank[],
4920 double TimePerChromaMetaChunkVBlank[],
4921 double TimePerMetaChunkFlip[],
4922 double TimePerChromaMetaChunkFlip[],
4923 double time_per_pte_group_nom_luma[],
4924 double time_per_pte_group_vblank_luma[],
4925 double time_per_pte_group_flip_luma[],
4926 double time_per_pte_group_nom_chroma[],
4927 double time_per_pte_group_vblank_chroma[],
4928 double time_per_pte_group_flip_chroma[])
4929 {
4930 unsigned int meta_chunk_width;
4931 unsigned int min_meta_chunk_width;
4932 unsigned int meta_chunk_per_row_int;
4933 unsigned int meta_row_remainder;
4934 unsigned int meta_chunk_threshold;
4935 unsigned int meta_chunks_per_row_ub;
4936 unsigned int meta_chunk_width_chroma;
4937 unsigned int min_meta_chunk_width_chroma;
4938 unsigned int meta_chunk_per_row_int_chroma;
4939 unsigned int meta_row_remainder_chroma;
4940 unsigned int meta_chunk_threshold_chroma;
4941 unsigned int meta_chunks_per_row_ub_chroma;
4942 unsigned int dpte_group_width_luma;
4943 unsigned int dpte_groups_per_row_luma_ub;
4944 unsigned int dpte_group_width_chroma;
4945 unsigned int dpte_groups_per_row_chroma_ub;
4946 unsigned int k;
4947
4948 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4949 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4950 if (BytePerPixelC[k] == 0)
4951 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4952 else
4953 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4954 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4955 if (BytePerPixelC[k] == 0)
4956 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4957 else
4958 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4959 }
4960
4961 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4962 if (DCCEnable[k] == true) {
4963 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4964 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4965 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4966 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4967 if (!IsVertical(SourceRotation[k]))
4968 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4969 else
4970 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4971
4972 if (meta_row_remainder <= meta_chunk_threshold)
4973 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4974 else
4975 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4976
4977 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4978 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4979 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4980 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4981 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4982 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4983 if (BytePerPixelC[k] == 0) {
4984 TimePerChromaMetaChunkNominal[k] = 0;
4985 TimePerChromaMetaChunkVBlank[k] = 0;
4986 TimePerChromaMetaChunkFlip[k] = 0;
4987 } else {
4988 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4989 meta_row_height_chroma[k];
4990 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4991 meta_row_height_chroma[k];
4992 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4993 meta_chunk_width_chroma;
4994 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4995 if (!IsVertical(SourceRotation[k])) {
4996 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4997 meta_req_width_chroma[k];
4998 } else {
4999 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
5000 meta_req_height_chroma[k];
5001 }
5002 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
5003 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5004 else
5005 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5006
5007 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
5008 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5009 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
5010 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5011 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5012 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5013 }
5014 } else {
5015 TimePerMetaChunkNominal[k] = 0;
5016 TimePerMetaChunkVBlank[k] = 0;
5017 TimePerMetaChunkFlip[k] = 0;
5018 TimePerChromaMetaChunkNominal[k] = 0;
5019 TimePerChromaMetaChunkVBlank[k] = 0;
5020 TimePerChromaMetaChunkFlip[k] = 0;
5021 }
5022 }
5023
5024 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5025 if (GPUVMEnable == true) {
5026 if (!IsVertical(SourceRotation[k])) {
5027 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5028 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5029 } else {
5030 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5031 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5032 }
5033
5034 if (use_one_row_for_frame[k]) {
5035 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5036 (double) dpte_group_width_luma / 2.0, 1.0);
5037 } else {
5038 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5039 (double) dpte_group_width_luma, 1.0);
5040 }
5041 #ifdef __DML_VBA_DEBUG__
5042 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
5043 __func__, k, use_one_row_for_frame[k]);
5044 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
5045 __func__, k, dpte_group_bytes[k]);
5046 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
5047 __func__, k, PTERequestSizeY[k]);
5048 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
5049 __func__, k, PixelPTEReqWidthY[k]);
5050 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
5051 __func__, k, PixelPTEReqHeightY[k]);
5052 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
5053 __func__, k, dpte_row_width_luma_ub[k]);
5054 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
5055 __func__, k, dpte_group_width_luma);
5056 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
5057 __func__, k, dpte_groups_per_row_luma_ub);
5058 #endif
5059
5060 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5061 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5062 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5063 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5064 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5065 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5066 if (BytePerPixelC[k] == 0) {
5067 time_per_pte_group_nom_chroma[k] = 0;
5068 time_per_pte_group_vblank_chroma[k] = 0;
5069 time_per_pte_group_flip_chroma[k] = 0;
5070 } else {
5071 if (!IsVertical(SourceRotation[k])) {
5072 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5073 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5074 } else {
5075 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5076 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5077 }
5078
5079 if (use_one_row_for_frame[k]) {
5080 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5081 (double) dpte_group_width_chroma / 2.0, 1.0);
5082 } else {
5083 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5084 (double) dpte_group_width_chroma, 1.0);
5085 }
5086 #ifdef __DML_VBA_DEBUG__
5087 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
5088 __func__, k, dpte_row_width_chroma_ub[k]);
5089 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
5090 __func__, k, dpte_group_width_chroma);
5091 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
5092 __func__, k, dpte_groups_per_row_chroma_ub);
5093 #endif
5094 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5095 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5096 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5097 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5098 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5099 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5100 }
5101 } else {
5102 time_per_pte_group_nom_luma[k] = 0;
5103 time_per_pte_group_vblank_luma[k] = 0;
5104 time_per_pte_group_flip_luma[k] = 0;
5105 time_per_pte_group_nom_chroma[k] = 0;
5106 time_per_pte_group_vblank_chroma[k] = 0;
5107 time_per_pte_group_flip_chroma[k] = 0;
5108 }
5109 #ifdef __DML_VBA_DEBUG__
5110 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
5111 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5112 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
5113 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5114 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
5115 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5116 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
5117 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5118 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
5119 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5120 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
5121 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5122 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
5123 __func__, k, TimePerMetaChunkNominal[k]);
5124 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
5125 __func__, k, TimePerMetaChunkVBlank[k]);
5126 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
5127 __func__, k, TimePerMetaChunkFlip[k]);
5128 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
5129 __func__, k, TimePerChromaMetaChunkNominal[k]);
5130 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
5131 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5132 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
5133 __func__, k, TimePerChromaMetaChunkFlip[k]);
5134 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
5135 __func__, k, time_per_pte_group_nom_luma[k]);
5136 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
5137 __func__, k, time_per_pte_group_vblank_luma[k]);
5138 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
5139 __func__, k, time_per_pte_group_flip_luma[k]);
5140 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
5141 __func__, k, time_per_pte_group_nom_chroma[k]);
5142 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5143 __func__, k, time_per_pte_group_vblank_chroma[k]);
5144 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
5145 __func__, k, time_per_pte_group_flip_chroma[k]);
5146 #endif
5147 }
5148 } // CalculateMetaAndPTETimes
5149
dml32_CalculateVMGroupAndRequestTimes(unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],unsigned int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5150 void dml32_CalculateVMGroupAndRequestTimes(
5151 unsigned int NumberOfActiveSurfaces,
5152 bool GPUVMEnable,
5153 unsigned int GPUVMMaxPageTableLevels,
5154 unsigned int HTotal[],
5155 unsigned int BytePerPixelC[],
5156 double DestinationLinesToRequestVMInVBlank[],
5157 double DestinationLinesToRequestVMInImmediateFlip[],
5158 bool DCCEnable[],
5159 double PixelClock[],
5160 unsigned int dpte_row_width_luma_ub[],
5161 unsigned int dpte_row_width_chroma_ub[],
5162 unsigned int vm_group_bytes[],
5163 unsigned int dpde0_bytes_per_frame_ub_l[],
5164 unsigned int dpde0_bytes_per_frame_ub_c[],
5165 unsigned int meta_pte_bytes_per_frame_ub_l[],
5166 unsigned int meta_pte_bytes_per_frame_ub_c[],
5167
5168 /* Output */
5169 double TimePerVMGroupVBlank[],
5170 double TimePerVMGroupFlip[],
5171 double TimePerVMRequestVBlank[],
5172 double TimePerVMRequestFlip[])
5173 {
5174 unsigned int k;
5175 unsigned int num_group_per_lower_vm_stage;
5176 unsigned int num_req_per_lower_vm_stage;
5177
5178 #ifdef __DML_VBA_DEBUG__
5179 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5180 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5181 #endif
5182 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5183
5184 #ifdef __DML_VBA_DEBUG__
5185 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5186 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5187 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5188 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5189 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5190 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5191 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5192 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5193 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5194 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5195 #endif
5196
5197 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5198 if (DCCEnable[k] == false) {
5199 if (BytePerPixelC[k] > 0) {
5200 num_group_per_lower_vm_stage = dml_ceil(
5201 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5202 (double) (vm_group_bytes[k]), 1.0) +
5203 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5204 (double) (vm_group_bytes[k]), 1.0);
5205 } else {
5206 num_group_per_lower_vm_stage = dml_ceil(
5207 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5208 (double) (vm_group_bytes[k]), 1.0);
5209 }
5210 } else {
5211 if (GPUVMMaxPageTableLevels == 1) {
5212 if (BytePerPixelC[k] > 0) {
5213 num_group_per_lower_vm_stage = dml_ceil(
5214 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5215 (double) (vm_group_bytes[k]), 1.0) +
5216 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5217 (double) (vm_group_bytes[k]), 1.0);
5218 } else {
5219 num_group_per_lower_vm_stage = dml_ceil(
5220 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5221 (double) (vm_group_bytes[k]), 1.0);
5222 }
5223 } else {
5224 if (BytePerPixelC[k] > 0) {
5225 num_group_per_lower_vm_stage = 2 + dml_ceil(
5226 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5227 (double) (vm_group_bytes[k]), 1) +
5228 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5229 (double) (vm_group_bytes[k]), 1) +
5230 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5231 (double) (vm_group_bytes[k]), 1) +
5232 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5233 (double) (vm_group_bytes[k]), 1);
5234 } else {
5235 num_group_per_lower_vm_stage = 1 + dml_ceil(
5236 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5237 (double) (vm_group_bytes[k]), 1) + dml_ceil(
5238 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5239 (double) (vm_group_bytes[k]), 1);
5240 }
5241 }
5242 }
5243
5244 if (DCCEnable[k] == false) {
5245 if (BytePerPixelC[k] > 0) {
5246 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5247 dpde0_bytes_per_frame_ub_c[k] / 64;
5248 } else {
5249 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5250 }
5251 } else {
5252 if (GPUVMMaxPageTableLevels == 1) {
5253 if (BytePerPixelC[k] > 0) {
5254 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5255 meta_pte_bytes_per_frame_ub_c[k] / 64;
5256 } else {
5257 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5258 }
5259 } else {
5260 if (BytePerPixelC[k] > 0) {
5261 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5262 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5263 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5264 meta_pte_bytes_per_frame_ub_c[k] / 64;
5265 } else {
5266 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5267 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5268 }
5269 }
5270 }
5271
5272 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5273 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5274 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5275 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5276 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5277 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5278 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5279 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5280
5281 if (GPUVMMaxPageTableLevels > 2) {
5282 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5283 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5284 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5285 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5286 }
5287
5288 } else {
5289 TimePerVMGroupVBlank[k] = 0;
5290 TimePerVMGroupFlip[k] = 0;
5291 TimePerVMRequestVBlank[k] = 0;
5292 TimePerVMRequestFlip[k] = 0;
5293 }
5294
5295 #ifdef __DML_VBA_DEBUG__
5296 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5297 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5298 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5299 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5300 #endif
5301 }
5302 } // CalculateVMGroupAndRequestTimes
5303
dml32_CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dm_rotation_angle SourceRotation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)5304 void dml32_CalculateDCCConfiguration(
5305 bool DCCEnabled,
5306 bool DCCProgrammingAssumesScanDirectionUnknown,
5307 enum source_format_class SourcePixelFormat,
5308 unsigned int SurfaceWidthLuma,
5309 unsigned int SurfaceWidthChroma,
5310 unsigned int SurfaceHeightLuma,
5311 unsigned int SurfaceHeightChroma,
5312 unsigned int nomDETInKByte,
5313 unsigned int RequestHeight256ByteLuma,
5314 unsigned int RequestHeight256ByteChroma,
5315 enum dm_swizzle_mode TilingFormat,
5316 unsigned int BytePerPixelY,
5317 unsigned int BytePerPixelC,
5318 double BytePerPixelDETY,
5319 double BytePerPixelDETC,
5320 enum dm_rotation_angle SourceRotation,
5321 /* Output */
5322 unsigned int *MaxUncompressedBlockLuma,
5323 unsigned int *MaxUncompressedBlockChroma,
5324 unsigned int *MaxCompressedBlockLuma,
5325 unsigned int *MaxCompressedBlockChroma,
5326 unsigned int *IndependentBlockLuma,
5327 unsigned int *IndependentBlockChroma)
5328 {
5329 typedef enum {
5330 REQ_256Bytes,
5331 REQ_128BytesNonContiguous,
5332 REQ_128BytesContiguous,
5333 REQ_NA
5334 } RequestType;
5335
5336 RequestType RequestLuma;
5337 RequestType RequestChroma;
5338
5339 unsigned int segment_order_horz_contiguous_luma;
5340 unsigned int segment_order_horz_contiguous_chroma;
5341 unsigned int segment_order_vert_contiguous_luma;
5342 unsigned int segment_order_vert_contiguous_chroma;
5343 unsigned int req128_horz_wc_l;
5344 unsigned int req128_horz_wc_c;
5345 unsigned int req128_vert_wc_l;
5346 unsigned int req128_vert_wc_c;
5347 unsigned int MAS_vp_horz_limit;
5348 unsigned int MAS_vp_vert_limit;
5349 unsigned int max_vp_horz_width;
5350 unsigned int max_vp_vert_height;
5351 unsigned int eff_surf_width_l;
5352 unsigned int eff_surf_width_c;
5353 unsigned int eff_surf_height_l;
5354 unsigned int eff_surf_height_c;
5355 unsigned int full_swath_bytes_horz_wc_l;
5356 unsigned int full_swath_bytes_horz_wc_c;
5357 unsigned int full_swath_bytes_vert_wc_l;
5358 unsigned int full_swath_bytes_vert_wc_c;
5359 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5360
5361 unsigned int yuv420;
5362 unsigned int horz_div_l;
5363 unsigned int horz_div_c;
5364 unsigned int vert_div_l;
5365 unsigned int vert_div_c;
5366
5367 unsigned int swath_buf_size;
5368 double detile_buf_vp_horz_limit;
5369 double detile_buf_vp_vert_limit;
5370
5371 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5372 SourcePixelFormat == dm_420_12) ? 1 : 0);
5373 horz_div_l = 1;
5374 horz_div_c = 1;
5375 vert_div_l = 1;
5376 vert_div_c = 1;
5377
5378 if (BytePerPixelY == 1)
5379 vert_div_l = 0;
5380 if (BytePerPixelC == 1)
5381 vert_div_c = 0;
5382
5383 if (BytePerPixelC == 0) {
5384 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5385 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5386 BytePerPixelY / (1 + horz_div_l));
5387 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5388 (1 + vert_div_l));
5389 } else {
5390 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5391 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5392 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5393 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5394 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5395 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5396 (1 + vert_div_c) / (1 + yuv420));
5397 }
5398
5399 if (SourcePixelFormat == dm_420_10) {
5400 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5401 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5402 }
5403
5404 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5405 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5406
5407 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5408 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5409 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5410 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5411 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5412 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5413 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5414 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5415
5416 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5417 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5418 if (BytePerPixelC > 0) {
5419 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5420 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5421 } else {
5422 full_swath_bytes_horz_wc_c = 0;
5423 full_swath_bytes_vert_wc_c = 0;
5424 }
5425
5426 if (SourcePixelFormat == dm_420_10) {
5427 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5428 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5429 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5430 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5431 }
5432
5433 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5434 req128_horz_wc_l = 0;
5435 req128_horz_wc_c = 0;
5436 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5437 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5438 req128_horz_wc_l = 0;
5439 req128_horz_wc_c = 1;
5440 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5441 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5442 req128_horz_wc_l = 1;
5443 req128_horz_wc_c = 0;
5444 } else {
5445 req128_horz_wc_l = 1;
5446 req128_horz_wc_c = 1;
5447 }
5448
5449 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5450 req128_vert_wc_l = 0;
5451 req128_vert_wc_c = 0;
5452 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5453 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5454 req128_vert_wc_l = 0;
5455 req128_vert_wc_c = 1;
5456 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5457 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5458 req128_vert_wc_l = 1;
5459 req128_vert_wc_c = 0;
5460 } else {
5461 req128_vert_wc_l = 1;
5462 req128_vert_wc_c = 1;
5463 }
5464
5465 if (BytePerPixelY == 2) {
5466 segment_order_horz_contiguous_luma = 0;
5467 segment_order_vert_contiguous_luma = 1;
5468 } else {
5469 segment_order_horz_contiguous_luma = 1;
5470 segment_order_vert_contiguous_luma = 0;
5471 }
5472
5473 if (BytePerPixelC == 2) {
5474 segment_order_horz_contiguous_chroma = 0;
5475 segment_order_vert_contiguous_chroma = 1;
5476 } else {
5477 segment_order_horz_contiguous_chroma = 1;
5478 segment_order_vert_contiguous_chroma = 0;
5479 }
5480 #ifdef __DML_VBA_DEBUG__
5481 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5482 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5483 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5484 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5485 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5486 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5487 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5488 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5489 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5490 __func__, segment_order_horz_contiguous_chroma);
5491 #endif
5492
5493 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5494 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5495 RequestLuma = REQ_256Bytes;
5496 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5497 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5498 RequestLuma = REQ_128BytesNonContiguous;
5499 else
5500 RequestLuma = REQ_128BytesContiguous;
5501
5502 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5503 RequestChroma = REQ_256Bytes;
5504 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5505 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5506 RequestChroma = REQ_128BytesNonContiguous;
5507 else
5508 RequestChroma = REQ_128BytesContiguous;
5509
5510 } else if (!IsVertical(SourceRotation)) {
5511 if (req128_horz_wc_l == 0)
5512 RequestLuma = REQ_256Bytes;
5513 else if (segment_order_horz_contiguous_luma == 0)
5514 RequestLuma = REQ_128BytesNonContiguous;
5515 else
5516 RequestLuma = REQ_128BytesContiguous;
5517
5518 if (req128_horz_wc_c == 0)
5519 RequestChroma = REQ_256Bytes;
5520 else if (segment_order_horz_contiguous_chroma == 0)
5521 RequestChroma = REQ_128BytesNonContiguous;
5522 else
5523 RequestChroma = REQ_128BytesContiguous;
5524
5525 } else {
5526 if (req128_vert_wc_l == 0)
5527 RequestLuma = REQ_256Bytes;
5528 else if (segment_order_vert_contiguous_luma == 0)
5529 RequestLuma = REQ_128BytesNonContiguous;
5530 else
5531 RequestLuma = REQ_128BytesContiguous;
5532
5533 if (req128_vert_wc_c == 0)
5534 RequestChroma = REQ_256Bytes;
5535 else if (segment_order_vert_contiguous_chroma == 0)
5536 RequestChroma = REQ_128BytesNonContiguous;
5537 else
5538 RequestChroma = REQ_128BytesContiguous;
5539 }
5540
5541 if (RequestLuma == REQ_256Bytes) {
5542 *MaxUncompressedBlockLuma = 256;
5543 *MaxCompressedBlockLuma = 256;
5544 *IndependentBlockLuma = 0;
5545 } else if (RequestLuma == REQ_128BytesContiguous) {
5546 *MaxUncompressedBlockLuma = 256;
5547 *MaxCompressedBlockLuma = 128;
5548 *IndependentBlockLuma = 128;
5549 } else {
5550 *MaxUncompressedBlockLuma = 256;
5551 *MaxCompressedBlockLuma = 64;
5552 *IndependentBlockLuma = 64;
5553 }
5554
5555 if (RequestChroma == REQ_256Bytes) {
5556 *MaxUncompressedBlockChroma = 256;
5557 *MaxCompressedBlockChroma = 256;
5558 *IndependentBlockChroma = 0;
5559 } else if (RequestChroma == REQ_128BytesContiguous) {
5560 *MaxUncompressedBlockChroma = 256;
5561 *MaxCompressedBlockChroma = 128;
5562 *IndependentBlockChroma = 128;
5563 } else {
5564 *MaxUncompressedBlockChroma = 256;
5565 *MaxCompressedBlockChroma = 64;
5566 *IndependentBlockChroma = 64;
5567 }
5568
5569 if (DCCEnabled != true || BytePerPixelC == 0) {
5570 *MaxUncompressedBlockChroma = 0;
5571 *MaxCompressedBlockChroma = 0;
5572 *IndependentBlockChroma = 0;
5573 }
5574
5575 if (DCCEnabled != true) {
5576 *MaxUncompressedBlockLuma = 0;
5577 *MaxCompressedBlockLuma = 0;
5578 *IndependentBlockLuma = 0;
5579 }
5580
5581 #ifdef __DML_VBA_DEBUG__
5582 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5583 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5584 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5585 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5586 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5587 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5588 #endif
5589
5590 } // CalculateDCCConfiguration
5591
dml32_CalculateStutterEfficiency(unsigned int CompressedBufferSizeInkByte,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool UnboundedRequestEnabled,unsigned int MetaFIFOSizeInKEntries,unsigned int ZeroSizeBufferEntries,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,unsigned int CompbufReservedSpace64B,unsigned int CompbufReservedSpaceZs,double SRExitTime,double SRExitZ8Time,bool SynchronizeTimingsFinal,unsigned int BlendingAndTiming[],double StutterEnterPlusExitWatermark,double Z8StutterEnterPlusExitWatermark,bool ProgressiveToInterlaceUnitInOPP,bool Interlace[],double MinTTUVBlank[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeY[],unsigned int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double NetDCCRateLuma[],double NetDCCRateChroma[],double DCCFractionOfZeroSizeRequestsLuma[],double DCCFractionOfZeroSizeRequestsChroma[],unsigned int HTotal[],unsigned int VTotal[],double PixelClock[],double VRatio[],enum dm_rotation_angle SourceRotation[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesC[],unsigned int BlockWidth256BytesC[],unsigned int DCCYMaxUncompressedBlock[],unsigned int DCCCMaxUncompressedBlock[],unsigned int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthSurfaceLuma[],double ReadBandwidthSurfaceChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,unsigned int * NumberOfStutterBurstsPerFrame,double * Z8StutterEfficiencyNotIncludingVBlank,double * Z8StutterEfficiency,unsigned int * Z8NumberOfStutterBurstsPerFrame,double * StutterPeriod,bool * DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)5592 void dml32_CalculateStutterEfficiency(
5593 unsigned int CompressedBufferSizeInkByte,
5594 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5595 bool UnboundedRequestEnabled,
5596 unsigned int MetaFIFOSizeInKEntries,
5597 unsigned int ZeroSizeBufferEntries,
5598 unsigned int PixelChunkSizeInKByte,
5599 unsigned int NumberOfActiveSurfaces,
5600 unsigned int ROBBufferSizeInKByte,
5601 double TotalDataReadBandwidth,
5602 double DCFCLK,
5603 double ReturnBW,
5604 unsigned int CompbufReservedSpace64B,
5605 unsigned int CompbufReservedSpaceZs,
5606 double SRExitTime,
5607 double SRExitZ8Time,
5608 bool SynchronizeTimingsFinal,
5609 unsigned int BlendingAndTiming[],
5610 double StutterEnterPlusExitWatermark,
5611 double Z8StutterEnterPlusExitWatermark,
5612 bool ProgressiveToInterlaceUnitInOPP,
5613 bool Interlace[],
5614 double MinTTUVBlank[],
5615 unsigned int DPPPerSurface[],
5616 unsigned int DETBufferSizeY[],
5617 unsigned int BytePerPixelY[],
5618 double BytePerPixelDETY[],
5619 double SwathWidthY[],
5620 unsigned int SwathHeightY[],
5621 unsigned int SwathHeightC[],
5622 double NetDCCRateLuma[],
5623 double NetDCCRateChroma[],
5624 double DCCFractionOfZeroSizeRequestsLuma[],
5625 double DCCFractionOfZeroSizeRequestsChroma[],
5626 unsigned int HTotal[],
5627 unsigned int VTotal[],
5628 double PixelClock[],
5629 double VRatio[],
5630 enum dm_rotation_angle SourceRotation[],
5631 unsigned int BlockHeight256BytesY[],
5632 unsigned int BlockWidth256BytesY[],
5633 unsigned int BlockHeight256BytesC[],
5634 unsigned int BlockWidth256BytesC[],
5635 unsigned int DCCYMaxUncompressedBlock[],
5636 unsigned int DCCCMaxUncompressedBlock[],
5637 unsigned int VActive[],
5638 bool DCCEnable[],
5639 bool WritebackEnable[],
5640 double ReadBandwidthSurfaceLuma[],
5641 double ReadBandwidthSurfaceChroma[],
5642 double meta_row_bw[],
5643 double dpte_row_bw[],
5644
5645 /* Output */
5646 double *StutterEfficiencyNotIncludingVBlank,
5647 double *StutterEfficiency,
5648 unsigned int *NumberOfStutterBurstsPerFrame,
5649 double *Z8StutterEfficiencyNotIncludingVBlank,
5650 double *Z8StutterEfficiency,
5651 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5652 double *StutterPeriod,
5653 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5654 {
5655
5656 bool FoundCriticalSurface = false;
5657 unsigned int SwathSizeCriticalSurface = 0;
5658 unsigned int LastChunkOfSwathSize;
5659 unsigned int MissingPartOfLastSwathOfDETSize;
5660 double LastZ8StutterPeriod = 0.0;
5661 double LastStutterPeriod = 0.0;
5662 unsigned int TotalNumberOfActiveOTG = 0;
5663 double doublePixelClock;
5664 unsigned int doubleHTotal;
5665 unsigned int doubleVTotal;
5666 bool SameTiming = true;
5667 double DETBufferingTimeY;
5668 double SwathWidthYCriticalSurface = 0.0;
5669 double SwathHeightYCriticalSurface = 0.0;
5670 double VActiveTimeCriticalSurface = 0.0;
5671 double FrameTimeCriticalSurface = 0.0;
5672 unsigned int BytePerPixelYCriticalSurface = 0;
5673 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5674 unsigned int DETBufferSizeYCriticalSurface = 0;
5675 double MinTTUVBlankCriticalSurface = 0.0;
5676 unsigned int BlockWidth256BytesYCriticalSurface = 0;
5677 bool doublePlaneCriticalSurface = 0;
5678 bool doublePipeCriticalSurface = 0;
5679 double TotalCompressedReadBandwidth;
5680 double TotalRowReadBandwidth;
5681 double AverageDCCCompressionRate;
5682 double EffectiveCompressedBufferSize;
5683 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5684 double StutterBurstTime;
5685 unsigned int TotalActiveWriteback;
5686 double LinesInDETY;
5687 double LinesInDETYRoundedDownToSwath;
5688 double MaximumEffectiveCompressionLuma;
5689 double MaximumEffectiveCompressionChroma;
5690 double TotalZeroSizeRequestReadBandwidth;
5691 double TotalZeroSizeCompressedReadBandwidth;
5692 double AverageDCCZeroSizeFraction;
5693 double AverageZeroSizeCompressionRate;
5694 unsigned int k;
5695
5696 TotalZeroSizeRequestReadBandwidth = 0;
5697 TotalZeroSizeCompressedReadBandwidth = 0;
5698 TotalRowReadBandwidth = 0;
5699 TotalCompressedReadBandwidth = 0;
5700
5701 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5702 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5703 if (DCCEnable[k] == true) {
5704 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5705 || (!IsVertical(SourceRotation[k])
5706 && BlockHeight256BytesY[k] > SwathHeightY[k])
5707 || DCCYMaxUncompressedBlock[k] < 256) {
5708 MaximumEffectiveCompressionLuma = 2;
5709 } else {
5710 MaximumEffectiveCompressionLuma = 4;
5711 }
5712 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5713 + ReadBandwidthSurfaceLuma[k]
5714 / dml_min(NetDCCRateLuma[k],
5715 MaximumEffectiveCompressionLuma);
5716 #ifdef __DML_VBA_DEBUG__
5717 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5718 __func__, k, ReadBandwidthSurfaceLuma[k]);
5719 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5720 __func__, k, NetDCCRateLuma[k]);
5721 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5722 __func__, k, MaximumEffectiveCompressionLuma);
5723 #endif
5724 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5725 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5726 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5727 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5728 / MaximumEffectiveCompressionLuma;
5729
5730 if (ReadBandwidthSurfaceChroma[k] > 0) {
5731 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5732 || (!IsVertical(SourceRotation[k])
5733 && BlockHeight256BytesC[k] > SwathHeightC[k])
5734 || DCCCMaxUncompressedBlock[k] < 256) {
5735 MaximumEffectiveCompressionChroma = 2;
5736 } else {
5737 MaximumEffectiveCompressionChroma = 4;
5738 }
5739 TotalCompressedReadBandwidth =
5740 TotalCompressedReadBandwidth
5741 + ReadBandwidthSurfaceChroma[k]
5742 / dml_min(NetDCCRateChroma[k],
5743 MaximumEffectiveCompressionChroma);
5744 #ifdef __DML_VBA_DEBUG__
5745 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5746 __func__, k, ReadBandwidthSurfaceChroma[k]);
5747 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5748 __func__, k, NetDCCRateChroma[k]);
5749 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5750 __func__, k, MaximumEffectiveCompressionChroma);
5751 #endif
5752 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5753 + ReadBandwidthSurfaceChroma[k]
5754 * DCCFractionOfZeroSizeRequestsChroma[k];
5755 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5756 + ReadBandwidthSurfaceChroma[k]
5757 * DCCFractionOfZeroSizeRequestsChroma[k]
5758 / MaximumEffectiveCompressionChroma;
5759 }
5760 } else {
5761 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5762 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5763 }
5764 TotalRowReadBandwidth = TotalRowReadBandwidth
5765 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5766 }
5767 }
5768
5769 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5770 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5771
5772 #ifdef __DML_VBA_DEBUG__
5773 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5774 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5775 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5776 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5777 __func__, TotalZeroSizeCompressedReadBandwidth);
5778 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5779 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5780 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5781 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5782 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5783 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5784 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5785 #endif
5786 if (AverageDCCZeroSizeFraction == 1) {
5787 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5788 / TotalZeroSizeCompressedReadBandwidth;
5789 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5790 * AverageZeroSizeCompressionRate
5791 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5792 * AverageZeroSizeCompressionRate;
5793 } else if (AverageDCCZeroSizeFraction > 0) {
5794 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5795 / TotalZeroSizeCompressedReadBandwidth;
5796 EffectiveCompressedBufferSize = dml_min(
5797 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5798 (double) MetaFIFOSizeInKEntries * 1024 * 64
5799 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5800 + 1 / AverageDCCCompressionRate))
5801 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5802 * AverageDCCCompressionRate,
5803 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5804 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5805
5806 #ifdef __DML_VBA_DEBUG__
5807 dml_print("DML::%s: min 1 = %f\n", __func__,
5808 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5809 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5810 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5811 AverageDCCCompressionRate));
5812 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5813 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5814 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5815 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5816 #endif
5817 } else {
5818 EffectiveCompressedBufferSize = dml_min(
5819 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5820 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5821 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5822 * AverageDCCCompressionRate;
5823
5824 #ifdef __DML_VBA_DEBUG__
5825 dml_print("DML::%s: min 1 = %f\n", __func__,
5826 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5827 dml_print("DML::%s: min 2 = %f\n", __func__,
5828 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5829 #endif
5830 }
5831
5832 #ifdef __DML_VBA_DEBUG__
5833 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5834 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5835 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5836 #endif
5837
5838 *StutterPeriod = 0;
5839
5840 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5841 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5842 LinesInDETY = ((double) DETBufferSizeY[k]
5843 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5844 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5845 / BytePerPixelDETY[k] / SwathWidthY[k];
5846 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5847 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5848 / VRatio[k];
5849 #ifdef __DML_VBA_DEBUG__
5850 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5851 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5852 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5853 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5854 __func__, k, ReadBandwidthSurfaceLuma[k]);
5855 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5856 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5857 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5858 __func__, k, LinesInDETYRoundedDownToSwath);
5859 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5860 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5861 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5862 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5863 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5864 #endif
5865
5866 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5867 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5868
5869 FoundCriticalSurface = true;
5870 *StutterPeriod = DETBufferingTimeY;
5871 FrameTimeCriticalSurface = (
5872 isInterlaceTiming ?
5873 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5874 * (double) HTotal[k] / PixelClock[k];
5875 VActiveTimeCriticalSurface = (
5876 isInterlaceTiming ?
5877 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5878 * (double) HTotal[k] / PixelClock[k];
5879 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5880 SwathWidthYCriticalSurface = SwathWidthY[k];
5881 SwathHeightYCriticalSurface = SwathHeightY[k];
5882 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5883 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5884 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5885 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5886 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5887 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5888 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5889
5890 #ifdef __DML_VBA_DEBUG__
5891 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
5892 __func__, k, FoundCriticalSurface);
5893 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
5894 __func__, k, *StutterPeriod);
5895 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
5896 __func__, k, MinTTUVBlankCriticalSurface);
5897 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
5898 __func__, k, FrameTimeCriticalSurface);
5899 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
5900 __func__, k, VActiveTimeCriticalSurface);
5901 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
5902 __func__, k, BytePerPixelYCriticalSurface);
5903 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
5904 __func__, k, SwathWidthYCriticalSurface);
5905 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
5906 __func__, k, SwathHeightYCriticalSurface);
5907 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
5908 __func__, k, BlockWidth256BytesYCriticalSurface);
5909 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
5910 __func__, k, doublePlaneCriticalSurface);
5911 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
5912 __func__, k, doublePipeCriticalSurface);
5913 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5914 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5915 #endif
5916 }
5917 }
5918 }
5919
5920 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5921 EffectiveCompressedBufferSize);
5922 #ifdef __DML_VBA_DEBUG__
5923 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5924 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5925 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5926 __func__, *StutterPeriod * TotalDataReadBandwidth);
5927 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5928 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5929 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5930 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5931 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5932 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5933 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5934 #endif
5935
5936 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5937 / ReturnBW
5938 + (*StutterPeriod * TotalDataReadBandwidth
5939 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5940 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5941 #ifdef __DML_VBA_DEBUG__
5942 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5943 AverageDCCCompressionRate / ReturnBW);
5944 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5945 __func__, (*StutterPeriod * TotalDataReadBandwidth));
5946 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5947 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5948 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5949 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5950 #endif
5951 StutterBurstTime = dml_max(StutterBurstTime,
5952 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5953 * SwathWidthYCriticalSurface / ReturnBW);
5954
5955 #ifdef __DML_VBA_DEBUG__
5956 dml_print("DML::%s: Time to finish residue swath=%f\n",
5957 __func__,
5958 LinesToFinishSwathTransferStutterCriticalSurface *
5959 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5960 #endif
5961
5962 TotalActiveWriteback = 0;
5963 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5964 if (WritebackEnable[k])
5965 TotalActiveWriteback = TotalActiveWriteback + 1;
5966 }
5967
5968 if (TotalActiveWriteback == 0) {
5969 #ifdef __DML_VBA_DEBUG__
5970 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5971 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5972 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5973 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5974 #endif
5975 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5976 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5977 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5978 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5979 *NumberOfStutterBurstsPerFrame = (
5980 *StutterEfficiencyNotIncludingVBlank > 0 ?
5981 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5982 *Z8NumberOfStutterBurstsPerFrame = (
5983 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5984 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5985 } else {
5986 *StutterEfficiencyNotIncludingVBlank = 0.;
5987 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5988 *NumberOfStutterBurstsPerFrame = 0;
5989 *Z8NumberOfStutterBurstsPerFrame = 0;
5990 }
5991 #ifdef __DML_VBA_DEBUG__
5992 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5993 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5994 __func__, *StutterEfficiencyNotIncludingVBlank);
5995 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5996 __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5997 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5998 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5999 #endif
6000
6001 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6002 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6003 if (BlendingAndTiming[k] == k) {
6004 if (TotalNumberOfActiveOTG == 0) {
6005 doublePixelClock = PixelClock[k];
6006 doubleHTotal = HTotal[k];
6007 doubleVTotal = VTotal[k];
6008 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
6009 || doubleVTotal != VTotal[k]) {
6010 SameTiming = false;
6011 }
6012 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6013 }
6014 }
6015 }
6016
6017 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6018 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6019
6020 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6021 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6022 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6023 + StutterBurstTime * VActiveTimeCriticalSurface
6024 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6025 } else {
6026 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6027 }
6028 } else {
6029 *StutterEfficiency = 0;
6030 }
6031
6032 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6033 LastZ8StutterPeriod = VActiveTimeCriticalSurface
6034 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6035 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6036 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6037 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6038 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6039 } else {
6040 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6041 }
6042 } else {
6043 *Z8StutterEfficiency = 0.;
6044 }
6045
6046 #ifdef __DML_VBA_DEBUG__
6047 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6048 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6049 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6050 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6051 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6052 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6053 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6054 __func__, *StutterEfficiencyNotIncludingVBlank);
6055 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6056 #endif
6057
6058 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6059 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6060 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6061 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6062 - DETBufferSizeYCriticalSurface;
6063
6064 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6065 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6066 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6067 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6068
6069 #ifdef __DML_VBA_DEBUG__
6070 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6071 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6072 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6073 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6074 #endif
6075 } // CalculateStutterEfficiency
6076
dml32_CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,bool nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)6077 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6078 unsigned int ConfigReturnBufferSizeInKByte,
6079 unsigned int ROBBufferSizeInKByte,
6080 unsigned int MaxNumDPP,
6081 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6082 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
6083
6084 /* Output */
6085 unsigned int *MaxTotalDETInKByte,
6086 unsigned int *nomDETInKByte,
6087 unsigned int *MinCompressedBufferSizeInKByte)
6088 {
6089 bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
6090 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
6091
6092 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6093 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6094 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6095 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6096
6097 #ifdef __DML_VBA_DEBUG__
6098 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6099 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6100 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6101 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6102 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6103 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6104 #endif
6105
6106 if (det_buff_size_override_en) {
6107 *nomDETInKByte = det_buff_size_override_val;
6108 #ifdef __DML_VBA_DEBUG__
6109 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6110 #endif
6111 }
6112 } // CalculateMaxDETAndMinCompressedBufferSize
6113
dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[])6114 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6115 double ReturnBW,
6116 bool NotUrgentLatencyHiding[],
6117 double ReadBandwidthLuma[],
6118 double ReadBandwidthChroma[],
6119 double cursor_bw[],
6120 double meta_row_bandwidth[],
6121 double dpte_row_bandwidth[],
6122 unsigned int NumberOfDPP[],
6123 double UrgentBurstFactorLuma[],
6124 double UrgentBurstFactorChroma[],
6125 double UrgentBurstFactorCursor[])
6126 {
6127 unsigned int k;
6128 bool NotEnoughUrgentLatencyHiding = false;
6129 bool CalculateVActiveBandwithSupport_val = false;
6130 double VActiveBandwith = 0;
6131
6132 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6133 if (NotUrgentLatencyHiding[k]) {
6134 NotEnoughUrgentLatencyHiding = true;
6135 }
6136 }
6137
6138 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6139 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6140 }
6141
6142 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6143
6144 #ifdef __DML_VBA_DEBUG__
6145 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6146 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
6147 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6148 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6149 #endif
6150 return CalculateVActiveBandwithSupport_val;
6151 }
6152
dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double PrefetchBW[],double VRatio[],double MaxVRatioPre,double * MaxPrefetchBandwidth,double * FractionOfUrgentBandwidth,bool * PrefetchBandwidthSupport)6153 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6154 double ReturnBW,
6155 bool NotUrgentLatencyHiding[],
6156 double ReadBandwidthLuma[],
6157 double ReadBandwidthChroma[],
6158 double PrefetchBandwidthLuma[],
6159 double PrefetchBandwidthChroma[],
6160 double cursor_bw[],
6161 double meta_row_bandwidth[],
6162 double dpte_row_bandwidth[],
6163 double cursor_bw_pre[],
6164 double prefetch_vmrow_bw[],
6165 unsigned int NumberOfDPP[],
6166 double UrgentBurstFactorLuma[],
6167 double UrgentBurstFactorChroma[],
6168 double UrgentBurstFactorCursor[],
6169 double UrgentBurstFactorLumaPre[],
6170 double UrgentBurstFactorChromaPre[],
6171 double UrgentBurstFactorCursorPre[],
6172 double PrefetchBW[],
6173 double VRatio[],
6174 double MaxVRatioPre,
6175
6176 /* output */
6177 double *MaxPrefetchBandwidth,
6178 double *FractionOfUrgentBandwidth,
6179 bool *PrefetchBandwidthSupport)
6180 {
6181 unsigned int k;
6182 double ActiveBandwidthPerSurface;
6183 bool NotEnoughUrgentLatencyHiding = false;
6184 double TotalActiveBandwidth = 0;
6185 double TotalPrefetchBandwidth = 0;
6186
6187 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6188 if (NotUrgentLatencyHiding[k]) {
6189 NotEnoughUrgentLatencyHiding = true;
6190 }
6191 }
6192
6193 *MaxPrefetchBandwidth = 0;
6194 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6195 ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6196
6197 TotalActiveBandwidth += ActiveBandwidthPerSurface;
6198
6199 TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6200
6201 *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6202 ActiveBandwidthPerSurface,
6203 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6204 }
6205
6206 if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6207 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6208 else
6209 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6210
6211 *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6212 }
6213
dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,double ReturnBW,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double cursor_bw_pre[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])6214 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6215 double ReturnBW,
6216 double ReadBandwidthLuma[],
6217 double ReadBandwidthChroma[],
6218 double PrefetchBandwidthLuma[],
6219 double PrefetchBandwidthChroma[],
6220 double cursor_bw[],
6221 double cursor_bw_pre[],
6222 unsigned int NumberOfDPP[],
6223 double UrgentBurstFactorLuma[],
6224 double UrgentBurstFactorChroma[],
6225 double UrgentBurstFactorCursor[],
6226 double UrgentBurstFactorLumaPre[],
6227 double UrgentBurstFactorChromaPre[],
6228 double UrgentBurstFactorCursorPre[])
6229 {
6230 unsigned int k;
6231 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6232
6233 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6234 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6235 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6236 }
6237
6238 return CalculateBandwidthAvailableForImmediateFlip_val;
6239 }
6240
dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,enum immediate_flip_requirement ImmediateFlipRequirement[],double final_flip_bw[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double * TotalBandwidth,double * FractionOfUrgentBandwidth,bool * ImmediateFlipBandwidthSupport)6241 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6242 double ReturnBW,
6243 enum immediate_flip_requirement ImmediateFlipRequirement[],
6244 double final_flip_bw[],
6245 double ReadBandwidthLuma[],
6246 double ReadBandwidthChroma[],
6247 double PrefetchBandwidthLuma[],
6248 double PrefetchBandwidthChroma[],
6249 double cursor_bw[],
6250 double meta_row_bandwidth[],
6251 double dpte_row_bandwidth[],
6252 double cursor_bw_pre[],
6253 double prefetch_vmrow_bw[],
6254 unsigned int NumberOfDPP[],
6255 double UrgentBurstFactorLuma[],
6256 double UrgentBurstFactorChroma[],
6257 double UrgentBurstFactorCursor[],
6258 double UrgentBurstFactorLumaPre[],
6259 double UrgentBurstFactorChromaPre[],
6260 double UrgentBurstFactorCursorPre[],
6261
6262 /* output */
6263 double *TotalBandwidth,
6264 double *FractionOfUrgentBandwidth,
6265 bool *ImmediateFlipBandwidthSupport)
6266 {
6267 unsigned int k;
6268 *TotalBandwidth = 0;
6269 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6270 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6271 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6272 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6273 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6274 } else {
6275 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6276 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6277 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6278 }
6279 }
6280 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6281 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6282 }
6283
dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,double ReturnBW,double UrgentLatency,unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],double BytePerPixelInDETY[],double BytePerPixelInDETC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int NumOfDPP[],unsigned int HTotal[],double PixelClock[],double VRatioY[],double VRatioC[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],enum unbounded_requesting_policy UseUnboundedRequesting)6284 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6285 double ReturnBW,
6286 double UrgentLatency,
6287 unsigned int SwathHeightY[],
6288 unsigned int SwathHeightC[],
6289 unsigned int SwathWidthY[],
6290 unsigned int SwathWidthC[],
6291 double BytePerPixelInDETY[],
6292 double BytePerPixelInDETC[],
6293 unsigned int DETBufferSizeY[],
6294 unsigned int DETBufferSizeC[],
6295 unsigned int NumOfDPP[],
6296 unsigned int HTotal[],
6297 double PixelClock[],
6298 double VRatioY[],
6299 double VRatioC[],
6300 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6301 enum unbounded_requesting_policy UseUnboundedRequesting)
6302 {
6303 int k;
6304 double SwathSizeAllSurfaces = 0;
6305 double SwathSizeAllSurfacesInFetchTimeUs;
6306 double DETSwathLatencyHidingUs;
6307 double DETSwathLatencyHidingYUs;
6308 double DETSwathLatencyHidingCUs;
6309 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6310 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6311 bool NotEnoughDETSwathFillLatencyHiding = false;
6312
6313 if (UseUnboundedRequesting == dm_unbounded_requesting)
6314 return false;
6315
6316 /* calculate sum of single swath size for all pipes in bytes */
6317 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6318 SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6319
6320 if (SwathHeightC[k] != 0)
6321 SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6322 else
6323 SwathSizePerSurfaceC[k] = 0;
6324
6325 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6326 }
6327
6328 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6329
6330 /* ensure all DET - 1 swath can hide a fetch for all surfaces */
6331 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6332 double LineTime = HTotal[k] / PixelClock[k];
6333
6334 /* only care if surface is not phantom */
6335 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6336 DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6337
6338 if (SwathHeightC[k] != 0) {
6339 DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6340
6341 DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6342 } else {
6343 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6344 }
6345
6346 /* DET must be able to hide time to fetch 1 swath for each surface */
6347 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6348 NotEnoughDETSwathFillLatencyHiding = true;
6349 break;
6350 }
6351 }
6352 }
6353
6354 return NotEnoughDETSwathFillLatencyHiding;
6355 }
6356