1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29 
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31 
dml32_dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)32 unsigned int dml32_dscceComputeDelay(
33 		unsigned int bpc,
34 		double BPP,
35 		unsigned int sliceWidth,
36 		unsigned int numSlices,
37 		enum output_format_class pixelFormat,
38 		enum output_encoder_class Output)
39 {
40 	// valid bpc         = source bits per component in the set of {8, 10, 12}
41 	// valid bpp         = increments of 1/16 of a bit
42 	//                    min = 6/7/8 in N420/N422/444, respectively
43 	//                    max = such that compression is 1:1
44 	//valid sliceWidth  = number of pixels per slice line,
45 	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 
49 	// fixed value
50 	unsigned int rcModelSize = 8192;
51 
52 	// N422/N420 operate at 2 pixels per clock
53 	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 	Delay, pixels;
55 
56 	if (pixelFormat == dm_420)
57 		pixelsPerClock = 2;
58 	else if (pixelFormat == dm_n422)
59 		pixelsPerClock = 2;
60 	// #all other modes operate at 1 pixel per clock
61 	else
62 		pixelsPerClock = 1;
63 
64 	//initial transmit delay as per PPS
65 	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66 
67 	//compute ssm delay
68 	if (bpc == 8)
69 		D = 81;
70 	else if (bpc == 10)
71 		D = 89;
72 	else
73 		D = 113;
74 
75 	//divide by pixel per cycle to compute slice width as seen by DSC
76 	w = sliceWidth / pixelsPerClock;
77 
78 	//422 mode has an additional cycle of delay
79 	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 		s = 0;
81 	else
82 		s = 1;
83 
84 	//main calculation for the dscce
85 	ix = initalXmitDelay + 45;
86 	wx = (w + 2) / 3;
87 	p = 3 * wx - w;
88 	l0 = ix / w;
89 	a = ix + p * l0;
90 	ax = (a + 2) / 3 + D + 6 + 1;
91 	L = (ax + wx - 1) / wx;
92 	if ((ix % w) == 0 && p != 0)
93 		lstall = 1;
94 	else
95 		lstall = 0;
96 	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97 
98 	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 	pixels = Delay * 3 * pixelsPerClock;
100 
101 #ifdef __DML_VBA_DEBUG__
102 	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 	dml_print("DML::%s: Output: %d\n", __func__, Output);
108 	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110 
111 	return pixels;
112 }
113 
dml32_dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 	unsigned int Delay = 0;
117 
118 	if (pixelFormat == dm_420) {
119 		//   sfr
120 		Delay = Delay + 2;
121 		//   dsccif
122 		Delay = Delay + 0;
123 		//   dscc - input deserializer
124 		Delay = Delay + 3;
125 		//   dscc gets pixels every other cycle
126 		Delay = Delay + 2;
127 		//   dscc - input cdc fifo
128 		Delay = Delay + 12;
129 		//   dscc gets pixels every other cycle
130 		Delay = Delay + 13;
131 		//   dscc - cdc uncertainty
132 		Delay = Delay + 2;
133 		//   dscc - output cdc fifo
134 		Delay = Delay + 7;
135 		//   dscc gets pixels every other cycle
136 		Delay = Delay + 3;
137 		//   dscc - cdc uncertainty
138 		Delay = Delay + 2;
139 		//   dscc - output serializer
140 		Delay = Delay + 1;
141 		//   sft
142 		Delay = Delay + 1;
143 	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 		//   sfr
145 		Delay = Delay + 2;
146 		//   dsccif
147 		Delay = Delay + 1;
148 		//   dscc - input deserializer
149 		Delay = Delay + 5;
150 		//  dscc - input cdc fifo
151 		Delay = Delay + 25;
152 		//   dscc - cdc uncertainty
153 		Delay = Delay + 2;
154 		//   dscc - output cdc fifo
155 		Delay = Delay + 10;
156 		//   dscc - cdc uncertainty
157 		Delay = Delay + 2;
158 		//   dscc - output serializer
159 		Delay = Delay + 1;
160 		//   sft
161 		Delay = Delay + 1;
162 	} else {
163 		//   sfr
164 		Delay = Delay + 2;
165 		//   dsccif
166 		Delay = Delay + 0;
167 		//   dscc - input deserializer
168 		Delay = Delay + 3;
169 		//   dscc - input cdc fifo
170 		Delay = Delay + 12;
171 		//   dscc - cdc uncertainty
172 		Delay = Delay + 2;
173 		//   dscc - output cdc fifo
174 		Delay = Delay + 7;
175 		//   dscc - output serializer
176 		Delay = Delay + 1;
177 		//   dscc - cdc uncertainty
178 		Delay = Delay + 2;
179 		//   sft
180 		Delay = Delay + 1;
181 	}
182 
183 	return Delay;
184 }
185 
186 
IsVertical(enum dm_rotation_angle Scan)187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 	bool is_vert = false;
190 
191 	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 		is_vert = true;
193 	else
194 		is_vert = false;
195 	return is_vert;
196 }
197 
dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 		double HRatio,
200 		double HRatioChroma,
201 		double VRatio,
202 		double VRatioChroma,
203 		double MaxDCHUBToPSCLThroughput,
204 		double MaxPSCLToLBThroughput,
205 		double PixelClock,
206 		enum source_format_class SourcePixelFormat,
207 		unsigned int HTaps,
208 		unsigned int HTapsChroma,
209 		unsigned int VTaps,
210 		unsigned int VTapsChroma,
211 
212 		/* output */
213 		double *PSCL_THROUGHPUT,
214 		double *PSCL_THROUGHPUT_CHROMA,
215 		double *DPPCLKUsingSingleDPP)
216 {
217 	double DPPCLKUsingSingleDPPLuma;
218 	double DPPCLKUsingSingleDPPChroma;
219 
220 	if (HRatio > 1) {
221 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 				dml_ceil((double) HTaps / 6.0, 1.0));
223 	} else {
224 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 	}
226 
227 	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 			*PSCL_THROUGHPUT, 1);
229 
230 	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232 
233 	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 			SourcePixelFormat != dm_rgbe_alpha)) {
235 		*PSCL_THROUGHPUT_CHROMA = 0;
236 		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 	} else {
238 		if (HRatioChroma > 1) {
239 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 		} else {
242 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 		}
244 		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 	}
250 }
251 
dml32_CalculateBytePerPixelAndBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC)252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 		enum source_format_class SourcePixelFormat,
254 		enum dm_swizzle_mode SurfaceTiling,
255 
256 		/* Output */
257 		unsigned int *BytePerPixelY,
258 		unsigned int *BytePerPixelC,
259 		double  *BytePerPixelDETY,
260 		double  *BytePerPixelDETC,
261 		unsigned int *BlockHeight256BytesY,
262 		unsigned int *BlockHeight256BytesC,
263 		unsigned int *BlockWidth256BytesY,
264 		unsigned int *BlockWidth256BytesC,
265 		unsigned int *MacroTileHeightY,
266 		unsigned int *MacroTileHeightC,
267 		unsigned int *MacroTileWidthY,
268 		unsigned int *MacroTileWidthC)
269 {
270 	if (SourcePixelFormat == dm_444_64) {
271 		*BytePerPixelDETY = 8;
272 		*BytePerPixelDETC = 0;
273 		*BytePerPixelY = 8;
274 		*BytePerPixelC = 0;
275 	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 		*BytePerPixelDETY = 4;
277 		*BytePerPixelDETC = 0;
278 		*BytePerPixelY = 4;
279 		*BytePerPixelC = 0;
280 	} else if (SourcePixelFormat == dm_444_16) {
281 		*BytePerPixelDETY = 2;
282 		*BytePerPixelDETC = 0;
283 		*BytePerPixelY = 2;
284 		*BytePerPixelC = 0;
285 	} else if (SourcePixelFormat == dm_444_8) {
286 		*BytePerPixelDETY = 1;
287 		*BytePerPixelDETC = 0;
288 		*BytePerPixelY = 1;
289 		*BytePerPixelC = 0;
290 	} else if (SourcePixelFormat == dm_rgbe_alpha) {
291 		*BytePerPixelDETY = 4;
292 		*BytePerPixelDETC = 1;
293 		*BytePerPixelY = 4;
294 		*BytePerPixelC = 1;
295 	} else if (SourcePixelFormat == dm_420_8) {
296 		*BytePerPixelDETY = 1;
297 		*BytePerPixelDETC = 2;
298 		*BytePerPixelY = 1;
299 		*BytePerPixelC = 2;
300 	} else if (SourcePixelFormat == dm_420_12) {
301 		*BytePerPixelDETY = 2;
302 		*BytePerPixelDETC = 4;
303 		*BytePerPixelY = 2;
304 		*BytePerPixelC = 4;
305 	} else {
306 		*BytePerPixelDETY = 4.0 / 3;
307 		*BytePerPixelDETC = 8.0 / 3;
308 		*BytePerPixelY = 2;
309 		*BytePerPixelC = 4;
310 	}
311 #ifdef __DML_VBA_DEBUG__
312 	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
316 	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
317 #endif
318 	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 			|| SourcePixelFormat == dm_444_16
320 			|| SourcePixelFormat == dm_444_8
321 			|| SourcePixelFormat == dm_mono_16
322 			|| SourcePixelFormat == dm_mono_8
323 			|| SourcePixelFormat == dm_rgbe)) {
324 		if (SurfaceTiling == dm_sw_linear)
325 			*BlockHeight256BytesY = 1;
326 		else if (SourcePixelFormat == dm_444_64)
327 			*BlockHeight256BytesY = 4;
328 		else if (SourcePixelFormat == dm_444_8)
329 			*BlockHeight256BytesY = 16;
330 		else
331 			*BlockHeight256BytesY = 8;
332 
333 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 		*BlockHeight256BytesC = 0;
335 		*BlockWidth256BytesC = 0;
336 	} else {
337 		if (SurfaceTiling == dm_sw_linear) {
338 			*BlockHeight256BytesY = 1;
339 			*BlockHeight256BytesC = 1;
340 		} else if (SourcePixelFormat == dm_rgbe_alpha) {
341 			*BlockHeight256BytesY = 8;
342 			*BlockHeight256BytesC = 16;
343 		} else if (SourcePixelFormat == dm_420_8) {
344 			*BlockHeight256BytesY = 16;
345 			*BlockHeight256BytesC = 8;
346 		} else {
347 			*BlockHeight256BytesY = 8;
348 			*BlockHeight256BytesC = 8;
349 		}
350 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 	}
353 #ifdef __DML_VBA_DEBUG__
354 	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
355 	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
357 	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359 
360 	if (SurfaceTiling == dm_sw_linear) {
361 		*MacroTileHeightY = *BlockHeight256BytesY;
362 		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 		*MacroTileHeightC = *BlockHeight256BytesC;
364 		if (*MacroTileHeightC == 0)
365 			*MacroTileWidthC = 0;
366 		else
367 			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 		if (*MacroTileHeightC == 0)
374 			*MacroTileWidthC = 0;
375 		else
376 			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 	} else {
378 		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 		if (*MacroTileHeightC == 0)
382 			*MacroTileWidthC = 0;
383 		else
384 			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 	}
386 
387 #ifdef __DML_VBA_DEBUG__
388 	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
389 	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
391 	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394 
dml32_CalculateSwathAndDETConfiguration(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int ConfigReturnBufferSizeInKByte,unsigned int MaxTotalDETInKByte,unsigned int MinCompressedBufferSizeInKByte,double ForceSingleDPP,unsigned int NumberOfActiveSurfaces,unsigned int nomDETInKByte,enum unbounded_requesting_policy UseUnboundedRequestingFinal,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,unsigned int PixelChunkSizeKBytes,unsigned int ROBSizeKBytes,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum output_encoder_class Output[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],enum odm_combine_mode ODMMode[],unsigned int BlendingAndTiming[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],unsigned int HActive[],double HRatio[],double HRatioChroma[],unsigned int DPPPerSurface[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int DETBufferSizeInKByte[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool * UnboundedRequestEnabled,unsigned int * CompressedBufferSizeInkByte,unsigned int * CompBufReservedSpaceKBytes,bool * CompBufReservedSpaceNeedAdjustment,bool ViewportSizeSupportPerSurface[],bool * ViewportSizeSupport)395 void dml32_CalculateSwathAndDETConfiguration(
396 		unsigned int DETSizeOverride[],
397 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 		unsigned int ConfigReturnBufferSizeInKByte,
399 		unsigned int MaxTotalDETInKByte,
400 		unsigned int MinCompressedBufferSizeInKByte,
401 		double ForceSingleDPP,
402 		unsigned int NumberOfActiveSurfaces,
403 		unsigned int nomDETInKByte,
404 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 		unsigned int PixelChunkSizeKBytes,
407 		unsigned int ROBSizeKBytes,
408 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 		enum output_encoder_class Output[],
410 		double ReadBandwidthLuma[],
411 		double ReadBandwidthChroma[],
412 		double MaximumSwathWidthLuma[],
413 		double MaximumSwathWidthChroma[],
414 		enum dm_rotation_angle SourceRotation[],
415 		bool ViewportStationary[],
416 		enum source_format_class SourcePixelFormat[],
417 		enum dm_swizzle_mode SurfaceTiling[],
418 		unsigned int ViewportWidth[],
419 		unsigned int ViewportHeight[],
420 		unsigned int ViewportXStart[],
421 		unsigned int ViewportYStart[],
422 		unsigned int ViewportXStartC[],
423 		unsigned int ViewportYStartC[],
424 		unsigned int SurfaceWidthY[],
425 		unsigned int SurfaceWidthC[],
426 		unsigned int SurfaceHeightY[],
427 		unsigned int SurfaceHeightC[],
428 		unsigned int Read256BytesBlockHeightY[],
429 		unsigned int Read256BytesBlockHeightC[],
430 		unsigned int Read256BytesBlockWidthY[],
431 		unsigned int Read256BytesBlockWidthC[],
432 		enum odm_combine_mode ODMMode[],
433 		unsigned int BlendingAndTiming[],
434 		unsigned int BytePerPixY[],
435 		unsigned int BytePerPixC[],
436 		double BytePerPixDETY[],
437 		double BytePerPixDETC[],
438 		unsigned int HActive[],
439 		double HRatio[],
440 		double HRatioChroma[],
441 		unsigned int DPPPerSurface[],
442 
443 		/* Output */
444 		unsigned int swath_width_luma_ub[],
445 		unsigned int swath_width_chroma_ub[],
446 		double SwathWidth[],
447 		double SwathWidthChroma[],
448 		unsigned int SwathHeightY[],
449 		unsigned int SwathHeightC[],
450 		unsigned int DETBufferSizeInKByte[],
451 		unsigned int DETBufferSizeY[],
452 		unsigned int DETBufferSizeC[],
453 		bool *UnboundedRequestEnabled,
454 		unsigned int *CompressedBufferSizeInkByte,
455 		unsigned int *CompBufReservedSpaceKBytes,
456 		bool *CompBufReservedSpaceNeedAdjustment,
457 		bool ViewportSizeSupportPerSurface[],
458 		bool *ViewportSizeSupport)
459 {
460 	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 	unsigned int RoundedUpSwathSizeBytesY;
465 	unsigned int RoundedUpSwathSizeBytesC;
466 	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 	unsigned int k;
469 	unsigned int TotalActiveDPP = 0;
470 	bool NoChromaSurfaces = true;
471 	unsigned int DETBufferSizeInKByteForSwathCalculation;
472 
473 #ifdef __DML_VBA_DEBUG__
474 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 	dml32_CalculateSwathWidth(ForceSingleDPP,
479 			NumberOfActiveSurfaces,
480 			SourcePixelFormat,
481 			SourceRotation,
482 			ViewportStationary,
483 			ViewportWidth,
484 			ViewportHeight,
485 			ViewportXStart,
486 			ViewportYStart,
487 			ViewportXStartC,
488 			ViewportYStartC,
489 			SurfaceWidthY,
490 			SurfaceWidthC,
491 			SurfaceHeightY,
492 			SurfaceHeightC,
493 			ODMMode,
494 			BytePerPixY,
495 			BytePerPixC,
496 			Read256BytesBlockHeightY,
497 			Read256BytesBlockHeightC,
498 			Read256BytesBlockWidthY,
499 			Read256BytesBlockWidthC,
500 			BlendingAndTiming,
501 			HActive,
502 			HRatio,
503 			DPPPerSurface,
504 
505 			/* Output */
506 			SwathWidthdoubleDPP,
507 			SwathWidthdoubleDPPChroma,
508 			SwathWidth,
509 			SwathWidthChroma,
510 			MaximumSwathHeightY,
511 			MaximumSwathHeightC,
512 			swath_width_luma_ub,
513 			swath_width_chroma_ub);
514 
515 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 				RoundedUpMaxSwathSizeBytesY[k]);
525 		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 				RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531 
532 		if (SourcePixelFormat[k] == dm_420_10) {
533 			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 		}
536 	}
537 
538 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 			NoChromaSurfaces = false;
543 		}
544 	}
545 
546 	// By default, just set the reserved space to 2 pixel chunks size
547 	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548 
549 	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553 
554 	if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 	}
557 
558 	#ifdef __DML_VBA_DEBUG__
559 		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
560 		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
561 	#endif
562 
563 	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564 
565 	dml32_CalculateDETBufferSize(DETSizeOverride,
566 			UseMALLForPStateChange,
567 			ForceSingleDPP,
568 			NumberOfActiveSurfaces,
569 			*UnboundedRequestEnabled,
570 			nomDETInKByte,
571 			MaxTotalDETInKByte,
572 			ConfigReturnBufferSizeInKByte,
573 			MinCompressedBufferSizeInKByte,
574 			CompressedBufferSegmentSizeInkByteFinal,
575 			SourcePixelFormat,
576 			ReadBandwidthLuma,
577 			ReadBandwidthChroma,
578 			RoundedUpMaxSwathSizeBytesY,
579 			RoundedUpMaxSwathSizeBytesC,
580 			DPPPerSurface,
581 
582 			/* Output */
583 			DETBufferSizeInKByte,    // per hubp pipe
584 			CompressedBufferSizeInkByte);
585 
586 #ifdef __DML_VBA_DEBUG__
587 	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594 
595 	*ViewportSizeSupport = true;
596 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597 
598 		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 				DETBufferSizeInKByteForSwathCalculation);
603 #endif
604 
605 		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 			SwathHeightY[k] = MaximumSwathHeightY[k];
608 			SwathHeightC[k] = MaximumSwathHeightC[k];
609 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 			SwathHeightC[k] = MaximumSwathHeightC[k];
616 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 			SwathHeightY[k] = MaximumSwathHeightY[k];
622 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 		} else {
626 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 		}
631 
632 		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 			*ViewportSizeSupport = false;
637 			ViewportSizeSupportPerSurface[k] = false;
638 		} else {
639 			ViewportSizeSupportPerSurface[k] = true;
640 		}
641 
642 		if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 			DETBufferSizeC[k] = 0;
648 		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 		} else {
655 #ifdef __DML_VBA_DEBUG__
656 			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 		}
661 
662 #ifdef __DML_VBA_DEBUG__
663 		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 				k, RoundedUpMaxSwathSizeBytesY[k]);
667 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 				k, RoundedUpMaxSwathSizeBytesC[k]);
669 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 				ViewportSizeSupportPerSurface[k]);
676 #endif
677 
678 	}
679 } // CalculateSwathAndDETConfiguration
680 
dml32_CalculateSwathWidth(bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum source_format_class SourcePixelFormat[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int BlendingAndTiming[],unsigned int HActive[],double HRatio[],unsigned int DPPPerSurface[],double SwathWidthdoubleDPPY[],double SwathWidthdoubleDPPC[],double SwathWidthY[],double SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])681 void dml32_CalculateSwathWidth(
682 		bool				ForceSingleDPP,
683 		unsigned int			NumberOfActiveSurfaces,
684 		enum source_format_class	SourcePixelFormat[],
685 		enum dm_rotation_angle		SourceRotation[],
686 		bool				ViewportStationary[],
687 		unsigned int			ViewportWidth[],
688 		unsigned int			ViewportHeight[],
689 		unsigned int			ViewportXStart[],
690 		unsigned int			ViewportYStart[],
691 		unsigned int			ViewportXStartC[],
692 		unsigned int			ViewportYStartC[],
693 		unsigned int			SurfaceWidthY[],
694 		unsigned int			SurfaceWidthC[],
695 		unsigned int			SurfaceHeightY[],
696 		unsigned int			SurfaceHeightC[],
697 		enum odm_combine_mode		ODMMode[],
698 		unsigned int			BytePerPixY[],
699 		unsigned int			BytePerPixC[],
700 		unsigned int			Read256BytesBlockHeightY[],
701 		unsigned int			Read256BytesBlockHeightC[],
702 		unsigned int			Read256BytesBlockWidthY[],
703 		unsigned int			Read256BytesBlockWidthC[],
704 		unsigned int			BlendingAndTiming[],
705 		unsigned int			HActive[],
706 		double				HRatio[],
707 		unsigned int			DPPPerSurface[],
708 
709 		/* Output */
710 		double				SwathWidthdoubleDPPY[],
711 		double				SwathWidthdoubleDPPC[],
712 		double				SwathWidthY[], // per-pipe
713 		double				SwathWidthC[], // per-pipe
714 		unsigned int			MaximumSwathHeightY[],
715 		unsigned int			MaximumSwathHeightC[],
716 		unsigned int			swath_width_luma_ub[], // per-pipe
717 		unsigned int			swath_width_chroma_ub[]) // per-pipe
718 {
719 	unsigned int k, j;
720 	enum odm_combine_mode MainSurfaceODMMode;
721 
722 	unsigned int surface_width_ub_l;
723 	unsigned int surface_height_ub_l;
724 	unsigned int surface_width_ub_c = 0;
725 	unsigned int surface_height_ub_c = 0;
726 
727 #ifdef __DML_VBA_DEBUG__
728 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731 
732 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 		if (!IsVertical(SourceRotation[k]))
734 			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 		else
736 			SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737 
738 #ifdef __DML_VBA_DEBUG__
739 		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742 
743 		MainSurfaceODMMode = ODMMode[k];
744 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 			if (BlendingAndTiming[k] == j)
746 				MainSurfaceODMMode = ODMMode[j];
747 		}
748 
749 		if (ForceSingleDPP) {
750 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 		} else {
752 			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 						dml_round(HActive[k] / 4.0 * HRatio[k]));
755 			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 						dml_round(HActive[k] / 2.0 * HRatio[k]));
758 			} else if (DPPPerSurface[k] == 2) {
759 				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 			} else {
761 				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 			}
763 		}
764 
765 #ifdef __DML_VBA_DEBUG__
766 		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772 
773 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 				SourcePixelFormat[k] == dm_420_12) {
775 			SwathWidthC[k] = SwathWidthY[k] / 2;
776 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 		} else {
778 			SwathWidthC[k] = SwathWidthY[k];
779 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 		}
781 
782 		if (ForceSingleDPP == true) {
783 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 		}
786 
787 		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789 
790 		if (!IsVertical(SourceRotation[k])) {
791 			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 						dml_floor(ViewportXStart[k] +
796 								SwathWidthY[k] +
797 								Read256BytesBlockWidthY[k] - 1,
798 								Read256BytesBlockWidthY[k]) -
799 								dml_floor(ViewportXStart[k],
800 								Read256BytesBlockWidthY[k]));
801 			} else {
802 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 						dml_ceil(SwathWidthY[k] - 1,
804 								Read256BytesBlockWidthY[k]) +
805 								Read256BytesBlockWidthY[k]);
806 			}
807 			if (BytePerPixC[k] > 0) {
808 				surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 									Read256BytesBlockWidthC[k] - 1,
813 									Read256BytesBlockWidthC[k]) -
814 									dml_floor(ViewportXStartC[k],
815 									Read256BytesBlockWidthC[k]));
816 				} else {
817 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 							dml_ceil(SwathWidthC[k] - 1,
819 								Read256BytesBlockWidthC[k]) +
820 								Read256BytesBlockWidthC[k]);
821 				}
822 			} else {
823 				swath_width_chroma_ub[k] = 0;
824 			}
825 		} else {
826 			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828 
829 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 						Read256BytesBlockHeightY[k]) -
833 						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 			} else {
835 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 			}
838 			if (BytePerPixC[k] > 0) {
839 				surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 									Read256BytesBlockHeightC[k] - 1,
844 									Read256BytesBlockHeightC[k]) -
845 									dml_floor(ViewportYStartC[k],
846 											Read256BytesBlockHeightC[k]));
847 				} else {
848 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 							Read256BytesBlockHeightC[k]);
851 				}
852 			} else {
853 				swath_width_chroma_ub[k] = 0;
854 			}
855 		}
856 
857 #ifdef __DML_VBA_DEBUG__
858 		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873 
874 	}
875 } // CalculateSwathWidth
876 
dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,unsigned int TotalNumberOfActiveDPP,bool NoChroma,enum output_encoder_class Output,enum dm_swizzle_mode SurfaceTiling,bool CompBufReservedSpaceNeedAdjustment,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 			unsigned int TotalNumberOfActiveDPP,
879 			bool NoChroma,
880 			enum output_encoder_class Output,
881 			enum dm_swizzle_mode SurfaceTiling,
882 			bool CompBufReservedSpaceNeedAdjustment,
883 			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 	bool ret_val = false;
886 
887 	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 			TotalNumberOfActiveDPP == 1 && NoChroma);
889 	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 		ret_val = false;
891 
892 	if (SurfaceTiling == dm_sw_linear)
893 		ret_val = false;
894 
895 	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 		ret_val = false;
897 
898 #ifdef __DML_VBA_DEBUG__
899 	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
900 	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
902 #endif
903 
904 	return (ret_val);
905 }
906 
dml32_CalculateDETBufferSize(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum source_format_class SourcePixelFormat[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int RoundedUpMaxSwathSizeBytesY[],unsigned int RoundedUpMaxSwathSizeBytesC[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)907 void dml32_CalculateDETBufferSize(
908 		unsigned int DETSizeOverride[],
909 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 		bool ForceSingleDPP,
911 		unsigned int NumberOfActiveSurfaces,
912 		bool UnboundedRequestEnabled,
913 		unsigned int nomDETInKByte,
914 		unsigned int MaxTotalDETInKByte,
915 		unsigned int ConfigReturnBufferSizeInKByte,
916 		unsigned int MinCompressedBufferSizeInKByte,
917 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 		enum source_format_class SourcePixelFormat[],
919 		double ReadBandwidthLuma[],
920 		double ReadBandwidthChroma[],
921 		unsigned int RoundedUpMaxSwathSizeBytesY[],
922 		unsigned int RoundedUpMaxSwathSizeBytesC[],
923 		unsigned int DPPPerSurface[],
924 		/* Output */
925 		unsigned int DETBufferSizeInKByte[],
926 		unsigned int *CompressedBufferSizeInkByte)
927 {
928 	unsigned int DETBufferSizePoolInKByte;
929 	unsigned int NextDETBufferPieceInKByte;
930 	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 	bool NextPotentialSurfaceToAssignDETPieceFound;
932 	unsigned int NextSurfaceToAssignDETPiece;
933 	double TotalBandwidth;
934 	double BandwidthOfSurfacesNotAssignedDETPiece;
935 	unsigned int max_minDET;
936 	unsigned int minDET;
937 	unsigned int minDET_pipe;
938 	unsigned int j, k;
939 
940 #ifdef __DML_VBA_DEBUG__
941 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 			CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951 
952 	// Note: Will use default det size if that fits 2 swaths
953 	if (UnboundedRequestEnabled) {
954 		if (DETSizeOverride[0] > 0) {
955 			DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 		} else {
957 			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 					((double) RoundedUpMaxSwathSizeBytesY[0] +
959 							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 		}
961 		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 	} else {
963 		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 			DETBufferSizeInKByte[k] = nomDETInKByte;
966 			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 					SourcePixelFormat[k] == dm_420_12) {
968 				max_minDET = nomDETInKByte - 64;
969 			} else {
970 				max_minDET = nomDETInKByte;
971 			}
972 			minDET = 128;
973 			minDET_pipe = 0;
974 
975 			// add DET resource until can hold 2 full swaths
976 			while (minDET <= max_minDET && minDET_pipe == 0) {
977 				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 					minDET_pipe = minDET;
980 				minDET = minDET + 64;
981 			}
982 
983 #ifdef __DML_VBA_DEBUG__
984 			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
985 			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
986 			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
987 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 					RoundedUpMaxSwathSizeBytesY[k]);
989 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 					RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992 
993 			if (minDET_pipe == 0) {
994 				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 						__func__, k, minDET_pipe);
999 #endif
1000 			}
1001 
1002 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 				DETBufferSizeInKByte[k] = 0;
1004 			} else if (DETSizeOverride[k] > 0) {
1005 				DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 				DETBufferSizeInKByte[k] = minDET_pipe;
1010 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 			}
1013 
1014 #ifdef __DML_VBA_DEBUG__
1015 			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 		}
1021 
1022 		TotalBandwidth = 0;
1023 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 		}
1027 #ifdef __DML_VBA_DEBUG__
1028 		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036 
1037 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 			} else {
1046 				DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 			}
1048 #ifdef __DML_VBA_DEBUG__
1049 			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 					DETPieceAssignedToThisSurfaceAlready[k]);
1051 			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 					BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 		}
1055 
1056 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 			NextPotentialSurfaceToAssignDETPieceFound = false;
1058 			NextSurfaceToAssignDETPiece = 0;
1059 
1060 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 						ReadBandwidthLuma[k]);
1064 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 						ReadBandwidthChroma[k]);
1066 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 						NextSurfaceToAssignDETPiece);
1072 #endif
1073 				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 						(!NextPotentialSurfaceToAssignDETPieceFound ||
1075 						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 					NextSurfaceToAssignDETPiece = k;
1079 					NextPotentialSurfaceToAssignDETPieceFound = true;
1080 				}
1081 #ifdef __DML_VBA_DEBUG__
1082 				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 			}
1088 
1089 			if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 				// Note: To show the banker's rounding behavior in VBA and also the fact
1091 				// that the DET buffer size varies due to precision issue
1092 				//
1093 				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
1094 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 				// BandwidthOfSurfacesNotAssignedDETPiece /
1097 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1099 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 				 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 				//
1104 				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106 
1107 				NextDETBufferPieceInKByte = dml_min(
1108 					dml_round((double) DETBufferSizePoolInKByte *
1109 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 						BandwidthOfSurfacesNotAssignedDETPiece /
1112 						((ForceSingleDPP ? 1 :
1113 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 						(ForceSingleDPP ? 1 :
1115 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 						dml_floor((double) DETBufferSizePoolInKByte,
1117 						(ForceSingleDPP ? 1 :
1118 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119 
1120 				// Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 				// We should limit the per-pipe DET size to the nominal / max per pipe.
1122 				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 					} else {
1128 						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 						// already has the max per-pipe value
1130 						NextDETBufferPieceInKByte = 0;
1131 					}
1132 				}
1133 
1134 #ifdef __DML_VBA_DEBUG__
1135 				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 					DETBufferSizePoolInKByte);
1137 				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 					NextSurfaceToAssignDETPiece);
1139 				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 					NextDETBufferPieceInKByte);
1147 				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 					__func__, j, NextSurfaceToAssignDETPiece,
1149 					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151 
1152 				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 						+ NextDETBufferPieceInKByte
1155 						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159 
1160 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 			}
1166 		}
1167 		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 	}
1169 	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170 
1171 #ifdef __DML_VBA_DEBUG__
1172 	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 	}
1178 #endif
1179 } // CalculateDETBufferSize
1180 
dml32_CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum output_format_class OutFormat,enum output_encoder_class Output,enum odm_combine_policy ODMUse,double StateDispclk,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum odm_combine_mode * ODMMode,double * RequiredDISPCLKPerSurface)1181 void dml32_CalculateODMMode(
1182 		unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 		unsigned int HActive,
1184 		enum output_format_class OutFormat,
1185 		enum output_encoder_class Output,
1186 		enum odm_combine_policy ODMUse,
1187 		double StateDispclk,
1188 		double MaxDispclk,
1189 		bool DSCEnable,
1190 		unsigned int TotalNumberOfActiveDPP,
1191 		unsigned int MaxNumDPP,
1192 		double PixelClock,
1193 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 		double DISPCLKRampingMargin,
1195 		double DISPCLKDPPCLKVCOSpeed,
1196 		unsigned int NumberOfDSCSlices,
1197 
1198 		/* Output */
1199 		bool *TotalAvailablePipesSupport,
1200 		unsigned int *NumberOfDPP,
1201 		enum odm_combine_mode *ODMMode,
1202 		double *RequiredDISPCLKPerSurface)
1203 {
1204 
1205 	double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208 
1209 	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 			MaxDispclk);
1212 	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 			MaxDispclk);
1215 	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 			MaxDispclk);
1218 	*TotalAvailablePipesSupport = true;
1219 	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220 
1221 	if (ODMUse == dm_odm_combine_policy_none)
1222 		*ODMMode = dm_odm_combine_mode_disabled;
1223 
1224 	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 	*NumberOfDPP = 0;
1226 
1227 	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 	// (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229 
1230 	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 					|| NumberOfDSCSlices > 8)))) {
1234 		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 			*ODMMode = dm_odm_combine_mode_4to1;
1236 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 			*NumberOfDPP = 4;
1238 		} else {
1239 			*TotalAvailablePipesSupport = false;
1240 		}
1241 	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 					|| (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 			*ODMMode = dm_odm_combine_mode_2to1;
1248 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 			*NumberOfDPP = 2;
1250 		} else {
1251 			*TotalAvailablePipesSupport = false;
1252 		}
1253 	} else {
1254 		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 			*NumberOfDPP = 1;
1256 		else
1257 			*TotalAvailablePipesSupport = false;
1258 	}
1259 	if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 			ODMUse != dm_odm_combine_policy_4to1) {
1261 		if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 			*ODMMode = dm_odm_combine_mode_disabled;
1263 			*NumberOfDPP = 0;
1264 			*TotalAvailablePipesSupport = false;
1265 		} else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 				*ODMMode == dm_odm_combine_mode_4to1) {
1267 			*ODMMode = dm_odm_combine_mode_4to1;
1268 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 			*NumberOfDPP = 4;
1270 		} else {
1271 			*ODMMode = dm_odm_combine_mode_2to1;
1272 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 			*NumberOfDPP = 2;
1274 		}
1275 	}
1276 	if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 			HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 		*ODMMode = dm_odm_combine_mode_disabled;
1279 		*NumberOfDPP = 0;
1280 		*TotalAvailablePipesSupport = false;
1281 	}
1282 }
1283 
dml32_CalculateRequiredDispclk(enum odm_combine_mode ODMMode,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,double MaxDispclk)1284 double dml32_CalculateRequiredDispclk(
1285 		enum odm_combine_mode ODMMode,
1286 		double PixelClock,
1287 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 		double DISPCLKRampingMargin,
1289 		double DISPCLKDPPCLKVCOSpeed,
1290 		double MaxDispclk)
1291 {
1292 	double RequiredDispclk = 0.;
1293 	double PixelClockAfterODM;
1294 	double DISPCLKWithRampingRoundedToDFSGranularity;
1295 	double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 	double MaxDispclkRoundedDownToDFSGranularity;
1297 
1298 	if (ODMMode == dm_odm_combine_mode_4to1)
1299 		PixelClockAfterODM = PixelClock / 4;
1300 	else if (ODMMode == dm_odm_combine_mode_2to1)
1301 		PixelClockAfterODM = PixelClock / 2;
1302 	else
1303 		PixelClockAfterODM = PixelClock;
1304 
1305 
1306 	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309 
1310 	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312 
1313 	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314 
1315 	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 	else
1320 		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321 
1322 	return RequiredDispclk;
1323 }
1324 
dml32_RoundToDFSGranularity(double Clock,bool round_up,double VCOSpeed)1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 	if (Clock <= 0.0)
1328 		return 0.0;
1329 
1330 	if (round_up)
1331 		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 	else
1333 		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335 
dml32_CalculateOutputLink(double PHYCLKPerState,double PHYCLKD18PerState,double PHYCLKD32PerState,double Downspreading,bool IsMainSurfaceUsingTheIndicatedTiming,enum output_encoder_class Output,enum output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,bool DSCEnable,unsigned int OutputLinkDPLanes,enum dm_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,double * RequiresFEC,double * OutBpp,enum dm_output_type * OutputType,enum dm_output_rate * OutputRate,unsigned int * RequiredSlots)1336 void dml32_CalculateOutputLink(
1337 		double PHYCLKPerState,
1338 		double PHYCLKD18PerState,
1339 		double PHYCLKD32PerState,
1340 		double Downspreading,
1341 		bool IsMainSurfaceUsingTheIndicatedTiming,
1342 		enum output_encoder_class Output,
1343 		enum output_format_class OutputFormat,
1344 		unsigned int HTotal,
1345 		unsigned int HActive,
1346 		double PixelClockBackEnd,
1347 		double ForcedOutputLinkBPP,
1348 		unsigned int DSCInputBitPerComponent,
1349 		unsigned int NumberOfDSCSlices,
1350 		double AudioSampleRate,
1351 		unsigned int AudioSampleLayout,
1352 		enum odm_combine_mode ODMModeNoDSC,
1353 		enum odm_combine_mode ODMModeDSC,
1354 		bool DSCEnable,
1355 		unsigned int OutputLinkDPLanes,
1356 		enum dm_output_link_dp_rate OutputLinkDPRate,
1357 
1358 		/* Output */
1359 		bool *RequiresDSC,
1360 		double *RequiresFEC,
1361 		double  *OutBpp,
1362 		enum dm_output_type *OutputType,
1363 		enum dm_output_rate *OutputRate,
1364 		unsigned int *RequiredSlots)
1365 {
1366 	bool LinkDSCEnable;
1367 	unsigned int dummy;
1368 	*RequiresDSC = false;
1369 	*RequiresFEC = false;
1370 	*OutBpp = 0;
1371 	*OutputType = dm_output_type_unknown;
1372 	*OutputRate = dm_output_rate_unknown;
1373 
1374 	if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 		if (Output == dm_hdmi) {
1376 			*RequiresDSC = false;
1377 			*RequiresFEC = false;
1378 			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 					ODMModeNoDSC, ODMModeDSC, &dummy);
1382 			//OutputTypeAndRate = "HDMI";
1383 			*OutputType = dm_output_type_hdmi;
1384 
1385 		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 			if (DSCEnable == true) {
1387 				*RequiresDSC = true;
1388 				LinkDSCEnable = true;
1389 				if (Output == dm_dp || Output == dm_dp2p0)
1390 					*RequiresFEC = true;
1391 				else
1392 					*RequiresFEC = false;
1393 			} else {
1394 				*RequiresDSC = false;
1395 				LinkDSCEnable = false;
1396 				if (Output == dm_dp2p0)
1397 					*RequiresFEC = true;
1398 				else
1399 					*RequiresFEC = false;
1400 			}
1401 			if (Output == dm_dp2p0) {
1402 				*OutBpp = 0;
1403 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 						PHYCLKD32PerState >= 10000 / 32) {
1405 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 					if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411 							ForcedOutputLinkBPP == 0) {
1412 						*RequiresDSC = true;
1413 						LinkDSCEnable = true;
1414 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 								OutputFormat, DSCInputBitPerComponent,
1418 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 					}
1421 					//OutputTypeAndRate = Output & " UHBR10";
1422 					*OutputType = dm_output_type_dp2p0;
1423 					*OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 				}
1425 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 						*OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 
1433 					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 							ForcedOutputLinkBPP == 0) {
1435 						*RequiresDSC = true;
1436 						LinkDSCEnable = true;
1437 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 								OutputFormat, DSCInputBitPerComponent,
1441 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 					}
1444 					//OutputTypeAndRate = Output & " UHBR13p5";
1445 					*OutputType = dm_output_type_dp2p0;
1446 					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 				}
1448 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 						*RequiresDSC = true;
1457 						LinkDSCEnable = true;
1458 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 								OutputFormat, DSCInputBitPerComponent,
1462 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 					}
1465 					//OutputTypeAndRate = Output & " UHBR20";
1466 					*OutputType = dm_output_type_dp2p0;
1467 					*OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 				}
1469 			} else {
1470 				*OutBpp = 0;
1471 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 						PHYCLKPerState >= 270) {
1473 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 							ForcedOutputLinkBPP == 0) {
1480 						*RequiresDSC = true;
1481 						LinkDSCEnable = true;
1482 						if (Output == dm_dp)
1483 							*RequiresFEC = true;
1484 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 								OutputFormat, DSCInputBitPerComponent,
1488 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 					}
1491 					//OutputTypeAndRate = Output & " HBR";
1492 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 					*OutputRate = dm_output_rate_dp_rate_hbr;
1494 				}
1495 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 						*OutBpp == 0 && PHYCLKPerState >= 540) {
1497 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502 
1503 					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 							ForcedOutputLinkBPP == 0) {
1505 						*RequiresDSC = true;
1506 						LinkDSCEnable = true;
1507 						if (Output == dm_dp)
1508 							*RequiresFEC = true;
1509 
1510 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 								OutputFormat, DSCInputBitPerComponent,
1514 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 					}
1517 					//OutputTypeAndRate = Output & " HBR2";
1518 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 					*OutputRate = dm_output_rate_dp_rate_hbr2;
1520 				}
1521 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 							ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 							RequiredSlots);
1528 
1529 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 						*RequiresDSC = true;
1531 						LinkDSCEnable = true;
1532 						if (Output == dm_dp)
1533 							*RequiresFEC = true;
1534 
1535 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 								OutputFormat, DSCInputBitPerComponent,
1539 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 					}
1542 					//OutputTypeAndRate = Output & " HBR3";
1543 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 					*OutputRate = dm_output_rate_dp_rate_hbr3;
1545 				}
1546 			}
1547 		}
1548 	}
1549 }
1550 
dml32_CalculateDPPCLK(unsigned int NumberOfActiveSurfaces,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKDPPCLKVCOSpeed,double DPPCLKUsingSingleDPP[],unsigned int DPPPerSurface[],double * GlobalDPPCLK,double Dppclk[])1551 void dml32_CalculateDPPCLK(
1552 		unsigned int NumberOfActiveSurfaces,
1553 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 		double DISPCLKDPPCLKVCOSpeed,
1555 		double DPPCLKUsingSingleDPP[],
1556 		unsigned int DPPPerSurface[],
1557 
1558 		/* output */
1559 		double *GlobalDPPCLK,
1560 		double Dppclk[])
1561 {
1562 	unsigned int k;
1563 	*GlobalDPPCLK = 0;
1564 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 	}
1568 	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572 
dml32_TruncToValidBPP(double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,unsigned int * RequiredSlots)1573 double dml32_TruncToValidBPP(
1574 		double LinkBitRate,
1575 		unsigned int Lanes,
1576 		unsigned int HTotal,
1577 		unsigned int HActive,
1578 		double PixelClock,
1579 		double DesiredBPP,
1580 		bool DSCEnable,
1581 		enum output_encoder_class Output,
1582 		enum output_format_class Format,
1583 		unsigned int DSCInputBitPerComponent,
1584 		unsigned int DSCSlices,
1585 		unsigned int AudioRate,
1586 		unsigned int AudioLayout,
1587 		enum odm_combine_mode ODMModeNoDSC,
1588 		enum odm_combine_mode ODMModeDSC,
1589 		/* Output */
1590 		unsigned int *RequiredSlots)
1591 {
1592 	double    MaxLinkBPP;
1593 	unsigned int   MinDSCBPP;
1594 	double    MaxDSCBPP;
1595 	unsigned int   NonDSCBPP0;
1596 	unsigned int   NonDSCBPP1;
1597 	unsigned int   NonDSCBPP2;
1598 	unsigned int   NonDSCBPP3;
1599 
1600 	if (Format == dm_420) {
1601 		NonDSCBPP0 = 12;
1602 		NonDSCBPP1 = 15;
1603 		NonDSCBPP2 = 18;
1604 		MinDSCBPP = 6;
1605 		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1606 	} else if (Format == dm_444) {
1607 		NonDSCBPP0 = 18;
1608 		NonDSCBPP1 = 24;
1609 		NonDSCBPP2 = 30;
1610 		NonDSCBPP3 = 36;
1611 		MinDSCBPP = 8;
1612 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1613 	} else {
1614 		if (Output == dm_hdmi) {
1615 			NonDSCBPP0 = 24;
1616 			NonDSCBPP1 = 24;
1617 			NonDSCBPP2 = 24;
1618 		} else {
1619 			NonDSCBPP0 = 16;
1620 			NonDSCBPP1 = 20;
1621 			NonDSCBPP2 = 24;
1622 		}
1623 		if (Format == dm_n422) {
1624 			MinDSCBPP = 7;
1625 			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1626 		} else {
1627 			MinDSCBPP = 8;
1628 			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1629 		}
1630 	}
1631 	if (Output == dm_dp2p0) {
1632 		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1633 	} else if (DSCEnable && Output == dm_dp) {
1634 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1635 	} else {
1636 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1637 	}
1638 
1639 	if (DSCEnable) {
1640 		if (ODMModeDSC == dm_odm_combine_mode_4to1)
1641 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1642 		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1643 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1644 		else if (ODMModeDSC == dm_odm_split_mode_1to2)
1645 			MaxLinkBPP = 2 * MaxLinkBPP;
1646 	} else {
1647 		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1648 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1649 		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1650 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1651 		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1652 			MaxLinkBPP = 2 * MaxLinkBPP;
1653 	}
1654 
1655 	if (DesiredBPP == 0) {
1656 		if (DSCEnable) {
1657 			if (MaxLinkBPP < MinDSCBPP)
1658 				return BPP_INVALID;
1659 			else if (MaxLinkBPP >= MaxDSCBPP)
1660 				return MaxDSCBPP;
1661 			else
1662 				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1663 		} else {
1664 			if (MaxLinkBPP >= NonDSCBPP3)
1665 				return NonDSCBPP3;
1666 			else if (MaxLinkBPP >= NonDSCBPP2)
1667 				return NonDSCBPP2;
1668 			else if (MaxLinkBPP >= NonDSCBPP1)
1669 				return NonDSCBPP1;
1670 			else if (MaxLinkBPP >= NonDSCBPP0)
1671 				return 16.0;
1672 			else
1673 				return BPP_INVALID;
1674 		}
1675 	} else {
1676 		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1677 				DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1678 				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1679 			return BPP_INVALID;
1680 		else
1681 			return DesiredBPP;
1682 	}
1683 
1684 	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1685 
1686 	return BPP_INVALID;
1687 } // TruncToValidBPP
1688 
dml32_RequiredDTBCLK(bool DSCEnable,double PixelClock,enum output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)1689 double dml32_RequiredDTBCLK(
1690 		bool              DSCEnable,
1691 		double               PixelClock,
1692 		enum output_format_class  OutputFormat,
1693 		double               OutputBpp,
1694 		unsigned int              DSCSlices,
1695 		unsigned int                 HTotal,
1696 		unsigned int                 HActive,
1697 		unsigned int              AudioRate,
1698 		unsigned int              AudioLayout)
1699 {
1700 	double PixelWordRate;
1701 	double HCActive;
1702 	double HCBlank;
1703 	double AverageTribyteRate;
1704 	double HActiveTribyteRate;
1705 
1706 	if (DSCEnable != true)
1707 		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1708 
1709 	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1710 	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1711 			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1712 	HCBlank = 64 + 32 *
1713 			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1714 	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1715 	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1716 	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1717 }
1718 
dml32_DSCDelayRequirement(bool DSCEnabled,enum odm_combine_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum output_format_class OutputFormat,enum output_encoder_class Output,double PixelClock,double PixelClockBackEnd,double dsc_delay_factor_wa)1719 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1720 		enum odm_combine_mode ODMMode,
1721 		unsigned int DSCInputBitPerComponent,
1722 		double OutputBpp,
1723 		unsigned int HActive,
1724 		unsigned int HTotal,
1725 		unsigned int NumberOfDSCSlices,
1726 		enum output_format_class  OutputFormat,
1727 		enum output_encoder_class Output,
1728 		double PixelClock,
1729 		double PixelClockBackEnd,
1730 		double dsc_delay_factor_wa)
1731 {
1732 	unsigned int DSCDelayRequirement_val;
1733 
1734 	if (DSCEnabled == true && OutputBpp != 0) {
1735 		if (ODMMode == dm_odm_combine_mode_4to1) {
1736 			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1737 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1738 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1739 		} else if (ODMMode == dm_odm_combine_mode_2to1) {
1740 			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1741 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1742 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1743 		} else {
1744 			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1745 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1746 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1747 		}
1748 
1749 		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1750 				dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1751 
1752 		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1753 
1754 	} else {
1755 		DSCDelayRequirement_val = 0;
1756 	}
1757 
1758 #ifdef __DML_VBA_DEBUG__
1759 	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1760 	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1761 	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1762 	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1763 	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1764 	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1765 	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1766 #endif
1767 
1768 	return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1769 }
1770 
dml32_CalculateSurfaceSizeInMall(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],bool DCCEnable[],bool ViewportStationary[],unsigned int ViewportXStartY[],unsigned int ViewportYStartY[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int ViewportWidthY[],unsigned int ViewportHeightY[],unsigned int BytesPerPixelY[],unsigned int ViewportWidthC[],unsigned int ViewportHeightC[],unsigned int BytesPerPixelC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)1771 void dml32_CalculateSurfaceSizeInMall(
1772 		unsigned int NumberOfActiveSurfaces,
1773 		unsigned int MALLAllocatedForDCN,
1774 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1775 		bool DCCEnable[],
1776 		bool ViewportStationary[],
1777 		unsigned int ViewportXStartY[],
1778 		unsigned int ViewportYStartY[],
1779 		unsigned int ViewportXStartC[],
1780 		unsigned int ViewportYStartC[],
1781 		unsigned int ViewportWidthY[],
1782 		unsigned int ViewportHeightY[],
1783 		unsigned int BytesPerPixelY[],
1784 		unsigned int ViewportWidthC[],
1785 		unsigned int ViewportHeightC[],
1786 		unsigned int BytesPerPixelC[],
1787 		unsigned int SurfaceWidthY[],
1788 		unsigned int SurfaceWidthC[],
1789 		unsigned int SurfaceHeightY[],
1790 		unsigned int SurfaceHeightC[],
1791 		unsigned int Read256BytesBlockWidthY[],
1792 		unsigned int Read256BytesBlockWidthC[],
1793 		unsigned int Read256BytesBlockHeightY[],
1794 		unsigned int Read256BytesBlockHeightC[],
1795 		unsigned int ReadBlockWidthY[],
1796 		unsigned int ReadBlockWidthC[],
1797 		unsigned int ReadBlockHeightY[],
1798 		unsigned int ReadBlockHeightC[],
1799 
1800 		/* Output */
1801 		unsigned int    SurfaceSizeInMALL[],
1802 		bool *ExceededMALLSize)
1803 {
1804 	unsigned int TotalSurfaceSizeInMALL  = 0;
1805 	unsigned int k;
1806 
1807 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1808 		if (ViewportStationary[k]) {
1809 			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1810 					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1811 						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1812 						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1813 						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1814 						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1815 						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1816 
1817 			if (ReadBlockWidthC[k] > 0) {
1818 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1819 						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1820 							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1821 							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1822 							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1823 							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1824 							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1825 							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1826 							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1827 							BytesPerPixelC[k];
1828 			}
1829 			if (DCCEnable[k] == true) {
1830 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1831 						dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1832 							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1833 							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1834 							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1835 							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1836 							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1837 							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1838 							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1839 							* Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1840 				if (Read256BytesBlockWidthC[k] > 0) {
1841 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1842 							dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1843 								Read256BytesBlockWidthC[k]),
1844 								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1845 								* Read256BytesBlockWidthC[k] - 1, 8 *
1846 								Read256BytesBlockWidthC[k]) -
1847 								dml_floor(ViewportXStartC[k], 8 *
1848 								Read256BytesBlockWidthC[k])) *
1849 								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1850 								Read256BytesBlockHeightC[k]),
1851 								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1852 								8 * Read256BytesBlockHeightC[k] - 1, 8 *
1853 								Read256BytesBlockHeightC[k]) -
1854 								dml_floor(ViewportYStartC[k], 8 *
1855 								Read256BytesBlockHeightC[k])) *
1856 								BytesPerPixelC[k] / 256;
1857 				}
1858 			}
1859 		} else {
1860 			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1861 					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1862 					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1863 							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1864 							BytesPerPixelY[k];
1865 			if (ReadBlockWidthC[k] > 0) {
1866 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1867 						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1868 								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1869 						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1870 								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1871 								BytesPerPixelC[k];
1872 			}
1873 			if (DCCEnable[k] == true) {
1874 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1875 						dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1876 								Read256BytesBlockWidthY[k] - 1), 8 *
1877 								Read256BytesBlockWidthY[k]) *
1878 						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1879 								Read256BytesBlockHeightY[k] - 1), 8 *
1880 								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1881 
1882 				if (Read256BytesBlockWidthC[k] > 0) {
1883 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1884 							dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1885 									Read256BytesBlockWidthC[k] - 1), 8 *
1886 									Read256BytesBlockWidthC[k]) *
1887 							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1888 									Read256BytesBlockHeightC[k] - 1), 8 *
1889 									Read256BytesBlockHeightC[k]) *
1890 									BytesPerPixelC[k] / 256;
1891 				}
1892 			}
1893 		}
1894 	}
1895 
1896 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1897 		if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1898 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1899 	}
1900 	*ExceededMALLSize =  (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
1901 } // CalculateSurfaceSizeInMall
1902 
dml32_CalculateVMRowAndSwath(unsigned int NumberOfActiveSurfaces,DmlPipe myPipe[],unsigned int SurfaceSizeInMALL[],unsigned int PTEBufferSizeInRequestsLuma,unsigned int PTEBufferSizeInRequestsChroma,unsigned int DCCMetaBufferSizeBytes,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int MALLAllocatedForDCN,double SwathWidthY[],double SwathWidthC[],bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes[],unsigned int HostVMMinPageSize,bool PTEBufferSizeNotExceeded[],bool DCCMetaBufferSizeNotExceeded[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int dpte_row_height_luma[],unsigned int dpte_row_height_chroma[],unsigned int dpte_row_height_linear_luma[],unsigned int dpte_row_height_linear_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int vm_group_bytes[],unsigned int dpte_group_bytes[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PTERequestSizeY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int PTERequestSizeC[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_c[],double PrefetchSourceLinesY[],double PrefetchSourceLinesC[],double VInitPreFillY[],double VInitPreFillC[],unsigned int MaxNumSwathY[],unsigned int MaxNumSwathC[],double meta_row_bw[],double dpte_row_bw[],double PixelPTEBytesPerRow[],double PDEAndMetaPTEBytesFrame[],double MetaRowByte[],bool use_one_row_for_frame[],bool use_one_row_for_frame_flip[],bool UsesMALLForStaticScreen[],bool PTE_BUFFER_MODE[],unsigned int BIGK_FRAGMENT_SIZE[])1903 void dml32_CalculateVMRowAndSwath(
1904 		unsigned int NumberOfActiveSurfaces,
1905 		DmlPipe myPipe[],
1906 		unsigned int SurfaceSizeInMALL[],
1907 		unsigned int PTEBufferSizeInRequestsLuma,
1908 		unsigned int PTEBufferSizeInRequestsChroma,
1909 		unsigned int DCCMetaBufferSizeBytes,
1910 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1911 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1912 		unsigned int MALLAllocatedForDCN,
1913 		double SwathWidthY[],
1914 		double SwathWidthC[],
1915 		bool GPUVMEnable,
1916 		bool HostVMEnable,
1917 		unsigned int HostVMMaxNonCachedPageTableLevels,
1918 		unsigned int GPUVMMaxPageTableLevels,
1919 		unsigned int GPUVMMinPageSizeKBytes[],
1920 		unsigned int HostVMMinPageSize,
1921 
1922 		/* Output */
1923 		bool PTEBufferSizeNotExceeded[],
1924 		bool DCCMetaBufferSizeNotExceeded[],
1925 		unsigned int dpte_row_width_luma_ub[],
1926 		unsigned int dpte_row_width_chroma_ub[],
1927 		unsigned int dpte_row_height_luma[],
1928 		unsigned int dpte_row_height_chroma[],
1929 		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1930 		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1931 		unsigned int meta_req_width[],
1932 		unsigned int meta_req_width_chroma[],
1933 		unsigned int meta_req_height[],
1934 		unsigned int meta_req_height_chroma[],
1935 		unsigned int meta_row_width[],
1936 		unsigned int meta_row_width_chroma[],
1937 		unsigned int meta_row_height[],
1938 		unsigned int meta_row_height_chroma[],
1939 		unsigned int vm_group_bytes[],
1940 		unsigned int dpte_group_bytes[],
1941 		unsigned int PixelPTEReqWidthY[],
1942 		unsigned int PixelPTEReqHeightY[],
1943 		unsigned int PTERequestSizeY[],
1944 		unsigned int PixelPTEReqWidthC[],
1945 		unsigned int PixelPTEReqHeightC[],
1946 		unsigned int PTERequestSizeC[],
1947 		unsigned int dpde0_bytes_per_frame_ub_l[],
1948 		unsigned int meta_pte_bytes_per_frame_ub_l[],
1949 		unsigned int dpde0_bytes_per_frame_ub_c[],
1950 		unsigned int meta_pte_bytes_per_frame_ub_c[],
1951 		double PrefetchSourceLinesY[],
1952 		double PrefetchSourceLinesC[],
1953 		double VInitPreFillY[],
1954 		double VInitPreFillC[],
1955 		unsigned int MaxNumSwathY[],
1956 		unsigned int MaxNumSwathC[],
1957 		double meta_row_bw[],
1958 		double dpte_row_bw[],
1959 		double PixelPTEBytesPerRow[],
1960 		double PDEAndMetaPTEBytesFrame[],
1961 		double MetaRowByte[],
1962 		bool use_one_row_for_frame[],
1963 		bool use_one_row_for_frame_flip[],
1964 		bool UsesMALLForStaticScreen[],
1965 		bool PTE_BUFFER_MODE[],
1966 		unsigned int BIGK_FRAGMENT_SIZE[])
1967 {
1968 	unsigned int k;
1969 	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1970 	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1971 	unsigned int PDEAndMetaPTEBytesFrameY;
1972 	unsigned int PDEAndMetaPTEBytesFrameC;
1973 	unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1974 	unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1975 	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1976 	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1977 	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1978 	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1979 	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1980 	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1981 	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1982 	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1983 	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1984 
1985 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1986 		if (HostVMEnable == true) {
1987 			vm_group_bytes[k] = 512;
1988 			dpte_group_bytes[k] = 512;
1989 		} else if (GPUVMEnable == true) {
1990 			vm_group_bytes[k] = 2048;
1991 			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1992 				dpte_group_bytes[k] = 512;
1993 			else
1994 				dpte_group_bytes[k] = 2048;
1995 		} else {
1996 			vm_group_bytes[k] = 0;
1997 			dpte_group_bytes[k] = 0;
1998 		}
1999 
2000 		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2001 				myPipe[k].SourcePixelFormat == dm_420_12 ||
2002 				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2003 			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2004 					!IsVertical(myPipe[k].SourceRotation)) {
2005 				PTEBufferSizeInRequestsForLuma[k] =
2006 						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2007 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2008 			} else {
2009 				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2010 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2011 			}
2012 
2013 			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2014 					myPipe[k].ViewportStationary,
2015 					myPipe[k].DCCEnable,
2016 					myPipe[k].DPPPerSurface,
2017 					myPipe[k].BlockHeight256BytesC,
2018 					myPipe[k].BlockWidth256BytesC,
2019 					myPipe[k].SourcePixelFormat,
2020 					myPipe[k].SurfaceTiling,
2021 					myPipe[k].BytePerPixelC,
2022 					myPipe[k].SourceRotation,
2023 					SwathWidthC[k],
2024 					myPipe[k].ViewportHeightChroma,
2025 					myPipe[k].ViewportXStartC,
2026 					myPipe[k].ViewportYStartC,
2027 					GPUVMEnable,
2028 					HostVMEnable,
2029 					HostVMMaxNonCachedPageTableLevels,
2030 					GPUVMMaxPageTableLevels,
2031 					GPUVMMinPageSizeKBytes[k],
2032 					HostVMMinPageSize,
2033 					PTEBufferSizeInRequestsForChroma[k],
2034 					myPipe[k].PitchC,
2035 					myPipe[k].DCCMetaPitchC,
2036 					myPipe[k].BlockWidthC,
2037 					myPipe[k].BlockHeightC,
2038 
2039 					/* Output */
2040 					&MetaRowByteC[k],
2041 					&PixelPTEBytesPerRowC[k],
2042 					&dpte_row_width_chroma_ub[k],
2043 					&dpte_row_height_chroma[k],
2044 					&dpte_row_height_linear_chroma[k],
2045 					&PixelPTEBytesPerRowC_one_row_per_frame[k],
2046 					&dpte_row_width_chroma_ub_one_row_per_frame[k],
2047 					&dpte_row_height_chroma_one_row_per_frame[k],
2048 					&meta_req_width_chroma[k],
2049 					&meta_req_height_chroma[k],
2050 					&meta_row_width_chroma[k],
2051 					&meta_row_height_chroma[k],
2052 					&PixelPTEReqWidthC[k],
2053 					&PixelPTEReqHeightC[k],
2054 					&PTERequestSizeC[k],
2055 					&dpde0_bytes_per_frame_ub_c[k],
2056 					&meta_pte_bytes_per_frame_ub_c[k]);
2057 
2058 			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2059 					myPipe[k].VRatioChroma,
2060 					myPipe[k].VTapsChroma,
2061 					myPipe[k].InterlaceEnable,
2062 					myPipe[k].ProgressiveToInterlaceUnitInOPP,
2063 					myPipe[k].SwathHeightC,
2064 					myPipe[k].SourceRotation,
2065 					myPipe[k].ViewportStationary,
2066 					SwathWidthC[k],
2067 					myPipe[k].ViewportHeightChroma,
2068 					myPipe[k].ViewportXStartC,
2069 					myPipe[k].ViewportYStartC,
2070 
2071 					/* Output */
2072 					&VInitPreFillC[k],
2073 					&MaxNumSwathC[k]);
2074 		} else {
2075 			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2076 			PTEBufferSizeInRequestsForChroma[k] = 0;
2077 			PixelPTEBytesPerRowC[k] = 0;
2078 			PDEAndMetaPTEBytesFrameC = 0;
2079 			MetaRowByteC[k] = 0;
2080 			MaxNumSwathC[k] = 0;
2081 			PrefetchSourceLinesC[k] = 0;
2082 			dpte_row_height_chroma_one_row_per_frame[k] = 0;
2083 			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2084 			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2085 		}
2086 
2087 		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2088 				myPipe[k].ViewportStationary,
2089 				myPipe[k].DCCEnable,
2090 				myPipe[k].DPPPerSurface,
2091 				myPipe[k].BlockHeight256BytesY,
2092 				myPipe[k].BlockWidth256BytesY,
2093 				myPipe[k].SourcePixelFormat,
2094 				myPipe[k].SurfaceTiling,
2095 				myPipe[k].BytePerPixelY,
2096 				myPipe[k].SourceRotation,
2097 				SwathWidthY[k],
2098 				myPipe[k].ViewportHeight,
2099 				myPipe[k].ViewportXStart,
2100 				myPipe[k].ViewportYStart,
2101 				GPUVMEnable,
2102 				HostVMEnable,
2103 				HostVMMaxNonCachedPageTableLevels,
2104 				GPUVMMaxPageTableLevels,
2105 				GPUVMMinPageSizeKBytes[k],
2106 				HostVMMinPageSize,
2107 				PTEBufferSizeInRequestsForLuma[k],
2108 				myPipe[k].PitchY,
2109 				myPipe[k].DCCMetaPitchY,
2110 				myPipe[k].BlockWidthY,
2111 				myPipe[k].BlockHeightY,
2112 
2113 				/* Output */
2114 				&MetaRowByteY[k],
2115 				&PixelPTEBytesPerRowY[k],
2116 				&dpte_row_width_luma_ub[k],
2117 				&dpte_row_height_luma[k],
2118 				&dpte_row_height_linear_luma[k],
2119 				&PixelPTEBytesPerRowY_one_row_per_frame[k],
2120 				&dpte_row_width_luma_ub_one_row_per_frame[k],
2121 				&dpte_row_height_luma_one_row_per_frame[k],
2122 				&meta_req_width[k],
2123 				&meta_req_height[k],
2124 				&meta_row_width[k],
2125 				&meta_row_height[k],
2126 				&PixelPTEReqWidthY[k],
2127 				&PixelPTEReqHeightY[k],
2128 				&PTERequestSizeY[k],
2129 				&dpde0_bytes_per_frame_ub_l[k],
2130 				&meta_pte_bytes_per_frame_ub_l[k]);
2131 
2132 		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2133 				myPipe[k].VRatio,
2134 				myPipe[k].VTaps,
2135 				myPipe[k].InterlaceEnable,
2136 				myPipe[k].ProgressiveToInterlaceUnitInOPP,
2137 				myPipe[k].SwathHeightY,
2138 				myPipe[k].SourceRotation,
2139 				myPipe[k].ViewportStationary,
2140 				SwathWidthY[k],
2141 				myPipe[k].ViewportHeight,
2142 				myPipe[k].ViewportXStart,
2143 				myPipe[k].ViewportYStart,
2144 
2145 				/* Output */
2146 				&VInitPreFillY[k],
2147 				&MaxNumSwathY[k]);
2148 
2149 		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2150 		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2151 
2152 		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2153 				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2154 			PTEBufferSizeNotExceeded[k] = true;
2155 		} else {
2156 			PTEBufferSizeNotExceeded[k] = false;
2157 		}
2158 
2159 		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2160 			PTEBufferSizeInRequestsForLuma[k] &&
2161 			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2162 	}
2163 
2164 	dml32_CalculateMALLUseForStaticScreen(
2165 			NumberOfActiveSurfaces,
2166 			MALLAllocatedForDCN,
2167 			UseMALLForStaticScreen,   // mode
2168 			SurfaceSizeInMALL,
2169 			one_row_per_frame_fits_in_buffer,
2170 			/* Output */
2171 			UsesMALLForStaticScreen); // boolen
2172 
2173 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2174 		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2175 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2176 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2177 				(GPUVMMinPageSizeKBytes[k] > 64);
2178 		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2179 	}
2180 
2181 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2182 #ifdef __DML_VBA_DEBUG__
2183 		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2184 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2185 #endif
2186 		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2187 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2188 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2189 				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2190 
2191 		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2192 				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2193 
2194 		if (use_one_row_for_frame[k]) {
2195 			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2196 			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2197 			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2198 			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2199 			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2200 			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2201 			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2202 		}
2203 
2204 		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2205 			DCCMetaBufferSizeNotExceeded[k] = true;
2206 		else
2207 			DCCMetaBufferSizeNotExceeded[k] = false;
2208 
2209 		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2210 		if (use_one_row_for_frame[k])
2211 			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2212 
2213 		dml32_CalculateRowBandwidth(
2214 				GPUVMEnable,
2215 				myPipe[k].SourcePixelFormat,
2216 				myPipe[k].VRatio,
2217 				myPipe[k].VRatioChroma,
2218 				myPipe[k].DCCEnable,
2219 				myPipe[k].HTotal / myPipe[k].PixelClock,
2220 				MetaRowByteY[k], MetaRowByteC[k],
2221 				meta_row_height[k],
2222 				meta_row_height_chroma[k],
2223 				PixelPTEBytesPerRowY[k],
2224 				PixelPTEBytesPerRowC[k],
2225 				dpte_row_height_luma[k],
2226 				dpte_row_height_chroma[k],
2227 
2228 				/* Output */
2229 				&meta_row_bw[k],
2230 				&dpte_row_bw[k]);
2231 #ifdef __DML_VBA_DEBUG__
2232 		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2233 		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2234 				__func__, k, use_one_row_for_frame_flip[k]);
2235 		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2236 				__func__, k, UseMALLForPStateChange[k]);
2237 		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2238 		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2239 				__func__, k, dpte_row_width_luma_ub[k]);
2240 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2241 		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2242 				__func__, k, dpte_row_height_chroma[k]);
2243 		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2244 				__func__, k, dpte_row_width_chroma_ub[k]);
2245 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2246 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2247 		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2248 				__func__, k, PTEBufferSizeNotExceeded[k]);
2249 		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2250 		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2251 #endif
2252 	}
2253 } // CalculateVMRowAndSwath
2254 
dml32_CalculateVMAndRowBytes(bool ViewportStationary,bool DCCEnable,unsigned int NumberOfDPPs,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum dm_rotation_angle SourceRotation,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int MacroTileWidth,unsigned int MacroTileHeight,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * dpte_row_height_linear,unsigned int * PixelPTEBytesPerRow_one_row_per_frame,unsigned int * dpte_row_width_ub_one_row_per_frame,unsigned int * dpte_row_height_one_row_per_frame,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)2255 unsigned int dml32_CalculateVMAndRowBytes(
2256 		bool ViewportStationary,
2257 		bool DCCEnable,
2258 		unsigned int NumberOfDPPs,
2259 		unsigned int BlockHeight256Bytes,
2260 		unsigned int BlockWidth256Bytes,
2261 		enum source_format_class SourcePixelFormat,
2262 		unsigned int SurfaceTiling,
2263 		unsigned int BytePerPixel,
2264 		enum dm_rotation_angle SourceRotation,
2265 		double SwathWidth,
2266 		unsigned int ViewportHeight,
2267 		unsigned int    ViewportXStart,
2268 		unsigned int    ViewportYStart,
2269 		bool GPUVMEnable,
2270 		bool HostVMEnable,
2271 		unsigned int HostVMMaxNonCachedPageTableLevels,
2272 		unsigned int GPUVMMaxPageTableLevels,
2273 		unsigned int GPUVMMinPageSizeKBytes,
2274 		unsigned int HostVMMinPageSize,
2275 		unsigned int PTEBufferSizeInRequests,
2276 		unsigned int Pitch,
2277 		unsigned int DCCMetaPitch,
2278 		unsigned int MacroTileWidth,
2279 		unsigned int MacroTileHeight,
2280 
2281 		/* Output */
2282 		unsigned int *MetaRowByte,
2283 		unsigned int *PixelPTEBytesPerRow,
2284 		unsigned int    *dpte_row_width_ub,
2285 		unsigned int *dpte_row_height,
2286 		unsigned int *dpte_row_height_linear,
2287 		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2288 		unsigned int    *dpte_row_width_ub_one_row_per_frame,
2289 		unsigned int    *dpte_row_height_one_row_per_frame,
2290 		unsigned int *MetaRequestWidth,
2291 		unsigned int *MetaRequestHeight,
2292 		unsigned int *meta_row_width,
2293 		unsigned int *meta_row_height,
2294 		unsigned int *PixelPTEReqWidth,
2295 		unsigned int *PixelPTEReqHeight,
2296 		unsigned int *PTERequestSize,
2297 		unsigned int    *DPDE0BytesFrame,
2298 		unsigned int    *MetaPTEBytesFrame)
2299 {
2300 	unsigned int MPDEBytesFrame;
2301 	unsigned int DCCMetaSurfaceBytes;
2302 	unsigned int ExtraDPDEBytesFrame;
2303 	unsigned int PDEAndMetaPTEBytesFrame;
2304 	unsigned int HostVMDynamicLevels = 0;
2305 	unsigned int    MacroTileSizeBytes;
2306 	unsigned int    vp_height_meta_ub;
2307 	unsigned int    vp_height_dpte_ub;
2308 	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2309 
2310 	if (GPUVMEnable == true && HostVMEnable == true) {
2311 		if (HostVMMinPageSize < 2048)
2312 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2313 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2314 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2315 		else
2316 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2317 	}
2318 
2319 	*MetaRequestHeight = 8 * BlockHeight256Bytes;
2320 	*MetaRequestWidth = 8 * BlockWidth256Bytes;
2321 	if (SurfaceTiling == dm_sw_linear) {
2322 		*meta_row_height = 32;
2323 		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2324 				- dml_floor(ViewportXStart, *MetaRequestWidth);
2325 	} else if (!IsVertical(SourceRotation)) {
2326 		*meta_row_height = *MetaRequestHeight;
2327 		if (ViewportStationary && NumberOfDPPs == 1) {
2328 			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2329 					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2330 		} else {
2331 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2332 		}
2333 		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2334 	} else {
2335 		*meta_row_height = *MetaRequestWidth;
2336 		if (ViewportStationary && NumberOfDPPs == 1) {
2337 			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2338 					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2339 		} else {
2340 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2341 		}
2342 		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2343 	}
2344 
2345 	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2346 		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2347 				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2348 	} else if (!IsVertical(SourceRotation)) {
2349 		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2350 	} else {
2351 		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2352 	}
2353 
2354 	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2355 
2356 	if (GPUVMEnable == true) {
2357 		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2358 				(8 * 4.0 * 1024), 1) + 1) * 64;
2359 		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2360 	} else {
2361 		*MetaPTEBytesFrame = 0;
2362 		MPDEBytesFrame = 0;
2363 	}
2364 
2365 	if (DCCEnable != true) {
2366 		*MetaPTEBytesFrame = 0;
2367 		MPDEBytesFrame = 0;
2368 		*MetaRowByte = 0;
2369 	}
2370 
2371 	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2372 
2373 	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2374 		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2375 			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2376 					MacroTileHeight - 1, MacroTileHeight) -
2377 					dml_floor(ViewportYStart, MacroTileHeight);
2378 		} else if (!IsVertical(SourceRotation)) {
2379 			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2380 		} else {
2381 			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2382 		}
2383 		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2384 				(8 * 2097152), 1) + 1);
2385 		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2386 	} else {
2387 		*DPDE0BytesFrame = 0;
2388 		ExtraDPDEBytesFrame = 0;
2389 		vp_height_dpte_ub = 0;
2390 	}
2391 
2392 	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2393 
2394 #ifdef __DML_VBA_DEBUG__
2395 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2396 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2397 	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2398 	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2399 	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2400 	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2401 	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2402 	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2403 	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2404 	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2405 	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2406 	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2407 	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2408 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2409 	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2410 	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2411 	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2412 #endif
2413 
2414 	if (HostVMEnable == true)
2415 		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2416 
2417 	if (SurfaceTiling == dm_sw_linear) {
2418 		*PixelPTEReqHeight = 1;
2419 		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2420 		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2421 		*PTERequestSize = 64;
2422 	} else if (GPUVMMinPageSizeKBytes == 4) {
2423 		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2424 		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2425 		*PTERequestSize = 128;
2426 	} else {
2427 		*PixelPTEReqHeight = MacroTileHeight;
2428 		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2429 		*PTERequestSize = 64;
2430 	}
2431 #ifdef __DML_VBA_DEBUG__
2432 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2433 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2434 	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2435 	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2436 	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2437 	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2438 	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2439 #endif
2440 
2441 	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2442 	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2443 			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2444 					(double) *PixelPTEReqWidth;
2445 	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2446 			*PTERequestSize;
2447 
2448 	if (SurfaceTiling == dm_sw_linear) {
2449 		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2450 				*PixelPTEReqWidth / Pitch), 1));
2451 #ifdef __DML_VBA_DEBUG__
2452 		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2453 				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2454 		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2455 				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2456 		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2457 				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2458 		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2459 				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2460 						*PixelPTEReqWidth / Pitch), 1));
2461 		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2462 #endif
2463 		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2464 				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2465 		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2466 
2467 		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2468 		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2469 				PixelPTEReqWidth_linear / Pitch), 1);
2470 		if (*dpte_row_height_linear > 128)
2471 			*dpte_row_height_linear = 128;
2472 
2473 	} else if (!IsVertical(SourceRotation)) {
2474 		*dpte_row_height = *PixelPTEReqHeight;
2475 
2476 		if (GPUVMMinPageSizeKBytes > 64) {
2477 			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2478 					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2479 		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
2480 			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2481 					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2482 					dml_floor(ViewportXStart, *PixelPTEReqWidth);
2483 		} else {
2484 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2485 					*PixelPTEReqWidth;
2486 		}
2487 
2488 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2489 	} else {
2490 		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2491 
2492 		if (ViewportStationary && (NumberOfDPPs == 1)) {
2493 			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2494 					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2495 		} else {
2496 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2497 					* *PixelPTEReqHeight;
2498 		}
2499 
2500 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2501 	}
2502 
2503 	if (GPUVMEnable != true)
2504 		*PixelPTEBytesPerRow = 0;
2505 	if (HostVMEnable == true)
2506 		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2507 
2508 #ifdef __DML_VBA_DEBUG__
2509 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2510 	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2511 	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2512 	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2513 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2514 	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2515 	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2516 	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2517 			__func__, *dpte_row_width_ub_one_row_per_frame);
2518 	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2519 			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
2520 	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2521 			*MetaPTEBytesFrame);
2522 #endif
2523 
2524 	return PDEAndMetaPTEBytesFrame;
2525 } // CalculateVMAndRowBytes
2526 
dml32_CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dm_rotation_angle SourceRotation,bool ViewportStationary,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)2527 double dml32_CalculatePrefetchSourceLines(
2528 		double VRatio,
2529 		unsigned int VTaps,
2530 		bool Interlace,
2531 		bool ProgressiveToInterlaceUnitInOPP,
2532 		unsigned int SwathHeight,
2533 		enum dm_rotation_angle SourceRotation,
2534 		bool ViewportStationary,
2535 		double SwathWidth,
2536 		unsigned int ViewportHeight,
2537 		unsigned int ViewportXStart,
2538 		unsigned int ViewportYStart,
2539 
2540 		/* Output */
2541 		double *VInitPreFill,
2542 		unsigned int *MaxNumSwath)
2543 {
2544 
2545 	unsigned int vp_start_rot;
2546 	unsigned int sw0_tmp;
2547 	unsigned int MaxPartialSwath;
2548 	double numLines;
2549 
2550 #ifdef __DML_VBA_DEBUG__
2551 	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2552 	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2553 	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2554 	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2555 	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2556 	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2557 #endif
2558 	if (ProgressiveToInterlaceUnitInOPP)
2559 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2560 	else
2561 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2562 
2563 	if (ViewportStationary) {
2564 		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2565 			vp_start_rot = SwathHeight -
2566 					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2567 		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2568 			vp_start_rot = ViewportXStart;
2569 		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2570 			vp_start_rot = SwathHeight -
2571 					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2572 		} else {
2573 			vp_start_rot = ViewportYStart;
2574 		}
2575 		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2576 		if (sw0_tmp < *VInitPreFill)
2577 			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2578 		else
2579 			*MaxNumSwath = 1;
2580 		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2581 	} else {
2582 		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2583 		if (*VInitPreFill > 1)
2584 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2585 		else
2586 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2587 	}
2588 	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2589 
2590 #ifdef __DML_VBA_DEBUG__
2591 	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2592 	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2593 	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2594 	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2595 	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2596 #endif
2597 	return numLines;
2598 
2599 } // CalculatePrefetchSourceLines
2600 
dml32_CalculateMALLUseForStaticScreen(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCNFinal,enum dm_use_mall_for_static_screen_mode * UseMALLForStaticScreen,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool UsesMALLForStaticScreen[])2601 void dml32_CalculateMALLUseForStaticScreen(
2602 		unsigned int NumberOfActiveSurfaces,
2603 		unsigned int MALLAllocatedForDCNFinal,
2604 		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2605 		unsigned int SurfaceSizeInMALL[],
2606 		bool one_row_per_frame_fits_in_buffer[],
2607 
2608 		/* output */
2609 		bool UsesMALLForStaticScreen[])
2610 {
2611 	unsigned int k;
2612 	unsigned int SurfaceToAddToMALL;
2613 	bool CanAddAnotherSurfaceToMALL;
2614 	unsigned int TotalSurfaceSizeInMALL;
2615 
2616 	TotalSurfaceSizeInMALL = 0;
2617 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2618 		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2619 		if (UsesMALLForStaticScreen[k])
2620 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2621 #ifdef __DML_VBA_DEBUG__
2622 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2623 		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2624 #endif
2625 	}
2626 
2627 	SurfaceToAddToMALL = 0;
2628 	CanAddAnotherSurfaceToMALL = true;
2629 	while (CanAddAnotherSurfaceToMALL) {
2630 		CanAddAnotherSurfaceToMALL = false;
2631 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2632 			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2633 					!UsesMALLForStaticScreen[k] &&
2634 					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2635 					one_row_per_frame_fits_in_buffer[k] &&
2636 					(!CanAddAnotherSurfaceToMALL ||
2637 					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2638 				CanAddAnotherSurfaceToMALL = true;
2639 				SurfaceToAddToMALL = k;
2640 #ifdef __DML_VBA_DEBUG__
2641 				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2642 						__func__, k, UseMALLForStaticScreen[k]);
2643 #endif
2644 			}
2645 		}
2646 		if (CanAddAnotherSurfaceToMALL) {
2647 			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2648 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2649 
2650 #ifdef __DML_VBA_DEBUG__
2651 			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2652 			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2653 #endif
2654 
2655 		}
2656 	}
2657 }
2658 
dml32_CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)2659 void dml32_CalculateRowBandwidth(
2660 		bool GPUVMEnable,
2661 		enum source_format_class SourcePixelFormat,
2662 		double VRatio,
2663 		double VRatioChroma,
2664 		bool DCCEnable,
2665 		double LineTime,
2666 		unsigned int MetaRowByteLuma,
2667 		unsigned int MetaRowByteChroma,
2668 		unsigned int meta_row_height_luma,
2669 		unsigned int meta_row_height_chroma,
2670 		unsigned int PixelPTEBytesPerRowLuma,
2671 		unsigned int PixelPTEBytesPerRowChroma,
2672 		unsigned int dpte_row_height_luma,
2673 		unsigned int dpte_row_height_chroma,
2674 		/* Output */
2675 		double *meta_row_bw,
2676 		double *dpte_row_bw)
2677 {
2678 	if (DCCEnable != true) {
2679 		*meta_row_bw = 0;
2680 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2681 			SourcePixelFormat == dm_rgbe_alpha) {
2682 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2683 				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2684 	} else {
2685 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2686 	}
2687 
2688 	if (GPUVMEnable != true) {
2689 		*dpte_row_bw = 0;
2690 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2691 			SourcePixelFormat == dm_rgbe_alpha) {
2692 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2693 				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2694 	} else {
2695 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2696 	}
2697 }
2698 
dml32_CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)2699 double dml32_CalculateUrgentLatency(
2700 		double UrgentLatencyPixelDataOnly,
2701 		double UrgentLatencyPixelMixedWithVMData,
2702 		double UrgentLatencyVMDataOnly,
2703 		bool   DoUrgentLatencyAdjustment,
2704 		double UrgentLatencyAdjustmentFabricClockComponent,
2705 		double UrgentLatencyAdjustmentFabricClockReference,
2706 		double FabricClock)
2707 {
2708 	double   ret;
2709 
2710 	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2711 	if (DoUrgentLatencyAdjustment == true) {
2712 		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2713 				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2714 	}
2715 	return ret;
2716 }
2717 
dml32_CalculateUrgentBurstFactor(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)2718 void dml32_CalculateUrgentBurstFactor(
2719 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2720 		unsigned int    swath_width_luma_ub,
2721 		unsigned int    swath_width_chroma_ub,
2722 		unsigned int SwathHeightY,
2723 		unsigned int SwathHeightC,
2724 		double  LineTime,
2725 		double  UrgentLatency,
2726 		double  CursorBufferSize,
2727 		unsigned int CursorWidth,
2728 		unsigned int CursorBPP,
2729 		double  VRatio,
2730 		double  VRatioC,
2731 		double  BytePerPixelInDETY,
2732 		double  BytePerPixelInDETC,
2733 		unsigned int    DETBufferSizeY,
2734 		unsigned int    DETBufferSizeC,
2735 		/* Output */
2736 		double *UrgentBurstFactorCursor,
2737 		double *UrgentBurstFactorLuma,
2738 		double *UrgentBurstFactorChroma,
2739 		bool   *NotEnoughUrgentLatencyHiding)
2740 {
2741 	double       LinesInDETLuma;
2742 	double       LinesInDETChroma;
2743 	unsigned int LinesInCursorBuffer;
2744 	double       CursorBufferSizeInTime;
2745 	double       DETBufferSizeInTimeLuma;
2746 	double       DETBufferSizeInTimeChroma;
2747 
2748 	*NotEnoughUrgentLatencyHiding = 0;
2749 
2750 	if (CursorWidth > 0) {
2751 		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2752 				(CursorWidth * CursorBPP / 8.0)), 1.0);
2753 		if (VRatio > 0) {
2754 			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2755 			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2756 				*NotEnoughUrgentLatencyHiding = 1;
2757 				*UrgentBurstFactorCursor = 0;
2758 			} else {
2759 				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
2760 						(CursorBufferSizeInTime - UrgentLatency);
2761 			}
2762 		} else {
2763 			*UrgentBurstFactorCursor = 1;
2764 		}
2765 	}
2766 
2767 	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2768 			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2769 
2770 	if (VRatio > 0) {
2771 		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2772 		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2773 			*NotEnoughUrgentLatencyHiding = 1;
2774 			*UrgentBurstFactorLuma = 0;
2775 		} else {
2776 			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2777 		}
2778 	} else {
2779 		*UrgentBurstFactorLuma = 1;
2780 	}
2781 
2782 	if (BytePerPixelInDETC > 0) {
2783 		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2784 					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2785 					/ swath_width_chroma_ub;
2786 
2787 		if (VRatio > 0) {
2788 			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2789 			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2790 				*NotEnoughUrgentLatencyHiding = 1;
2791 				*UrgentBurstFactorChroma = 0;
2792 			} else {
2793 				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2794 						/ (DETBufferSizeInTimeChroma - UrgentLatency);
2795 			}
2796 		} else {
2797 			*UrgentBurstFactorChroma = 1;
2798 		}
2799 	}
2800 } // CalculateUrgentBurstFactor
2801 
dml32_CalculateDCFCLKDeepSleep(unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)2802 void dml32_CalculateDCFCLKDeepSleep(
2803 		unsigned int NumberOfActiveSurfaces,
2804 		unsigned int BytePerPixelY[],
2805 		unsigned int BytePerPixelC[],
2806 		double VRatio[],
2807 		double VRatioChroma[],
2808 		double SwathWidthY[],
2809 		double SwathWidthC[],
2810 		unsigned int DPPPerSurface[],
2811 		double HRatio[],
2812 		double HRatioChroma[],
2813 		double PixelClock[],
2814 		double PSCL_THROUGHPUT[],
2815 		double PSCL_THROUGHPUT_CHROMA[],
2816 		double Dppclk[],
2817 		double ReadBandwidthLuma[],
2818 		double ReadBandwidthChroma[],
2819 		unsigned int ReturnBusWidth,
2820 
2821 		/* Output */
2822 		double *DCFClkDeepSleep)
2823 {
2824 	unsigned int k;
2825 	double   DisplayPipeLineDeliveryTimeLuma;
2826 	double   DisplayPipeLineDeliveryTimeChroma;
2827 	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2828 	double ReadBandwidth = 0.0;
2829 
2830 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2831 
2832 		if (VRatio[k] <= 1) {
2833 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2834 					/ PixelClock[k];
2835 		} else {
2836 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2837 		}
2838 		if (BytePerPixelC[k] == 0) {
2839 			DisplayPipeLineDeliveryTimeChroma = 0;
2840 		} else {
2841 			if (VRatioChroma[k] <= 1) {
2842 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2843 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2844 			} else {
2845 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2846 						/ Dppclk[k];
2847 			}
2848 		}
2849 
2850 		if (BytePerPixelC[k] > 0) {
2851 			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2852 					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2853 					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2854 					32.0 / DisplayPipeLineDeliveryTimeChroma);
2855 		} else {
2856 			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2857 					64.0 / DisplayPipeLineDeliveryTimeLuma;
2858 		}
2859 		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2860 
2861 #ifdef __DML_VBA_DEBUG__
2862 		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2863 		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2864 #endif
2865 	}
2866 
2867 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2868 		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2869 
2870 	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2871 
2872 #ifdef __DML_VBA_DEBUG__
2873 	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2874 	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2875 	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2876 	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2877 #endif
2878 
2879 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2880 		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2881 #ifdef __DML_VBA_DEBUG__
2882 	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2883 #endif
2884 } // CalculateDCFCLKDeepSleep
2885 
dml32_CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)2886 double dml32_CalculateWriteBackDelay(
2887 		enum source_format_class WritebackPixelFormat,
2888 		double WritebackHRatio,
2889 		double WritebackVRatio,
2890 		unsigned int WritebackVTaps,
2891 		unsigned int         WritebackDestinationWidth,
2892 		unsigned int         WritebackDestinationHeight,
2893 		unsigned int         WritebackSourceHeight,
2894 		unsigned int HTotal)
2895 {
2896 	double CalculateWriteBackDelay;
2897 	double Line_length;
2898 	double Output_lines_last_notclamped;
2899 	double WritebackVInit;
2900 
2901 	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2902 	Line_length = dml_max((double) WritebackDestinationWidth,
2903 			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2904 	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2905 			dml_ceil(((double)WritebackSourceHeight -
2906 					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2907 	if (Output_lines_last_notclamped < 0) {
2908 		CalculateWriteBackDelay = 0;
2909 	} else {
2910 		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2911 				(HTotal - WritebackDestinationWidth) + 80;
2912 	}
2913 	return CalculateWriteBackDelay;
2914 }
2915 
dml32_UseMinimumDCFCLK(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool DRRDisplay[],bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,unsigned int MaxInterDCNTileRepeaters,unsigned int MaxPrefetchMode,double DRAMClockChangeLatencyFinal,double FCLKChangeLatency,double SREnterPlusExitTime,unsigned int ReturnBusWidth,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,unsigned int PixelChunkSizeInKByte,unsigned int MetaChunkSize,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,bool ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,unsigned int VTotal[],unsigned int VActive[],unsigned int DynamicMetadataTransmittedBytes[],unsigned int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFClkDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],unsigned int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])2916 void dml32_UseMinimumDCFCLK(
2917 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2918 		bool DRRDisplay[],
2919 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2920 		unsigned int MaxInterDCNTileRepeaters,
2921 		unsigned int MaxPrefetchMode,
2922 		double DRAMClockChangeLatencyFinal,
2923 		double FCLKChangeLatency,
2924 		double SREnterPlusExitTime,
2925 		unsigned int ReturnBusWidth,
2926 		unsigned int RoundTripPingLatencyCycles,
2927 		unsigned int ReorderingBytes,
2928 		unsigned int PixelChunkSizeInKByte,
2929 		unsigned int MetaChunkSize,
2930 		bool GPUVMEnable,
2931 		unsigned int GPUVMMaxPageTableLevels,
2932 		bool HostVMEnable,
2933 		unsigned int NumberOfActiveSurfaces,
2934 		double HostVMMinPageSize,
2935 		unsigned int HostVMMaxNonCachedPageTableLevels,
2936 		bool DynamicMetadataVMEnabled,
2937 		bool ImmediateFlipRequirement,
2938 		bool ProgressiveToInterlaceUnitInOPP,
2939 		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2940 		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2941 		unsigned int VTotal[],
2942 		unsigned int VActive[],
2943 		unsigned int DynamicMetadataTransmittedBytes[],
2944 		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2945 		bool Interlace[],
2946 		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2947 		double RequiredDISPCLK[][2],
2948 		double UrgLatency[],
2949 		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2950 		double ProjectedDCFClkDeepSleep[][2],
2951 		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2952 		unsigned int TotalNumberOfActiveDPP[][2],
2953 		unsigned int TotalNumberOfDCCActiveDPP[][2],
2954 		unsigned int dpte_group_bytes[],
2955 		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2956 		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2957 		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2958 		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2959 		unsigned int BytePerPixelY[],
2960 		unsigned int BytePerPixelC[],
2961 		unsigned int HTotal[],
2962 		double PixelClock[],
2963 		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2964 		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2965 		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2966 		bool DynamicMetadataEnable[],
2967 		double ReadBandwidthLuma[],
2968 		double ReadBandwidthChroma[],
2969 		double DCFCLKPerState[],
2970 		/* Output */
2971 		double DCFCLKState[][2])
2972 {
2973 	unsigned int i, j, k;
2974 	unsigned int     dummy1;
2975 	double dummy2, dummy3;
2976 	double   NormalEfficiency;
2977 	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2978 
2979 	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2980 	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2981 		for  (j = 0; j <= 1; ++j) {
2982 			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2983 			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2984 			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2985 			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2986 			double MinimumTWait = 0.0;
2987 			double DPTEBandwidth;
2988 			double DCFCLKRequiredForAverageBandwidth;
2989 			unsigned int ExtraLatencyBytes;
2990 			double ExtraLatencyCycles;
2991 			double DCFCLKRequiredForPeakBandwidth;
2992 			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2993 			double MinimumTvmPlus2Tr0;
2994 
2995 			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2996 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2997 				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2998 						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2999 								/ (15.75 * HTotal[k] / PixelClock[k]);
3000 			}
3001 
3002 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3003 				NoOfDPPState[k] = NoOfDPP[i][j][k];
3004 
3005 			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3006 			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3007 
3008 			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3009 					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3010 					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3011 					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3012 					HostVMMaxNonCachedPageTableLevels);
3013 			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3014 					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3015 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3016 				double DCFCLKCyclesRequiredInPrefetch;
3017 				double PrefetchTime;
3018 
3019 				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3020 						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3021 						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3022 								* BytePerPixelC[k]) / NormalEfficiency
3023 						/ ReturnBusWidth;
3024 				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3025 						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3026 								/ NormalEfficiency / ReturnBusWidth
3027 								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3028 						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3029 								/ ReturnBusWidth
3030 						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3031 						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
3032 				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3033 						* HTotal[k] / PixelClock[k];
3034 				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3035 						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3036 						UrgLatency[i] * GPUVMMaxPageTableLevels *
3037 						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3038 
3039 				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3040 						UseMALLForPStateChange[k],
3041 						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3042 						DRRDisplay[k],
3043 						DRAMClockChangeLatencyFinal,
3044 						FCLKChangeLatency,
3045 						UrgLatency[i],
3046 						SREnterPlusExitTime);
3047 
3048 				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3049 						MinimumTWait - UrgLatency[i] *
3050 						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3051 						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3052 						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3053 						DynamicMetadataVMExtraLatency[k];
3054 
3055 				if (PrefetchTime > 0) {
3056 					double ExpectedVRatioPrefetch;
3057 
3058 					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3059 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3060 							DCFCLKCyclesRequiredInPrefetch);
3061 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3062 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3063 							PrefetchPixelLinesTime[k] *
3064 							dml_max(1.0, ExpectedVRatioPrefetch) *
3065 							dml_max(1.0, ExpectedVRatioPrefetch / 4);
3066 					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3067 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3068 								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3069 								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3070 								NormalEfficiency / ReturnBusWidth;
3071 					}
3072 				} else {
3073 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3074 				}
3075 				if (DynamicMetadataEnable[k] == true) {
3076 					double TSetupPipe;
3077 					double TdmbfPipe;
3078 					double TdmsksPipe;
3079 					double TdmecPipe;
3080 					double AllowedTimeForUrgentExtraLatency;
3081 
3082 					dml32_CalculateVUpdateAndDynamicMetadataParameters(
3083 							MaxInterDCNTileRepeaters,
3084 							RequiredDPPCLKPerSurface[i][j][k],
3085 							RequiredDISPCLK[i][j],
3086 							ProjectedDCFClkDeepSleep[i][j],
3087 							PixelClock[k],
3088 							HTotal[k],
3089 							VTotal[k] - VActive[k],
3090 							DynamicMetadataTransmittedBytes[k],
3091 							DynamicMetadataLinesBeforeActiveRequired[k],
3092 							Interlace[k],
3093 							ProgressiveToInterlaceUnitInOPP,
3094 
3095 							/* output */
3096 							&TSetupPipe,
3097 							&TdmbfPipe,
3098 							&TdmecPipe,
3099 							&TdmsksPipe,
3100 							&dummy1,
3101 							&dummy2,
3102 							&dummy3);
3103 					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3104 							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3105 							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3106 					if (AllowedTimeForUrgentExtraLatency > 0)
3107 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3108 								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3109 								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3110 					else
3111 						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3112 				}
3113 			}
3114 			DCFCLKRequiredForPeakBandwidth = 0;
3115 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3116 				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3117 						DCFCLKRequiredForPeakBandwidthPerSurface[k];
3118 			}
3119 			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3120 					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3121 					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3122 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3123 				double MaximumTvmPlus2Tr0PlusTsw;
3124 
3125 				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3126 						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3127 				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3128 					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3129 				} else {
3130 					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3131 							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3132 								MinimumTvmPlus2Tr0 -
3133 								PrefetchPixelLinesTime[k] / 4),
3134 							(2 * ExtraLatencyCycles +
3135 								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3136 								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3137 				}
3138 			}
3139 			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3140 					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3141 		}
3142 	}
3143 }
3144 
dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3145 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3146 		unsigned int TotalNumberOfActiveDPP,
3147 		unsigned int PixelChunkSizeInKByte,
3148 		unsigned int TotalNumberOfDCCActiveDPP,
3149 		unsigned int MetaChunkSize,
3150 		bool GPUVMEnable,
3151 		bool HostVMEnable,
3152 		unsigned int NumberOfActiveSurfaces,
3153 		unsigned int NumberOfDPP[],
3154 		unsigned int dpte_group_bytes[],
3155 		double HostVMInefficiencyFactor,
3156 		double HostVMMinPageSize,
3157 		unsigned int HostVMMaxNonCachedPageTableLevels)
3158 {
3159 	unsigned int k;
3160 	double   ret;
3161 	unsigned int  HostVMDynamicLevels;
3162 
3163 	if (GPUVMEnable == true && HostVMEnable == true) {
3164 		if (HostVMMinPageSize < 2048)
3165 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3166 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3167 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3168 		else
3169 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3170 	} else {
3171 		HostVMDynamicLevels = 0;
3172 	}
3173 
3174 	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3175 			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3176 
3177 	if (GPUVMEnable == true) {
3178 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3179 			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3180 					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3181 		}
3182 	}
3183 	return ret;
3184 }
3185 
dml32_CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3186 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3187 		unsigned int MaxInterDCNTileRepeaters,
3188 		double Dppclk,
3189 		double Dispclk,
3190 		double DCFClkDeepSleep,
3191 		double PixelClock,
3192 		unsigned int HTotal,
3193 		unsigned int VBlank,
3194 		unsigned int DynamicMetadataTransmittedBytes,
3195 		unsigned int DynamicMetadataLinesBeforeActiveRequired,
3196 		unsigned int InterlaceEnable,
3197 		bool ProgressiveToInterlaceUnitInOPP,
3198 
3199 		/* output */
3200 		double *TSetup,
3201 		double *Tdmbf,
3202 		double *Tdmec,
3203 		double *Tdmsks,
3204 		unsigned int *VUpdateOffsetPix,
3205 		double *VUpdateWidthPix,
3206 		double *VReadyOffsetPix)
3207 {
3208 	double TotalRepeaterDelayTime;
3209 
3210 	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3211 	*VUpdateWidthPix  =
3212 			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3213 	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3214 			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3215 	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3216 	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3217 	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3218 	*Tdmec = HTotal / PixelClock;
3219 
3220 	if (DynamicMetadataLinesBeforeActiveRequired == 0)
3221 		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3222 	else
3223 		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3224 
3225 	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3226 		*Tdmsks = *Tdmsks / 2;
3227 #ifdef __DML_VBA_DEBUG__
3228 	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3229 	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3230 	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3231 
3232 	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3233 			__func__, DynamicMetadataLinesBeforeActiveRequired);
3234 	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3235 	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3236 	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3237 	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3238 #endif
3239 }
3240 
dml32_CalculateTWait(unsigned int PrefetchMode,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,bool DRRDisplay,double DRAMClockChangeLatency,double FCLKChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3241 double dml32_CalculateTWait(
3242 		unsigned int PrefetchMode,
3243 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3244 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3245 		bool DRRDisplay,
3246 		double DRAMClockChangeLatency,
3247 		double FCLKChangeLatency,
3248 		double UrgentLatency,
3249 		double SREnterPlusExitTime)
3250 {
3251 	double TWait = 0.0;
3252 
3253 	if (PrefetchMode == 0 &&
3254 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3255 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3256 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3257 			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3258 		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3259 	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3260 		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3261 	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3262 		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3263 	} else {
3264 		TWait = UrgentLatency;
3265 	}
3266 
3267 #ifdef __DML_VBA_DEBUG__
3268 	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3269 	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3270 #endif
3271 	return TWait;
3272 } // CalculateTWait
3273 
3274 // Function: get_return_bw_mbps
3275 // Megabyte per second
dml32_get_return_bw_mbps(const soc_bounding_box_st * soc,const int VoltageLevel,const bool HostVMEnable,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3276 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3277 		const int VoltageLevel,
3278 		const bool HostVMEnable,
3279 		const double DCFCLK,
3280 		const double FabricClock,
3281 		const double DRAMSpeed)
3282 {
3283 	double ReturnBW = 0.;
3284 	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3285 	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3286 	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3287 	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3288 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3289 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3290 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3291 	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3292 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3293 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3294 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3295 
3296 	if (HostVMEnable != true)
3297 		ReturnBW = PixelDataOnlyReturnBW;
3298 	else
3299 		ReturnBW = PixelMixedWithVMDataReturnBW;
3300 
3301 #ifdef __DML_VBA_DEBUG__
3302 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3303 	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3304 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3305 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3306 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3307 	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3308 	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3309 	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3310 	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3311 	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3312 	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3313 #endif
3314 	return ReturnBW;
3315 }
3316 
3317 // Function: get_return_bw_mbps_vm_only
3318 // Megabyte per second
dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st * soc,const int VoltageLevel,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3319 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3320 		const int VoltageLevel,
3321 		const double DCFCLK,
3322 		const double FabricClock,
3323 		const double DRAMSpeed)
3324 {
3325 	double VMDataOnlyReturnBW = dml_min3(
3326 			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3327 			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3328 					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3329 			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3330 					* (VoltageLevel < 2 ?
3331 							soc->pct_ideal_dram_bw_after_urgent_strobe :
3332 							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3333 #ifdef __DML_VBA_DEBUG__
3334 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3335 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3336 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3337 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3338 	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3339 #endif
3340 	return VMDataOnlyReturnBW;
3341 }
3342 
dml32_CalculateExtraLatency(unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3343 double dml32_CalculateExtraLatency(
3344 		unsigned int RoundTripPingLatencyCycles,
3345 		unsigned int ReorderingBytes,
3346 		double DCFCLK,
3347 		unsigned int TotalNumberOfActiveDPP,
3348 		unsigned int PixelChunkSizeInKByte,
3349 		unsigned int TotalNumberOfDCCActiveDPP,
3350 		unsigned int MetaChunkSize,
3351 		double ReturnBW,
3352 		bool GPUVMEnable,
3353 		bool HostVMEnable,
3354 		unsigned int NumberOfActiveSurfaces,
3355 		unsigned int NumberOfDPP[],
3356 		unsigned int dpte_group_bytes[],
3357 		double HostVMInefficiencyFactor,
3358 		double HostVMMinPageSize,
3359 		unsigned int HostVMMaxNonCachedPageTableLevels)
3360 {
3361 	double ExtraLatencyBytes;
3362 	double ExtraLatency;
3363 
3364 	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3365 			ReorderingBytes,
3366 			TotalNumberOfActiveDPP,
3367 			PixelChunkSizeInKByte,
3368 			TotalNumberOfDCCActiveDPP,
3369 			MetaChunkSize,
3370 			GPUVMEnable,
3371 			HostVMEnable,
3372 			NumberOfActiveSurfaces,
3373 			NumberOfDPP,
3374 			dpte_group_bytes,
3375 			HostVMInefficiencyFactor,
3376 			HostVMMinPageSize,
3377 			HostVMMaxNonCachedPageTableLevels);
3378 
3379 	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3380 
3381 #ifdef __DML_VBA_DEBUG__
3382 	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3383 	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3384 	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3385 	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3386 	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3387 #endif
3388 
3389 	return ExtraLatency;
3390 } // CalculateExtraLatency
3391 
dml32_CalculatePrefetchSchedule(struct vba_vars_st * v,unsigned int k,double HostVMInefficiencyFactor,DmlPipe * myPipe,unsigned int DSCDelay,unsigned int DPP_RECOUT_WIDTH,unsigned int VStartup,unsigned int MaxVStartup,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,unsigned int VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,unsigned int VInitPreFillC,unsigned int MaxNumSwathC,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double TPreReq,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3392 bool dml32_CalculatePrefetchSchedule(
3393 		struct vba_vars_st *v,
3394 		unsigned int k,
3395 		double HostVMInefficiencyFactor,
3396 		DmlPipe *myPipe,
3397 		unsigned int DSCDelay,
3398 		unsigned int DPP_RECOUT_WIDTH,
3399 		unsigned int VStartup,
3400 		unsigned int MaxVStartup,
3401 		double UrgentLatency,
3402 		double UrgentExtraLatency,
3403 		double TCalc,
3404 		unsigned int PDEAndMetaPTEBytesFrame,
3405 		unsigned int MetaRowByte,
3406 		unsigned int PixelPTEBytesPerRow,
3407 		double PrefetchSourceLinesY,
3408 		unsigned int SwathWidthY,
3409 		unsigned int VInitPreFillY,
3410 		unsigned int MaxNumSwathY,
3411 		double PrefetchSourceLinesC,
3412 		unsigned int SwathWidthC,
3413 		unsigned int VInitPreFillC,
3414 		unsigned int MaxNumSwathC,
3415 		unsigned int swath_width_luma_ub,
3416 		unsigned int swath_width_chroma_ub,
3417 		unsigned int SwathHeightY,
3418 		unsigned int SwathHeightC,
3419 		double TWait,
3420 		double TPreReq,
3421 		/* Output */
3422 		double   *DSTXAfterScaler,
3423 		double   *DSTYAfterScaler,
3424 		double *DestinationLinesForPrefetch,
3425 		double *PrefetchBandwidth,
3426 		double *DestinationLinesToRequestVMInVBlank,
3427 		double *DestinationLinesToRequestRowInVBlank,
3428 		double *VRatioPrefetchY,
3429 		double *VRatioPrefetchC,
3430 		double *RequiredPrefetchPixDataBWLuma,
3431 		double *RequiredPrefetchPixDataBWChroma,
3432 		bool   *NotEnoughTimeForDynamicMetadata,
3433 		double *Tno_bw,
3434 		double *prefetch_vmrow_bw,
3435 		double *Tdmdl_vm,
3436 		double *Tdmdl,
3437 		double *TSetup,
3438 		unsigned int   *VUpdateOffsetPix,
3439 		double   *VUpdateWidthPix,
3440 		double   *VReadyOffsetPix)
3441 {
3442 	double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3443 	bool MyError = false;
3444 	unsigned int DPPCycles, DISPCLKCycles;
3445 	double DSTTotalPixelsAfterScaler;
3446 	double LineTime;
3447 	double dst_y_prefetch_equ;
3448 	double prefetch_bw_oto;
3449 	double Tvm_oto;
3450 	double Tr0_oto;
3451 	double Tvm_oto_lines;
3452 	double Tr0_oto_lines;
3453 	double dst_y_prefetch_oto;
3454 	double TimeForFetchingMetaPTE = 0;
3455 	double TimeForFetchingRowInVBlank = 0;
3456 	double LinesToRequestPrefetchPixelData = 0;
3457 	unsigned int HostVMDynamicLevelsTrips;
3458 	double  trip_to_mem;
3459 	double  Tvm_trips;
3460 	double  Tr0_trips;
3461 	double  Tvm_trips_rounded;
3462 	double  Tr0_trips_rounded;
3463 	double  Lsw_oto;
3464 	double  Tpre_rounded;
3465 	double  prefetch_bw_equ;
3466 	double  Tvm_equ;
3467 	double  Tr0_equ;
3468 	double  Tdmbf;
3469 	double  Tdmec;
3470 	double  Tdmsks;
3471 	double  prefetch_sw_bytes;
3472 	double  bytes_pp;
3473 	double  dep_bytes;
3474 	unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3475 	double  min_Lsw;
3476 	double  Tsw_est1 = 0;
3477 	double  Tsw_est3 = 0;
3478 
3479 	if (v->GPUVMEnable == true && v->HostVMEnable == true)
3480 		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3481 	else
3482 		HostVMDynamicLevelsTrips = 0;
3483 #ifdef __DML_VBA_DEBUG__
3484 	dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3485 	dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3486 	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3487 	dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3488 			__func__, v->HostVMEnable, HostVMInefficiencyFactor);
3489 #endif
3490 	dml32_CalculateVUpdateAndDynamicMetadataParameters(
3491 			v->MaxInterDCNTileRepeaters,
3492 			myPipe->Dppclk,
3493 			myPipe->Dispclk,
3494 			myPipe->DCFClkDeepSleep,
3495 			myPipe->PixelClock,
3496 			myPipe->HTotal,
3497 			myPipe->VBlank,
3498 			v->DynamicMetadataTransmittedBytes[k],
3499 			v->DynamicMetadataLinesBeforeActiveRequired[k],
3500 			myPipe->InterlaceEnable,
3501 			myPipe->ProgressiveToInterlaceUnitInOPP,
3502 			TSetup,
3503 
3504 			/* output */
3505 			&Tdmbf,
3506 			&Tdmec,
3507 			&Tdmsks,
3508 			VUpdateOffsetPix,
3509 			VUpdateWidthPix,
3510 			VReadyOffsetPix);
3511 
3512 	LineTime = myPipe->HTotal / myPipe->PixelClock;
3513 	trip_to_mem = UrgentLatency;
3514 	Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3515 
3516 	if (v->DynamicMetadataVMEnabled == true)
3517 		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
3518 	else
3519 		*Tdmdl = TWait + UrgentExtraLatency;
3520 
3521 #ifdef __DML_VBA_ALLOW_DELTA__
3522 	if (v->DynamicMetadataEnable[k] == false)
3523 		*Tdmdl = 0.0;
3524 #endif
3525 
3526 	if (v->DynamicMetadataEnable[k] == true) {
3527 		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3528 			*NotEnoughTimeForDynamicMetadata = true;
3529 #ifdef __DML_VBA_DEBUG__
3530 			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3531 			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3532 					__func__, Tdmbf);
3533 			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3534 			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3535 					__func__, Tdmsks);
3536 			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3537 					__func__, *Tdmdl);
3538 #endif
3539 		} else {
3540 			*NotEnoughTimeForDynamicMetadata = false;
3541 		}
3542 	} else {
3543 		*NotEnoughTimeForDynamicMetadata = false;
3544 	}
3545 
3546 	*Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3547 			v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3548 
3549 	if (myPipe->ScalerEnabled)
3550 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3551 	else
3552 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3553 
3554 	DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3555 
3556 	DISPCLKCycles = v->DISPCLKDelaySubtotal;
3557 
3558 	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3559 		return true;
3560 
3561 	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3562 			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3563 
3564 	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3565 			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3566 			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3567 					myPipe->HActive / 2 : 0)
3568 			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3569 
3570 #ifdef __DML_VBA_DEBUG__
3571 	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3572 	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3573 	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3574 	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3575 	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3576 	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3577 	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3578 	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3579 	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3580 #endif
3581 
3582 	if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3583 		*DSTYAfterScaler = 1;
3584 	else
3585 		*DSTYAfterScaler = 0;
3586 
3587 	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3588 	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3589 	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3590 #ifdef __DML_VBA_DEBUG__
3591 	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3592 	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3593 #endif
3594 
3595 	MyError = false;
3596 
3597 	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3598 
3599 	if (v->GPUVMEnable == true) {
3600 		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3601 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3602 		if (v->GPUVMMaxPageTableLevels >= 3) {
3603 			*Tno_bw = UrgentExtraLatency + trip_to_mem *
3604 					(double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3605 		} else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3606 			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3607 					4.0 * LineTime; // VBA_ERROR
3608 			*Tno_bw = UrgentExtraLatency;
3609 		} else {
3610 			*Tno_bw = 0;
3611 		}
3612 	} else if (myPipe->DCCEnable == true) {
3613 		Tvm_trips_rounded = LineTime / 4.0;
3614 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3615 		*Tno_bw = 0;
3616 	} else {
3617 		Tvm_trips_rounded = LineTime / 4.0;
3618 		Tr0_trips_rounded = LineTime / 2.0;
3619 		*Tno_bw = 0;
3620 	}
3621 	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3622 	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3623 
3624 	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3625 			|| myPipe->SourcePixelFormat == dm_420_12) {
3626 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3627 	} else {
3628 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3629 	}
3630 
3631 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3632 			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3633 	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3634 			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3635 
3636 	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3637 	min_Lsw = dml_max(min_Lsw, 1.0);
3638 	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3639 
3640 	if (v->GPUVMEnable == true) {
3641 		Tvm_oto = dml_max3(
3642 				Tvm_trips,
3643 				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3644 				LineTime / 4.0);
3645 	} else
3646 		Tvm_oto = LineTime / 4.0;
3647 
3648 	if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3649 		Tr0_oto = dml_max4(
3650 				Tr0_trips,
3651 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3652 				(LineTime - Tvm_oto)/2.0,
3653 				LineTime / 4.0);
3654 #ifdef __DML_VBA_DEBUG__
3655 		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3656 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3657 		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3658 		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3659 		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3660 #endif
3661 	} else
3662 		Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3663 
3664 	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3665 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3666 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3667 
3668 	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3669 			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3670 
3671 	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3672 #ifdef __DML_VBA_DEBUG__
3673 	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3674 	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3675 	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3676 	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3677 	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3678 	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3679 	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3680 	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3681 	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3682 	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3683 	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3684 	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3685 	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3686 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3687 	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3688 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3689 	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3690 	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3691 	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3692 	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3693 	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3694 	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3695 	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3696 	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3697 	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3698 	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3699 	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3700 #endif
3701 
3702 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3703 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
3704 #ifdef __DML_VBA_DEBUG__
3705 	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3706 	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3707 	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3708 	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3709 			__func__, VStartup * LineTime);
3710 	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3711 	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3712 	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3713 	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3714 	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3715 	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3716 	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3717 			__func__, *DSTYAfterScaler);
3718 #endif
3719 	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3720 			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3721 
3722 	if (prefetch_sw_bytes < dep_bytes)
3723 		prefetch_sw_bytes = 2 * dep_bytes;
3724 
3725 	*PrefetchBandwidth = 0;
3726 	*DestinationLinesToRequestVMInVBlank = 0;
3727 	*DestinationLinesToRequestRowInVBlank = 0;
3728 	*VRatioPrefetchY = 0;
3729 	*VRatioPrefetchC = 0;
3730 	*RequiredPrefetchPixDataBWLuma = 0;
3731 	if (dst_y_prefetch_equ > 1 &&
3732 			(Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3733 		double PrefetchBandwidth1;
3734 		double PrefetchBandwidth2;
3735 		double PrefetchBandwidth3;
3736 		double PrefetchBandwidth4;
3737 
3738 		if (Tpre_rounded - *Tno_bw > 0) {
3739 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3740 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3741 					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3742 			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3743 		} else
3744 			PrefetchBandwidth1 = 0;
3745 
3746 		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3747 				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3748 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3749 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3750 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3751 		}
3752 
3753 		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3754 			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3755 			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3756 		else
3757 			PrefetchBandwidth2 = 0;
3758 
3759 		if (Tpre_rounded - Tvm_trips_rounded > 0) {
3760 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3761 					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3762 			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3763 		} else
3764 			PrefetchBandwidth3 = 0;
3765 
3766 
3767 		if (VStartup == MaxVStartup &&
3768 				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3769 				LineTime - Tvm_trips_rounded > 0) {
3770 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3771 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3772 		}
3773 
3774 		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3775 			PrefetchBandwidth4 = prefetch_sw_bytes /
3776 					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3777 		} else {
3778 			PrefetchBandwidth4 = 0;
3779 		}
3780 
3781 #ifdef __DML_VBA_DEBUG__
3782 		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3783 		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3784 		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3785 		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3786 		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3787 		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3788 		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3789 		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3790 		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3791 #endif
3792 		{
3793 			bool Case1OK;
3794 			bool Case2OK;
3795 			bool Case3OK;
3796 
3797 			if (PrefetchBandwidth1 > 0) {
3798 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3799 						>= Tvm_trips_rounded
3800 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3801 								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
3802 					Case1OK = true;
3803 				} else {
3804 					Case1OK = false;
3805 				}
3806 			} else {
3807 				Case1OK = false;
3808 			}
3809 
3810 			if (PrefetchBandwidth2 > 0) {
3811 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3812 						>= Tvm_trips_rounded
3813 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3814 						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
3815 					Case2OK = true;
3816 				} else {
3817 					Case2OK = false;
3818 				}
3819 			} else {
3820 				Case2OK = false;
3821 			}
3822 
3823 			if (PrefetchBandwidth3 > 0) {
3824 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3825 						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3826 								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3827 								Tr0_trips_rounded) {
3828 					Case3OK = true;
3829 				} else {
3830 					Case3OK = false;
3831 				}
3832 			} else {
3833 				Case3OK = false;
3834 			}
3835 
3836 			if (Case1OK)
3837 				prefetch_bw_equ = PrefetchBandwidth1;
3838 			else if (Case2OK)
3839 				prefetch_bw_equ = PrefetchBandwidth2;
3840 			else if (Case3OK)
3841 				prefetch_bw_equ = PrefetchBandwidth3;
3842 			else
3843 				prefetch_bw_equ = PrefetchBandwidth4;
3844 
3845 #ifdef __DML_VBA_DEBUG__
3846 			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3847 			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3848 			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3849 			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3850 #endif
3851 
3852 			if (prefetch_bw_equ > 0) {
3853 				if (v->GPUVMEnable == true) {
3854 					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3855 							HostVMInefficiencyFactor / prefetch_bw_equ,
3856 							Tvm_trips, LineTime / 4);
3857 				} else {
3858 					Tvm_equ = LineTime / 4;
3859 				}
3860 
3861 				if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3862 					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3863 							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3864 							(LineTime - Tvm_equ) / 2, LineTime / 4);
3865 				} else {
3866 					Tr0_equ = (LineTime - Tvm_equ) / 2;
3867 				}
3868 			} else {
3869 				Tvm_equ = 0;
3870 				Tr0_equ = 0;
3871 #ifdef __DML_VBA_DEBUG__
3872 				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3873 #endif
3874 			}
3875 		}
3876 
3877 		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3878 			if (dst_y_prefetch_oto * LineTime < TPreReq) {
3879 				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3880 			} else {
3881 				*DestinationLinesForPrefetch = dst_y_prefetch_oto;
3882 			}
3883 			TimeForFetchingMetaPTE = Tvm_oto;
3884 			TimeForFetchingRowInVBlank = Tr0_oto;
3885 			*PrefetchBandwidth = prefetch_bw_oto;
3886 		} else {
3887 			*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3888 			TimeForFetchingMetaPTE = Tvm_equ;
3889 			TimeForFetchingRowInVBlank = Tr0_equ;
3890 			*PrefetchBandwidth = prefetch_bw_equ;
3891 		}
3892 
3893 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3894 
3895 		*DestinationLinesToRequestRowInVBlank =
3896 				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3897 
3898 		LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3899 				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3900 
3901 #ifdef __DML_VBA_DEBUG__
3902 		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3903 		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3904 				__func__, *DestinationLinesToRequestVMInVBlank);
3905 		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3906 		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3907 		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3908 				__func__, *DestinationLinesToRequestRowInVBlank);
3909 		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3910 		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3911 #endif
3912 
3913 		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3914 			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3915 			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3916 #ifdef __DML_VBA_DEBUG__
3917 			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3918 			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3919 			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3920 #endif
3921 			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3922 				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3923 					*VRatioPrefetchY =
3924 							dml_max((double) PrefetchSourceLinesY /
3925 									LinesToRequestPrefetchPixelData,
3926 									(double) MaxNumSwathY * SwathHeightY /
3927 									(LinesToRequestPrefetchPixelData -
3928 									(VInitPreFillY - 3.0) / 2.0));
3929 					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3930 				} else {
3931 					MyError = true;
3932 					*VRatioPrefetchY = 0;
3933 				}
3934 #ifdef __DML_VBA_DEBUG__
3935 				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3936 				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3937 				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3938 #endif
3939 			}
3940 
3941 			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3942 			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3943 
3944 #ifdef __DML_VBA_DEBUG__
3945 			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3946 			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3947 			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3948 #endif
3949 			if ((SwathHeightC > 4)) {
3950 				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3951 					*VRatioPrefetchC =
3952 						dml_max(*VRatioPrefetchC,
3953 							(double) MaxNumSwathC * SwathHeightC /
3954 							(LinesToRequestPrefetchPixelData -
3955 							(VInitPreFillC - 3.0) / 2.0));
3956 					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3957 				} else {
3958 					MyError = true;
3959 					*VRatioPrefetchC = 0;
3960 				}
3961 #ifdef __DML_VBA_DEBUG__
3962 				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3963 				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3964 				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3965 #endif
3966 			}
3967 
3968 			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3969 					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3970 					/ LineTime;
3971 
3972 #ifdef __DML_VBA_DEBUG__
3973 			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3974 			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3975 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3976 			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3977 					__func__, *RequiredPrefetchPixDataBWLuma);
3978 #endif
3979 			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3980 					LinesToRequestPrefetchPixelData
3981 					* myPipe->BytePerPixelC
3982 					* swath_width_chroma_ub / LineTime;
3983 		} else {
3984 			MyError = true;
3985 #ifdef __DML_VBA_DEBUG__
3986 			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3987 					__func__, LinesToRequestPrefetchPixelData);
3988 #endif
3989 			*VRatioPrefetchY = 0;
3990 			*VRatioPrefetchC = 0;
3991 			*RequiredPrefetchPixDataBWLuma = 0;
3992 			*RequiredPrefetchPixDataBWChroma = 0;
3993 		}
3994 #ifdef __DML_VBA_DEBUG__
3995 		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3996 			(double)LinesToRequestPrefetchPixelData * LineTime +
3997 			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3998 		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3999 		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4000 			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4001 		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4002 		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4003 			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4004 			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4005 		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4006 				PixelPTEBytesPerRow);
4007 #endif
4008 	} else {
4009 		MyError = true;
4010 #ifdef __DML_VBA_DEBUG__
4011 		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4012 				__func__, dst_y_prefetch_equ);
4013 #endif
4014 	}
4015 
4016 	{
4017 		double prefetch_vm_bw;
4018 		double prefetch_row_bw;
4019 
4020 		if (PDEAndMetaPTEBytesFrame == 0) {
4021 			prefetch_vm_bw = 0;
4022 		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
4023 #ifdef __DML_VBA_DEBUG__
4024 			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4025 			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4026 			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4027 					__func__, *DestinationLinesToRequestVMInVBlank);
4028 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4029 #endif
4030 			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4031 					(*DestinationLinesToRequestVMInVBlank * LineTime);
4032 #ifdef __DML_VBA_DEBUG__
4033 			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4034 #endif
4035 		} else {
4036 			prefetch_vm_bw = 0;
4037 			MyError = true;
4038 #ifdef __DML_VBA_DEBUG__
4039 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4040 					__func__, *DestinationLinesToRequestVMInVBlank);
4041 #endif
4042 		}
4043 
4044 		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4045 			prefetch_row_bw = 0;
4046 		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
4047 			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4048 					(*DestinationLinesToRequestRowInVBlank * LineTime);
4049 
4050 #ifdef __DML_VBA_DEBUG__
4051 			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4052 			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4053 			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4054 					__func__, *DestinationLinesToRequestRowInVBlank);
4055 			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4056 #endif
4057 		} else {
4058 			prefetch_row_bw = 0;
4059 			MyError = true;
4060 #ifdef __DML_VBA_DEBUG__
4061 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4062 					__func__, *DestinationLinesToRequestRowInVBlank);
4063 #endif
4064 		}
4065 
4066 		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4067 	}
4068 
4069 	if (MyError) {
4070 		*PrefetchBandwidth = 0;
4071 		TimeForFetchingMetaPTE = 0;
4072 		TimeForFetchingRowInVBlank = 0;
4073 		*DestinationLinesToRequestVMInVBlank = 0;
4074 		*DestinationLinesToRequestRowInVBlank = 0;
4075 		*DestinationLinesForPrefetch = 0;
4076 		LinesToRequestPrefetchPixelData = 0;
4077 		*VRatioPrefetchY = 0;
4078 		*VRatioPrefetchC = 0;
4079 		*RequiredPrefetchPixDataBWLuma = 0;
4080 		*RequiredPrefetchPixDataBWChroma = 0;
4081 	}
4082 
4083 	return MyError;
4084 } // CalculatePrefetchSchedule
4085 
dml32_CalculateFlipSchedule(double HostVMInefficiencyFactor,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,bool use_one_row_for_frame_flip,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)4086 void dml32_CalculateFlipSchedule(
4087 		double HostVMInefficiencyFactor,
4088 		double UrgentExtraLatency,
4089 		double UrgentLatency,
4090 		unsigned int GPUVMMaxPageTableLevels,
4091 		bool HostVMEnable,
4092 		unsigned int HostVMMaxNonCachedPageTableLevels,
4093 		bool GPUVMEnable,
4094 		double HostVMMinPageSize,
4095 		double PDEAndMetaPTEBytesPerFrame,
4096 		double MetaRowBytes,
4097 		double DPTEBytesPerRow,
4098 		double BandwidthAvailableForImmediateFlip,
4099 		unsigned int TotImmediateFlipBytes,
4100 		enum source_format_class SourcePixelFormat,
4101 		double LineTime,
4102 		double VRatio,
4103 		double VRatioChroma,
4104 		double Tno_bw,
4105 		bool DCCEnable,
4106 		unsigned int dpte_row_height,
4107 		unsigned int meta_row_height,
4108 		unsigned int dpte_row_height_chroma,
4109 		unsigned int meta_row_height_chroma,
4110 		bool    use_one_row_for_frame_flip,
4111 
4112 		/* Output */
4113 		double *DestinationLinesToRequestVMInImmediateFlip,
4114 		double *DestinationLinesToRequestRowInImmediateFlip,
4115 		double *final_flip_bw,
4116 		bool *ImmediateFlipSupportedForPipe)
4117 {
4118 	double min_row_time = 0.0;
4119 	unsigned int HostVMDynamicLevelsTrips;
4120 	double TimeForFetchingMetaPTEImmediateFlip;
4121 	double TimeForFetchingRowInVBlankImmediateFlip;
4122 	double ImmediateFlipBW;
4123 
4124 	if (GPUVMEnable == true && HostVMEnable == true)
4125 		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4126 	else
4127 		HostVMDynamicLevelsTrips = 0;
4128 
4129 #ifdef __DML_VBA_DEBUG__
4130 	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4131 	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4132 #endif
4133 
4134 	if (TotImmediateFlipBytes > 0) {
4135 		if (use_one_row_for_frame_flip) {
4136 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4137 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4138 		} else {
4139 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4140 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4141 		}
4142 		if (GPUVMEnable == true) {
4143 			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4144 					HostVMInefficiencyFactor / ImmediateFlipBW,
4145 					UrgentExtraLatency + UrgentLatency *
4146 					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4147 					LineTime / 4.0);
4148 		} else {
4149 			TimeForFetchingMetaPTEImmediateFlip = 0;
4150 		}
4151 		if ((GPUVMEnable == true || DCCEnable == true)) {
4152 			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4153 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4154 					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4155 		} else {
4156 			TimeForFetchingRowInVBlankImmediateFlip = 0;
4157 		}
4158 
4159 		*DestinationLinesToRequestVMInImmediateFlip =
4160 				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4161 		*DestinationLinesToRequestRowInImmediateFlip =
4162 				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4163 
4164 		if (GPUVMEnable == true) {
4165 			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4166 					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4167 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4168 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4169 		} else if ((GPUVMEnable == true || DCCEnable == true)) {
4170 			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4171 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4172 		} else {
4173 			*final_flip_bw = 0;
4174 		}
4175 	} else {
4176 		TimeForFetchingMetaPTEImmediateFlip = 0;
4177 		TimeForFetchingRowInVBlankImmediateFlip = 0;
4178 		*DestinationLinesToRequestVMInImmediateFlip = 0;
4179 		*DestinationLinesToRequestRowInImmediateFlip = 0;
4180 		*final_flip_bw = 0;
4181 	}
4182 
4183 	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4184 		if (GPUVMEnable == true && DCCEnable != true) {
4185 			min_row_time = dml_min(dpte_row_height *
4186 					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4187 		} else if (GPUVMEnable != true && DCCEnable == true) {
4188 			min_row_time = dml_min(meta_row_height *
4189 					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4190 		} else {
4191 			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4192 					LineTime / VRatio, dpte_row_height_chroma * LineTime /
4193 					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4194 		}
4195 	} else {
4196 		if (GPUVMEnable == true && DCCEnable != true) {
4197 			min_row_time = dpte_row_height * LineTime / VRatio;
4198 		} else if (GPUVMEnable != true && DCCEnable == true) {
4199 			min_row_time = meta_row_height * LineTime / VRatio;
4200 		} else {
4201 			min_row_time =
4202 				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4203 		}
4204 	}
4205 
4206 	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4207 			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4208 					> min_row_time) {
4209 		*ImmediateFlipSupportedForPipe = false;
4210 	} else {
4211 		*ImmediateFlipSupportedForPipe = true;
4212 	}
4213 
4214 #ifdef __DML_VBA_DEBUG__
4215 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4216 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4217 	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4218 			__func__, *DestinationLinesToRequestVMInImmediateFlip);
4219 	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4220 			__func__, *DestinationLinesToRequestRowInImmediateFlip);
4221 	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4222 	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4223 			__func__, TimeForFetchingRowInVBlankImmediateFlip);
4224 	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4225 	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4226 #endif
4227 } // CalculateFlipSchedule
4228 
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct vba_vars_st * v,unsigned int PrefetchMode,double DCFCLK,double ReturnBW,SOCParametersList mmSOCParameters,double SOCCLK,double DCFClkDeepSleep,unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool UnboundedRequestEnabled,unsigned int CompressedBufferSizeInkByte,enum clock_change_support * DRAMClockChangeSupport,double MaxActiveDRAMClockChangeLatencySupported[],unsigned int SubViewportLinesNeededInMALL[],enum dm_fclock_change_support * FCLKChangeSupport,double * MinActiveFCLKChangeLatencySupported,bool * USRRetrainingSupport,double ActiveDRAMClockChangeLatencyMargin[])4229 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4230 		struct vba_vars_st *v,
4231 		unsigned int PrefetchMode,
4232 		double DCFCLK,
4233 		double ReturnBW,
4234 		SOCParametersList mmSOCParameters,
4235 		double SOCCLK,
4236 		double DCFClkDeepSleep,
4237 		unsigned int DETBufferSizeY[],
4238 		unsigned int DETBufferSizeC[],
4239 		unsigned int SwathHeightY[],
4240 		unsigned int SwathHeightC[],
4241 		double SwathWidthY[],
4242 		double SwathWidthC[],
4243 		unsigned int DPPPerSurface[],
4244 		double BytePerPixelDETY[],
4245 		double BytePerPixelDETC[],
4246 		double DSTXAfterScaler[],
4247 		double DSTYAfterScaler[],
4248 		bool UnboundedRequestEnabled,
4249 		unsigned int CompressedBufferSizeInkByte,
4250 
4251 		/* Output */
4252 		enum clock_change_support *DRAMClockChangeSupport,
4253 		double MaxActiveDRAMClockChangeLatencySupported[],
4254 		unsigned int SubViewportLinesNeededInMALL[],
4255 		enum dm_fclock_change_support *FCLKChangeSupport,
4256 		double *MinActiveFCLKChangeLatencySupported,
4257 		bool *USRRetrainingSupport,
4258 		double ActiveDRAMClockChangeLatencyMargin[])
4259 {
4260 	unsigned int i, j, k;
4261 	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4262 	unsigned int DRAMClockChangeSupportNumber = 0;
4263 	unsigned int LastSurfaceWithoutMargin;
4264 	unsigned int DRAMClockChangeMethod = 0;
4265 	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4266 	double MinActiveFCLKChangeMargin = 0.;
4267 	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4268 	double ActiveClockChangeLatencyHidingY;
4269 	double ActiveClockChangeLatencyHidingC;
4270 	double ActiveClockChangeLatencyHiding;
4271 	double EffectiveDETBufferSizeY;
4272 	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4273 	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4274 	double TotalPixelBW = 0.0;
4275 	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4276 	double     EffectiveLBLatencyHidingY;
4277 	double     EffectiveLBLatencyHidingC;
4278 	double     LinesInDETY[DC__NUM_DPP__MAX];
4279 	double     LinesInDETC[DC__NUM_DPP__MAX];
4280 	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4281 	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4282 	double     FullDETBufferingTimeY;
4283 	double     FullDETBufferingTimeC;
4284 	double     WritebackDRAMClockChangeLatencyMargin;
4285 	double     WritebackFCLKChangeLatencyMargin;
4286 	double     WritebackLatencyHiding;
4287 	bool    SameTimingForFCLKChange;
4288 
4289 	unsigned int    TotalActiveWriteback = 0;
4290 	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4291 	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4292 
4293 	v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4294 	v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4295 			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4296 	v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4297 	v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4298 	v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4299 			+ 10 / DCFClkDeepSleep;
4300 	v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4301 			+ 10 / DCFClkDeepSleep;
4302 	v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4303 			+ 10 / DCFClkDeepSleep;
4304 	v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4305 			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4306 
4307 #ifdef __DML_VBA_DEBUG__
4308 	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4309 	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4310 	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4311 	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4312 	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4313 	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4314 	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4315 	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4316 	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4317 	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4318 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4319 			__func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4320 #endif
4321 
4322 
4323 	TotalActiveWriteback = 0;
4324 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4325 		if (v->WritebackEnable[k] == true)
4326 			TotalActiveWriteback = TotalActiveWriteback + 1;
4327 	}
4328 
4329 	if (TotalActiveWriteback <= 1) {
4330 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4331 	} else {
4332 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4333 				+ v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4334 	}
4335 	if (v->USRRetrainingRequiredFinal)
4336 		v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark
4337 				+ mmSOCParameters.USRRetrainingLatency;
4338 
4339 	if (TotalActiveWriteback <= 1) {
4340 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4341 				+ mmSOCParameters.WritebackLatency;
4342 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4343 				+ mmSOCParameters.WritebackLatency;
4344 	} else {
4345 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4346 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4347 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4348 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4349 	}
4350 
4351 	if (v->USRRetrainingRequiredFinal)
4352 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4353 				+ mmSOCParameters.USRRetrainingLatency;
4354 
4355 	if (v->USRRetrainingRequiredFinal)
4356 		v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4357 				+ mmSOCParameters.USRRetrainingLatency;
4358 
4359 #ifdef __DML_VBA_DEBUG__
4360 	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4361 			__func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4362 	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4363 	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4364 	dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4365 	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4366 #endif
4367 
4368 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4369 		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4370 				SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4371 	}
4372 
4373 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4374 
4375 		LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4376 		LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4377 
4378 
4379 #ifdef __DML_VBA_DEBUG__
4380 		dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4381 		dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
4382 		dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
4383 		dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
4384 		dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
4385 #endif
4386 
4387 		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4388 		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4389 		EffectiveDETBufferSizeY = DETBufferSizeY[k];
4390 
4391 		if (UnboundedRequestEnabled) {
4392 			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4393 					+ CompressedBufferSizeInkByte * 1024
4394 							* (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4395 							/ (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4396 		}
4397 
4398 		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4399 		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4400 		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4401 
4402 		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4403 				- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4404 
4405 		if (v->NumberOfActiveSurfaces > 1) {
4406 			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4407 					- (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4408 							/ v->PixelClock[k] / v->VRatio[k];
4409 		}
4410 
4411 		if (BytePerPixelDETC[k] > 0) {
4412 			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4413 			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4414 			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4415 					/ v->VRatioChroma[k];
4416 			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4417 					- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4418 							/ v->PixelClock[k];
4419 			if (v->NumberOfActiveSurfaces > 1) {
4420 				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4421 						- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4422 								/ v->PixelClock[k] / v->VRatioChroma[k];
4423 			}
4424 			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4425 					ActiveClockChangeLatencyHidingC);
4426 		} else {
4427 			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4428 		}
4429 
4430 		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4431 				- v->Watermark.DRAMClockChangeWatermark;
4432 		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4433 				- v->Watermark.FCLKChangeWatermark;
4434 		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4435 
4436 		if (v->WritebackEnable[k]) {
4437 			WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4438 					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4439 							/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4440 			if (v->WritebackPixelFormat[k] == dm_444_64)
4441 				WritebackLatencyHiding = WritebackLatencyHiding / 2;
4442 
4443 			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4444 					- v->Watermark.WritebackDRAMClockChangeWatermark;
4445 
4446 			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4447 					- v->Watermark.WritebackFCLKChangeWatermark;
4448 
4449 			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4450 					WritebackFCLKChangeLatencyMargin);
4451 			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4452 					WritebackDRAMClockChangeLatencyMargin);
4453 		}
4454 		MaxActiveDRAMClockChangeLatencySupported[k] =
4455 				(v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4456 						0 :
4457 						(ActiveDRAMClockChangeLatencyMargin[k]
4458 								+ mmSOCParameters.DRAMClockChangeLatency);
4459 	}
4460 
4461 	for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4462 		for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4463 			if (i == j ||
4464 					(v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4465 					(v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4466 					(v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4467 					(v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4468 					v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4469 					v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4470 					(v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4471 				SynchronizedSurfaces[i][j] = true;
4472 			} else {
4473 				SynchronizedSurfaces[i][j] = false;
4474 			}
4475 		}
4476 	}
4477 
4478 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4479 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4480 				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4481 				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4482 			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4483 			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4484 			SurfaceWithMinActiveFCLKChangeMargin = k;
4485 		}
4486 	}
4487 
4488 	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4489 
4490 	SameTimingForFCLKChange = true;
4491 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4492 		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4493 			if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4494 					(SameTimingForFCLKChange ||
4495 					ActiveFCLKChangeLatencyMargin[k] <
4496 					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4497 				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4498 			}
4499 			SameTimingForFCLKChange = false;
4500 		}
4501 	}
4502 
4503 	if (MinActiveFCLKChangeMargin > 0) {
4504 		*FCLKChangeSupport = dm_fclock_change_vactive;
4505 	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4506 			(PrefetchMode <= 1)) {
4507 		*FCLKChangeSupport = dm_fclock_change_vblank;
4508 	} else {
4509 		*FCLKChangeSupport = dm_fclock_change_unsupported;
4510 	}
4511 
4512 	*USRRetrainingSupport = true;
4513 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4514 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4515 				(USRRetrainingLatencyMargin[k] < 0)) {
4516 			*USRRetrainingSupport = false;
4517 		}
4518 	}
4519 
4520 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4521 		if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4522 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4523 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4524 				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4525 			if (PrefetchMode > 0) {
4526 				DRAMClockChangeSupportNumber = 2;
4527 			} else if (DRAMClockChangeSupportNumber == 0) {
4528 				DRAMClockChangeSupportNumber = 1;
4529 				LastSurfaceWithoutMargin = k;
4530 			} else if (DRAMClockChangeSupportNumber == 1 &&
4531 					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4532 				DRAMClockChangeSupportNumber = 2;
4533 			}
4534 		}
4535 	}
4536 
4537 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4538 		if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4539 			DRAMClockChangeMethod = 1;
4540 		else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4541 			DRAMClockChangeMethod = 2;
4542 	}
4543 
4544 	if (DRAMClockChangeMethod == 0) {
4545 		if (DRAMClockChangeSupportNumber == 0)
4546 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4547 		else if (DRAMClockChangeSupportNumber == 1)
4548 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4549 		else
4550 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4551 	} else if (DRAMClockChangeMethod == 1) {
4552 		if (DRAMClockChangeSupportNumber == 0)
4553 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4554 		else if (DRAMClockChangeSupportNumber == 1)
4555 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4556 		else
4557 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4558 	} else {
4559 		if (DRAMClockChangeSupportNumber == 0)
4560 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4561 		else if (DRAMClockChangeSupportNumber == 1)
4562 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4563 		else
4564 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4565 	}
4566 
4567 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4568 		unsigned int dst_y_pstate;
4569 		unsigned int src_y_pstate_l;
4570 		unsigned int src_y_pstate_c;
4571 		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4572 
4573 		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4574 		src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4575 		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4576 		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4577 
4578 #ifdef __DML_VBA_DEBUG__
4579 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4580 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4581 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4582 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4583 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4584 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4585 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4586 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4587 dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
4588 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4589 #endif
4590 		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4591 
4592 		if (BytePerPixelDETC[k] > 0) {
4593 			src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4594 			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4595 			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4596 			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4597 
4598 #ifdef __DML_VBA_DEBUG__
4599 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4600 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4601 dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4602 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4603 #endif
4604 		}
4605 	}
4606 #ifdef __DML_VBA_DEBUG__
4607 	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4608 	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4609 	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4610 			__func__, *MinActiveFCLKChangeLatencySupported);
4611 	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4612 #endif
4613 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4614 
dml32_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize,double DISPCLKDPPCLKVCOSpeed)4615 double dml32_CalculateWriteBackDISPCLK(
4616 		enum source_format_class WritebackPixelFormat,
4617 		double PixelClock,
4618 		double WritebackHRatio,
4619 		double WritebackVRatio,
4620 		unsigned int WritebackHTaps,
4621 		unsigned int WritebackVTaps,
4622 		unsigned int   WritebackSourceWidth,
4623 		unsigned int   WritebackDestinationWidth,
4624 		unsigned int HTotal,
4625 		unsigned int WritebackLineBufferSize,
4626 		double DISPCLKDPPCLKVCOSpeed)
4627 {
4628 	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4629 
4630 	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4631 	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4632 	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4633 			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4634 	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4635 }
4636 
dml32_CalculateMinAndMaxPrefetchMode(enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,unsigned int * MinPrefetchMode,unsigned int * MaxPrefetchMode)4637 void dml32_CalculateMinAndMaxPrefetchMode(
4638 		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4639 		unsigned int             *MinPrefetchMode,
4640 		unsigned int             *MaxPrefetchMode)
4641 {
4642 	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4643 		*MinPrefetchMode = 3;
4644 		*MaxPrefetchMode = 3;
4645 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4646 		*MinPrefetchMode = 2;
4647 		*MaxPrefetchMode = 2;
4648 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4649 		*MinPrefetchMode = 1;
4650 		*MaxPrefetchMode = 1;
4651 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4652 		*MinPrefetchMode = 0;
4653 		*MaxPrefetchMode = 0;
4654 	} else {
4655 		*MinPrefetchMode = 0;
4656 		*MaxPrefetchMode = 3;
4657 	}
4658 } // CalculateMinAndMaxPrefetchMode
4659 
dml32_CalculatePixelDeliveryTimes(unsigned int NumberOfActiveSurfaces,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])4660 void dml32_CalculatePixelDeliveryTimes(
4661 		unsigned int             NumberOfActiveSurfaces,
4662 		double              VRatio[],
4663 		double              VRatioChroma[],
4664 		double              VRatioPrefetchY[],
4665 		double              VRatioPrefetchC[],
4666 		unsigned int             swath_width_luma_ub[],
4667 		unsigned int             swath_width_chroma_ub[],
4668 		unsigned int             DPPPerSurface[],
4669 		double              HRatio[],
4670 		double              HRatioChroma[],
4671 		double              PixelClock[],
4672 		double              PSCL_THROUGHPUT[],
4673 		double              PSCL_THROUGHPUT_CHROMA[],
4674 		double              Dppclk[],
4675 		unsigned int             BytePerPixelC[],
4676 		enum dm_rotation_angle   SourceRotation[],
4677 		unsigned int             NumberOfCursors[],
4678 		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4679 		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4680 		unsigned int             BlockWidth256BytesY[],
4681 		unsigned int             BlockHeight256BytesY[],
4682 		unsigned int             BlockWidth256BytesC[],
4683 		unsigned int             BlockHeight256BytesC[],
4684 
4685 		/* Output */
4686 		double              DisplayPipeLineDeliveryTimeLuma[],
4687 		double              DisplayPipeLineDeliveryTimeChroma[],
4688 		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4689 		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4690 		double              DisplayPipeRequestDeliveryTimeLuma[],
4691 		double              DisplayPipeRequestDeliveryTimeChroma[],
4692 		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4693 		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4694 		double              CursorRequestDeliveryTime[],
4695 		double              CursorRequestDeliveryTimePrefetch[])
4696 {
4697 	double   req_per_swath_ub;
4698 	unsigned int k;
4699 
4700 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4701 
4702 #ifdef __DML_VBA_DEBUG__
4703 		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4704 		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4705 		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4706 		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4707 		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4708 		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4709 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4710 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4711 		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4712 		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4713 		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4714 #endif
4715 
4716 		if (VRatio[k] <= 1) {
4717 			DisplayPipeLineDeliveryTimeLuma[k] =
4718 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4719 		} else {
4720 			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4721 		}
4722 
4723 		if (BytePerPixelC[k] == 0) {
4724 			DisplayPipeLineDeliveryTimeChroma[k] = 0;
4725 		} else {
4726 			if (VRatioChroma[k] <= 1) {
4727 				DisplayPipeLineDeliveryTimeChroma[k] =
4728 					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4729 			} else {
4730 				DisplayPipeLineDeliveryTimeChroma[k] =
4731 					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4732 			}
4733 		}
4734 
4735 		if (VRatioPrefetchY[k] <= 1) {
4736 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4737 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4738 		} else {
4739 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4740 					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4741 		}
4742 
4743 		if (BytePerPixelC[k] == 0) {
4744 			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4745 		} else {
4746 			if (VRatioPrefetchC[k] <= 1) {
4747 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4748 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4749 			} else {
4750 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4751 						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4752 			}
4753 		}
4754 #ifdef __DML_VBA_DEBUG__
4755 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4756 				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4757 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4758 				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4759 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4760 				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4761 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4762 				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4763 #endif
4764 	}
4765 
4766 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4767 		if (!IsVertical(SourceRotation[k]))
4768 			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4769 		else
4770 			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4771 #ifdef __DML_VBA_DEBUG__
4772 		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4773 #endif
4774 
4775 		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4776 		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4777 				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4778 		if (BytePerPixelC[k] == 0) {
4779 			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4780 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4781 		} else {
4782 			if (!IsVertical(SourceRotation[k]))
4783 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4784 			else
4785 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4786 #ifdef __DML_VBA_DEBUG__
4787 			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4788 #endif
4789 			DisplayPipeRequestDeliveryTimeChroma[k] =
4790 					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4791 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4792 					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4793 		}
4794 #ifdef __DML_VBA_DEBUG__
4795 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4796 				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4797 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4798 				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4799 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4800 				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4801 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4802 				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4803 #endif
4804 	}
4805 
4806 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4807 		unsigned int cursor_req_per_width;
4808 
4809 		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4810 				256.0 / 8.0, 1.0);
4811 		if (NumberOfCursors[k] > 0) {
4812 			if (VRatio[k] <= 1) {
4813 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4814 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4815 			} else {
4816 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4817 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4818 			}
4819 			if (VRatioPrefetchY[k] <= 1) {
4820 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4821 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4822 			} else {
4823 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4824 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4825 			}
4826 		} else {
4827 			CursorRequestDeliveryTime[k] = 0;
4828 			CursorRequestDeliveryTimePrefetch[k] = 0;
4829 		}
4830 #ifdef __DML_VBA_DEBUG__
4831 		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4832 				__func__, k, NumberOfCursors[k]);
4833 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4834 				__func__, k, CursorRequestDeliveryTime[k]);
4835 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4836 				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
4837 #endif
4838 	}
4839 } // CalculatePixelDeliveryTimes
4840 
dml32_CalculateMetaAndPTETimes(bool use_one_row_for_frame[],unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])4841 void dml32_CalculateMetaAndPTETimes(
4842 		bool use_one_row_for_frame[],
4843 		unsigned int NumberOfActiveSurfaces,
4844 		bool GPUVMEnable,
4845 		unsigned int MetaChunkSize,
4846 		unsigned int MinMetaChunkSizeBytes,
4847 		unsigned int    HTotal[],
4848 		double  VRatio[],
4849 		double  VRatioChroma[],
4850 		double  DestinationLinesToRequestRowInVBlank[],
4851 		double  DestinationLinesToRequestRowInImmediateFlip[],
4852 		bool DCCEnable[],
4853 		double  PixelClock[],
4854 		unsigned int BytePerPixelY[],
4855 		unsigned int BytePerPixelC[],
4856 		enum dm_rotation_angle SourceRotation[],
4857 		unsigned int dpte_row_height[],
4858 		unsigned int dpte_row_height_chroma[],
4859 		unsigned int meta_row_width[],
4860 		unsigned int meta_row_width_chroma[],
4861 		unsigned int meta_row_height[],
4862 		unsigned int meta_row_height_chroma[],
4863 		unsigned int meta_req_width[],
4864 		unsigned int meta_req_width_chroma[],
4865 		unsigned int meta_req_height[],
4866 		unsigned int meta_req_height_chroma[],
4867 		unsigned int dpte_group_bytes[],
4868 		unsigned int    PTERequestSizeY[],
4869 		unsigned int    PTERequestSizeC[],
4870 		unsigned int    PixelPTEReqWidthY[],
4871 		unsigned int    PixelPTEReqHeightY[],
4872 		unsigned int    PixelPTEReqWidthC[],
4873 		unsigned int    PixelPTEReqHeightC[],
4874 		unsigned int    dpte_row_width_luma_ub[],
4875 		unsigned int    dpte_row_width_chroma_ub[],
4876 
4877 		/* Output */
4878 		double DST_Y_PER_PTE_ROW_NOM_L[],
4879 		double DST_Y_PER_PTE_ROW_NOM_C[],
4880 		double DST_Y_PER_META_ROW_NOM_L[],
4881 		double DST_Y_PER_META_ROW_NOM_C[],
4882 		double TimePerMetaChunkNominal[],
4883 		double TimePerChromaMetaChunkNominal[],
4884 		double TimePerMetaChunkVBlank[],
4885 		double TimePerChromaMetaChunkVBlank[],
4886 		double TimePerMetaChunkFlip[],
4887 		double TimePerChromaMetaChunkFlip[],
4888 		double time_per_pte_group_nom_luma[],
4889 		double time_per_pte_group_vblank_luma[],
4890 		double time_per_pte_group_flip_luma[],
4891 		double time_per_pte_group_nom_chroma[],
4892 		double time_per_pte_group_vblank_chroma[],
4893 		double time_per_pte_group_flip_chroma[])
4894 {
4895 	unsigned int   meta_chunk_width;
4896 	unsigned int   min_meta_chunk_width;
4897 	unsigned int   meta_chunk_per_row_int;
4898 	unsigned int   meta_row_remainder;
4899 	unsigned int   meta_chunk_threshold;
4900 	unsigned int   meta_chunks_per_row_ub;
4901 	unsigned int   meta_chunk_width_chroma;
4902 	unsigned int   min_meta_chunk_width_chroma;
4903 	unsigned int   meta_chunk_per_row_int_chroma;
4904 	unsigned int   meta_row_remainder_chroma;
4905 	unsigned int   meta_chunk_threshold_chroma;
4906 	unsigned int   meta_chunks_per_row_ub_chroma;
4907 	unsigned int   dpte_group_width_luma;
4908 	unsigned int   dpte_groups_per_row_luma_ub;
4909 	unsigned int   dpte_group_width_chroma;
4910 	unsigned int   dpte_groups_per_row_chroma_ub;
4911 	unsigned int k;
4912 
4913 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4914 		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4915 		if (BytePerPixelC[k] == 0)
4916 			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4917 		else
4918 			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4919 		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4920 		if (BytePerPixelC[k] == 0)
4921 			DST_Y_PER_META_ROW_NOM_C[k] = 0;
4922 		else
4923 			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4924 	}
4925 
4926 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4927 		if (DCCEnable[k] == true) {
4928 			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4929 			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4930 			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4931 			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4932 			if (!IsVertical(SourceRotation[k]))
4933 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4934 			else
4935 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4936 
4937 			if (meta_row_remainder <= meta_chunk_threshold)
4938 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4939 			else
4940 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4941 
4942 			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4943 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4944 			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4945 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4946 			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4947 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4948 			if (BytePerPixelC[k] == 0) {
4949 				TimePerChromaMetaChunkNominal[k] = 0;
4950 				TimePerChromaMetaChunkVBlank[k] = 0;
4951 				TimePerChromaMetaChunkFlip[k] = 0;
4952 			} else {
4953 				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4954 						meta_row_height_chroma[k];
4955 				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4956 						meta_row_height_chroma[k];
4957 				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4958 						meta_chunk_width_chroma;
4959 				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4960 				if (!IsVertical(SourceRotation[k])) {
4961 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4962 							meta_req_width_chroma[k];
4963 				} else {
4964 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4965 							meta_req_height_chroma[k];
4966 				}
4967 				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4968 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4969 				else
4970 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4971 
4972 				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4973 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4974 				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4975 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4976 				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4977 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4978 			}
4979 		} else {
4980 			TimePerMetaChunkNominal[k] = 0;
4981 			TimePerMetaChunkVBlank[k] = 0;
4982 			TimePerMetaChunkFlip[k] = 0;
4983 			TimePerChromaMetaChunkNominal[k] = 0;
4984 			TimePerChromaMetaChunkVBlank[k] = 0;
4985 			TimePerChromaMetaChunkFlip[k] = 0;
4986 		}
4987 	}
4988 
4989 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4990 		if (GPUVMEnable == true) {
4991 			if (!IsVertical(SourceRotation[k])) {
4992 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
4993 						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4994 			} else {
4995 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
4996 						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4997 			}
4998 
4999 			if (use_one_row_for_frame[k]) {
5000 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5001 						(double) dpte_group_width_luma / 2.0, 1.0);
5002 			} else {
5003 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5004 						(double) dpte_group_width_luma, 1.0);
5005 			}
5006 #ifdef __DML_VBA_DEBUG__
5007 			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5008 					__func__, k, use_one_row_for_frame[k]);
5009 			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5010 					__func__, k, dpte_group_bytes[k]);
5011 			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5012 					__func__, k, PTERequestSizeY[k]);
5013 			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5014 					__func__, k, PixelPTEReqWidthY[k]);
5015 			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5016 					__func__, k, PixelPTEReqHeightY[k]);
5017 			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5018 					__func__, k, dpte_row_width_luma_ub[k]);
5019 			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5020 					__func__, k, dpte_group_width_luma);
5021 			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5022 					__func__, k, dpte_groups_per_row_luma_ub);
5023 #endif
5024 
5025 			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5026 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5027 			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5028 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5029 			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5030 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5031 			if (BytePerPixelC[k] == 0) {
5032 				time_per_pte_group_nom_chroma[k] = 0;
5033 				time_per_pte_group_vblank_chroma[k] = 0;
5034 				time_per_pte_group_flip_chroma[k] = 0;
5035 			} else {
5036 				if (!IsVertical(SourceRotation[k])) {
5037 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5038 							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5039 				} else {
5040 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5041 							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5042 				}
5043 
5044 				if (use_one_row_for_frame[k]) {
5045 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5046 							(double) dpte_group_width_chroma / 2.0, 1.0);
5047 				} else {
5048 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5049 							(double) dpte_group_width_chroma, 1.0);
5050 				}
5051 #ifdef __DML_VBA_DEBUG__
5052 				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5053 						__func__, k, dpte_row_width_chroma_ub[k]);
5054 				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5055 						__func__, k, dpte_group_width_chroma);
5056 				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5057 						__func__, k, dpte_groups_per_row_chroma_ub);
5058 #endif
5059 				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5060 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5061 				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5062 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5063 				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5064 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5065 			}
5066 		} else {
5067 			time_per_pte_group_nom_luma[k] = 0;
5068 			time_per_pte_group_vblank_luma[k] = 0;
5069 			time_per_pte_group_flip_luma[k] = 0;
5070 			time_per_pte_group_nom_chroma[k] = 0;
5071 			time_per_pte_group_vblank_chroma[k] = 0;
5072 			time_per_pte_group_flip_chroma[k] = 0;
5073 		}
5074 #ifdef __DML_VBA_DEBUG__
5075 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5076 				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
5077 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5078 				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5079 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5080 				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5081 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5082 				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5083 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5084 				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5085 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5086 				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5087 		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5088 				__func__, k, TimePerMetaChunkNominal[k]);
5089 		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5090 				__func__, k, TimePerMetaChunkVBlank[k]);
5091 		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5092 				__func__, k, TimePerMetaChunkFlip[k]);
5093 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5094 				__func__, k, TimePerChromaMetaChunkNominal[k]);
5095 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5096 				__func__, k, TimePerChromaMetaChunkVBlank[k]);
5097 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5098 				__func__, k, TimePerChromaMetaChunkFlip[k]);
5099 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5100 				__func__, k, time_per_pte_group_nom_luma[k]);
5101 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5102 				__func__, k, time_per_pte_group_vblank_luma[k]);
5103 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5104 				__func__, k, time_per_pte_group_flip_luma[k]);
5105 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5106 				__func__, k, time_per_pte_group_nom_chroma[k]);
5107 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5108 				__func__, k, time_per_pte_group_vblank_chroma[k]);
5109 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5110 				__func__, k, time_per_pte_group_flip_chroma[k]);
5111 #endif
5112 	}
5113 } // CalculateMetaAndPTETimes
5114 
dml32_CalculateVMGroupAndRequestTimes(unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],unsigned int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5115 void dml32_CalculateVMGroupAndRequestTimes(
5116 		unsigned int     NumberOfActiveSurfaces,
5117 		bool     GPUVMEnable,
5118 		unsigned int     GPUVMMaxPageTableLevels,
5119 		unsigned int     HTotal[],
5120 		unsigned int     BytePerPixelC[],
5121 		double      DestinationLinesToRequestVMInVBlank[],
5122 		double      DestinationLinesToRequestVMInImmediateFlip[],
5123 		bool     DCCEnable[],
5124 		double      PixelClock[],
5125 		unsigned int        dpte_row_width_luma_ub[],
5126 		unsigned int        dpte_row_width_chroma_ub[],
5127 		unsigned int     vm_group_bytes[],
5128 		unsigned int     dpde0_bytes_per_frame_ub_l[],
5129 		unsigned int     dpde0_bytes_per_frame_ub_c[],
5130 		unsigned int        meta_pte_bytes_per_frame_ub_l[],
5131 		unsigned int        meta_pte_bytes_per_frame_ub_c[],
5132 
5133 		/* Output */
5134 		double      TimePerVMGroupVBlank[],
5135 		double      TimePerVMGroupFlip[],
5136 		double      TimePerVMRequestVBlank[],
5137 		double      TimePerVMRequestFlip[])
5138 {
5139 	unsigned int k;
5140 	unsigned int   num_group_per_lower_vm_stage;
5141 	unsigned int   num_req_per_lower_vm_stage;
5142 
5143 #ifdef __DML_VBA_DEBUG__
5144 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5145 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5146 #endif
5147 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5148 
5149 #ifdef __DML_VBA_DEBUG__
5150 		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5151 		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5152 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5153 				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
5154 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5155 				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
5156 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5157 				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5158 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5159 				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5160 #endif
5161 
5162 		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5163 			if (DCCEnable[k] == false) {
5164 				if (BytePerPixelC[k] > 0) {
5165 					num_group_per_lower_vm_stage = dml_ceil(
5166 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5167 							(double) (vm_group_bytes[k]), 1.0) +
5168 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5169 							(double) (vm_group_bytes[k]), 1.0);
5170 				} else {
5171 					num_group_per_lower_vm_stage = dml_ceil(
5172 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5173 							(double) (vm_group_bytes[k]), 1.0);
5174 				}
5175 			} else {
5176 				if (GPUVMMaxPageTableLevels == 1) {
5177 					if (BytePerPixelC[k] > 0) {
5178 						num_group_per_lower_vm_stage = dml_ceil(
5179 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5180 							(double) (vm_group_bytes[k]), 1.0) +
5181 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5182 							(double) (vm_group_bytes[k]), 1.0);
5183 					} else {
5184 						num_group_per_lower_vm_stage = dml_ceil(
5185 								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5186 								(double) (vm_group_bytes[k]), 1.0);
5187 					}
5188 				} else {
5189 					if (BytePerPixelC[k] > 0) {
5190 						num_group_per_lower_vm_stage = 2 + dml_ceil(
5191 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5192 							(double) (vm_group_bytes[k]), 1) +
5193 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5194 							(double) (vm_group_bytes[k]), 1) +
5195 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5196 							(double) (vm_group_bytes[k]), 1) +
5197 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5198 							(double) (vm_group_bytes[k]), 1);
5199 					} else {
5200 						num_group_per_lower_vm_stage = 1 + dml_ceil(
5201 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5202 							(double) (vm_group_bytes[k]), 1) + dml_ceil(
5203 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5204 							(double) (vm_group_bytes[k]), 1);
5205 					}
5206 				}
5207 			}
5208 
5209 			if (DCCEnable[k] == false) {
5210 				if (BytePerPixelC[k] > 0) {
5211 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5212 							dpde0_bytes_per_frame_ub_c[k] / 64;
5213 				} else {
5214 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5215 				}
5216 			} else {
5217 				if (GPUVMMaxPageTableLevels == 1) {
5218 					if (BytePerPixelC[k] > 0) {
5219 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5220 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5221 					} else {
5222 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5223 					}
5224 				} else {
5225 					if (BytePerPixelC[k] > 0) {
5226 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5227 								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5228 								meta_pte_bytes_per_frame_ub_l[k] / 64 +
5229 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5230 					} else {
5231 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5232 								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5233 					}
5234 				}
5235 			}
5236 
5237 			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5238 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5239 			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5240 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5241 			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5242 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5243 			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5244 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5245 
5246 			if (GPUVMMaxPageTableLevels > 2) {
5247 				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5248 				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5249 				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5250 				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5251 			}
5252 
5253 		} else {
5254 			TimePerVMGroupVBlank[k] = 0;
5255 			TimePerVMGroupFlip[k] = 0;
5256 			TimePerVMRequestVBlank[k] = 0;
5257 			TimePerVMRequestFlip[k] = 0;
5258 		}
5259 
5260 #ifdef __DML_VBA_DEBUG__
5261 		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5262 		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5263 		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5264 		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5265 #endif
5266 	}
5267 } // CalculateVMGroupAndRequestTimes
5268 
dml32_CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dm_rotation_angle SourceRotation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)5269 void dml32_CalculateDCCConfiguration(
5270 		bool             DCCEnabled,
5271 		bool             DCCProgrammingAssumesScanDirectionUnknown,
5272 		enum source_format_class SourcePixelFormat,
5273 		unsigned int             SurfaceWidthLuma,
5274 		unsigned int             SurfaceWidthChroma,
5275 		unsigned int             SurfaceHeightLuma,
5276 		unsigned int             SurfaceHeightChroma,
5277 		unsigned int                nomDETInKByte,
5278 		unsigned int             RequestHeight256ByteLuma,
5279 		unsigned int             RequestHeight256ByteChroma,
5280 		enum dm_swizzle_mode     TilingFormat,
5281 		unsigned int             BytePerPixelY,
5282 		unsigned int             BytePerPixelC,
5283 		double              BytePerPixelDETY,
5284 		double              BytePerPixelDETC,
5285 		enum dm_rotation_angle   SourceRotation,
5286 		/* Output */
5287 		unsigned int        *MaxUncompressedBlockLuma,
5288 		unsigned int        *MaxUncompressedBlockChroma,
5289 		unsigned int        *MaxCompressedBlockLuma,
5290 		unsigned int        *MaxCompressedBlockChroma,
5291 		unsigned int        *IndependentBlockLuma,
5292 		unsigned int        *IndependentBlockChroma)
5293 {
5294 	typedef enum {
5295 		REQ_256Bytes,
5296 		REQ_128BytesNonContiguous,
5297 		REQ_128BytesContiguous,
5298 		REQ_NA
5299 	} RequestType;
5300 
5301 	RequestType   RequestLuma;
5302 	RequestType   RequestChroma;
5303 
5304 	unsigned int   segment_order_horz_contiguous_luma;
5305 	unsigned int   segment_order_horz_contiguous_chroma;
5306 	unsigned int   segment_order_vert_contiguous_luma;
5307 	unsigned int   segment_order_vert_contiguous_chroma;
5308 	unsigned int req128_horz_wc_l;
5309 	unsigned int req128_horz_wc_c;
5310 	unsigned int req128_vert_wc_l;
5311 	unsigned int req128_vert_wc_c;
5312 	unsigned int MAS_vp_horz_limit;
5313 	unsigned int MAS_vp_vert_limit;
5314 	unsigned int max_vp_horz_width;
5315 	unsigned int max_vp_vert_height;
5316 	unsigned int eff_surf_width_l;
5317 	unsigned int eff_surf_width_c;
5318 	unsigned int eff_surf_height_l;
5319 	unsigned int eff_surf_height_c;
5320 	unsigned int full_swath_bytes_horz_wc_l;
5321 	unsigned int full_swath_bytes_horz_wc_c;
5322 	unsigned int full_swath_bytes_vert_wc_l;
5323 	unsigned int full_swath_bytes_vert_wc_c;
5324 	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5325 
5326 	unsigned int   yuv420;
5327 	unsigned int   horz_div_l;
5328 	unsigned int   horz_div_c;
5329 	unsigned int   vert_div_l;
5330 	unsigned int   vert_div_c;
5331 
5332 	unsigned int     swath_buf_size;
5333 	double   detile_buf_vp_horz_limit;
5334 	double   detile_buf_vp_vert_limit;
5335 
5336 	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5337 			SourcePixelFormat == dm_420_12) ? 1 : 0);
5338 	horz_div_l = 1;
5339 	horz_div_c = 1;
5340 	vert_div_l = 1;
5341 	vert_div_c = 1;
5342 
5343 	if (BytePerPixelY == 1)
5344 		vert_div_l = 0;
5345 	if (BytePerPixelC == 1)
5346 		vert_div_c = 0;
5347 
5348 	if (BytePerPixelC == 0) {
5349 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5350 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5351 				BytePerPixelY / (1 + horz_div_l));
5352 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5353 				(1 + vert_div_l));
5354 	} else {
5355 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5356 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5357 				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5358 				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5359 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5360 				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5361 				(1 + vert_div_c) / (1 + yuv420));
5362 	}
5363 
5364 	if (SourcePixelFormat == dm_420_10) {
5365 		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5366 		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5367 	}
5368 
5369 	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5370 	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5371 
5372 	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5373 	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5374 	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5375 	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5376 	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5377 	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5378 	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5379 	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5380 
5381 	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5382 	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5383 	if (BytePerPixelC > 0) {
5384 		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5385 		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5386 	} else {
5387 		full_swath_bytes_horz_wc_c = 0;
5388 		full_swath_bytes_vert_wc_c = 0;
5389 	}
5390 
5391 	if (SourcePixelFormat == dm_420_10) {
5392 		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5393 		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5394 		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5395 		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5396 	}
5397 
5398 	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5399 		req128_horz_wc_l = 0;
5400 		req128_horz_wc_c = 0;
5401 	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5402 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5403 		req128_horz_wc_l = 0;
5404 		req128_horz_wc_c = 1;
5405 	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5406 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5407 		req128_horz_wc_l = 1;
5408 		req128_horz_wc_c = 0;
5409 	} else {
5410 		req128_horz_wc_l = 1;
5411 		req128_horz_wc_c = 1;
5412 	}
5413 
5414 	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5415 		req128_vert_wc_l = 0;
5416 		req128_vert_wc_c = 0;
5417 	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5418 			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5419 		req128_vert_wc_l = 0;
5420 		req128_vert_wc_c = 1;
5421 	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5422 			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5423 		req128_vert_wc_l = 1;
5424 		req128_vert_wc_c = 0;
5425 	} else {
5426 		req128_vert_wc_l = 1;
5427 		req128_vert_wc_c = 1;
5428 	}
5429 
5430 	if (BytePerPixelY == 2) {
5431 		segment_order_horz_contiguous_luma = 0;
5432 		segment_order_vert_contiguous_luma = 1;
5433 	} else {
5434 		segment_order_horz_contiguous_luma = 1;
5435 		segment_order_vert_contiguous_luma = 0;
5436 	}
5437 
5438 	if (BytePerPixelC == 2) {
5439 		segment_order_horz_contiguous_chroma = 0;
5440 		segment_order_vert_contiguous_chroma = 1;
5441 	} else {
5442 		segment_order_horz_contiguous_chroma = 1;
5443 		segment_order_vert_contiguous_chroma = 0;
5444 	}
5445 #ifdef __DML_VBA_DEBUG__
5446 	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5447 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5448 	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5449 	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5450 	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5451 	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5452 	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5453 	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5454 	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5455 			__func__, segment_order_horz_contiguous_chroma);
5456 #endif
5457 
5458 	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5459 		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5460 			RequestLuma = REQ_256Bytes;
5461 		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5462 				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5463 			RequestLuma = REQ_128BytesNonContiguous;
5464 		else
5465 			RequestLuma = REQ_128BytesContiguous;
5466 
5467 		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5468 			RequestChroma = REQ_256Bytes;
5469 		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5470 				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5471 			RequestChroma = REQ_128BytesNonContiguous;
5472 		else
5473 			RequestChroma = REQ_128BytesContiguous;
5474 
5475 	} else if (!IsVertical(SourceRotation)) {
5476 		if (req128_horz_wc_l == 0)
5477 			RequestLuma = REQ_256Bytes;
5478 		else if (segment_order_horz_contiguous_luma == 0)
5479 			RequestLuma = REQ_128BytesNonContiguous;
5480 		else
5481 			RequestLuma = REQ_128BytesContiguous;
5482 
5483 		if (req128_horz_wc_c == 0)
5484 			RequestChroma = REQ_256Bytes;
5485 		else if (segment_order_horz_contiguous_chroma == 0)
5486 			RequestChroma = REQ_128BytesNonContiguous;
5487 		else
5488 			RequestChroma = REQ_128BytesContiguous;
5489 
5490 	} else {
5491 		if (req128_vert_wc_l == 0)
5492 			RequestLuma = REQ_256Bytes;
5493 		else if (segment_order_vert_contiguous_luma == 0)
5494 			RequestLuma = REQ_128BytesNonContiguous;
5495 		else
5496 			RequestLuma = REQ_128BytesContiguous;
5497 
5498 		if (req128_vert_wc_c == 0)
5499 			RequestChroma = REQ_256Bytes;
5500 		else if (segment_order_vert_contiguous_chroma == 0)
5501 			RequestChroma = REQ_128BytesNonContiguous;
5502 		else
5503 			RequestChroma = REQ_128BytesContiguous;
5504 	}
5505 
5506 	if (RequestLuma == REQ_256Bytes) {
5507 		*MaxUncompressedBlockLuma = 256;
5508 		*MaxCompressedBlockLuma = 256;
5509 		*IndependentBlockLuma = 0;
5510 	} else if (RequestLuma == REQ_128BytesContiguous) {
5511 		*MaxUncompressedBlockLuma = 256;
5512 		*MaxCompressedBlockLuma = 128;
5513 		*IndependentBlockLuma = 128;
5514 	} else {
5515 		*MaxUncompressedBlockLuma = 256;
5516 		*MaxCompressedBlockLuma = 64;
5517 		*IndependentBlockLuma = 64;
5518 	}
5519 
5520 	if (RequestChroma == REQ_256Bytes) {
5521 		*MaxUncompressedBlockChroma = 256;
5522 		*MaxCompressedBlockChroma = 256;
5523 		*IndependentBlockChroma = 0;
5524 	} else if (RequestChroma == REQ_128BytesContiguous) {
5525 		*MaxUncompressedBlockChroma = 256;
5526 		*MaxCompressedBlockChroma = 128;
5527 		*IndependentBlockChroma = 128;
5528 	} else {
5529 		*MaxUncompressedBlockChroma = 256;
5530 		*MaxCompressedBlockChroma = 64;
5531 		*IndependentBlockChroma = 64;
5532 	}
5533 
5534 	if (DCCEnabled != true || BytePerPixelC == 0) {
5535 		*MaxUncompressedBlockChroma = 0;
5536 		*MaxCompressedBlockChroma = 0;
5537 		*IndependentBlockChroma = 0;
5538 	}
5539 
5540 	if (DCCEnabled != true) {
5541 		*MaxUncompressedBlockLuma = 0;
5542 		*MaxCompressedBlockLuma = 0;
5543 		*IndependentBlockLuma = 0;
5544 	}
5545 
5546 #ifdef __DML_VBA_DEBUG__
5547 	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5548 	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5549 	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5550 	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5551 	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5552 	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5553 #endif
5554 
5555 } // CalculateDCCConfiguration
5556 
dml32_CalculateStutterEfficiency(unsigned int CompressedBufferSizeInkByte,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool UnboundedRequestEnabled,unsigned int MetaFIFOSizeInKEntries,unsigned int ZeroSizeBufferEntries,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,unsigned int CompbufReservedSpace64B,unsigned int CompbufReservedSpaceZs,double SRExitTime,double SRExitZ8Time,bool SynchronizeTimingsFinal,unsigned int BlendingAndTiming[],double StutterEnterPlusExitWatermark,double Z8StutterEnterPlusExitWatermark,bool ProgressiveToInterlaceUnitInOPP,bool Interlace[],double MinTTUVBlank[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeY[],unsigned int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double NetDCCRateLuma[],double NetDCCRateChroma[],double DCCFractionOfZeroSizeRequestsLuma[],double DCCFractionOfZeroSizeRequestsChroma[],unsigned int HTotal[],unsigned int VTotal[],double PixelClock[],double VRatio[],enum dm_rotation_angle SourceRotation[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesC[],unsigned int BlockWidth256BytesC[],unsigned int DCCYMaxUncompressedBlock[],unsigned int DCCCMaxUncompressedBlock[],unsigned int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthSurfaceLuma[],double ReadBandwidthSurfaceChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,unsigned int * NumberOfStutterBurstsPerFrame,double * Z8StutterEfficiencyNotIncludingVBlank,double * Z8StutterEfficiency,unsigned int * Z8NumberOfStutterBurstsPerFrame,double * StutterPeriod,bool * DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)5557 void dml32_CalculateStutterEfficiency(
5558 		unsigned int      CompressedBufferSizeInkByte,
5559 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5560 		bool   UnboundedRequestEnabled,
5561 		unsigned int      MetaFIFOSizeInKEntries,
5562 		unsigned int      ZeroSizeBufferEntries,
5563 		unsigned int      PixelChunkSizeInKByte,
5564 		unsigned int   NumberOfActiveSurfaces,
5565 		unsigned int      ROBBufferSizeInKByte,
5566 		double    TotalDataReadBandwidth,
5567 		double    DCFCLK,
5568 		double    ReturnBW,
5569 		unsigned int      CompbufReservedSpace64B,
5570 		unsigned int      CompbufReservedSpaceZs,
5571 		double    SRExitTime,
5572 		double    SRExitZ8Time,
5573 		bool   SynchronizeTimingsFinal,
5574 		unsigned int   BlendingAndTiming[],
5575 		double    StutterEnterPlusExitWatermark,
5576 		double    Z8StutterEnterPlusExitWatermark,
5577 		bool   ProgressiveToInterlaceUnitInOPP,
5578 		bool   Interlace[],
5579 		double    MinTTUVBlank[],
5580 		unsigned int   DPPPerSurface[],
5581 		unsigned int      DETBufferSizeY[],
5582 		unsigned int   BytePerPixelY[],
5583 		double    BytePerPixelDETY[],
5584 		double      SwathWidthY[],
5585 		unsigned int   SwathHeightY[],
5586 		unsigned int   SwathHeightC[],
5587 		double    NetDCCRateLuma[],
5588 		double    NetDCCRateChroma[],
5589 		double    DCCFractionOfZeroSizeRequestsLuma[],
5590 		double    DCCFractionOfZeroSizeRequestsChroma[],
5591 		unsigned int      HTotal[],
5592 		unsigned int      VTotal[],
5593 		double    PixelClock[],
5594 		double    VRatio[],
5595 		enum dm_rotation_angle SourceRotation[],
5596 		unsigned int   BlockHeight256BytesY[],
5597 		unsigned int   BlockWidth256BytesY[],
5598 		unsigned int   BlockHeight256BytesC[],
5599 		unsigned int   BlockWidth256BytesC[],
5600 		unsigned int   DCCYMaxUncompressedBlock[],
5601 		unsigned int   DCCCMaxUncompressedBlock[],
5602 		unsigned int      VActive[],
5603 		bool   DCCEnable[],
5604 		bool   WritebackEnable[],
5605 		double    ReadBandwidthSurfaceLuma[],
5606 		double    ReadBandwidthSurfaceChroma[],
5607 		double    meta_row_bw[],
5608 		double    dpte_row_bw[],
5609 
5610 		/* Output */
5611 		double   *StutterEfficiencyNotIncludingVBlank,
5612 		double   *StutterEfficiency,
5613 		unsigned int     *NumberOfStutterBurstsPerFrame,
5614 		double   *Z8StutterEfficiencyNotIncludingVBlank,
5615 		double   *Z8StutterEfficiency,
5616 		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5617 		double   *StutterPeriod,
5618 		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5619 {
5620 
5621 	bool FoundCriticalSurface = false;
5622 	unsigned int SwathSizeCriticalSurface = 0;
5623 	unsigned int LastChunkOfSwathSize;
5624 	unsigned int MissingPartOfLastSwathOfDETSize;
5625 	double LastZ8StutterPeriod = 0.0;
5626 	double LastStutterPeriod = 0.0;
5627 	unsigned int TotalNumberOfActiveOTG = 0;
5628 	double doublePixelClock;
5629 	unsigned int doubleHTotal;
5630 	unsigned int doubleVTotal;
5631 	bool SameTiming = true;
5632 	double DETBufferingTimeY;
5633 	double SwathWidthYCriticalSurface = 0.0;
5634 	double SwathHeightYCriticalSurface = 0.0;
5635 	double VActiveTimeCriticalSurface = 0.0;
5636 	double FrameTimeCriticalSurface = 0.0;
5637 	unsigned int BytePerPixelYCriticalSurface = 0;
5638 	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5639 	unsigned int DETBufferSizeYCriticalSurface = 0;
5640 	double MinTTUVBlankCriticalSurface = 0.0;
5641 	unsigned int BlockWidth256BytesYCriticalSurface = 0;
5642 	bool doublePlaneCriticalSurface = 0;
5643 	bool doublePipeCriticalSurface = 0;
5644 	double TotalCompressedReadBandwidth;
5645 	double TotalRowReadBandwidth;
5646 	double AverageDCCCompressionRate;
5647 	double EffectiveCompressedBufferSize;
5648 	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5649 	double StutterBurstTime;
5650 	unsigned int TotalActiveWriteback;
5651 	double LinesInDETY;
5652 	double LinesInDETYRoundedDownToSwath;
5653 	double MaximumEffectiveCompressionLuma;
5654 	double MaximumEffectiveCompressionChroma;
5655 	double TotalZeroSizeRequestReadBandwidth;
5656 	double TotalZeroSizeCompressedReadBandwidth;
5657 	double AverageDCCZeroSizeFraction;
5658 	double AverageZeroSizeCompressionRate;
5659 	unsigned int k;
5660 
5661 	TotalZeroSizeRequestReadBandwidth = 0;
5662 	TotalZeroSizeCompressedReadBandwidth = 0;
5663 	TotalRowReadBandwidth = 0;
5664 	TotalCompressedReadBandwidth = 0;
5665 
5666 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5667 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5668 			if (DCCEnable[k] == true) {
5669 				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5670 						|| (!IsVertical(SourceRotation[k])
5671 								&& BlockHeight256BytesY[k] > SwathHeightY[k])
5672 						|| DCCYMaxUncompressedBlock[k] < 256) {
5673 					MaximumEffectiveCompressionLuma = 2;
5674 				} else {
5675 					MaximumEffectiveCompressionLuma = 4;
5676 				}
5677 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5678 						+ ReadBandwidthSurfaceLuma[k]
5679 								/ dml_min(NetDCCRateLuma[k],
5680 										MaximumEffectiveCompressionLuma);
5681 #ifdef __DML_VBA_DEBUG__
5682 				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5683 						__func__, k, ReadBandwidthSurfaceLuma[k]);
5684 				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5685 						__func__, k, NetDCCRateLuma[k]);
5686 				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5687 						__func__, k, MaximumEffectiveCompressionLuma);
5688 #endif
5689 				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5690 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5691 				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5692 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5693 								/ MaximumEffectiveCompressionLuma;
5694 
5695 				if (ReadBandwidthSurfaceChroma[k] > 0) {
5696 					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5697 							|| (!IsVertical(SourceRotation[k])
5698 									&& BlockHeight256BytesC[k] > SwathHeightC[k])
5699 							|| DCCCMaxUncompressedBlock[k] < 256) {
5700 						MaximumEffectiveCompressionChroma = 2;
5701 					} else {
5702 						MaximumEffectiveCompressionChroma = 4;
5703 					}
5704 					TotalCompressedReadBandwidth =
5705 							TotalCompressedReadBandwidth
5706 							+ ReadBandwidthSurfaceChroma[k]
5707 							/ dml_min(NetDCCRateChroma[k],
5708 							MaximumEffectiveCompressionChroma);
5709 #ifdef __DML_VBA_DEBUG__
5710 					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5711 							__func__, k, ReadBandwidthSurfaceChroma[k]);
5712 					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5713 							__func__, k, NetDCCRateChroma[k]);
5714 					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5715 							__func__, k, MaximumEffectiveCompressionChroma);
5716 #endif
5717 					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5718 							+ ReadBandwidthSurfaceChroma[k]
5719 									* DCCFractionOfZeroSizeRequestsChroma[k];
5720 					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5721 							+ ReadBandwidthSurfaceChroma[k]
5722 									* DCCFractionOfZeroSizeRequestsChroma[k]
5723 									/ MaximumEffectiveCompressionChroma;
5724 				}
5725 			} else {
5726 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5727 						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5728 			}
5729 			TotalRowReadBandwidth = TotalRowReadBandwidth
5730 					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5731 		}
5732 	}
5733 
5734 	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5735 	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5736 
5737 #ifdef __DML_VBA_DEBUG__
5738 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5739 	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5740 	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5741 	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5742 			__func__, TotalZeroSizeCompressedReadBandwidth);
5743 	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5744 	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5745 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5746 	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5747 	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5748 	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5749 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5750 #endif
5751 	if (AverageDCCZeroSizeFraction == 1) {
5752 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5753 				/ TotalZeroSizeCompressedReadBandwidth;
5754 		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5755 				* AverageZeroSizeCompressionRate
5756 				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5757 						* AverageZeroSizeCompressionRate;
5758 	} else if (AverageDCCZeroSizeFraction > 0) {
5759 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5760 				/ TotalZeroSizeCompressedReadBandwidth;
5761 		EffectiveCompressedBufferSize = dml_min(
5762 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5763 				(double) MetaFIFOSizeInKEntries * 1024 * 64
5764 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5765 					+ 1 / AverageDCCCompressionRate))
5766 					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5767 					* AverageDCCCompressionRate,
5768 					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5769 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5770 
5771 #ifdef __DML_VBA_DEBUG__
5772 		dml_print("DML::%s: min 1 = %f\n", __func__,
5773 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5774 		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5775 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5776 						AverageDCCCompressionRate));
5777 		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5778 				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5779 		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5780 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5781 #endif
5782 	} else {
5783 		EffectiveCompressedBufferSize = dml_min(
5784 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5785 				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5786 				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5787 						* AverageDCCCompressionRate;
5788 
5789 #ifdef __DML_VBA_DEBUG__
5790 		dml_print("DML::%s: min 1 = %f\n", __func__,
5791 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5792 		dml_print("DML::%s: min 2 = %f\n", __func__,
5793 				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5794 #endif
5795 	}
5796 
5797 #ifdef __DML_VBA_DEBUG__
5798 	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5799 	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5800 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5801 #endif
5802 
5803 	*StutterPeriod = 0;
5804 
5805 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5806 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5807 			LinesInDETY = ((double) DETBufferSizeY[k]
5808 					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5809 							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5810 					/ BytePerPixelDETY[k] / SwathWidthY[k];
5811 			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5812 			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5813 					/ VRatio[k];
5814 #ifdef __DML_VBA_DEBUG__
5815 			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5816 			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5817 			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5818 			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5819 					__func__, k, ReadBandwidthSurfaceLuma[k]);
5820 			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5821 			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5822 			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5823 					__func__, k, LinesInDETYRoundedDownToSwath);
5824 			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5825 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5826 			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5827 			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5828 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5829 #endif
5830 
5831 			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5832 				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5833 
5834 				FoundCriticalSurface = true;
5835 				*StutterPeriod = DETBufferingTimeY;
5836 				FrameTimeCriticalSurface = (
5837 						isInterlaceTiming ?
5838 								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5839 						* (double) HTotal[k] / PixelClock[k];
5840 				VActiveTimeCriticalSurface = (
5841 						isInterlaceTiming ?
5842 								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5843 						* (double) HTotal[k] / PixelClock[k];
5844 				BytePerPixelYCriticalSurface = BytePerPixelY[k];
5845 				SwathWidthYCriticalSurface = SwathWidthY[k];
5846 				SwathHeightYCriticalSurface = SwathHeightY[k];
5847 				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5848 				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5849 						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
5850 				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5851 				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5852 				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5853 				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5854 
5855 #ifdef __DML_VBA_DEBUG__
5856 				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5857 						__func__, k, FoundCriticalSurface);
5858 				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5859 						__func__, k, *StutterPeriod);
5860 				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5861 						__func__, k, MinTTUVBlankCriticalSurface);
5862 				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5863 						__func__, k, FrameTimeCriticalSurface);
5864 				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5865 						__func__, k, VActiveTimeCriticalSurface);
5866 				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5867 						__func__, k, BytePerPixelYCriticalSurface);
5868 				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5869 						__func__, k, SwathWidthYCriticalSurface);
5870 				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5871 						__func__, k, SwathHeightYCriticalSurface);
5872 				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5873 						__func__, k, BlockWidth256BytesYCriticalSurface);
5874 				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5875 						__func__, k, doublePlaneCriticalSurface);
5876 				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5877 						__func__, k, doublePipeCriticalSurface);
5878 				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5879 						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5880 #endif
5881 			}
5882 		}
5883 	}
5884 
5885 	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5886 			EffectiveCompressedBufferSize);
5887 #ifdef __DML_VBA_DEBUG__
5888 	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5889 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5890 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5891 			__func__, *StutterPeriod * TotalDataReadBandwidth);
5892 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5893 	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5894 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5895 	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5896 	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5897 	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5898 	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5899 #endif
5900 
5901 	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5902 			/ ReturnBW
5903 			+ (*StutterPeriod * TotalDataReadBandwidth
5904 					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5905 			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5906 #ifdef __DML_VBA_DEBUG__
5907 	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5908 			AverageDCCCompressionRate / ReturnBW);
5909 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5910 			__func__, (*StutterPeriod * TotalDataReadBandwidth));
5911 	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5912 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5913 	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5914 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5915 #endif
5916 	StutterBurstTime = dml_max(StutterBurstTime,
5917 			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5918 					* SwathWidthYCriticalSurface / ReturnBW);
5919 
5920 #ifdef __DML_VBA_DEBUG__
5921 	dml_print("DML::%s: Time to finish residue swath=%f\n",
5922 			__func__,
5923 			LinesToFinishSwathTransferStutterCriticalSurface *
5924 			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5925 #endif
5926 
5927 	TotalActiveWriteback = 0;
5928 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5929 		if (WritebackEnable[k])
5930 			TotalActiveWriteback = TotalActiveWriteback + 1;
5931 	}
5932 
5933 	if (TotalActiveWriteback == 0) {
5934 #ifdef __DML_VBA_DEBUG__
5935 		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5936 		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5937 		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5938 		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5939 #endif
5940 		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5941 				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5942 		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5943 				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5944 		*NumberOfStutterBurstsPerFrame = (
5945 				*StutterEfficiencyNotIncludingVBlank > 0 ?
5946 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5947 		*Z8NumberOfStutterBurstsPerFrame = (
5948 				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5949 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5950 	} else {
5951 		*StutterEfficiencyNotIncludingVBlank = 0.;
5952 		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
5953 		*NumberOfStutterBurstsPerFrame = 0;
5954 		*Z8NumberOfStutterBurstsPerFrame = 0;
5955 	}
5956 #ifdef __DML_VBA_DEBUG__
5957 	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5958 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5959 			__func__, *StutterEfficiencyNotIncludingVBlank);
5960 	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5961 			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
5962 	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5963 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5964 #endif
5965 
5966 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5967 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5968 			if (BlendingAndTiming[k] == k) {
5969 				if (TotalNumberOfActiveOTG == 0) {
5970 					doublePixelClock = PixelClock[k];
5971 					doubleHTotal = HTotal[k];
5972 					doubleVTotal = VTotal[k];
5973 				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5974 						|| doubleVTotal != VTotal[k]) {
5975 					SameTiming = false;
5976 				}
5977 				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5978 			}
5979 		}
5980 	}
5981 
5982 	if (*StutterEfficiencyNotIncludingVBlank > 0) {
5983 		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5984 
5985 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5986 				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5987 			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5988 						+ StutterBurstTime * VActiveTimeCriticalSurface
5989 						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5990 		} else {
5991 			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5992 		}
5993 	} else {
5994 		*StutterEfficiency = 0;
5995 	}
5996 
5997 	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5998 		LastZ8StutterPeriod = VActiveTimeCriticalSurface
5999 				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6000 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6001 				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6002 			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6003 				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6004 		} else {
6005 			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6006 		}
6007 	} else {
6008 		*Z8StutterEfficiency = 0.;
6009 	}
6010 
6011 #ifdef __DML_VBA_DEBUG__
6012 	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6013 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6014 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6015 	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6016 	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6017 	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6018 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6019 			__func__, *StutterEfficiencyNotIncludingVBlank);
6020 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6021 #endif
6022 
6023 	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6024 			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6025 	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6026 	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6027 			- DETBufferSizeYCriticalSurface;
6028 
6029 	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6030 			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6031 			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6032 			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6033 
6034 #ifdef __DML_VBA_DEBUG__
6035 	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6036 	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6037 	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6038 	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6039 #endif
6040 } // CalculateStutterEfficiency
6041 
dml32_CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,bool nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)6042 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6043 		unsigned int    ConfigReturnBufferSizeInKByte,
6044 		unsigned int    ROBBufferSizeInKByte,
6045 		unsigned int MaxNumDPP,
6046 		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6047 		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6048 
6049 		/* Output */
6050 		unsigned int *MaxTotalDETInKByte,
6051 		unsigned int *nomDETInKByte,
6052 		unsigned int *MinCompressedBufferSizeInKByte)
6053 {
6054 	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6055 	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6056 
6057 	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6058 			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6059 	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6060 	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6061 
6062 #ifdef __DML_VBA_DEBUG__
6063 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6064 	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6065 	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6066 	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6067 	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6068 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6069 #endif
6070 
6071 	if (det_buff_size_override_en) {
6072 		*nomDETInKByte = det_buff_size_override_val;
6073 #ifdef __DML_VBA_DEBUG__
6074 		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6075 #endif
6076 	}
6077 } // CalculateMaxDETAndMinCompressedBufferSize
6078 
dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[])6079 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6080 		double ReturnBW,
6081 		bool NotUrgentLatencyHiding[],
6082 		double ReadBandwidthLuma[],
6083 		double ReadBandwidthChroma[],
6084 		double cursor_bw[],
6085 		double meta_row_bandwidth[],
6086 		double dpte_row_bandwidth[],
6087 		unsigned int NumberOfDPP[],
6088 		double UrgentBurstFactorLuma[],
6089 		double UrgentBurstFactorChroma[],
6090 		double UrgentBurstFactorCursor[])
6091 {
6092 	unsigned int k;
6093 	bool NotEnoughUrgentLatencyHiding = false;
6094 	bool CalculateVActiveBandwithSupport_val = false;
6095 	double VActiveBandwith = 0;
6096 
6097 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6098 		if (NotUrgentLatencyHiding[k]) {
6099 			NotEnoughUrgentLatencyHiding = true;
6100 		}
6101 	}
6102 
6103 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6104 		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6105 	}
6106 
6107 	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6108 
6109 #ifdef __DML_VBA_DEBUG__
6110 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6111 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6112 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6113 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6114 #endif
6115 	return CalculateVActiveBandwithSupport_val;
6116 }
6117 
dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double * PrefetchBandwidth,double * FractionOfUrgentBandwidth,bool * PrefetchBandwidthSupport)6118 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6119 		double ReturnBW,
6120 		bool NotUrgentLatencyHiding[],
6121 		double ReadBandwidthLuma[],
6122 		double ReadBandwidthChroma[],
6123 		double PrefetchBandwidthLuma[],
6124 		double PrefetchBandwidthChroma[],
6125 		double cursor_bw[],
6126 		double meta_row_bandwidth[],
6127 		double dpte_row_bandwidth[],
6128 		double cursor_bw_pre[],
6129 		double prefetch_vmrow_bw[],
6130 		unsigned int NumberOfDPP[],
6131 		double UrgentBurstFactorLuma[],
6132 		double UrgentBurstFactorChroma[],
6133 		double UrgentBurstFactorCursor[],
6134 		double UrgentBurstFactorLumaPre[],
6135 		double UrgentBurstFactorChromaPre[],
6136 		double UrgentBurstFactorCursorPre[],
6137 
6138 		/* output */
6139 		double  *PrefetchBandwidth,
6140 		double  *FractionOfUrgentBandwidth,
6141 		bool *PrefetchBandwidthSupport)
6142 {
6143 	unsigned int k;
6144 	bool NotEnoughUrgentLatencyHiding = false;
6145 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6146 		if (NotUrgentLatencyHiding[k]) {
6147 			NotEnoughUrgentLatencyHiding = true;
6148 		}
6149 	}
6150 
6151 	*PrefetchBandwidth = 0;
6152 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6153 		*PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6154 				ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6155 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6156 	}
6157 
6158 	*PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6159 	*FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6160 }
6161 
dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,double ReturnBW,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double cursor_bw_pre[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])6162 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6163 		double ReturnBW,
6164 		double ReadBandwidthLuma[],
6165 		double ReadBandwidthChroma[],
6166 		double PrefetchBandwidthLuma[],
6167 		double PrefetchBandwidthChroma[],
6168 		double cursor_bw[],
6169 		double cursor_bw_pre[],
6170 		unsigned int NumberOfDPP[],
6171 		double UrgentBurstFactorLuma[],
6172 		double UrgentBurstFactorChroma[],
6173 		double UrgentBurstFactorCursor[],
6174 		double UrgentBurstFactorLumaPre[],
6175 		double UrgentBurstFactorChromaPre[],
6176 		double UrgentBurstFactorCursorPre[])
6177 {
6178 	unsigned int k;
6179 	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6180 
6181 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6182 		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6183 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6184 	}
6185 
6186 	return CalculateBandwidthAvailableForImmediateFlip_val;
6187 }
6188 
dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,enum immediate_flip_requirement ImmediateFlipRequirement[],double final_flip_bw[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double * TotalBandwidth,double * FractionOfUrgentBandwidth,bool * ImmediateFlipBandwidthSupport)6189 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6190 		double ReturnBW,
6191 		enum immediate_flip_requirement ImmediateFlipRequirement[],
6192 		double final_flip_bw[],
6193 		double ReadBandwidthLuma[],
6194 		double ReadBandwidthChroma[],
6195 		double PrefetchBandwidthLuma[],
6196 		double PrefetchBandwidthChroma[],
6197 		double cursor_bw[],
6198 		double meta_row_bandwidth[],
6199 		double dpte_row_bandwidth[],
6200 		double cursor_bw_pre[],
6201 		double prefetch_vmrow_bw[],
6202 		unsigned int NumberOfDPP[],
6203 		double UrgentBurstFactorLuma[],
6204 		double UrgentBurstFactorChroma[],
6205 		double UrgentBurstFactorCursor[],
6206 		double UrgentBurstFactorLumaPre[],
6207 		double UrgentBurstFactorChromaPre[],
6208 		double UrgentBurstFactorCursorPre[],
6209 
6210 		/* output */
6211 		double  *TotalBandwidth,
6212 		double  *FractionOfUrgentBandwidth,
6213 		bool *ImmediateFlipBandwidthSupport)
6214 {
6215 	unsigned int k;
6216 	*TotalBandwidth = 0;
6217 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6218 		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6219 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6220 					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6221 					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6222 		} else {
6223 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6224 					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6225 					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6226 		}
6227 	}
6228 	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6229 	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6230 }
6231 
dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,double ReturnBW,double UrgentLatency,unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],double BytePerPixelInDETY[],double BytePerPixelInDETC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int NumOfDPP[],unsigned int HTotal[],double PixelClock[],double VRatioY[],double VRatioC[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX])6232 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6233 		double ReturnBW,
6234 		double UrgentLatency,
6235 		unsigned int SwathHeightY[],
6236 		unsigned int SwathHeightC[],
6237 		unsigned int SwathWidthY[],
6238 		unsigned int SwathWidthC[],
6239 		double  BytePerPixelInDETY[],
6240 		double  BytePerPixelInDETC[],
6241 		unsigned int    DETBufferSizeY[],
6242 		unsigned int    DETBufferSizeC[],
6243 		unsigned int	NumOfDPP[],
6244 		unsigned int	HTotal[],
6245 		double	PixelClock[],
6246 		double	VRatioY[],
6247 		double	VRatioC[],
6248 		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX])
6249 {
6250 	int k;
6251 	double SwathSizeAllSurfaces = 0;
6252 	double SwathSizeAllSurfacesInFetchTimeUs;
6253 	double DETSwathLatencyHidingUs;
6254 	double DETSwathLatencyHidingYUs;
6255 	double DETSwathLatencyHidingCUs;
6256 	double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6257 	double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6258 	bool NotEnoughDETSwathFillLatencyHiding = false;
6259 
6260 	/* calculate sum of single swath size for all pipes in bytes */
6261 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6262 		SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6263 
6264 		if (SwathHeightC[k] != 0)
6265 			SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6266 		else
6267 			SwathSizePerSurfaceC[k] = 0;
6268 
6269 		SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6270 	}
6271 
6272 	SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6273 
6274 	/* ensure all DET - 1 swath can hide a fetch for all surfaces */
6275 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6276 		double LineTime = HTotal[k] / PixelClock[k];
6277 
6278 		/* only care if surface is not phantom */
6279 		if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6280 			DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6281 
6282 			if (SwathHeightC[k] != 0) {
6283 				DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6284 
6285 				DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6286 			} else {
6287 				DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6288 			}
6289 
6290 			/* DET must be able to hide time to fetch 1 swath for each surface */
6291 			if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6292 				NotEnoughDETSwathFillLatencyHiding = true;
6293 				break;
6294 			}
6295 		}
6296 	}
6297 
6298 	return NotEnoughDETSwathFillLatencyHiding;
6299 }
6300