1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26
27 #include "../display_mode_lib.h"
28 #include "../dml_inline_defs.h"
29 #include "../display_mode_vba.h"
30 #include "display_mode_vba_21.h"
31
32
33 /*
34 * NOTE:
35 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 *
37 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
38 * ways. Unless there is something clearly wrong with it the code should
39 * remain as-is as it provides us with a guarantee from HW that it is correct.
40 */
41 typedef struct {
42 double DPPCLK;
43 double DISPCLK;
44 double PixelClock;
45 double DCFCLKDeepSleep;
46 unsigned int DPPPerPlane;
47 bool ScalerEnabled;
48 enum scan_direction_class SourceScan;
49 unsigned int BlockWidth256BytesY;
50 unsigned int BlockHeight256BytesY;
51 unsigned int BlockWidth256BytesC;
52 unsigned int BlockHeight256BytesC;
53 unsigned int InterlaceEnable;
54 unsigned int NumberOfCursors;
55 unsigned int VBlank;
56 unsigned int HTotal;
57 } Pipe;
58
59 typedef struct {
60 bool Enable;
61 unsigned int MaxPageTableLevels;
62 unsigned int CachedPageTableLevels;
63 } HostVM;
64
65 #define BPP_INVALID 0
66 #define BPP_BLENDED_PIPE 0xffffffff
67 #define DCN21_MAX_DSC_IMAGE_WIDTH 5184
68 #define DCN21_MAX_420_IMAGE_WIDTH 4096
69
70 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
71 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
72 struct display_mode_lib *mode_lib);
73 static unsigned int dscceComputeDelay(
74 unsigned int bpc,
75 double bpp,
76 unsigned int sliceWidth,
77 unsigned int numSlices,
78 enum output_format_class pixelFormat);
79 static unsigned int dscComputeDelay(enum output_format_class pixelFormat);
80 // Super monster function with some 45 argument
81 static bool CalculatePrefetchSchedule(
82 struct display_mode_lib *mode_lib,
83 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
84 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
85 Pipe *myPipe,
86 unsigned int DSCDelay,
87 double DPPCLKDelaySubtotal,
88 double DPPCLKDelaySCL,
89 double DPPCLKDelaySCLLBOnly,
90 double DPPCLKDelayCNVCFormater,
91 double DPPCLKDelayCNVCCursor,
92 double DISPCLKDelaySubtotal,
93 unsigned int ScalerRecoutWidth,
94 enum output_format_class OutputFormat,
95 unsigned int MaxInterDCNTileRepeaters,
96 unsigned int VStartup,
97 unsigned int MaxVStartup,
98 unsigned int GPUVMPageTableLevels,
99 bool GPUVMEnable,
100 HostVM *myHostVM,
101 bool DynamicMetadataEnable,
102 int DynamicMetadataLinesBeforeActiveRequired,
103 unsigned int DynamicMetadataTransmittedBytes,
104 bool DCCEnable,
105 double UrgentLatency,
106 double UrgentExtraLatency,
107 double TCalc,
108 unsigned int PDEAndMetaPTEBytesFrame,
109 unsigned int MetaRowByte,
110 unsigned int PixelPTEBytesPerRow,
111 double PrefetchSourceLinesY,
112 unsigned int SwathWidthY,
113 double BytePerPixelDETY,
114 double VInitPreFillY,
115 unsigned int MaxNumSwathY,
116 double PrefetchSourceLinesC,
117 double BytePerPixelDETC,
118 double VInitPreFillC,
119 unsigned int MaxNumSwathC,
120 unsigned int SwathHeightY,
121 unsigned int SwathHeightC,
122 double TWait,
123 bool XFCEnabled,
124 double XFCRemoteSurfaceFlipDelay,
125 bool ProgressiveToInterlaceUnitInOPP,
126 double *DSTXAfterScaler,
127 double *DSTYAfterScaler,
128 double *DestinationLinesForPrefetch,
129 double *PrefetchBandwidth,
130 double *DestinationLinesToRequestVMInVBlank,
131 double *DestinationLinesToRequestRowInVBlank,
132 double *VRatioPrefetchY,
133 double *VRatioPrefetchC,
134 double *RequiredPrefetchPixDataBWLuma,
135 double *RequiredPrefetchPixDataBWChroma,
136 unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
137 double *Tno_bw,
138 double *prefetch_vmrow_bw,
139 unsigned int *swath_width_luma_ub,
140 unsigned int *swath_width_chroma_ub,
141 unsigned int *VUpdateOffsetPix,
142 double *VUpdateWidthPix,
143 double *VReadyOffsetPix);
144 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
145 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
146 static double CalculateDCCConfiguration(
147 bool DCCEnabled,
148 bool DCCProgrammingAssumesScanDirectionUnknown,
149 unsigned int ViewportWidth,
150 unsigned int ViewportHeight,
151 unsigned int DETBufferSize,
152 unsigned int RequestHeight256Byte,
153 unsigned int SwathHeight,
154 enum dm_swizzle_mode TilingFormat,
155 unsigned int BytePerPixel,
156 enum scan_direction_class ScanOrientation,
157 unsigned int *MaxUncompressedBlock,
158 unsigned int *MaxCompressedBlock,
159 unsigned int *Independent64ByteBlock);
160 static double CalculatePrefetchSourceLines(
161 struct display_mode_lib *mode_lib,
162 double VRatio,
163 double vtaps,
164 bool Interlace,
165 bool ProgressiveToInterlaceUnitInOPP,
166 unsigned int SwathHeight,
167 unsigned int ViewportYStart,
168 double *VInitPreFill,
169 unsigned int *MaxNumSwath);
170 static unsigned int CalculateVMAndRowBytes(
171 struct display_mode_lib *mode_lib,
172 bool DCCEnable,
173 unsigned int BlockHeight256Bytes,
174 unsigned int BlockWidth256Bytes,
175 enum source_format_class SourcePixelFormat,
176 unsigned int SurfaceTiling,
177 unsigned int BytePerPixel,
178 enum scan_direction_class ScanDirection,
179 unsigned int ViewportWidth,
180 unsigned int ViewportHeight,
181 unsigned int SwathWidthY,
182 bool GPUVMEnable,
183 bool HostVMEnable,
184 unsigned int HostVMMaxPageTableLevels,
185 unsigned int HostVMCachedPageTableLevels,
186 unsigned int VMMPageSize,
187 unsigned int PTEBufferSizeInRequests,
188 unsigned int Pitch,
189 unsigned int DCCMetaPitch,
190 unsigned int *MacroTileWidth,
191 unsigned int *MetaRowByte,
192 unsigned int *PixelPTEBytesPerRow,
193 bool *PTEBufferSizeNotExceeded,
194 unsigned int *dpte_row_width_ub,
195 unsigned int *dpte_row_height,
196 unsigned int *MetaRequestWidth,
197 unsigned int *MetaRequestHeight,
198 unsigned int *meta_row_width,
199 unsigned int *meta_row_height,
200 unsigned int *vm_group_bytes,
201 unsigned int *dpte_group_bytes,
202 unsigned int *PixelPTEReqWidth,
203 unsigned int *PixelPTEReqHeight,
204 unsigned int *PTERequestSize,
205 unsigned int *DPDE0BytesFrame,
206 unsigned int *MetaPTEBytesFrame);
207
208 static double CalculateTWait(
209 unsigned int PrefetchMode,
210 double DRAMClockChangeLatency,
211 double UrgentLatency,
212 double SREnterPlusExitTime);
213 static double CalculateRemoteSurfaceFlipDelay(
214 struct display_mode_lib *mode_lib,
215 double VRatio,
216 double SwathWidth,
217 double Bpp,
218 double LineTime,
219 double XFCTSlvVupdateOffset,
220 double XFCTSlvVupdateWidth,
221 double XFCTSlvVreadyOffset,
222 double XFCXBUFLatencyTolerance,
223 double XFCFillBWOverhead,
224 double XFCSlvChunkSize,
225 double XFCBusTransportTime,
226 double TCalc,
227 double TWait,
228 double *SrcActiveDrainRate,
229 double *TInitXFill,
230 double *TslvChk);
231 static void CalculateActiveRowBandwidth(
232 bool GPUVMEnable,
233 enum source_format_class SourcePixelFormat,
234 double VRatio,
235 bool DCCEnable,
236 double LineTime,
237 unsigned int MetaRowByteLuma,
238 unsigned int MetaRowByteChroma,
239 unsigned int meta_row_height_luma,
240 unsigned int meta_row_height_chroma,
241 unsigned int PixelPTEBytesPerRowLuma,
242 unsigned int PixelPTEBytesPerRowChroma,
243 unsigned int dpte_row_height_luma,
244 unsigned int dpte_row_height_chroma,
245 double *meta_row_bw,
246 double *dpte_row_bw);
247 static void CalculateFlipSchedule(
248 struct display_mode_lib *mode_lib,
249 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
250 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
251 double UrgentExtraLatency,
252 double UrgentLatency,
253 unsigned int GPUVMMaxPageTableLevels,
254 bool HostVMEnable,
255 unsigned int HostVMMaxPageTableLevels,
256 unsigned int HostVMCachedPageTableLevels,
257 bool GPUVMEnable,
258 double PDEAndMetaPTEBytesPerFrame,
259 double MetaRowBytes,
260 double DPTEBytesPerRow,
261 double BandwidthAvailableForImmediateFlip,
262 unsigned int TotImmediateFlipBytes,
263 enum source_format_class SourcePixelFormat,
264 double LineTime,
265 double VRatio,
266 double Tno_bw,
267 bool DCCEnable,
268 unsigned int dpte_row_height,
269 unsigned int meta_row_height,
270 unsigned int dpte_row_height_chroma,
271 unsigned int meta_row_height_chroma,
272 double *DestinationLinesToRequestVMInImmediateFlip,
273 double *DestinationLinesToRequestRowInImmediateFlip,
274 double *final_flip_bw,
275 bool *ImmediateFlipSupportedForPipe);
276 static double CalculateWriteBackDelay(
277 enum source_format_class WritebackPixelFormat,
278 double WritebackHRatio,
279 double WritebackVRatio,
280 unsigned int WritebackLumaHTaps,
281 unsigned int WritebackLumaVTaps,
282 unsigned int WritebackChromaHTaps,
283 unsigned int WritebackChromaVTaps,
284 unsigned int WritebackDestinationWidth);
285 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
286 struct display_mode_lib *mode_lib,
287 unsigned int PrefetchMode,
288 unsigned int NumberOfActivePlanes,
289 unsigned int MaxLineBufferLines,
290 unsigned int LineBufferSize,
291 unsigned int DPPOutputBufferPixels,
292 unsigned int DETBufferSizeInKByte,
293 unsigned int WritebackInterfaceLumaBufferSize,
294 unsigned int WritebackInterfaceChromaBufferSize,
295 double DCFCLK,
296 double UrgentOutOfOrderReturn,
297 double ReturnBW,
298 bool GPUVMEnable,
299 int dpte_group_bytes[],
300 unsigned int MetaChunkSize,
301 double UrgentLatency,
302 double ExtraLatency,
303 double WritebackLatency,
304 double WritebackChunkSize,
305 double SOCCLK,
306 double DRAMClockChangeLatency,
307 double SRExitTime,
308 double SREnterPlusExitTime,
309 double DCFCLKDeepSleep,
310 int DPPPerPlane[],
311 bool DCCEnable[],
312 double DPPCLK[],
313 double SwathWidthSingleDPPY[],
314 unsigned int SwathHeightY[],
315 double ReadBandwidthPlaneLuma[],
316 unsigned int SwathHeightC[],
317 double ReadBandwidthPlaneChroma[],
318 unsigned int LBBitPerPixel[],
319 double SwathWidthY[],
320 double HRatio[],
321 unsigned int vtaps[],
322 unsigned int VTAPsChroma[],
323 double VRatio[],
324 unsigned int HTotal[],
325 double PixelClock[],
326 unsigned int BlendingAndTiming[],
327 double BytePerPixelDETY[],
328 double BytePerPixelDETC[],
329 bool WritebackEnable[],
330 enum source_format_class WritebackPixelFormat[],
331 double WritebackDestinationWidth[],
332 double WritebackDestinationHeight[],
333 double WritebackSourceHeight[],
334 enum clock_change_support *DRAMClockChangeSupport,
335 double *UrgentWatermark,
336 double *WritebackUrgentWatermark,
337 double *DRAMClockChangeWatermark,
338 double *WritebackDRAMClockChangeWatermark,
339 double *StutterExitWatermark,
340 double *StutterEnterPlusExitWatermark,
341 double *MinActiveDRAMClockChangeLatencySupported);
342 static void CalculateDCFCLKDeepSleep(
343 struct display_mode_lib *mode_lib,
344 unsigned int NumberOfActivePlanes,
345 double BytePerPixelDETY[],
346 double BytePerPixelDETC[],
347 double VRatio[],
348 double SwathWidthY[],
349 int DPPPerPlane[],
350 double HRatio[],
351 double PixelClock[],
352 double PSCL_THROUGHPUT[],
353 double PSCL_THROUGHPUT_CHROMA[],
354 double DPPCLK[],
355 double *DCFCLKDeepSleep);
356 static void CalculateDETBufferSize(
357 unsigned int DETBufferSizeInKByte,
358 unsigned int SwathHeightY,
359 unsigned int SwathHeightC,
360 unsigned int *DETBufferSizeY,
361 unsigned int *DETBufferSizeC);
362 static void CalculateUrgentBurstFactor(
363 unsigned int DETBufferSizeInKByte,
364 unsigned int SwathHeightY,
365 unsigned int SwathHeightC,
366 unsigned int SwathWidthY,
367 double LineTime,
368 double UrgentLatency,
369 double CursorBufferSize,
370 unsigned int CursorWidth,
371 unsigned int CursorBPP,
372 double VRatio,
373 double VRatioPreY,
374 double VRatioPreC,
375 double BytePerPixelInDETY,
376 double BytePerPixelInDETC,
377 double *UrgentBurstFactorCursor,
378 double *UrgentBurstFactorCursorPre,
379 double *UrgentBurstFactorLuma,
380 double *UrgentBurstFactorLumaPre,
381 double *UrgentBurstFactorChroma,
382 double *UrgentBurstFactorChromaPre,
383 unsigned int *NotEnoughUrgentLatencyHiding,
384 unsigned int *NotEnoughUrgentLatencyHidingPre);
385
386 static void CalculatePixelDeliveryTimes(
387 unsigned int NumberOfActivePlanes,
388 double VRatio[],
389 double VRatioPrefetchY[],
390 double VRatioPrefetchC[],
391 unsigned int swath_width_luma_ub[],
392 unsigned int swath_width_chroma_ub[],
393 int DPPPerPlane[],
394 double HRatio[],
395 double PixelClock[],
396 double PSCL_THROUGHPUT[],
397 double PSCL_THROUGHPUT_CHROMA[],
398 double DPPCLK[],
399 double BytePerPixelDETC[],
400 enum scan_direction_class SourceScan[],
401 unsigned int BlockWidth256BytesY[],
402 unsigned int BlockHeight256BytesY[],
403 unsigned int BlockWidth256BytesC[],
404 unsigned int BlockHeight256BytesC[],
405 double DisplayPipeLineDeliveryTimeLuma[],
406 double DisplayPipeLineDeliveryTimeChroma[],
407 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
408 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
409 double DisplayPipeRequestDeliveryTimeLuma[],
410 double DisplayPipeRequestDeliveryTimeChroma[],
411 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
412 double DisplayPipeRequestDeliveryTimeChromaPrefetch[]);
413
414 static void CalculateMetaAndPTETimes(
415 unsigned int NumberOfActivePlanes,
416 bool GPUVMEnable,
417 unsigned int MetaChunkSize,
418 unsigned int MinMetaChunkSizeBytes,
419 unsigned int GPUVMMaxPageTableLevels,
420 unsigned int HTotal[],
421 double VRatio[],
422 double VRatioPrefetchY[],
423 double VRatioPrefetchC[],
424 double DestinationLinesToRequestRowInVBlank[],
425 double DestinationLinesToRequestRowInImmediateFlip[],
426 double DestinationLinesToRequestVMInVBlank[],
427 double DestinationLinesToRequestVMInImmediateFlip[],
428 bool DCCEnable[],
429 double PixelClock[],
430 double BytePerPixelDETY[],
431 double BytePerPixelDETC[],
432 enum scan_direction_class SourceScan[],
433 unsigned int dpte_row_height[],
434 unsigned int dpte_row_height_chroma[],
435 unsigned int meta_row_width[],
436 unsigned int meta_row_height[],
437 unsigned int meta_req_width[],
438 unsigned int meta_req_height[],
439 int dpte_group_bytes[],
440 unsigned int PTERequestSizeY[],
441 unsigned int PTERequestSizeC[],
442 unsigned int PixelPTEReqWidthY[],
443 unsigned int PixelPTEReqHeightY[],
444 unsigned int PixelPTEReqWidthC[],
445 unsigned int PixelPTEReqHeightC[],
446 unsigned int dpte_row_width_luma_ub[],
447 unsigned int dpte_row_width_chroma_ub[],
448 unsigned int vm_group_bytes[],
449 unsigned int dpde0_bytes_per_frame_ub_l[],
450 unsigned int dpde0_bytes_per_frame_ub_c[],
451 unsigned int meta_pte_bytes_per_frame_ub_l[],
452 unsigned int meta_pte_bytes_per_frame_ub_c[],
453 double DST_Y_PER_PTE_ROW_NOM_L[],
454 double DST_Y_PER_PTE_ROW_NOM_C[],
455 double DST_Y_PER_META_ROW_NOM_L[],
456 double TimePerMetaChunkNominal[],
457 double TimePerMetaChunkVBlank[],
458 double TimePerMetaChunkFlip[],
459 double time_per_pte_group_nom_luma[],
460 double time_per_pte_group_vblank_luma[],
461 double time_per_pte_group_flip_luma[],
462 double time_per_pte_group_nom_chroma[],
463 double time_per_pte_group_vblank_chroma[],
464 double time_per_pte_group_flip_chroma[],
465 double TimePerVMGroupVBlank[],
466 double TimePerVMGroupFlip[],
467 double TimePerVMRequestVBlank[],
468 double TimePerVMRequestFlip[]);
469
470 static double CalculateExtraLatency(
471 double UrgentRoundTripAndOutOfOrderLatency,
472 int TotalNumberOfActiveDPP,
473 int PixelChunkSizeInKByte,
474 int TotalNumberOfDCCActiveDPP,
475 int MetaChunkSize,
476 double ReturnBW,
477 bool GPUVMEnable,
478 bool HostVMEnable,
479 int NumberOfActivePlanes,
480 int NumberOfDPP[],
481 int dpte_group_bytes[],
482 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
483 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
484 int HostVMMaxPageTableLevels,
485 int HostVMCachedPageTableLevels);
486
dml21_recalculate(struct display_mode_lib * mode_lib)487 void dml21_recalculate(struct display_mode_lib *mode_lib)
488 {
489 ModeSupportAndSystemConfiguration(mode_lib);
490 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
491 DisplayPipeConfiguration(mode_lib);
492 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
493 }
494
dscceComputeDelay(unsigned int bpc,double bpp,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat)495 static unsigned int dscceComputeDelay(
496 unsigned int bpc,
497 double bpp,
498 unsigned int sliceWidth,
499 unsigned int numSlices,
500 enum output_format_class pixelFormat)
501 {
502 // valid bpc = source bits per component in the set of {8, 10, 12}
503 // valid bpp = increments of 1/16 of a bit
504 // min = 6/7/8 in N420/N422/444, respectively
505 // max = such that compression is 1:1
506 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
507 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
508 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
509
510 // fixed value
511 unsigned int rcModelSize = 8192;
512
513 // N422/N420 operate at 2 pixels per clock
514 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, S, ix, wx, p, l0, a, ax, l,
515 Delay, pixels;
516
517 if (pixelFormat == dm_n422 || pixelFormat == dm_420)
518 pixelsPerClock = 2;
519 // #all other modes operate at 1 pixel per clock
520 else
521 pixelsPerClock = 1;
522
523 //initial transmit delay as per PPS
524 initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock);
525
526 //compute ssm delay
527 if (bpc == 8)
528 D = 81;
529 else if (bpc == 10)
530 D = 89;
531 else
532 D = 113;
533
534 //divide by pixel per cycle to compute slice width as seen by DSC
535 w = sliceWidth / pixelsPerClock;
536
537 //422 mode has an additional cycle of delay
538 if (pixelFormat == dm_s422)
539 S = 1;
540 else
541 S = 0;
542
543 //main calculation for the dscce
544 ix = initalXmitDelay + 45;
545 wx = (w + 2) / 3;
546 p = 3 * wx - w;
547 l0 = ix / w;
548 a = ix + p * l0;
549 ax = (a + 2) / 3 + D + 6 + 1;
550 l = (ax + wx - 1) / wx;
551 if ((ix % w) == 0 && p != 0)
552 lstall = 1;
553 else
554 lstall = 0;
555 Delay = l * wx * (numSlices - 1) + ax + S + lstall + 22;
556
557 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
558 pixels = Delay * 3 * pixelsPerClock;
559 return pixels;
560 }
561
dscComputeDelay(enum output_format_class pixelFormat)562 static unsigned int dscComputeDelay(enum output_format_class pixelFormat)
563 {
564 unsigned int Delay = 0;
565
566 if (pixelFormat == dm_420) {
567 // sfr
568 Delay = Delay + 2;
569 // dsccif
570 Delay = Delay + 0;
571 // dscc - input deserializer
572 Delay = Delay + 3;
573 // dscc gets pixels every other cycle
574 Delay = Delay + 2;
575 // dscc - input cdc fifo
576 Delay = Delay + 12;
577 // dscc gets pixels every other cycle
578 Delay = Delay + 13;
579 // dscc - cdc uncertainty
580 Delay = Delay + 2;
581 // dscc - output cdc fifo
582 Delay = Delay + 7;
583 // dscc gets pixels every other cycle
584 Delay = Delay + 3;
585 // dscc - cdc uncertainty
586 Delay = Delay + 2;
587 // dscc - output serializer
588 Delay = Delay + 1;
589 // sft
590 Delay = Delay + 1;
591 } else if (pixelFormat == dm_n422) {
592 // sfr
593 Delay = Delay + 2;
594 // dsccif
595 Delay = Delay + 1;
596 // dscc - input deserializer
597 Delay = Delay + 5;
598 // dscc - input cdc fifo
599 Delay = Delay + 25;
600 // dscc - cdc uncertainty
601 Delay = Delay + 2;
602 // dscc - output cdc fifo
603 Delay = Delay + 10;
604 // dscc - cdc uncertainty
605 Delay = Delay + 2;
606 // dscc - output serializer
607 Delay = Delay + 1;
608 // sft
609 Delay = Delay + 1;
610 } else {
611 // sfr
612 Delay = Delay + 2;
613 // dsccif
614 Delay = Delay + 0;
615 // dscc - input deserializer
616 Delay = Delay + 3;
617 // dscc - input cdc fifo
618 Delay = Delay + 12;
619 // dscc - cdc uncertainty
620 Delay = Delay + 2;
621 // dscc - output cdc fifo
622 Delay = Delay + 7;
623 // dscc - output serializer
624 Delay = Delay + 1;
625 // dscc - cdc uncertainty
626 Delay = Delay + 2;
627 // sft
628 Delay = Delay + 1;
629 }
630
631 return Delay;
632 }
633
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotal,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCFormater,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int ScalerRecoutWidth,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,HostVM * myHostVM,bool DynamicMetadataEnable,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,bool DCCEnable,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double BytePerPixelDETY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,double BytePerPixelDETC,double VInitPreFillC,unsigned int MaxNumSwathC,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool XFCEnabled,double XFCRemoteSurfaceFlipDelay,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,unsigned int * VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,unsigned int * swath_width_luma_ub,unsigned int * swath_width_chroma_ub,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)634 static bool CalculatePrefetchSchedule(
635 struct display_mode_lib *mode_lib,
636 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
637 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
638 Pipe *myPipe,
639 unsigned int DSCDelay,
640 double DPPCLKDelaySubtotal,
641 double DPPCLKDelaySCL,
642 double DPPCLKDelaySCLLBOnly,
643 double DPPCLKDelayCNVCFormater,
644 double DPPCLKDelayCNVCCursor,
645 double DISPCLKDelaySubtotal,
646 unsigned int ScalerRecoutWidth,
647 enum output_format_class OutputFormat,
648 unsigned int MaxInterDCNTileRepeaters,
649 unsigned int VStartup,
650 unsigned int MaxVStartup,
651 unsigned int GPUVMPageTableLevels,
652 bool GPUVMEnable,
653 HostVM *myHostVM,
654 bool DynamicMetadataEnable,
655 int DynamicMetadataLinesBeforeActiveRequired,
656 unsigned int DynamicMetadataTransmittedBytes,
657 bool DCCEnable,
658 double UrgentLatency,
659 double UrgentExtraLatency,
660 double TCalc,
661 unsigned int PDEAndMetaPTEBytesFrame,
662 unsigned int MetaRowByte,
663 unsigned int PixelPTEBytesPerRow,
664 double PrefetchSourceLinesY,
665 unsigned int SwathWidthY,
666 double BytePerPixelDETY,
667 double VInitPreFillY,
668 unsigned int MaxNumSwathY,
669 double PrefetchSourceLinesC,
670 double BytePerPixelDETC,
671 double VInitPreFillC,
672 unsigned int MaxNumSwathC,
673 unsigned int SwathHeightY,
674 unsigned int SwathHeightC,
675 double TWait,
676 bool XFCEnabled,
677 double XFCRemoteSurfaceFlipDelay,
678 bool ProgressiveToInterlaceUnitInOPP,
679 double *DSTXAfterScaler,
680 double *DSTYAfterScaler,
681 double *DestinationLinesForPrefetch,
682 double *PrefetchBandwidth,
683 double *DestinationLinesToRequestVMInVBlank,
684 double *DestinationLinesToRequestRowInVBlank,
685 double *VRatioPrefetchY,
686 double *VRatioPrefetchC,
687 double *RequiredPrefetchPixDataBWLuma,
688 double *RequiredPrefetchPixDataBWChroma,
689 unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
690 double *Tno_bw,
691 double *prefetch_vmrow_bw,
692 unsigned int *swath_width_luma_ub,
693 unsigned int *swath_width_chroma_ub,
694 unsigned int *VUpdateOffsetPix,
695 double *VUpdateWidthPix,
696 double *VReadyOffsetPix)
697 {
698 bool MyError = false;
699 unsigned int DPPCycles, DISPCLKCycles;
700 double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime;
701 double Tdm, LineTime, Tsetup;
702 double dst_y_prefetch_equ;
703 double Tsw_oto;
704 double prefetch_bw_oto;
705 double Tvm_oto;
706 double Tr0_oto;
707 double Tvm_oto_lines;
708 double Tr0_oto_lines;
709 double Tsw_oto_lines;
710 double dst_y_prefetch_oto;
711 double TimeForFetchingMetaPTE = 0;
712 double TimeForFetchingRowInVBlank = 0;
713 double LinesToRequestPrefetchPixelData = 0;
714 double HostVMInefficiencyFactor;
715 unsigned int HostVMDynamicLevels;
716
717 if (GPUVMEnable == true && myHostVM->Enable == true) {
718 HostVMInefficiencyFactor =
719 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
720 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
721 HostVMDynamicLevels = myHostVM->MaxPageTableLevels
722 - myHostVM->CachedPageTableLevels;
723 } else {
724 HostVMInefficiencyFactor = 1;
725 HostVMDynamicLevels = 0;
726 }
727
728 if (myPipe->ScalerEnabled)
729 DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL;
730 else
731 DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly;
732
733 DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
734
735 DISPCLKCycles = DISPCLKDelaySubtotal;
736
737 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
738 return true;
739
740 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK
741 + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
742
743 if (myPipe->DPPPerPlane > 1)
744 *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth;
745
746 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
747 *DSTYAfterScaler = 1;
748 else
749 *DSTYAfterScaler = 0;
750
751 DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * myPipe->HTotal)) + *DSTXAfterScaler;
752 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
753 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
754
755 *VUpdateOffsetPix = dml_ceil(myPipe->HTotal / 4.0, 1);
756 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / myPipe->DPPCLK + 3.0 / myPipe->DISPCLK);
757 *VUpdateWidthPix = (14.0 / myPipe->DCFCLKDeepSleep + 12.0 / myPipe->DPPCLK + TotalRepeaterDelayTime)
758 * myPipe->PixelClock;
759
760 *VReadyOffsetPix = dml_max(
761 150.0 / myPipe->DPPCLK,
762 TotalRepeaterDelayTime + 20.0 / myPipe->DCFCLKDeepSleep + 10.0 / myPipe->DPPCLK)
763 * myPipe->PixelClock;
764
765 Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / myPipe->PixelClock;
766
767 LineTime = (double) myPipe->HTotal / myPipe->PixelClock;
768
769 if (DynamicMetadataEnable) {
770 double Tdmbf, Tdmec, Tdmsks;
771
772 Tdm = dml_max(0.0, UrgentExtraLatency - TCalc);
773 Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / myPipe->DISPCLK;
774 Tdmec = LineTime;
775 if (DynamicMetadataLinesBeforeActiveRequired == -1)
776 Tdmsks = myPipe->VBlank * LineTime / 2.0;
777 else
778 Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime;
779 if (myPipe->InterlaceEnable && !ProgressiveToInterlaceUnitInOPP)
780 Tdmsks = Tdmsks / 2;
781 if (VStartup * LineTime
782 < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) {
783 MyError = true;
784 *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait
785 + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime;
786 } else
787 *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0;
788 } else
789 Tdm = 0;
790
791 if (GPUVMEnable) {
792 if (GPUVMPageTableLevels >= 3)
793 *Tno_bw = UrgentExtraLatency + UrgentLatency * ((GPUVMPageTableLevels - 2) * (myHostVM->MaxPageTableLevels + 1) - 1);
794 else
795 *Tno_bw = 0;
796 } else if (!DCCEnable)
797 *Tno_bw = LineTime;
798 else
799 *Tno_bw = LineTime / 4;
800
801 dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime
802 - (Tsetup + Tdm) / LineTime
803 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
804
805 Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
806
807 if (myPipe->SourceScan == dm_horz) {
808 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;
809 if (myPipe->BlockWidth256BytesC > 0)
810 *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
811 } else {
812 *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;
813 if (myPipe->BlockWidth256BytesC > 0)
814 *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
815 }
816
817 prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;
818
819
820 if (GPUVMEnable == true) {
821 Tvm_oto = dml_max(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
822 dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1),
823 LineTime / 4.0));
824 } else
825 Tvm_oto = LineTime / 4.0;
826
827 if ((GPUVMEnable == true || DCCEnable == true)) {
828 Tr0_oto = dml_max(
829 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
830 dml_max(UrgentLatency * (HostVMDynamicLevels + 1), dml_max(LineTime - Tvm_oto, LineTime / 4)));
831 } else
832 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
833
834 Tvm_oto_lines = dml_ceil(4 * Tvm_oto / LineTime, 1) / 4.0;
835 Tr0_oto_lines = dml_ceil(4 * Tr0_oto / LineTime, 1) / 4.0;
836 Tsw_oto_lines = dml_ceil(4 * Tsw_oto / LineTime, 1) / 4.0;
837 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Tsw_oto_lines + 0.75;
838
839 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
840
841 if (dst_y_prefetch_oto < dst_y_prefetch_equ)
842 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
843 else
844 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
845
846 // Limit to prevent overflow in DST_Y_PREFETCH register
847 *DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75);
848
849 dml_print("DML: VStartup: %d\n", VStartup);
850 dml_print("DML: TCalc: %f\n", TCalc);
851 dml_print("DML: TWait: %f\n", TWait);
852 dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay);
853 dml_print("DML: LineTime: %f\n", LineTime);
854 dml_print("DML: Tsetup: %f\n", Tsetup);
855 dml_print("DML: Tdm: %f\n", Tdm);
856 dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler);
857 dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler);
858 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
859
860 *PrefetchBandwidth = 0;
861 *DestinationLinesToRequestVMInVBlank = 0;
862 *DestinationLinesToRequestRowInVBlank = 0;
863 *VRatioPrefetchY = 0;
864 *VRatioPrefetchC = 0;
865 *RequiredPrefetchPixDataBWLuma = 0;
866 if (*DestinationLinesForPrefetch > 1) {
867 double PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
868 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
869 + PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1)
870 + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2))
871 / (*DestinationLinesForPrefetch * LineTime - *Tno_bw);
872
873 double PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
874 HostVMInefficiencyFactor + PrefetchSourceLinesY *
875 *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
876 PrefetchSourceLinesC * *swath_width_chroma_ub *
877 dml_ceil(BytePerPixelDETC, 2)) /
878 (*DestinationLinesForPrefetch * LineTime - *Tno_bw - 2 *
879 UrgentLatency * (1 + HostVMDynamicLevels));
880
881 double PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow
882 * HostVMInefficiencyFactor + PrefetchSourceLinesY *
883 *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) +
884 PrefetchSourceLinesC * *swath_width_chroma_ub *
885 dml_ceil(BytePerPixelDETC, 2)) /
886 (*DestinationLinesForPrefetch * LineTime -
887 UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
888 * (HostVMDynamicLevels + 1) - 1));
889
890 double PrefetchBandwidth4 = (PrefetchSourceLinesY * *swath_width_luma_ub *
891 dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC *
892 *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) /
893 (*DestinationLinesForPrefetch * LineTime -
894 UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels
895 * (HostVMDynamicLevels + 1) - 1) - 2 * UrgentLatency *
896 (1 + HostVMDynamicLevels));
897
898 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw > 0) {
899 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / ((*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw);
900 }
901 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
902 *PrefetchBandwidth = PrefetchBandwidth1;
903 } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < UrgentLatency * (1 + HostVMDynamicLevels)) {
904 *PrefetchBandwidth = PrefetchBandwidth2;
905 } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= UrgentLatency * (1 + HostVMDynamicLevels)) {
906 *PrefetchBandwidth = PrefetchBandwidth3;
907 } else {
908 *PrefetchBandwidth = PrefetchBandwidth4;
909 }
910
911 if (GPUVMEnable) {
912 TimeForFetchingMetaPTE = dml_max(*Tno_bw + (double) PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / *PrefetchBandwidth,
913 dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), LineTime / 4));
914 } else {
915 // 5/30/2018 - This was an optimization requested from Sy but now NumberOfCursors is no longer a factor
916 // so if this needs to be reinstated, then it should be officially done in the VBA code as well.
917 // if (mode_lib->NumberOfCursors > 0 || XFCEnabled)
918 TimeForFetchingMetaPTE = LineTime / 4;
919 // else
920 // TimeForFetchingMetaPTE = 0.0;
921 }
922
923 if ((GPUVMEnable == true || DCCEnable == true)) {
924 TimeForFetchingRowInVBlank =
925 dml_max(
926 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
927 / *PrefetchBandwidth,
928 dml_max(
929 UrgentLatency * (1 + HostVMDynamicLevels),
930 dml_max(
931 (LineTime
932 - TimeForFetchingMetaPTE) / 2.0,
933 LineTime
934 / 4.0)));
935 } else {
936 // See note above dated 5/30/2018
937 // if (NumberOfCursors > 0 || XFCEnabled)
938 TimeForFetchingRowInVBlank = (LineTime - TimeForFetchingMetaPTE) / 2.0;
939 // else // TODO: Did someone else add this??
940 // TimeForFetchingRowInVBlank = 0.0;
941 }
942
943 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
944
945 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
946
947 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
948 // See note above dated 5/30/2018
949 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
950 - ((GPUVMEnable || DCCEnable) ?
951 (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) :
952 0.0); // TODO: Did someone else add this??
953
954 if (LinesToRequestPrefetchPixelData > 0) {
955
956 *VRatioPrefetchY = (double) PrefetchSourceLinesY
957 / LinesToRequestPrefetchPixelData;
958 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
959 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
960 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
961 *VRatioPrefetchY =
962 dml_max(
963 (double) PrefetchSourceLinesY
964 / LinesToRequestPrefetchPixelData,
965 (double) MaxNumSwathY
966 * SwathHeightY
967 / (LinesToRequestPrefetchPixelData
968 - (VInitPreFillY
969 - 3.0)
970 / 2.0));
971 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
972 } else {
973 MyError = true;
974 *VRatioPrefetchY = 0;
975 }
976 }
977
978 *VRatioPrefetchC = (double) PrefetchSourceLinesC
979 / LinesToRequestPrefetchPixelData;
980 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
981
982 if ((SwathHeightC > 4)) {
983 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
984 *VRatioPrefetchC =
985 dml_max(
986 *VRatioPrefetchC,
987 (double) MaxNumSwathC
988 * SwathHeightC
989 / (LinesToRequestPrefetchPixelData
990 - (VInitPreFillC
991 - 3.0)
992 / 2.0));
993 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
994 } else {
995 MyError = true;
996 *VRatioPrefetchC = 0;
997 }
998 }
999
1000 *RequiredPrefetchPixDataBWLuma = myPipe->DPPPerPlane
1001 * (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData
1002 * dml_ceil(BytePerPixelDETY, 1)
1003 * *swath_width_luma_ub / LineTime;
1004 *RequiredPrefetchPixDataBWChroma = myPipe->DPPPerPlane
1005 * (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData
1006 * dml_ceil(BytePerPixelDETC, 2)
1007 * *swath_width_chroma_ub / LineTime;
1008 } else {
1009 MyError = true;
1010 *VRatioPrefetchY = 0;
1011 *VRatioPrefetchC = 0;
1012 *RequiredPrefetchPixDataBWLuma = 0;
1013 *RequiredPrefetchPixDataBWChroma = 0;
1014 }
1015
1016 dml_print("DML: Tvm: %fus\n", TimeForFetchingMetaPTE);
1017 dml_print("DML: Tr0: %fus\n", TimeForFetchingRowInVBlank);
1018 dml_print("DML: Tsw: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime - TimeForFetchingMetaPTE - TimeForFetchingRowInVBlank);
1019 dml_print("DML: Tpre: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime);
1020 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1021
1022 } else {
1023 MyError = true;
1024 }
1025
1026 {
1027 double prefetch_vm_bw;
1028 double prefetch_row_bw;
1029
1030 if (PDEAndMetaPTEBytesFrame == 0) {
1031 prefetch_vm_bw = 0;
1032 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1033 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1034 } else {
1035 prefetch_vm_bw = 0;
1036 MyError = true;
1037 }
1038 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1039 prefetch_row_bw = 0;
1040 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1041 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1042 } else {
1043 prefetch_row_bw = 0;
1044 MyError = true;
1045 }
1046
1047 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1048 }
1049
1050 if (MyError) {
1051 *PrefetchBandwidth = 0;
1052 TimeForFetchingMetaPTE = 0;
1053 TimeForFetchingRowInVBlank = 0;
1054 *DestinationLinesToRequestVMInVBlank = 0;
1055 *DestinationLinesToRequestRowInVBlank = 0;
1056 *DestinationLinesForPrefetch = 0;
1057 LinesToRequestPrefetchPixelData = 0;
1058 *VRatioPrefetchY = 0;
1059 *VRatioPrefetchC = 0;
1060 *RequiredPrefetchPixDataBWLuma = 0;
1061 *RequiredPrefetchPixDataBWChroma = 0;
1062 }
1063
1064 return MyError;
1065 }
1066
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1067 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1068 {
1069 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1070 }
1071
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1072 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1073 {
1074 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1);
1075 }
1076
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,unsigned int ViewportWidth,unsigned int ViewportHeight,unsigned int DETBufferSize,unsigned int RequestHeight256Byte,unsigned int SwathHeight,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixel,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlock,unsigned int * MaxCompressedBlock,unsigned int * Independent64ByteBlock)1077 static double CalculateDCCConfiguration(
1078 bool DCCEnabled,
1079 bool DCCProgrammingAssumesScanDirectionUnknown,
1080 unsigned int ViewportWidth,
1081 unsigned int ViewportHeight,
1082 unsigned int DETBufferSize,
1083 unsigned int RequestHeight256Byte,
1084 unsigned int SwathHeight,
1085 enum dm_swizzle_mode TilingFormat,
1086 unsigned int BytePerPixel,
1087 enum scan_direction_class ScanOrientation,
1088 unsigned int *MaxUncompressedBlock,
1089 unsigned int *MaxCompressedBlock,
1090 unsigned int *Independent64ByteBlock)
1091 {
1092 double MaximumDCCCompressionSurface = 0.0;
1093 enum {
1094 REQ_256Bytes,
1095 REQ_128BytesNonContiguous,
1096 REQ_128BytesContiguous,
1097 REQ_NA
1098 } Request = REQ_NA;
1099
1100 if (DCCEnabled == true) {
1101 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1102 if (DETBufferSize >= RequestHeight256Byte * ViewportWidth * BytePerPixel
1103 && DETBufferSize
1104 >= 256 / RequestHeight256Byte
1105 * ViewportHeight) {
1106 Request = REQ_256Bytes;
1107 } else if ((DETBufferSize
1108 < RequestHeight256Byte * ViewportWidth * BytePerPixel
1109 && (BytePerPixel == 2 || BytePerPixel == 4))
1110 || (DETBufferSize
1111 < 256 / RequestHeight256Byte
1112 * ViewportHeight
1113 && BytePerPixel == 8
1114 && (TilingFormat == dm_sw_4kb_d
1115 || TilingFormat
1116 == dm_sw_4kb_d_x
1117 || TilingFormat
1118 == dm_sw_var_d
1119 || TilingFormat
1120 == dm_sw_var_d_x
1121 || TilingFormat
1122 == dm_sw_64kb_d
1123 || TilingFormat
1124 == dm_sw_64kb_d_x
1125 || TilingFormat
1126 == dm_sw_64kb_d_t
1127 || TilingFormat
1128 == dm_sw_64kb_r_x))) {
1129 Request = REQ_128BytesNonContiguous;
1130 } else {
1131 Request = REQ_128BytesContiguous;
1132 }
1133 } else {
1134 if (BytePerPixel == 1) {
1135 if (ScanOrientation == dm_vert || SwathHeight == 16) {
1136 Request = REQ_256Bytes;
1137 } else {
1138 Request = REQ_128BytesContiguous;
1139 }
1140 } else if (BytePerPixel == 2) {
1141 if ((ScanOrientation == dm_vert && SwathHeight == 16) || (ScanOrientation != dm_vert && SwathHeight == 8)) {
1142 Request = REQ_256Bytes;
1143 } else if (ScanOrientation == dm_vert) {
1144 Request = REQ_128BytesContiguous;
1145 } else {
1146 Request = REQ_128BytesNonContiguous;
1147 }
1148 } else if (BytePerPixel == 4) {
1149 if (SwathHeight == 8) {
1150 Request = REQ_256Bytes;
1151 } else if (ScanOrientation == dm_vert) {
1152 Request = REQ_128BytesContiguous;
1153 } else {
1154 Request = REQ_128BytesNonContiguous;
1155 }
1156 } else if (BytePerPixel == 8) {
1157 if (TilingFormat == dm_sw_4kb_d || TilingFormat == dm_sw_4kb_d_x
1158 || TilingFormat == dm_sw_var_d
1159 || TilingFormat == dm_sw_var_d_x
1160 || TilingFormat == dm_sw_64kb_d
1161 || TilingFormat == dm_sw_64kb_d_x
1162 || TilingFormat == dm_sw_64kb_d_t
1163 || TilingFormat == dm_sw_64kb_r_x) {
1164 if ((ScanOrientation == dm_vert && SwathHeight == 8)
1165 || (ScanOrientation != dm_vert
1166 && SwathHeight == 4)) {
1167 Request = REQ_256Bytes;
1168 } else if (ScanOrientation != dm_vert) {
1169 Request = REQ_128BytesContiguous;
1170 } else {
1171 Request = REQ_128BytesNonContiguous;
1172 }
1173 } else {
1174 if (ScanOrientation != dm_vert || SwathHeight == 8) {
1175 Request = REQ_256Bytes;
1176 } else {
1177 Request = REQ_128BytesContiguous;
1178 }
1179 }
1180 }
1181 }
1182 } else {
1183 Request = REQ_NA;
1184 }
1185
1186 if (Request == REQ_256Bytes) {
1187 *MaxUncompressedBlock = 256;
1188 *MaxCompressedBlock = 256;
1189 *Independent64ByteBlock = false;
1190 MaximumDCCCompressionSurface = 4.0;
1191 } else if (Request == REQ_128BytesContiguous) {
1192 *MaxUncompressedBlock = 128;
1193 *MaxCompressedBlock = 128;
1194 *Independent64ByteBlock = false;
1195 MaximumDCCCompressionSurface = 2.0;
1196 } else if (Request == REQ_128BytesNonContiguous) {
1197 *MaxUncompressedBlock = 256;
1198 *MaxCompressedBlock = 64;
1199 *Independent64ByteBlock = true;
1200 MaximumDCCCompressionSurface = 4.0;
1201 } else {
1202 *MaxUncompressedBlock = 0;
1203 *MaxCompressedBlock = 0;
1204 *Independent64ByteBlock = 0;
1205 MaximumDCCCompressionSurface = 0.0;
1206 }
1207
1208 return MaximumDCCCompressionSurface;
1209 }
1210
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1211 static double CalculatePrefetchSourceLines(
1212 struct display_mode_lib *mode_lib,
1213 double VRatio,
1214 double vtaps,
1215 bool Interlace,
1216 bool ProgressiveToInterlaceUnitInOPP,
1217 unsigned int SwathHeight,
1218 unsigned int ViewportYStart,
1219 double *VInitPreFill,
1220 unsigned int *MaxNumSwath)
1221 {
1222 unsigned int MaxPartialSwath;
1223
1224 if (ProgressiveToInterlaceUnitInOPP)
1225 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1226 else
1227 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1228
1229 if (!mode_lib->vba.IgnoreViewportPositioning) {
1230
1231 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1232
1233 if (*VInitPreFill > 1.0)
1234 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1235 else
1236 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1237 % SwathHeight;
1238 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1239
1240 } else {
1241
1242 if (ViewportYStart != 0)
1243 dml_print(
1244 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1245
1246 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1247
1248 if (*VInitPreFill > 1.0)
1249 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1250 else
1251 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1252 % SwathHeight;
1253 }
1254
1255 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1256 }
1257
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int ViewportWidth,unsigned int ViewportHeight,unsigned int SwathWidth,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxPageTableLevels,unsigned int HostVMCachedPageTableLevels,unsigned int VMMPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1258 static unsigned int CalculateVMAndRowBytes(
1259 struct display_mode_lib *mode_lib,
1260 bool DCCEnable,
1261 unsigned int BlockHeight256Bytes,
1262 unsigned int BlockWidth256Bytes,
1263 enum source_format_class SourcePixelFormat,
1264 unsigned int SurfaceTiling,
1265 unsigned int BytePerPixel,
1266 enum scan_direction_class ScanDirection,
1267 unsigned int ViewportWidth,
1268 unsigned int ViewportHeight,
1269 unsigned int SwathWidth,
1270 bool GPUVMEnable,
1271 bool HostVMEnable,
1272 unsigned int HostVMMaxPageTableLevels,
1273 unsigned int HostVMCachedPageTableLevels,
1274 unsigned int VMMPageSize,
1275 unsigned int PTEBufferSizeInRequests,
1276 unsigned int Pitch,
1277 unsigned int DCCMetaPitch,
1278 unsigned int *MacroTileWidth,
1279 unsigned int *MetaRowByte,
1280 unsigned int *PixelPTEBytesPerRow,
1281 bool *PTEBufferSizeNotExceeded,
1282 unsigned int *dpte_row_width_ub,
1283 unsigned int *dpte_row_height,
1284 unsigned int *MetaRequestWidth,
1285 unsigned int *MetaRequestHeight,
1286 unsigned int *meta_row_width,
1287 unsigned int *meta_row_height,
1288 unsigned int *vm_group_bytes,
1289 unsigned int *dpte_group_bytes,
1290 unsigned int *PixelPTEReqWidth,
1291 unsigned int *PixelPTEReqHeight,
1292 unsigned int *PTERequestSize,
1293 unsigned int *DPDE0BytesFrame,
1294 unsigned int *MetaPTEBytesFrame)
1295 {
1296 unsigned int MPDEBytesFrame;
1297 unsigned int DCCMetaSurfaceBytes;
1298 unsigned int MacroTileSizeBytes;
1299 unsigned int MacroTileHeight;
1300 unsigned int ExtraDPDEBytesFrame;
1301 unsigned int PDEAndMetaPTEBytesFrame;
1302 unsigned int PixelPTEReqHeightPTEs = 0;
1303
1304 if (DCCEnable == true) {
1305 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1306 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1307 if (ScanDirection == dm_horz) {
1308 *meta_row_height = *MetaRequestHeight;
1309 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1310 + *MetaRequestWidth;
1311 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1312 } else {
1313 *meta_row_height = *MetaRequestWidth;
1314 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1315 + *MetaRequestHeight;
1316 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1317 }
1318 if (ScanDirection == dm_horz) {
1319 DCCMetaSurfaceBytes = DCCMetaPitch
1320 * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1321 + 64 * BlockHeight256Bytes) * BytePerPixel
1322 / 256;
1323 } else {
1324 DCCMetaSurfaceBytes = DCCMetaPitch
1325 * (dml_ceil(
1326 (double) ViewportHeight - 1,
1327 64 * BlockHeight256Bytes)
1328 + 64 * BlockHeight256Bytes) * BytePerPixel
1329 / 256;
1330 }
1331 if (GPUVMEnable == true) {
1332 *MetaPTEBytesFrame = (dml_ceil(
1333 (double) (DCCMetaSurfaceBytes - VMMPageSize)
1334 / (8 * VMMPageSize),
1335 1) + 1) * 64;
1336 MPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 2);
1337 } else {
1338 *MetaPTEBytesFrame = 0;
1339 MPDEBytesFrame = 0;
1340 }
1341 } else {
1342 *MetaPTEBytesFrame = 0;
1343 MPDEBytesFrame = 0;
1344 *MetaRowByte = 0;
1345 }
1346
1347 if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) {
1348 MacroTileSizeBytes = 256;
1349 MacroTileHeight = BlockHeight256Bytes;
1350 } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x
1351 || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) {
1352 MacroTileSizeBytes = 4096;
1353 MacroTileHeight = 4 * BlockHeight256Bytes;
1354 } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t
1355 || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d
1356 || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x
1357 || SurfaceTiling == dm_sw_64kb_r_x) {
1358 MacroTileSizeBytes = 65536;
1359 MacroTileHeight = 16 * BlockHeight256Bytes;
1360 } else {
1361 MacroTileSizeBytes = 262144;
1362 MacroTileHeight = 32 * BlockHeight256Bytes;
1363 }
1364 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1365
1366 if (GPUVMEnable == true && (mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) > 2) {
1367 if (ScanDirection == dm_horz) {
1368 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1369 } else {
1370 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1371 }
1372 ExtraDPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 3);
1373 } else {
1374 *DPDE0BytesFrame = 0;
1375 ExtraDPDEBytesFrame = 0;
1376 }
1377
1378 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1379 + ExtraDPDEBytesFrame;
1380
1381 if (HostVMEnable == true) {
1382 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
1383 }
1384
1385 if (GPUVMEnable == true) {
1386 double FractionOfPTEReturnDrop;
1387
1388 if (SurfaceTiling == dm_sw_linear) {
1389 PixelPTEReqHeightPTEs = 1;
1390 *PixelPTEReqHeight = 1;
1391 *PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel;
1392 *PTERequestSize = 64;
1393 FractionOfPTEReturnDrop = 0;
1394 } else if (MacroTileSizeBytes == 4096) {
1395 PixelPTEReqHeightPTEs = 1;
1396 *PixelPTEReqHeight = MacroTileHeight;
1397 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1398 *PTERequestSize = 64;
1399 if (ScanDirection == dm_horz)
1400 FractionOfPTEReturnDrop = 0;
1401 else
1402 FractionOfPTEReturnDrop = 7 / 8;
1403 } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) {
1404 PixelPTEReqHeightPTEs = 16;
1405 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1406 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1407 *PTERequestSize = 128;
1408 FractionOfPTEReturnDrop = 0;
1409 } else {
1410 PixelPTEReqHeightPTEs = 1;
1411 *PixelPTEReqHeight = MacroTileHeight;
1412 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1413 *PTERequestSize = 64;
1414 FractionOfPTEReturnDrop = 0;
1415 }
1416
1417 if (SurfaceTiling == dm_sw_linear) {
1418 *dpte_row_height = dml_min(128,
1419 1 << (unsigned int) dml_floor(
1420 dml_log2(
1421 (double) PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch),
1422 1));
1423 *dpte_row_width_ub = (dml_ceil((double) (Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1424 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1425 } else if (ScanDirection == dm_horz) {
1426 *dpte_row_height = *PixelPTEReqHeight;
1427 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1428 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1429 } else {
1430 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1431 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1432 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1433 }
1434 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1435 <= 64 * PTEBufferSizeInRequests) {
1436 *PTEBufferSizeNotExceeded = true;
1437 } else {
1438 *PTEBufferSizeNotExceeded = false;
1439 }
1440 } else {
1441 *PixelPTEBytesPerRow = 0;
1442 *PTEBufferSizeNotExceeded = true;
1443 }
1444 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %d\n", *MetaPTEBytesFrame);
1445
1446 if (HostVMEnable == true) {
1447 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels));
1448 }
1449
1450 if (HostVMEnable == true) {
1451 *vm_group_bytes = 512;
1452 *dpte_group_bytes = 512;
1453 } else if (GPUVMEnable == true) {
1454 *vm_group_bytes = 2048;
1455 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection != dm_horz) {
1456 *dpte_group_bytes = 512;
1457 } else {
1458 *dpte_group_bytes = 2048;
1459 }
1460 } else {
1461 *vm_group_bytes = 0;
1462 *dpte_group_bytes = 0;
1463 }
1464
1465 return PDEAndMetaPTEBytesFrame;
1466 }
1467
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1468 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1469 struct display_mode_lib *mode_lib)
1470 {
1471 struct vba_vars_st *locals = &mode_lib->vba;
1472 unsigned int j, k;
1473
1474 mode_lib->vba.WritebackDISPCLK = 0.0;
1475 mode_lib->vba.DISPCLKWithRamping = 0;
1476 mode_lib->vba.DISPCLKWithoutRamping = 0;
1477 mode_lib->vba.GlobalDPPCLK = 0.0;
1478
1479 // DISPCLK and DPPCLK Calculation
1480 //
1481 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1482 if (mode_lib->vba.WritebackEnable[k]) {
1483 mode_lib->vba.WritebackDISPCLK =
1484 dml_max(
1485 mode_lib->vba.WritebackDISPCLK,
1486 CalculateWriteBackDISPCLK(
1487 mode_lib->vba.WritebackPixelFormat[k],
1488 mode_lib->vba.PixelClock[k],
1489 mode_lib->vba.WritebackHRatio[k],
1490 mode_lib->vba.WritebackVRatio[k],
1491 mode_lib->vba.WritebackLumaHTaps[k],
1492 mode_lib->vba.WritebackLumaVTaps[k],
1493 mode_lib->vba.WritebackChromaHTaps[k],
1494 mode_lib->vba.WritebackChromaVTaps[k],
1495 mode_lib->vba.WritebackDestinationWidth[k],
1496 mode_lib->vba.HTotal[k],
1497 mode_lib->vba.WritebackChromaLineBufferWidth));
1498 }
1499 }
1500
1501 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1502 if (mode_lib->vba.HRatio[k] > 1) {
1503 locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1504 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1505 mode_lib->vba.MaxPSCLToLBThroughput
1506 * mode_lib->vba.HRatio[k]
1507 / dml_ceil(
1508 mode_lib->vba.htaps[k]
1509 / 6.0,
1510 1));
1511 } else {
1512 locals->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1513 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1514 mode_lib->vba.MaxPSCLToLBThroughput);
1515 }
1516
1517 mode_lib->vba.DPPCLKUsingSingleDPPLuma =
1518 mode_lib->vba.PixelClock[k]
1519 * dml_max(
1520 mode_lib->vba.vtaps[k] / 6.0
1521 * dml_min(
1522 1.0,
1523 mode_lib->vba.HRatio[k]),
1524 dml_max(
1525 mode_lib->vba.HRatio[k]
1526 * mode_lib->vba.VRatio[k]
1527 / locals->PSCL_THROUGHPUT_LUMA[k],
1528 1.0));
1529
1530 if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6)
1531 && mode_lib->vba.DPPCLKUsingSingleDPPLuma
1532 < 2 * mode_lib->vba.PixelClock[k]) {
1533 mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k];
1534 }
1535
1536 if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8
1537 && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) {
1538 locals->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1539 locals->DPPCLKUsingSingleDPP[k] =
1540 mode_lib->vba.DPPCLKUsingSingleDPPLuma;
1541 } else {
1542 if (mode_lib->vba.HRatio[k] > 1) {
1543 locals->PSCL_THROUGHPUT_CHROMA[k] =
1544 dml_min(
1545 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1546 mode_lib->vba.MaxPSCLToLBThroughput
1547 * mode_lib->vba.HRatio[k]
1548 / 2
1549 / dml_ceil(
1550 mode_lib->vba.HTAPsChroma[k]
1551 / 6.0,
1552 1.0));
1553 } else {
1554 locals->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1555 mode_lib->vba.MaxDCHUBToPSCLThroughput,
1556 mode_lib->vba.MaxPSCLToLBThroughput);
1557 }
1558 mode_lib->vba.DPPCLKUsingSingleDPPChroma =
1559 mode_lib->vba.PixelClock[k]
1560 * dml_max(
1561 mode_lib->vba.VTAPsChroma[k]
1562 / 6.0
1563 * dml_min(
1564 1.0,
1565 mode_lib->vba.HRatio[k]
1566 / 2),
1567 dml_max(
1568 mode_lib->vba.HRatio[k]
1569 * mode_lib->vba.VRatio[k]
1570 / 4
1571 / locals->PSCL_THROUGHPUT_CHROMA[k],
1572 1.0));
1573
1574 if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6)
1575 && mode_lib->vba.DPPCLKUsingSingleDPPChroma
1576 < 2 * mode_lib->vba.PixelClock[k]) {
1577 mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2
1578 * mode_lib->vba.PixelClock[k];
1579 }
1580
1581 locals->DPPCLKUsingSingleDPP[k] = dml_max(
1582 mode_lib->vba.DPPCLKUsingSingleDPPLuma,
1583 mode_lib->vba.DPPCLKUsingSingleDPPChroma);
1584 }
1585 }
1586
1587 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1588 if (mode_lib->vba.BlendingAndTiming[k] != k)
1589 continue;
1590 if (mode_lib->vba.ODMCombineEnabled[k]) {
1591 mode_lib->vba.DISPCLKWithRamping =
1592 dml_max(
1593 mode_lib->vba.DISPCLKWithRamping,
1594 mode_lib->vba.PixelClock[k] / 2
1595 * (1
1596 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1597 / 100)
1598 * (1
1599 + mode_lib->vba.DISPCLKRampingMargin
1600 / 100));
1601 mode_lib->vba.DISPCLKWithoutRamping =
1602 dml_max(
1603 mode_lib->vba.DISPCLKWithoutRamping,
1604 mode_lib->vba.PixelClock[k] / 2
1605 * (1
1606 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1607 / 100));
1608 } else if (!mode_lib->vba.ODMCombineEnabled[k]) {
1609 mode_lib->vba.DISPCLKWithRamping =
1610 dml_max(
1611 mode_lib->vba.DISPCLKWithRamping,
1612 mode_lib->vba.PixelClock[k]
1613 * (1
1614 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1615 / 100)
1616 * (1
1617 + mode_lib->vba.DISPCLKRampingMargin
1618 / 100));
1619 mode_lib->vba.DISPCLKWithoutRamping =
1620 dml_max(
1621 mode_lib->vba.DISPCLKWithoutRamping,
1622 mode_lib->vba.PixelClock[k]
1623 * (1
1624 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1625 / 100));
1626 }
1627 }
1628
1629 mode_lib->vba.DISPCLKWithRamping = dml_max(
1630 mode_lib->vba.DISPCLKWithRamping,
1631 mode_lib->vba.WritebackDISPCLK);
1632 mode_lib->vba.DISPCLKWithoutRamping = dml_max(
1633 mode_lib->vba.DISPCLKWithoutRamping,
1634 mode_lib->vba.WritebackDISPCLK);
1635
1636 ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0);
1637 mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1638 mode_lib->vba.DISPCLKWithRamping,
1639 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1640 mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1641 mode_lib->vba.DISPCLKWithoutRamping,
1642 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1643 mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
1644 mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states - 1].dispclk_mhz,
1645 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1646 if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity
1647 > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
1648 mode_lib->vba.DISPCLK_calculated =
1649 mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity;
1650 } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity
1651 > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) {
1652 mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity;
1653 } else {
1654 mode_lib->vba.DISPCLK_calculated =
1655 mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity;
1656 }
1657 DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated);
1658
1659 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1660 mode_lib->vba.DPPCLK_calculated[k] = locals->DPPCLKUsingSingleDPP[k]
1661 / mode_lib->vba.DPPPerPlane[k]
1662 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1663 mode_lib->vba.GlobalDPPCLK = dml_max(
1664 mode_lib->vba.GlobalDPPCLK,
1665 mode_lib->vba.DPPCLK_calculated[k]);
1666 }
1667 mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp(
1668 mode_lib->vba.GlobalDPPCLK,
1669 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
1670 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1671 mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255
1672 * dml_ceil(
1673 mode_lib->vba.DPPCLK_calculated[k] * 255
1674 / mode_lib->vba.GlobalDPPCLK,
1675 1);
1676 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]);
1677 }
1678
1679 // Urgent and B P-State/DRAM Clock Change Watermark
1680 DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK);
1681 DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN);
1682 DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW);
1683
1684 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1685 bool MainPlaneDoesODMCombine = false;
1686
1687 if (mode_lib->vba.SourceScan[k] == dm_horz)
1688 locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k];
1689 else
1690 locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k];
1691
1692 if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
1693 MainPlaneDoesODMCombine = true;
1694 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
1695 if (mode_lib->vba.BlendingAndTiming[k] == j
1696 && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
1697 MainPlaneDoesODMCombine = true;
1698
1699 if (MainPlaneDoesODMCombine == true)
1700 locals->SwathWidthY[k] = dml_min(
1701 (double) locals->SwathWidthSingleDPPY[k],
1702 dml_round(
1703 mode_lib->vba.HActive[k] / 2.0
1704 * mode_lib->vba.HRatio[k]));
1705 else
1706 locals->SwathWidthY[k] = locals->SwathWidthSingleDPPY[k]
1707 / mode_lib->vba.DPPPerPlane[k];
1708 }
1709
1710 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1711 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
1712 locals->BytePerPixelDETY[k] = 8;
1713 locals->BytePerPixelDETC[k] = 0;
1714 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
1715 locals->BytePerPixelDETY[k] = 4;
1716 locals->BytePerPixelDETC[k] = 0;
1717 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
1718 locals->BytePerPixelDETY[k] = 2;
1719 locals->BytePerPixelDETC[k] = 0;
1720 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
1721 locals->BytePerPixelDETY[k] = 1;
1722 locals->BytePerPixelDETC[k] = 0;
1723 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
1724 locals->BytePerPixelDETY[k] = 1;
1725 locals->BytePerPixelDETC[k] = 2;
1726 } else { // dm_420_10
1727 locals->BytePerPixelDETY[k] = 4.0 / 3.0;
1728 locals->BytePerPixelDETC[k] = 8.0 / 3.0;
1729 }
1730 }
1731
1732 mode_lib->vba.TotalDataReadBandwidth = 0.0;
1733 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1734 locals->ReadBandwidthPlaneLuma[k] = locals->SwathWidthSingleDPPY[k]
1735 * dml_ceil(locals->BytePerPixelDETY[k], 1)
1736 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
1737 * mode_lib->vba.VRatio[k];
1738 locals->ReadBandwidthPlaneChroma[k] = locals->SwathWidthSingleDPPY[k]
1739 / 2 * dml_ceil(locals->BytePerPixelDETC[k], 2)
1740 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k])
1741 * mode_lib->vba.VRatio[k] / 2;
1742 DTRACE(
1743 " read_bw[%i] = %fBps",
1744 k,
1745 locals->ReadBandwidthPlaneLuma[k]
1746 + locals->ReadBandwidthPlaneChroma[k]);
1747 mode_lib->vba.TotalDataReadBandwidth += locals->ReadBandwidthPlaneLuma[k]
1748 + locals->ReadBandwidthPlaneChroma[k];
1749 }
1750
1751 // DCFCLK Deep Sleep
1752 CalculateDCFCLKDeepSleep(
1753 mode_lib,
1754 mode_lib->vba.NumberOfActivePlanes,
1755 locals->BytePerPixelDETY,
1756 locals->BytePerPixelDETC,
1757 mode_lib->vba.VRatio,
1758 locals->SwathWidthY,
1759 mode_lib->vba.DPPPerPlane,
1760 mode_lib->vba.HRatio,
1761 mode_lib->vba.PixelClock,
1762 locals->PSCL_THROUGHPUT_LUMA,
1763 locals->PSCL_THROUGHPUT_CHROMA,
1764 locals->DPPCLK,
1765 &mode_lib->vba.DCFCLKDeepSleep);
1766
1767 // DSCCLK
1768 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1769 if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) {
1770 locals->DSCCLK_calculated[k] = 0.0;
1771 } else {
1772 if (mode_lib->vba.OutputFormat[k] == dm_420
1773 || mode_lib->vba.OutputFormat[k] == dm_n422)
1774 mode_lib->vba.DSCFormatFactor = 2;
1775 else
1776 mode_lib->vba.DSCFormatFactor = 1;
1777 if (mode_lib->vba.ODMCombineEnabled[k])
1778 locals->DSCCLK_calculated[k] =
1779 mode_lib->vba.PixelClockBackEnd[k] / 6
1780 / mode_lib->vba.DSCFormatFactor
1781 / (1
1782 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1783 / 100);
1784 else
1785 locals->DSCCLK_calculated[k] =
1786 mode_lib->vba.PixelClockBackEnd[k] / 3
1787 / mode_lib->vba.DSCFormatFactor
1788 / (1
1789 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
1790 / 100);
1791 }
1792 }
1793
1794 // DSC Delay
1795 // TODO
1796 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1797 double bpp = mode_lib->vba.OutputBpp[k];
1798 unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k];
1799
1800 if (mode_lib->vba.DSCEnabled[k] && bpp != 0) {
1801 if (!mode_lib->vba.ODMCombineEnabled[k]) {
1802 locals->DSCDelay[k] =
1803 dscceComputeDelay(
1804 mode_lib->vba.DSCInputBitPerComponent[k],
1805 bpp,
1806 dml_ceil(
1807 (double) mode_lib->vba.HActive[k]
1808 / mode_lib->vba.NumberOfDSCSlices[k],
1809 1),
1810 slices,
1811 mode_lib->vba.OutputFormat[k])
1812 + dscComputeDelay(
1813 mode_lib->vba.OutputFormat[k]);
1814 } else {
1815 locals->DSCDelay[k] =
1816 2
1817 * (dscceComputeDelay(
1818 mode_lib->vba.DSCInputBitPerComponent[k],
1819 bpp,
1820 dml_ceil(
1821 (double) mode_lib->vba.HActive[k]
1822 / mode_lib->vba.NumberOfDSCSlices[k],
1823 1),
1824 slices / 2.0,
1825 mode_lib->vba.OutputFormat[k])
1826 + dscComputeDelay(
1827 mode_lib->vba.OutputFormat[k]));
1828 }
1829 locals->DSCDelay[k] = locals->DSCDelay[k]
1830 * mode_lib->vba.PixelClock[k]
1831 / mode_lib->vba.PixelClockBackEnd[k];
1832 } else {
1833 locals->DSCDelay[k] = 0;
1834 }
1835 }
1836
1837 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
1838 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes
1839 if (j != k && mode_lib->vba.BlendingAndTiming[k] == j
1840 && mode_lib->vba.DSCEnabled[j])
1841 locals->DSCDelay[k] = locals->DSCDelay[j];
1842
1843 // Prefetch
1844 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
1845 unsigned int PDEAndMetaPTEBytesFrameY;
1846 unsigned int PixelPTEBytesPerRowY;
1847 unsigned int MetaRowByteY;
1848 unsigned int MetaRowByteC;
1849 unsigned int PDEAndMetaPTEBytesFrameC;
1850 unsigned int PixelPTEBytesPerRowC;
1851 bool PTEBufferSizeNotExceededY;
1852 bool PTEBufferSizeNotExceededC;
1853
1854 Calculate256BBlockSizes(
1855 mode_lib->vba.SourcePixelFormat[k],
1856 mode_lib->vba.SurfaceTiling[k],
1857 dml_ceil(locals->BytePerPixelDETY[k], 1),
1858 dml_ceil(locals->BytePerPixelDETC[k], 2),
1859 &locals->BlockHeight256BytesY[k],
1860 &locals->BlockHeight256BytesC[k],
1861 &locals->BlockWidth256BytesY[k],
1862 &locals->BlockWidth256BytesC[k]);
1863
1864 locals->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
1865 mode_lib,
1866 mode_lib->vba.VRatio[k],
1867 mode_lib->vba.vtaps[k],
1868 mode_lib->vba.Interlace[k],
1869 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
1870 mode_lib->vba.SwathHeightY[k],
1871 mode_lib->vba.ViewportYStartY[k],
1872 &locals->VInitPreFillY[k],
1873 &locals->MaxNumSwathY[k]);
1874
1875 if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
1876 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
1877 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
1878 && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) {
1879 PDEAndMetaPTEBytesFrameC =
1880 CalculateVMAndRowBytes(
1881 mode_lib,
1882 mode_lib->vba.DCCEnable[k],
1883 locals->BlockHeight256BytesC[k],
1884 locals->BlockWidth256BytesC[k],
1885 mode_lib->vba.SourcePixelFormat[k],
1886 mode_lib->vba.SurfaceTiling[k],
1887 dml_ceil(
1888 locals->BytePerPixelDETC[k],
1889 2),
1890 mode_lib->vba.SourceScan[k],
1891 mode_lib->vba.ViewportWidth[k] / 2,
1892 mode_lib->vba.ViewportHeight[k] / 2,
1893 locals->SwathWidthY[k] / 2,
1894 mode_lib->vba.GPUVMEnable,
1895 mode_lib->vba.HostVMEnable,
1896 mode_lib->vba.HostVMMaxPageTableLevels,
1897 mode_lib->vba.HostVMCachedPageTableLevels,
1898 mode_lib->vba.VMMPageSize,
1899 mode_lib->vba.PTEBufferSizeInRequestsChroma,
1900 mode_lib->vba.PitchC[k],
1901 mode_lib->vba.DCCMetaPitchC[k],
1902 &locals->MacroTileWidthC[k],
1903 &MetaRowByteC,
1904 &PixelPTEBytesPerRowC,
1905 &PTEBufferSizeNotExceededC,
1906 &locals->dpte_row_width_chroma_ub[k],
1907 &locals->dpte_row_height_chroma[k],
1908 &locals->meta_req_width_chroma[k],
1909 &locals->meta_req_height_chroma[k],
1910 &locals->meta_row_width_chroma[k],
1911 &locals->meta_row_height_chroma[k],
1912 &locals->vm_group_bytes_chroma,
1913 &locals->dpte_group_bytes_chroma,
1914 &locals->PixelPTEReqWidthC[k],
1915 &locals->PixelPTEReqHeightC[k],
1916 &locals->PTERequestSizeC[k],
1917 &locals->dpde0_bytes_per_frame_ub_c[k],
1918 &locals->meta_pte_bytes_per_frame_ub_c[k]);
1919
1920 locals->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
1921 mode_lib,
1922 mode_lib->vba.VRatio[k] / 2,
1923 mode_lib->vba.VTAPsChroma[k],
1924 mode_lib->vba.Interlace[k],
1925 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
1926 mode_lib->vba.SwathHeightC[k],
1927 mode_lib->vba.ViewportYStartC[k],
1928 &locals->VInitPreFillC[k],
1929 &locals->MaxNumSwathC[k]);
1930 } else {
1931 PixelPTEBytesPerRowC = 0;
1932 PDEAndMetaPTEBytesFrameC = 0;
1933 MetaRowByteC = 0;
1934 locals->MaxNumSwathC[k] = 0;
1935 locals->PrefetchSourceLinesC[k] = 0;
1936 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
1937 }
1938
1939 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
1940 mode_lib,
1941 mode_lib->vba.DCCEnable[k],
1942 locals->BlockHeight256BytesY[k],
1943 locals->BlockWidth256BytesY[k],
1944 mode_lib->vba.SourcePixelFormat[k],
1945 mode_lib->vba.SurfaceTiling[k],
1946 dml_ceil(locals->BytePerPixelDETY[k], 1),
1947 mode_lib->vba.SourceScan[k],
1948 mode_lib->vba.ViewportWidth[k],
1949 mode_lib->vba.ViewportHeight[k],
1950 locals->SwathWidthY[k],
1951 mode_lib->vba.GPUVMEnable,
1952 mode_lib->vba.HostVMEnable,
1953 mode_lib->vba.HostVMMaxPageTableLevels,
1954 mode_lib->vba.HostVMCachedPageTableLevels,
1955 mode_lib->vba.VMMPageSize,
1956 locals->PTEBufferSizeInRequestsForLuma,
1957 mode_lib->vba.PitchY[k],
1958 mode_lib->vba.DCCMetaPitchY[k],
1959 &locals->MacroTileWidthY[k],
1960 &MetaRowByteY,
1961 &PixelPTEBytesPerRowY,
1962 &PTEBufferSizeNotExceededY,
1963 &locals->dpte_row_width_luma_ub[k],
1964 &locals->dpte_row_height[k],
1965 &locals->meta_req_width[k],
1966 &locals->meta_req_height[k],
1967 &locals->meta_row_width[k],
1968 &locals->meta_row_height[k],
1969 &locals->vm_group_bytes[k],
1970 &locals->dpte_group_bytes[k],
1971 &locals->PixelPTEReqWidthY[k],
1972 &locals->PixelPTEReqHeightY[k],
1973 &locals->PTERequestSizeY[k],
1974 &locals->dpde0_bytes_per_frame_ub_l[k],
1975 &locals->meta_pte_bytes_per_frame_ub_l[k]);
1976
1977 locals->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
1978 locals->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
1979 + PDEAndMetaPTEBytesFrameC;
1980 locals->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
1981
1982 CalculateActiveRowBandwidth(
1983 mode_lib->vba.GPUVMEnable,
1984 mode_lib->vba.SourcePixelFormat[k],
1985 mode_lib->vba.VRatio[k],
1986 mode_lib->vba.DCCEnable[k],
1987 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
1988 MetaRowByteY,
1989 MetaRowByteC,
1990 locals->meta_row_height[k],
1991 locals->meta_row_height_chroma[k],
1992 PixelPTEBytesPerRowY,
1993 PixelPTEBytesPerRowC,
1994 locals->dpte_row_height[k],
1995 locals->dpte_row_height_chroma[k],
1996 &locals->meta_row_bw[k],
1997 &locals->dpte_row_bw[k]);
1998 }
1999
2000 mode_lib->vba.TotalDCCActiveDPP = 0;
2001 mode_lib->vba.TotalActiveDPP = 0;
2002 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2003 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP
2004 + mode_lib->vba.DPPPerPlane[k];
2005 if (mode_lib->vba.DCCEnable[k])
2006 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP
2007 + mode_lib->vba.DPPPerPlane[k];
2008 }
2009
2010 mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
2011 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2012 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2013 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
2014
2015 mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency =
2016 (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK
2017 + mode_lib->vba.UrgentOutOfOrderReturnPerChannel
2018 * mode_lib->vba.NumberOfChannels
2019 / mode_lib->vba.ReturnBW;
2020
2021 mode_lib->vba.UrgentExtraLatency = CalculateExtraLatency(
2022 mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency,
2023 mode_lib->vba.TotalActiveDPP,
2024 mode_lib->vba.PixelChunkSizeInKByte,
2025 mode_lib->vba.TotalDCCActiveDPP,
2026 mode_lib->vba.MetaChunkSize,
2027 mode_lib->vba.ReturnBW,
2028 mode_lib->vba.GPUVMEnable,
2029 mode_lib->vba.HostVMEnable,
2030 mode_lib->vba.NumberOfActivePlanes,
2031 mode_lib->vba.DPPPerPlane,
2032 locals->dpte_group_bytes,
2033 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2034 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2035 mode_lib->vba.HostVMMaxPageTableLevels,
2036 mode_lib->vba.HostVMCachedPageTableLevels);
2037
2038
2039 mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep;
2040
2041 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2042 if (mode_lib->vba.BlendingAndTiming[k] == k) {
2043 if (mode_lib->vba.WritebackEnable[k] == true) {
2044 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2045 mode_lib->vba.WritebackLatency
2046 + CalculateWriteBackDelay(
2047 mode_lib->vba.WritebackPixelFormat[k],
2048 mode_lib->vba.WritebackHRatio[k],
2049 mode_lib->vba.WritebackVRatio[k],
2050 mode_lib->vba.WritebackLumaHTaps[k],
2051 mode_lib->vba.WritebackLumaVTaps[k],
2052 mode_lib->vba.WritebackChromaHTaps[k],
2053 mode_lib->vba.WritebackChromaVTaps[k],
2054 mode_lib->vba.WritebackDestinationWidth[k])
2055 / mode_lib->vba.DISPCLK;
2056 } else
2057 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0;
2058 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
2059 if (mode_lib->vba.BlendingAndTiming[j] == k
2060 && mode_lib->vba.WritebackEnable[j] == true) {
2061 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2062 dml_max(
2063 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k],
2064 mode_lib->vba.WritebackLatency
2065 + CalculateWriteBackDelay(
2066 mode_lib->vba.WritebackPixelFormat[j],
2067 mode_lib->vba.WritebackHRatio[j],
2068 mode_lib->vba.WritebackVRatio[j],
2069 mode_lib->vba.WritebackLumaHTaps[j],
2070 mode_lib->vba.WritebackLumaVTaps[j],
2071 mode_lib->vba.WritebackChromaHTaps[j],
2072 mode_lib->vba.WritebackChromaVTaps[j],
2073 mode_lib->vba.WritebackDestinationWidth[j])
2074 / mode_lib->vba.DISPCLK);
2075 }
2076 }
2077 }
2078 }
2079
2080 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
2081 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j)
2082 if (mode_lib->vba.BlendingAndTiming[k] == j)
2083 locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] =
2084 locals->WritebackDelay[mode_lib->vba.VoltageLevel][j];
2085
2086 mode_lib->vba.VStartupLines = 13;
2087 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2088 locals->MaxVStartupLines[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1));
2089 }
2090
2091 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k)
2092 locals->MaximumMaxVStartupLines = dml_max(locals->MaximumMaxVStartupLines, locals->MaxVStartupLines[k]);
2093
2094 // We don't really care to iterate between the various prefetch modes
2095 //mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &mode_lib->vba.MinPrefetchMode, &mode_lib->vba.MaxPrefetchMode);
2096 mode_lib->vba.UrgentLatency = dml_max3(mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.UrgentLatencyPixelMixedWithVMData, mode_lib->vba.UrgentLatencyVMDataOnly);
2097
2098 do {
2099 double MaxTotalRDBandwidth = 0;
2100 double MaxTotalRDBandwidthNoUrgentBurst = 0;
2101 bool DestinationLineTimesForPrefetchLessThan2 = false;
2102 bool VRatioPrefetchMoreThan4 = false;
2103 double TWait = CalculateTWait(
2104 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2105 mode_lib->vba.DRAMClockChangeLatency,
2106 mode_lib->vba.UrgentLatency,
2107 mode_lib->vba.SREnterPlusExitTime);
2108
2109 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2110 Pipe myPipe;
2111 HostVM myHostVM;
2112
2113 if (mode_lib->vba.XFCEnabled[k] == true) {
2114 mode_lib->vba.XFCRemoteSurfaceFlipDelay =
2115 CalculateRemoteSurfaceFlipDelay(
2116 mode_lib,
2117 mode_lib->vba.VRatio[k],
2118 locals->SwathWidthY[k],
2119 dml_ceil(
2120 locals->BytePerPixelDETY[k],
2121 1),
2122 mode_lib->vba.HTotal[k]
2123 / mode_lib->vba.PixelClock[k],
2124 mode_lib->vba.XFCTSlvVupdateOffset,
2125 mode_lib->vba.XFCTSlvVupdateWidth,
2126 mode_lib->vba.XFCTSlvVreadyOffset,
2127 mode_lib->vba.XFCXBUFLatencyTolerance,
2128 mode_lib->vba.XFCFillBWOverhead,
2129 mode_lib->vba.XFCSlvChunkSize,
2130 mode_lib->vba.XFCBusTransportTime,
2131 mode_lib->vba.TCalc,
2132 TWait,
2133 &mode_lib->vba.SrcActiveDrainRate,
2134 &mode_lib->vba.TInitXFill,
2135 &mode_lib->vba.TslvChk);
2136 } else {
2137 mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0;
2138 }
2139
2140 myPipe.DPPCLK = locals->DPPCLK[k];
2141 myPipe.DISPCLK = mode_lib->vba.DISPCLK;
2142 myPipe.PixelClock = mode_lib->vba.PixelClock[k];
2143 myPipe.DCFCLKDeepSleep = mode_lib->vba.DCFCLKDeepSleep;
2144 myPipe.DPPPerPlane = mode_lib->vba.DPPPerPlane[k];
2145 myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
2146 myPipe.SourceScan = mode_lib->vba.SourceScan[k];
2147 myPipe.BlockWidth256BytesY = locals->BlockWidth256BytesY[k];
2148 myPipe.BlockHeight256BytesY = locals->BlockHeight256BytesY[k];
2149 myPipe.BlockWidth256BytesC = locals->BlockWidth256BytesC[k];
2150 myPipe.BlockHeight256BytesC = locals->BlockHeight256BytesC[k];
2151 myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
2152 myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
2153 myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
2154 myPipe.HTotal = mode_lib->vba.HTotal[k];
2155
2156
2157 myHostVM.Enable = mode_lib->vba.HostVMEnable;
2158 myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
2159 myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
2160
2161 mode_lib->vba.ErrorResult[k] =
2162 CalculatePrefetchSchedule(
2163 mode_lib,
2164 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2165 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2166 &myPipe,
2167 locals->DSCDelay[k],
2168 mode_lib->vba.DPPCLKDelaySubtotal,
2169 mode_lib->vba.DPPCLKDelaySCL,
2170 mode_lib->vba.DPPCLKDelaySCLLBOnly,
2171 mode_lib->vba.DPPCLKDelayCNVCFormater,
2172 mode_lib->vba.DPPCLKDelayCNVCCursor,
2173 mode_lib->vba.DISPCLKDelaySubtotal,
2174 (unsigned int) (locals->SwathWidthY[k]
2175 / mode_lib->vba.HRatio[k]),
2176 mode_lib->vba.OutputFormat[k],
2177 mode_lib->vba.MaxInterDCNTileRepeaters,
2178 dml_min(mode_lib->vba.VStartupLines, locals->MaxVStartupLines[k]),
2179 locals->MaxVStartupLines[k],
2180 mode_lib->vba.GPUVMMaxPageTableLevels,
2181 mode_lib->vba.GPUVMEnable,
2182 &myHostVM,
2183 mode_lib->vba.DynamicMetadataEnable[k],
2184 mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
2185 mode_lib->vba.DynamicMetadataTransmittedBytes[k],
2186 mode_lib->vba.DCCEnable[k],
2187 mode_lib->vba.UrgentLatency,
2188 mode_lib->vba.UrgentExtraLatency,
2189 mode_lib->vba.TCalc,
2190 locals->PDEAndMetaPTEBytesFrame[k],
2191 locals->MetaRowByte[k],
2192 locals->PixelPTEBytesPerRow[k],
2193 locals->PrefetchSourceLinesY[k],
2194 locals->SwathWidthY[k],
2195 locals->BytePerPixelDETY[k],
2196 locals->VInitPreFillY[k],
2197 locals->MaxNumSwathY[k],
2198 locals->PrefetchSourceLinesC[k],
2199 locals->BytePerPixelDETC[k],
2200 locals->VInitPreFillC[k],
2201 locals->MaxNumSwathC[k],
2202 mode_lib->vba.SwathHeightY[k],
2203 mode_lib->vba.SwathHeightC[k],
2204 TWait,
2205 mode_lib->vba.XFCEnabled[k],
2206 mode_lib->vba.XFCRemoteSurfaceFlipDelay,
2207 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
2208 &locals->DSTXAfterScaler[k],
2209 &locals->DSTYAfterScaler[k],
2210 &locals->DestinationLinesForPrefetch[k],
2211 &locals->PrefetchBandwidth[k],
2212 &locals->DestinationLinesToRequestVMInVBlank[k],
2213 &locals->DestinationLinesToRequestRowInVBlank[k],
2214 &locals->VRatioPrefetchY[k],
2215 &locals->VRatioPrefetchC[k],
2216 &locals->RequiredPrefetchPixDataBWLuma[k],
2217 &locals->RequiredPrefetchPixDataBWChroma[k],
2218 &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
2219 &locals->Tno_bw[k],
2220 &locals->prefetch_vmrow_bw[k],
2221 &locals->swath_width_luma_ub[k],
2222 &locals->swath_width_chroma_ub[k],
2223 &mode_lib->vba.VUpdateOffsetPix[k],
2224 &mode_lib->vba.VUpdateWidthPix[k],
2225 &mode_lib->vba.VReadyOffsetPix[k]);
2226 if (mode_lib->vba.BlendingAndTiming[k] == k) {
2227 locals->VStartup[k] = dml_min(
2228 mode_lib->vba.VStartupLines,
2229 locals->MaxVStartupLines[k]);
2230 if (locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata
2231 != 0) {
2232 locals->VStartup[k] =
2233 locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata;
2234 }
2235 } else {
2236 locals->VStartup[k] =
2237 dml_min(
2238 mode_lib->vba.VStartupLines,
2239 locals->MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]);
2240 }
2241 }
2242
2243 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2244 unsigned int m;
2245
2246 locals->cursor_bw[k] = 0;
2247 locals->cursor_bw_pre[k] = 0;
2248 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
2249 locals->cursor_bw[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
2250 locals->cursor_bw_pre[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPrefetchY[k];
2251 }
2252
2253 CalculateUrgentBurstFactor(
2254 mode_lib->vba.DETBufferSizeInKByte[0],
2255 mode_lib->vba.SwathHeightY[k],
2256 mode_lib->vba.SwathHeightC[k],
2257 locals->SwathWidthY[k],
2258 mode_lib->vba.HTotal[k] /
2259 mode_lib->vba.PixelClock[k],
2260 mode_lib->vba.UrgentLatency,
2261 mode_lib->vba.CursorBufferSize,
2262 mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
2263 dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
2264 mode_lib->vba.VRatio[k],
2265 locals->VRatioPrefetchY[k],
2266 locals->VRatioPrefetchC[k],
2267 locals->BytePerPixelDETY[k],
2268 locals->BytePerPixelDETC[k],
2269 &locals->UrgentBurstFactorCursor[k],
2270 &locals->UrgentBurstFactorCursorPre[k],
2271 &locals->UrgentBurstFactorLuma[k],
2272 &locals->UrgentBurstFactorLumaPre[k],
2273 &locals->UrgentBurstFactorChroma[k],
2274 &locals->UrgentBurstFactorChromaPre[k],
2275 &locals->NotEnoughUrgentLatencyHiding[0][0],
2276 &locals->NotEnoughUrgentLatencyHidingPre);
2277
2278 if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
2279 locals->UrgentBurstFactorLuma[k] = 1;
2280 locals->UrgentBurstFactorChroma[k] = 1;
2281 locals->UrgentBurstFactorCursor[k] = 1;
2282 locals->UrgentBurstFactorLumaPre[k] = 1;
2283 locals->UrgentBurstFactorChromaPre[k] = 1;
2284 locals->UrgentBurstFactorCursorPre[k] = 1;
2285 }
2286
2287 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2288 dml_max3(locals->prefetch_vmrow_bw[k],
2289 locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
2290 + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k]
2291 * locals->UrgentBurstFactorCursor[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
2292 locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixDataBWChroma[k]
2293 * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2294
2295 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2296 dml_max3(locals->prefetch_vmrow_bw[k],
2297 locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k]
2298 + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
2299 locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
2300
2301 if (locals->DestinationLinesForPrefetch[k] < 2)
2302 DestinationLineTimesForPrefetchLessThan2 = true;
2303 if (locals->VRatioPrefetchY[k] > 4 || locals->VRatioPrefetchC[k] > 4)
2304 VRatioPrefetchMoreThan4 = true;
2305 }
2306 mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW;
2307
2308 if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding[0][0] == 0 &&
2309 locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2310 && !DestinationLineTimesForPrefetchLessThan2)
2311 mode_lib->vba.PrefetchModeSupported = true;
2312 else {
2313 mode_lib->vba.PrefetchModeSupported = false;
2314 dml_print(
2315 "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2316 }
2317
2318 if (mode_lib->vba.PrefetchModeSupported == true) {
2319 mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW;
2320 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2321 mode_lib->vba.BandwidthAvailableForImmediateFlip =
2322 mode_lib->vba.BandwidthAvailableForImmediateFlip
2323 - dml_max(
2324 locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k]
2325 + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k]
2326 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
2327 locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] +
2328 locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] +
2329 locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2330 }
2331
2332 mode_lib->vba.TotImmediateFlipBytes = 0;
2333 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2334 mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k] + locals->PixelPTEBytesPerRow[k];
2335 }
2336 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2337 CalculateFlipSchedule(
2338 mode_lib,
2339 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2340 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2341 mode_lib->vba.UrgentExtraLatency,
2342 mode_lib->vba.UrgentLatency,
2343 mode_lib->vba.GPUVMMaxPageTableLevels,
2344 mode_lib->vba.HostVMEnable,
2345 mode_lib->vba.HostVMMaxPageTableLevels,
2346 mode_lib->vba.HostVMCachedPageTableLevels,
2347 mode_lib->vba.GPUVMEnable,
2348 locals->PDEAndMetaPTEBytesFrame[k],
2349 locals->MetaRowByte[k],
2350 locals->PixelPTEBytesPerRow[k],
2351 mode_lib->vba.BandwidthAvailableForImmediateFlip,
2352 mode_lib->vba.TotImmediateFlipBytes,
2353 mode_lib->vba.SourcePixelFormat[k],
2354 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
2355 mode_lib->vba.VRatio[k],
2356 locals->Tno_bw[k],
2357 mode_lib->vba.DCCEnable[k],
2358 locals->dpte_row_height[k],
2359 locals->meta_row_height[k],
2360 locals->dpte_row_height_chroma[k],
2361 locals->meta_row_height_chroma[k],
2362 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
2363 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
2364 &locals->final_flip_bw[k],
2365 &locals->ImmediateFlipSupportedForPipe[k]);
2366 }
2367 mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
2368 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2369 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2370 mode_lib->vba.total_dcn_read_bw_with_flip =
2371 mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
2372 locals->prefetch_vmrow_bw[k],
2373 locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
2374 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
2375 locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k]
2376 + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k]
2377 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
2378 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst =
2379 mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst +
2380 dml_max3(locals->prefetch_vmrow_bw[k],
2381 locals->final_flip_bw[k] + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k],
2382 locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]);
2383
2384 }
2385 mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip = mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst / mode_lib->vba.ReturnBW;
2386
2387 mode_lib->vba.ImmediateFlipSupported = true;
2388 if (mode_lib->vba.total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) {
2389 mode_lib->vba.ImmediateFlipSupported = false;
2390 }
2391 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2392 if (locals->ImmediateFlipSupportedForPipe[k] == false) {
2393 mode_lib->vba.ImmediateFlipSupported = false;
2394 }
2395 }
2396 } else {
2397 mode_lib->vba.ImmediateFlipSupported = false;
2398 }
2399
2400 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2401 if (mode_lib->vba.ErrorResult[k]) {
2402 mode_lib->vba.PrefetchModeSupported = false;
2403 dml_print(
2404 "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2405 }
2406 }
2407
2408 mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1;
2409 } while (!((mode_lib->vba.PrefetchModeSupported
2410 && ((!mode_lib->vba.ImmediateFlipSupport && !mode_lib->vba.HostVMEnable)
2411 || mode_lib->vba.ImmediateFlipSupported))
2412 || locals->MaximumMaxVStartupLines < mode_lib->vba.VStartupLines));
2413
2414 //Watermarks and NB P-State/DRAM Clock Change Support
2415 {
2416 enum clock_change_support DRAMClockChangeSupport; // dummy
2417 CalculateWatermarksAndDRAMSpeedChangeSupport(
2418 mode_lib,
2419 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2420 mode_lib->vba.NumberOfActivePlanes,
2421 mode_lib->vba.MaxLineBufferLines,
2422 mode_lib->vba.LineBufferSize,
2423 mode_lib->vba.DPPOutputBufferPixels,
2424 mode_lib->vba.DETBufferSizeInKByte[0],
2425 mode_lib->vba.WritebackInterfaceLumaBufferSize,
2426 mode_lib->vba.WritebackInterfaceChromaBufferSize,
2427 mode_lib->vba.DCFCLK,
2428 mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
2429 mode_lib->vba.ReturnBW,
2430 mode_lib->vba.GPUVMEnable,
2431 locals->dpte_group_bytes,
2432 mode_lib->vba.MetaChunkSize,
2433 mode_lib->vba.UrgentLatency,
2434 mode_lib->vba.UrgentExtraLatency,
2435 mode_lib->vba.WritebackLatency,
2436 mode_lib->vba.WritebackChunkSize,
2437 mode_lib->vba.SOCCLK,
2438 mode_lib->vba.DRAMClockChangeLatency,
2439 mode_lib->vba.SRExitTime,
2440 mode_lib->vba.SREnterPlusExitTime,
2441 mode_lib->vba.DCFCLKDeepSleep,
2442 mode_lib->vba.DPPPerPlane,
2443 mode_lib->vba.DCCEnable,
2444 locals->DPPCLK,
2445 locals->SwathWidthSingleDPPY,
2446 mode_lib->vba.SwathHeightY,
2447 locals->ReadBandwidthPlaneLuma,
2448 mode_lib->vba.SwathHeightC,
2449 locals->ReadBandwidthPlaneChroma,
2450 mode_lib->vba.LBBitPerPixel,
2451 locals->SwathWidthY,
2452 mode_lib->vba.HRatio,
2453 mode_lib->vba.vtaps,
2454 mode_lib->vba.VTAPsChroma,
2455 mode_lib->vba.VRatio,
2456 mode_lib->vba.HTotal,
2457 mode_lib->vba.PixelClock,
2458 mode_lib->vba.BlendingAndTiming,
2459 locals->BytePerPixelDETY,
2460 locals->BytePerPixelDETC,
2461 mode_lib->vba.WritebackEnable,
2462 mode_lib->vba.WritebackPixelFormat,
2463 mode_lib->vba.WritebackDestinationWidth,
2464 mode_lib->vba.WritebackDestinationHeight,
2465 mode_lib->vba.WritebackSourceHeight,
2466 &DRAMClockChangeSupport,
2467 &mode_lib->vba.UrgentWatermark,
2468 &mode_lib->vba.WritebackUrgentWatermark,
2469 &mode_lib->vba.DRAMClockChangeWatermark,
2470 &mode_lib->vba.WritebackDRAMClockChangeWatermark,
2471 &mode_lib->vba.StutterExitWatermark,
2472 &mode_lib->vba.StutterEnterPlusExitWatermark,
2473 &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
2474 }
2475
2476
2477 //Display Pipeline Delivery Time in Prefetch, Groups
2478 CalculatePixelDeliveryTimes(
2479 mode_lib->vba.NumberOfActivePlanes,
2480 mode_lib->vba.VRatio,
2481 locals->VRatioPrefetchY,
2482 locals->VRatioPrefetchC,
2483 locals->swath_width_luma_ub,
2484 locals->swath_width_chroma_ub,
2485 mode_lib->vba.DPPPerPlane,
2486 mode_lib->vba.HRatio,
2487 mode_lib->vba.PixelClock,
2488 locals->PSCL_THROUGHPUT_LUMA,
2489 locals->PSCL_THROUGHPUT_CHROMA,
2490 locals->DPPCLK,
2491 locals->BytePerPixelDETC,
2492 mode_lib->vba.SourceScan,
2493 locals->BlockWidth256BytesY,
2494 locals->BlockHeight256BytesY,
2495 locals->BlockWidth256BytesC,
2496 locals->BlockHeight256BytesC,
2497 locals->DisplayPipeLineDeliveryTimeLuma,
2498 locals->DisplayPipeLineDeliveryTimeChroma,
2499 locals->DisplayPipeLineDeliveryTimeLumaPrefetch,
2500 locals->DisplayPipeLineDeliveryTimeChromaPrefetch,
2501 locals->DisplayPipeRequestDeliveryTimeLuma,
2502 locals->DisplayPipeRequestDeliveryTimeChroma,
2503 locals->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2504 locals->DisplayPipeRequestDeliveryTimeChromaPrefetch);
2505
2506 CalculateMetaAndPTETimes(
2507 mode_lib->vba.NumberOfActivePlanes,
2508 mode_lib->vba.GPUVMEnable,
2509 mode_lib->vba.MetaChunkSize,
2510 mode_lib->vba.MinMetaChunkSizeBytes,
2511 mode_lib->vba.GPUVMMaxPageTableLevels,
2512 mode_lib->vba.HTotal,
2513 mode_lib->vba.VRatio,
2514 locals->VRatioPrefetchY,
2515 locals->VRatioPrefetchC,
2516 locals->DestinationLinesToRequestRowInVBlank,
2517 locals->DestinationLinesToRequestRowInImmediateFlip,
2518 locals->DestinationLinesToRequestVMInVBlank,
2519 locals->DestinationLinesToRequestVMInImmediateFlip,
2520 mode_lib->vba.DCCEnable,
2521 mode_lib->vba.PixelClock,
2522 locals->BytePerPixelDETY,
2523 locals->BytePerPixelDETC,
2524 mode_lib->vba.SourceScan,
2525 locals->dpte_row_height,
2526 locals->dpte_row_height_chroma,
2527 locals->meta_row_width,
2528 locals->meta_row_height,
2529 locals->meta_req_width,
2530 locals->meta_req_height,
2531 locals->dpte_group_bytes,
2532 locals->PTERequestSizeY,
2533 locals->PTERequestSizeC,
2534 locals->PixelPTEReqWidthY,
2535 locals->PixelPTEReqHeightY,
2536 locals->PixelPTEReqWidthC,
2537 locals->PixelPTEReqHeightC,
2538 locals->dpte_row_width_luma_ub,
2539 locals->dpte_row_width_chroma_ub,
2540 locals->vm_group_bytes,
2541 locals->dpde0_bytes_per_frame_ub_l,
2542 locals->dpde0_bytes_per_frame_ub_c,
2543 locals->meta_pte_bytes_per_frame_ub_l,
2544 locals->meta_pte_bytes_per_frame_ub_c,
2545 locals->DST_Y_PER_PTE_ROW_NOM_L,
2546 locals->DST_Y_PER_PTE_ROW_NOM_C,
2547 locals->DST_Y_PER_META_ROW_NOM_L,
2548 locals->TimePerMetaChunkNominal,
2549 locals->TimePerMetaChunkVBlank,
2550 locals->TimePerMetaChunkFlip,
2551 locals->time_per_pte_group_nom_luma,
2552 locals->time_per_pte_group_vblank_luma,
2553 locals->time_per_pte_group_flip_luma,
2554 locals->time_per_pte_group_nom_chroma,
2555 locals->time_per_pte_group_vblank_chroma,
2556 locals->time_per_pte_group_flip_chroma,
2557 locals->TimePerVMGroupVBlank,
2558 locals->TimePerVMGroupFlip,
2559 locals->TimePerVMRequestVBlank,
2560 locals->TimePerVMRequestFlip);
2561
2562
2563 // Min TTUVBlank
2564 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2565 if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
2566 locals->AllowDRAMClockChangeDuringVBlank[k] = true;
2567 locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2568 locals->MinTTUVBlank[k] = dml_max(
2569 mode_lib->vba.DRAMClockChangeWatermark,
2570 dml_max(
2571 mode_lib->vba.StutterEnterPlusExitWatermark,
2572 mode_lib->vba.UrgentWatermark));
2573 } else if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 1) {
2574 locals->AllowDRAMClockChangeDuringVBlank[k] = false;
2575 locals->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2576 locals->MinTTUVBlank[k] = dml_max(
2577 mode_lib->vba.StutterEnterPlusExitWatermark,
2578 mode_lib->vba.UrgentWatermark);
2579 } else {
2580 locals->AllowDRAMClockChangeDuringVBlank[k] = false;
2581 locals->AllowDRAMSelfRefreshDuringVBlank[k] = false;
2582 locals->MinTTUVBlank[k] = mode_lib->vba.UrgentWatermark;
2583 }
2584 if (!mode_lib->vba.DynamicMetadataEnable[k])
2585 locals->MinTTUVBlank[k] = mode_lib->vba.TCalc
2586 + locals->MinTTUVBlank[k];
2587 }
2588
2589 // DCC Configuration
2590 mode_lib->vba.ActiveDPPs = 0;
2591 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2592 locals->MaximumDCCCompressionYSurface[k] = CalculateDCCConfiguration(
2593 mode_lib->vba.DCCEnable[k],
2594 false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
2595 mode_lib->vba.ViewportWidth[k],
2596 mode_lib->vba.ViewportHeight[k],
2597 mode_lib->vba.DETBufferSizeInKByte[0] * 1024,
2598 locals->BlockHeight256BytesY[k],
2599 mode_lib->vba.SwathHeightY[k],
2600 mode_lib->vba.SurfaceTiling[k],
2601 locals->BytePerPixelDETY[k],
2602 mode_lib->vba.SourceScan[k],
2603 &locals->DCCYMaxUncompressedBlock[k],
2604 &locals->DCCYMaxCompressedBlock[k],
2605 &locals->DCCYIndependent64ByteBlock[k]);
2606 }
2607
2608 //XFC Parameters:
2609 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2610 if (mode_lib->vba.XFCEnabled[k] == true) {
2611 double TWait;
2612
2613 locals->XFCSlaveVUpdateOffset[k] = mode_lib->vba.XFCTSlvVupdateOffset;
2614 locals->XFCSlaveVupdateWidth[k] = mode_lib->vba.XFCTSlvVupdateWidth;
2615 locals->XFCSlaveVReadyOffset[k] = mode_lib->vba.XFCTSlvVreadyOffset;
2616 TWait = CalculateTWait(
2617 mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb],
2618 mode_lib->vba.DRAMClockChangeLatency,
2619 mode_lib->vba.UrgentLatency,
2620 mode_lib->vba.SREnterPlusExitTime);
2621 mode_lib->vba.XFCRemoteSurfaceFlipDelay = CalculateRemoteSurfaceFlipDelay(
2622 mode_lib,
2623 mode_lib->vba.VRatio[k],
2624 locals->SwathWidthY[k],
2625 dml_ceil(locals->BytePerPixelDETY[k], 1),
2626 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
2627 mode_lib->vba.XFCTSlvVupdateOffset,
2628 mode_lib->vba.XFCTSlvVupdateWidth,
2629 mode_lib->vba.XFCTSlvVreadyOffset,
2630 mode_lib->vba.XFCXBUFLatencyTolerance,
2631 mode_lib->vba.XFCFillBWOverhead,
2632 mode_lib->vba.XFCSlvChunkSize,
2633 mode_lib->vba.XFCBusTransportTime,
2634 mode_lib->vba.TCalc,
2635 TWait,
2636 &mode_lib->vba.SrcActiveDrainRate,
2637 &mode_lib->vba.TInitXFill,
2638 &mode_lib->vba.TslvChk);
2639 locals->XFCRemoteSurfaceFlipLatency[k] =
2640 dml_floor(
2641 mode_lib->vba.XFCRemoteSurfaceFlipDelay
2642 / (mode_lib->vba.HTotal[k]
2643 / mode_lib->vba.PixelClock[k]),
2644 1);
2645 locals->XFCTransferDelay[k] =
2646 dml_ceil(
2647 mode_lib->vba.XFCBusTransportTime
2648 / (mode_lib->vba.HTotal[k]
2649 / mode_lib->vba.PixelClock[k]),
2650 1);
2651 locals->XFCPrechargeDelay[k] =
2652 dml_ceil(
2653 (mode_lib->vba.XFCBusTransportTime
2654 + mode_lib->vba.TInitXFill
2655 + mode_lib->vba.TslvChk)
2656 / (mode_lib->vba.HTotal[k]
2657 / mode_lib->vba.PixelClock[k]),
2658 1);
2659 mode_lib->vba.InitFillLevel = mode_lib->vba.XFCXBUFLatencyTolerance
2660 * mode_lib->vba.SrcActiveDrainRate;
2661 mode_lib->vba.FinalFillMargin =
2662 (locals->DestinationLinesToRequestVMInVBlank[k]
2663 + locals->DestinationLinesToRequestRowInVBlank[k])
2664 * mode_lib->vba.HTotal[k]
2665 / mode_lib->vba.PixelClock[k]
2666 * mode_lib->vba.SrcActiveDrainRate
2667 + mode_lib->vba.XFCFillConstant;
2668 mode_lib->vba.FinalFillLevel = mode_lib->vba.XFCRemoteSurfaceFlipDelay
2669 * mode_lib->vba.SrcActiveDrainRate
2670 + mode_lib->vba.FinalFillMargin;
2671 mode_lib->vba.RemainingFillLevel = dml_max(
2672 0.0,
2673 mode_lib->vba.FinalFillLevel - mode_lib->vba.InitFillLevel);
2674 mode_lib->vba.TFinalxFill = mode_lib->vba.RemainingFillLevel
2675 / (mode_lib->vba.SrcActiveDrainRate
2676 * mode_lib->vba.XFCFillBWOverhead / 100);
2677 locals->XFCPrefetchMargin[k] =
2678 mode_lib->vba.XFCRemoteSurfaceFlipDelay
2679 + mode_lib->vba.TFinalxFill
2680 + (locals->DestinationLinesToRequestVMInVBlank[k]
2681 + locals->DestinationLinesToRequestRowInVBlank[k])
2682 * mode_lib->vba.HTotal[k]
2683 / mode_lib->vba.PixelClock[k];
2684 } else {
2685 locals->XFCSlaveVUpdateOffset[k] = 0;
2686 locals->XFCSlaveVupdateWidth[k] = 0;
2687 locals->XFCSlaveVReadyOffset[k] = 0;
2688 locals->XFCRemoteSurfaceFlipLatency[k] = 0;
2689 locals->XFCPrechargeDelay[k] = 0;
2690 locals->XFCTransferDelay[k] = 0;
2691 locals->XFCPrefetchMargin[k] = 0;
2692 }
2693 }
2694
2695 // Stutter Efficiency
2696 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2697 CalculateDETBufferSize(
2698 mode_lib->vba.DETBufferSizeInKByte[0],
2699 mode_lib->vba.SwathHeightY[k],
2700 mode_lib->vba.SwathHeightC[k],
2701 &locals->DETBufferSizeY[k],
2702 &locals->DETBufferSizeC[k]);
2703
2704 locals->LinesInDETY[k] = (double)locals->DETBufferSizeY[k]
2705 / locals->BytePerPixelDETY[k] / locals->SwathWidthY[k];
2706 locals->LinesInDETYRoundedDownToSwath[k] = dml_floor(
2707 locals->LinesInDETY[k],
2708 mode_lib->vba.SwathHeightY[k]);
2709 locals->FullDETBufferingTimeY[k] =
2710 locals->LinesInDETYRoundedDownToSwath[k]
2711 * (mode_lib->vba.HTotal[k]
2712 / mode_lib->vba.PixelClock[k])
2713 / mode_lib->vba.VRatio[k];
2714 }
2715
2716 mode_lib->vba.StutterPeriod = 999999.0;
2717 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2718 if (locals->FullDETBufferingTimeY[k] < mode_lib->vba.StutterPeriod) {
2719 mode_lib->vba.StutterPeriod = locals->FullDETBufferingTimeY[k];
2720 mode_lib->vba.FrameTimeForMinFullDETBufferingTime =
2721 (double) mode_lib->vba.VTotal[k] * mode_lib->vba.HTotal[k]
2722 / mode_lib->vba.PixelClock[k];
2723 locals->BytePerPixelYCriticalPlane = dml_ceil(locals->BytePerPixelDETY[k], 1);
2724 locals->SwathWidthYCriticalPlane = locals->SwathWidthY[k];
2725 locals->LinesToFinishSwathTransferStutterCriticalPlane =
2726 mode_lib->vba.SwathHeightY[k] - (locals->LinesInDETY[k] - locals->LinesInDETYRoundedDownToSwath[k]);
2727 }
2728 }
2729
2730 mode_lib->vba.AverageReadBandwidth = 0.0;
2731 mode_lib->vba.TotalRowReadBandwidth = 0.0;
2732 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2733 unsigned int DCCRateLimit;
2734
2735 if (mode_lib->vba.DCCEnable[k]) {
2736 if (locals->DCCYMaxCompressedBlock[k] == 256)
2737 DCCRateLimit = 4;
2738 else
2739 DCCRateLimit = 2;
2740
2741 mode_lib->vba.AverageReadBandwidth =
2742 mode_lib->vba.AverageReadBandwidth
2743 + (locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k]) /
2744 dml_min(mode_lib->vba.DCCRate[k], DCCRateLimit);
2745 } else {
2746 mode_lib->vba.AverageReadBandwidth =
2747 mode_lib->vba.AverageReadBandwidth
2748 + locals->ReadBandwidthPlaneLuma[k]
2749 + locals->ReadBandwidthPlaneChroma[k];
2750 }
2751 mode_lib->vba.TotalRowReadBandwidth = mode_lib->vba.TotalRowReadBandwidth +
2752 locals->meta_row_bw[k] + locals->dpte_row_bw[k];
2753 }
2754
2755 mode_lib->vba.AverageDCCCompressionRate = mode_lib->vba.TotalDataReadBandwidth / mode_lib->vba.AverageReadBandwidth;
2756
2757 mode_lib->vba.PartOfBurstThatFitsInROB =
2758 dml_min(
2759 mode_lib->vba.StutterPeriod
2760 * mode_lib->vba.TotalDataReadBandwidth,
2761 mode_lib->vba.ROBBufferSizeInKByte * 1024
2762 * mode_lib->vba.AverageDCCCompressionRate);
2763 mode_lib->vba.StutterBurstTime = mode_lib->vba.PartOfBurstThatFitsInROB
2764 / mode_lib->vba.AverageDCCCompressionRate / mode_lib->vba.ReturnBW
2765 + (mode_lib->vba.StutterPeriod * mode_lib->vba.TotalDataReadBandwidth
2766 - mode_lib->vba.PartOfBurstThatFitsInROB)
2767 / (mode_lib->vba.DCFCLK * 64)
2768 + mode_lib->vba.StutterPeriod * mode_lib->vba.TotalRowReadBandwidth / mode_lib->vba.ReturnBW;
2769 mode_lib->vba.StutterBurstTime = dml_max(
2770 mode_lib->vba.StutterBurstTime,
2771 (locals->LinesToFinishSwathTransferStutterCriticalPlane * locals->BytePerPixelYCriticalPlane *
2772 locals->SwathWidthYCriticalPlane / mode_lib->vba.ReturnBW)
2773 );
2774
2775 mode_lib->vba.TotalActiveWriteback = 0;
2776 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2777 if (mode_lib->vba.WritebackEnable[k] == true) {
2778 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
2779 }
2780 }
2781
2782 if (mode_lib->vba.TotalActiveWriteback == 0) {
2783 mode_lib->vba.StutterEfficiencyNotIncludingVBlank = (1
2784 - (mode_lib->vba.SRExitTime + mode_lib->vba.StutterBurstTime)
2785 / mode_lib->vba.StutterPeriod) * 100;
2786 } else {
2787 mode_lib->vba.StutterEfficiencyNotIncludingVBlank = 0;
2788 }
2789
2790 mode_lib->vba.SmallestVBlank = 999999;
2791 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2792 if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
2793 mode_lib->vba.VBlankTime = (double) (mode_lib->vba.VTotal[k]
2794 - mode_lib->vba.VActive[k]) * mode_lib->vba.HTotal[k]
2795 / mode_lib->vba.PixelClock[k];
2796 } else {
2797 mode_lib->vba.VBlankTime = 0;
2798 }
2799 mode_lib->vba.SmallestVBlank = dml_min(
2800 mode_lib->vba.SmallestVBlank,
2801 mode_lib->vba.VBlankTime);
2802 }
2803
2804 mode_lib->vba.StutterEfficiency = (mode_lib->vba.StutterEfficiencyNotIncludingVBlank / 100
2805 * (mode_lib->vba.FrameTimeForMinFullDETBufferingTime
2806 - mode_lib->vba.SmallestVBlank)
2807 + mode_lib->vba.SmallestVBlank)
2808 / mode_lib->vba.FrameTimeForMinFullDETBufferingTime * 100;
2809 }
2810
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)2811 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
2812 {
2813 // Display Pipe Configuration
2814 double BytePerPixDETY;
2815 double BytePerPixDETC;
2816 double Read256BytesBlockHeightY;
2817 double Read256BytesBlockHeightC;
2818 double Read256BytesBlockWidthY;
2819 double Read256BytesBlockWidthC;
2820 double MaximumSwathHeightY;
2821 double MaximumSwathHeightC;
2822 double MinimumSwathHeightY;
2823 double MinimumSwathHeightC;
2824 double SwathWidth;
2825 double SwathWidthGranularityY;
2826 double SwathWidthGranularityC;
2827 double RoundedUpMaxSwathSizeBytesY;
2828 double RoundedUpMaxSwathSizeBytesC;
2829 unsigned int j, k;
2830
2831 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
2832 bool MainPlaneDoesODMCombine = false;
2833
2834 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
2835 BytePerPixDETY = 8;
2836 BytePerPixDETC = 0;
2837 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
2838 BytePerPixDETY = 4;
2839 BytePerPixDETC = 0;
2840 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
2841 BytePerPixDETY = 2;
2842 BytePerPixDETC = 0;
2843 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8) {
2844 BytePerPixDETY = 1;
2845 BytePerPixDETC = 0;
2846 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
2847 BytePerPixDETY = 1;
2848 BytePerPixDETC = 2;
2849 } else {
2850 BytePerPixDETY = 4.0 / 3.0;
2851 BytePerPixDETC = 8.0 / 3.0;
2852 }
2853
2854 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2855 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2856 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
2857 || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
2858 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2859 Read256BytesBlockHeightY = 1;
2860 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
2861 Read256BytesBlockHeightY = 4;
2862 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2863 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16) {
2864 Read256BytesBlockHeightY = 8;
2865 } else {
2866 Read256BytesBlockHeightY = 16;
2867 }
2868 Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
2869 / Read256BytesBlockHeightY;
2870 Read256BytesBlockHeightC = 0;
2871 Read256BytesBlockWidthC = 0;
2872 } else {
2873 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2874 Read256BytesBlockHeightY = 1;
2875 Read256BytesBlockHeightC = 1;
2876 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
2877 Read256BytesBlockHeightY = 16;
2878 Read256BytesBlockHeightC = 8;
2879 } else {
2880 Read256BytesBlockHeightY = 8;
2881 Read256BytesBlockHeightC = 8;
2882 }
2883 Read256BytesBlockWidthY = 256 / dml_ceil(BytePerPixDETY, 1)
2884 / Read256BytesBlockHeightY;
2885 Read256BytesBlockWidthC = 256 / dml_ceil(BytePerPixDETC, 2)
2886 / Read256BytesBlockHeightC;
2887 }
2888
2889 if (mode_lib->vba.SourceScan[k] == dm_horz) {
2890 MaximumSwathHeightY = Read256BytesBlockHeightY;
2891 MaximumSwathHeightC = Read256BytesBlockHeightC;
2892 } else {
2893 MaximumSwathHeightY = Read256BytesBlockWidthY;
2894 MaximumSwathHeightC = Read256BytesBlockWidthC;
2895 }
2896
2897 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2898 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
2899 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
2900 || mode_lib->vba.SourcePixelFormat[k] == dm_444_8)) {
2901 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
2902 || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
2903 && (mode_lib->vba.SurfaceTiling[k]
2904 == dm_sw_4kb_s
2905 || mode_lib->vba.SurfaceTiling[k]
2906 == dm_sw_4kb_s_x
2907 || mode_lib->vba.SurfaceTiling[k]
2908 == dm_sw_64kb_s
2909 || mode_lib->vba.SurfaceTiling[k]
2910 == dm_sw_64kb_s_t
2911 || mode_lib->vba.SurfaceTiling[k]
2912 == dm_sw_64kb_s_x
2913 || mode_lib->vba.SurfaceTiling[k]
2914 == dm_sw_var_s
2915 || mode_lib->vba.SurfaceTiling[k]
2916 == dm_sw_var_s_x)
2917 && mode_lib->vba.SourceScan[k] == dm_horz)) {
2918 MinimumSwathHeightY = MaximumSwathHeightY;
2919 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8
2920 && mode_lib->vba.SourceScan[k] != dm_horz) {
2921 MinimumSwathHeightY = MaximumSwathHeightY;
2922 } else {
2923 MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
2924 }
2925 MinimumSwathHeightC = MaximumSwathHeightC;
2926 } else {
2927 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
2928 MinimumSwathHeightY = MaximumSwathHeightY;
2929 MinimumSwathHeightC = MaximumSwathHeightC;
2930 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
2931 && mode_lib->vba.SourceScan[k] == dm_horz) {
2932 MinimumSwathHeightY = MaximumSwathHeightY / 2.0;
2933 MinimumSwathHeightC = MaximumSwathHeightC;
2934 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
2935 && mode_lib->vba.SourceScan[k] == dm_horz) {
2936 MinimumSwathHeightC = MaximumSwathHeightC / 2.0;
2937 MinimumSwathHeightY = MaximumSwathHeightY;
2938 } else {
2939 MinimumSwathHeightY = MaximumSwathHeightY;
2940 MinimumSwathHeightC = MaximumSwathHeightC;
2941 }
2942 }
2943
2944 if (mode_lib->vba.SourceScan[k] == dm_horz) {
2945 SwathWidth = mode_lib->vba.ViewportWidth[k];
2946 } else {
2947 SwathWidth = mode_lib->vba.ViewportHeight[k];
2948 }
2949
2950 if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2951 MainPlaneDoesODMCombine = true;
2952 }
2953 for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) {
2954 if (mode_lib->vba.BlendingAndTiming[k] == j
2955 && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2956 MainPlaneDoesODMCombine = true;
2957 }
2958 }
2959
2960 if (MainPlaneDoesODMCombine == true) {
2961 SwathWidth = dml_min(
2962 SwathWidth,
2963 mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]);
2964 } else {
2965 SwathWidth = SwathWidth / mode_lib->vba.DPPPerPlane[k];
2966 }
2967
2968 SwathWidthGranularityY = 256 / dml_ceil(BytePerPixDETY, 1) / MaximumSwathHeightY;
2969 RoundedUpMaxSwathSizeBytesY = (dml_ceil(
2970 (double) (SwathWidth - 1),
2971 SwathWidthGranularityY) + SwathWidthGranularityY) * BytePerPixDETY
2972 * MaximumSwathHeightY;
2973 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
2974 RoundedUpMaxSwathSizeBytesY = dml_ceil(RoundedUpMaxSwathSizeBytesY, 256)
2975 + 256;
2976 }
2977 if (MaximumSwathHeightC > 0) {
2978 SwathWidthGranularityC = 256.0 / dml_ceil(BytePerPixDETC, 2)
2979 / MaximumSwathHeightC;
2980 RoundedUpMaxSwathSizeBytesC = (dml_ceil(
2981 (double) (SwathWidth / 2.0 - 1),
2982 SwathWidthGranularityC) + SwathWidthGranularityC)
2983 * BytePerPixDETC * MaximumSwathHeightC;
2984 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
2985 RoundedUpMaxSwathSizeBytesC = dml_ceil(
2986 RoundedUpMaxSwathSizeBytesC,
2987 256) + 256;
2988 }
2989 } else
2990 RoundedUpMaxSwathSizeBytesC = 0.0;
2991
2992 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
2993 <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) {
2994 mode_lib->vba.SwathHeightY[k] = MaximumSwathHeightY;
2995 mode_lib->vba.SwathHeightC[k] = MaximumSwathHeightC;
2996 } else {
2997 mode_lib->vba.SwathHeightY[k] = MinimumSwathHeightY;
2998 mode_lib->vba.SwathHeightC[k] = MinimumSwathHeightC;
2999 }
3000
3001 CalculateDETBufferSize(
3002 mode_lib->vba.DETBufferSizeInKByte[0],
3003 mode_lib->vba.SwathHeightY[k],
3004 mode_lib->vba.SwathHeightC[k],
3005 &mode_lib->vba.DETBufferSizeY[k],
3006 &mode_lib->vba.DETBufferSizeC[k]);
3007 }
3008 }
3009
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3010 static double CalculateTWait(
3011 unsigned int PrefetchMode,
3012 double DRAMClockChangeLatency,
3013 double UrgentLatency,
3014 double SREnterPlusExitTime)
3015 {
3016 if (PrefetchMode == 0) {
3017 return dml_max(
3018 DRAMClockChangeLatency + UrgentLatency,
3019 dml_max(SREnterPlusExitTime, UrgentLatency));
3020 } else if (PrefetchMode == 1) {
3021 return dml_max(SREnterPlusExitTime, UrgentLatency);
3022 } else {
3023 return UrgentLatency;
3024 }
3025 }
3026
CalculateRemoteSurfaceFlipDelay(struct display_mode_lib * mode_lib,double VRatio,double SwathWidth,double Bpp,double LineTime,double XFCTSlvVupdateOffset,double XFCTSlvVupdateWidth,double XFCTSlvVreadyOffset,double XFCXBUFLatencyTolerance,double XFCFillBWOverhead,double XFCSlvChunkSize,double XFCBusTransportTime,double TCalc,double TWait,double * SrcActiveDrainRate,double * TInitXFill,double * TslvChk)3027 static double CalculateRemoteSurfaceFlipDelay(
3028 struct display_mode_lib *mode_lib,
3029 double VRatio,
3030 double SwathWidth,
3031 double Bpp,
3032 double LineTime,
3033 double XFCTSlvVupdateOffset,
3034 double XFCTSlvVupdateWidth,
3035 double XFCTSlvVreadyOffset,
3036 double XFCXBUFLatencyTolerance,
3037 double XFCFillBWOverhead,
3038 double XFCSlvChunkSize,
3039 double XFCBusTransportTime,
3040 double TCalc,
3041 double TWait,
3042 double *SrcActiveDrainRate,
3043 double *TInitXFill,
3044 double *TslvChk)
3045 {
3046 double TSlvSetup, AvgfillRate, result;
3047
3048 *SrcActiveDrainRate = VRatio * SwathWidth * Bpp / LineTime;
3049 TSlvSetup = XFCTSlvVupdateOffset + XFCTSlvVupdateWidth + XFCTSlvVreadyOffset;
3050 *TInitXFill = XFCXBUFLatencyTolerance / (1 + XFCFillBWOverhead / 100);
3051 AvgfillRate = *SrcActiveDrainRate * (1 + XFCFillBWOverhead / 100);
3052 *TslvChk = XFCSlvChunkSize / AvgfillRate;
3053 dml_print(
3054 "DML::CalculateRemoteSurfaceFlipDelay: SrcActiveDrainRate: %f\n",
3055 *SrcActiveDrainRate);
3056 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TSlvSetup: %f\n", TSlvSetup);
3057 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TInitXFill: %f\n", *TInitXFill);
3058 dml_print("DML::CalculateRemoteSurfaceFlipDelay: AvgfillRate: %f\n", AvgfillRate);
3059 dml_print("DML::CalculateRemoteSurfaceFlipDelay: TslvChk: %f\n", *TslvChk);
3060 result = 2 * XFCBusTransportTime + TSlvSetup + TCalc + TWait + *TslvChk + *TInitXFill; // TODO: This doesn't seem to match programming guide
3061 dml_print("DML::CalculateRemoteSurfaceFlipDelay: RemoteSurfaceFlipDelay: %f\n", result);
3062 return result;
3063 }
3064
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackLumaHTaps,unsigned int WritebackLumaVTaps,unsigned int WritebackChromaHTaps,unsigned int WritebackChromaVTaps,unsigned int WritebackDestinationWidth)3065 static double CalculateWriteBackDelay(
3066 enum source_format_class WritebackPixelFormat,
3067 double WritebackHRatio,
3068 double WritebackVRatio,
3069 unsigned int WritebackLumaHTaps,
3070 unsigned int WritebackLumaVTaps,
3071 unsigned int WritebackChromaHTaps,
3072 unsigned int WritebackChromaVTaps,
3073 unsigned int WritebackDestinationWidth)
3074 {
3075 double CalculateWriteBackDelay =
3076 dml_max(
3077 dml_ceil(WritebackLumaHTaps / 4.0, 1) / WritebackHRatio,
3078 WritebackLumaVTaps * dml_ceil(1.0 / WritebackVRatio, 1)
3079 * dml_ceil(
3080 WritebackDestinationWidth
3081 / 4.0,
3082 1)
3083 + dml_ceil(1.0 / WritebackVRatio, 1)
3084 * (dml_ceil(
3085 WritebackLumaVTaps
3086 / 4.0,
3087 1) + 4));
3088
3089 if (WritebackPixelFormat != dm_444_32) {
3090 CalculateWriteBackDelay =
3091 dml_max(
3092 CalculateWriteBackDelay,
3093 dml_max(
3094 dml_ceil(
3095 WritebackChromaHTaps
3096 / 2.0,
3097 1)
3098 / (2
3099 * WritebackHRatio),
3100 WritebackChromaVTaps
3101 * dml_ceil(
3102 1
3103 / (2
3104 * WritebackVRatio),
3105 1)
3106 * dml_ceil(
3107 WritebackDestinationWidth
3108 / 2.0
3109 / 2.0,
3110 1)
3111 + dml_ceil(
3112 1
3113 / (2
3114 * WritebackVRatio),
3115 1)
3116 * (dml_ceil(
3117 WritebackChromaVTaps
3118 / 4.0,
3119 1)
3120 + 4)));
3121 }
3122 return CalculateWriteBackDelay;
3123 }
3124
CalculateActiveRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3125 static void CalculateActiveRowBandwidth(
3126 bool GPUVMEnable,
3127 enum source_format_class SourcePixelFormat,
3128 double VRatio,
3129 bool DCCEnable,
3130 double LineTime,
3131 unsigned int MetaRowByteLuma,
3132 unsigned int MetaRowByteChroma,
3133 unsigned int meta_row_height_luma,
3134 unsigned int meta_row_height_chroma,
3135 unsigned int PixelPTEBytesPerRowLuma,
3136 unsigned int PixelPTEBytesPerRowChroma,
3137 unsigned int dpte_row_height_luma,
3138 unsigned int dpte_row_height_chroma,
3139 double *meta_row_bw,
3140 double *dpte_row_bw)
3141 {
3142 if (DCCEnable != true) {
3143 *meta_row_bw = 0;
3144 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3145 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3146 + VRatio / 2 * MetaRowByteChroma
3147 / (meta_row_height_chroma * LineTime);
3148 } else {
3149 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3150 }
3151
3152 if (GPUVMEnable != true) {
3153 *dpte_row_bw = 0;
3154 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3155 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3156 + VRatio / 2 * PixelPTEBytesPerRowChroma
3157 / (dpte_row_height_chroma * LineTime);
3158 } else {
3159 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3160 }
3161 }
3162
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxPageTableLevels,unsigned int HostVMCachedPageTableLevels,bool GPUVMEnable,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3163 static void CalculateFlipSchedule(
3164 struct display_mode_lib *mode_lib,
3165 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3166 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3167 double UrgentExtraLatency,
3168 double UrgentLatency,
3169 unsigned int GPUVMMaxPageTableLevels,
3170 bool HostVMEnable,
3171 unsigned int HostVMMaxPageTableLevels,
3172 unsigned int HostVMCachedPageTableLevels,
3173 bool GPUVMEnable,
3174 double PDEAndMetaPTEBytesPerFrame,
3175 double MetaRowBytes,
3176 double DPTEBytesPerRow,
3177 double BandwidthAvailableForImmediateFlip,
3178 unsigned int TotImmediateFlipBytes,
3179 enum source_format_class SourcePixelFormat,
3180 double LineTime,
3181 double VRatio,
3182 double Tno_bw,
3183 bool DCCEnable,
3184 unsigned int dpte_row_height,
3185 unsigned int meta_row_height,
3186 unsigned int dpte_row_height_chroma,
3187 unsigned int meta_row_height_chroma,
3188 double *DestinationLinesToRequestVMInImmediateFlip,
3189 double *DestinationLinesToRequestRowInImmediateFlip,
3190 double *final_flip_bw,
3191 bool *ImmediateFlipSupportedForPipe)
3192 {
3193 double min_row_time = 0.0;
3194 unsigned int HostVMDynamicLevels;
3195 double TimeForFetchingMetaPTEImmediateFlip;
3196 double TimeForFetchingRowInVBlankImmediateFlip;
3197 double ImmediateFlipBW;
3198 double HostVMInefficiencyFactor;
3199 double VRatioClamped;
3200
3201 if (GPUVMEnable == true && HostVMEnable == true) {
3202 HostVMInefficiencyFactor =
3203 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
3204 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3205 HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
3206 } else {
3207 HostVMInefficiencyFactor = 1;
3208 HostVMDynamicLevels = 0;
3209 }
3210
3211 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow)
3212 * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3213
3214 if (GPUVMEnable == true) {
3215 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3216 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3217 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevels + 1) - 1),
3218 LineTime / 4.0);
3219 } else {
3220 TimeForFetchingMetaPTEImmediateFlip = 0;
3221 }
3222
3223 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3224 if ((GPUVMEnable == true || DCCEnable == true)) {
3225 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevels + 1), LineTime / 4);
3226 } else {
3227 TimeForFetchingRowInVBlankImmediateFlip = 0;
3228 }
3229
3230 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3231 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), (MetaRowBytes + DPTEBytesPerRow) * HostVMInefficiencyFactor / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3232 VRatioClamped = (VRatio < 1.0) ? 1.0 : VRatio;
3233 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10) {
3234 if (GPUVMEnable == true && DCCEnable != true) {
3235 min_row_time = dml_min(
3236 dpte_row_height * LineTime / VRatioClamped,
3237 dpte_row_height_chroma * LineTime / (VRatioClamped / 2));
3238 } else if (GPUVMEnable != true && DCCEnable == true) {
3239 min_row_time = dml_min(
3240 meta_row_height * LineTime / VRatioClamped,
3241 meta_row_height_chroma * LineTime / (VRatioClamped / 2));
3242 } else {
3243 min_row_time = dml_min4(
3244 dpte_row_height * LineTime / VRatioClamped,
3245 meta_row_height * LineTime / VRatioClamped,
3246 dpte_row_height_chroma * LineTime / (VRatioClamped / 2),
3247 meta_row_height_chroma * LineTime / (VRatioClamped / 2));
3248 }
3249 } else {
3250 if (GPUVMEnable == true && DCCEnable != true) {
3251 min_row_time = dpte_row_height * LineTime / VRatioClamped;
3252 } else if (GPUVMEnable != true && DCCEnable == true) {
3253 min_row_time = meta_row_height * LineTime / VRatioClamped;
3254 } else {
3255 min_row_time = dml_min(
3256 dpte_row_height * LineTime / VRatioClamped,
3257 meta_row_height * LineTime / VRatioClamped);
3258 }
3259 }
3260
3261 if (*DestinationLinesToRequestVMInImmediateFlip >= 32
3262 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3263 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3264 *ImmediateFlipSupportedForPipe = false;
3265 } else {
3266 *ImmediateFlipSupportedForPipe = true;
3267 }
3268 }
3269
TruncToValidBPP(double DecimalBPP,double DesiredBPP,bool DSCEnabled,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent)3270 static unsigned int TruncToValidBPP(
3271 double DecimalBPP,
3272 double DesiredBPP,
3273 bool DSCEnabled,
3274 enum output_encoder_class Output,
3275 enum output_format_class Format,
3276 unsigned int DSCInputBitPerComponent)
3277 {
3278 if (Output == dm_hdmi) {
3279 if (Format == dm_420) {
3280 if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3281 return 18;
3282 else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
3283 return 15;
3284 else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
3285 return 12;
3286 else
3287 return BPP_INVALID;
3288 } else if (Format == dm_444) {
3289 if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
3290 return 36;
3291 else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
3292 return 30;
3293 else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3294 return 24;
3295 else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3296 return 18;
3297 else
3298 return BPP_INVALID;
3299 } else {
3300 if (DecimalBPP / 1.5 >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3301 return 24;
3302 else if (DecimalBPP / 1.5 >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
3303 return 20;
3304 else if (DecimalBPP / 1.5 >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
3305 return 16;
3306 else
3307 return BPP_INVALID;
3308 }
3309 } else {
3310 if (DSCEnabled) {
3311 if (Format == dm_420) {
3312 if (DesiredBPP == 0) {
3313 if (DecimalBPP < 6)
3314 return BPP_INVALID;
3315 else if (DecimalBPP >= 1.5 * DSCInputBitPerComponent - 1.0 / 16.0)
3316 return 1.5 * DSCInputBitPerComponent - 1.0 / 16.0;
3317 else
3318 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3319 } else {
3320 if (DecimalBPP < 6
3321 || DesiredBPP < 6
3322 || DesiredBPP > 1.5 * DSCInputBitPerComponent - 1.0 / 16.0
3323 || DecimalBPP < DesiredBPP) {
3324 return BPP_INVALID;
3325 } else {
3326 return DesiredBPP;
3327 }
3328 }
3329 } else if (Format == dm_n422) {
3330 if (DesiredBPP == 0) {
3331 if (DecimalBPP < 7)
3332 return BPP_INVALID;
3333 else if (DecimalBPP >= 2 * DSCInputBitPerComponent - 1.0 / 16.0)
3334 return 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3335 else
3336 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3337 } else {
3338 if (DecimalBPP < 7
3339 || DesiredBPP < 7
3340 || DesiredBPP > 2 * DSCInputBitPerComponent - 1.0 / 16.0
3341 || DecimalBPP < DesiredBPP) {
3342 return BPP_INVALID;
3343 } else {
3344 return DesiredBPP;
3345 }
3346 }
3347 } else {
3348 if (DesiredBPP == 0) {
3349 if (DecimalBPP < 8)
3350 return BPP_INVALID;
3351 else if (DecimalBPP >= 3 * DSCInputBitPerComponent - 1.0 / 16.0)
3352 return 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3353 else
3354 return dml_floor(16 * DecimalBPP, 1) / 16.0;
3355 } else {
3356 if (DecimalBPP < 8
3357 || DesiredBPP < 8
3358 || DesiredBPP > 3 * DSCInputBitPerComponent - 1.0 / 16.0
3359 || DecimalBPP < DesiredBPP) {
3360 return BPP_INVALID;
3361 } else {
3362 return DesiredBPP;
3363 }
3364 }
3365 }
3366 } else if (Format == dm_420) {
3367 if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3368 return 18;
3369 else if (DecimalBPP >= 15 && (DesiredBPP == 0 || DesiredBPP == 15))
3370 return 15;
3371 else if (DecimalBPP >= 12 && (DesiredBPP == 0 || DesiredBPP == 12))
3372 return 12;
3373 else
3374 return BPP_INVALID;
3375 } else if (Format == dm_s422 || Format == dm_n422) {
3376 if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3377 return 24;
3378 else if (DecimalBPP >= 20 && (DesiredBPP == 0 || DesiredBPP == 20))
3379 return 20;
3380 else if (DecimalBPP >= 16 && (DesiredBPP == 0 || DesiredBPP == 16))
3381 return 16;
3382 else
3383 return BPP_INVALID;
3384 } else {
3385 if (DecimalBPP >= 36 && (DesiredBPP == 0 || DesiredBPP == 36))
3386 return 36;
3387 else if (DecimalBPP >= 30 && (DesiredBPP == 0 || DesiredBPP == 30))
3388 return 30;
3389 else if (DecimalBPP >= 24 && (DesiredBPP == 0 || DesiredBPP == 24))
3390 return 24;
3391 else if (DecimalBPP >= 18 && (DesiredBPP == 0 || DesiredBPP == 18))
3392 return 18;
3393 else
3394 return BPP_INVALID;
3395 }
3396 }
3397 }
3398
3399
CalculatePrefetchSchedulePerPlane(struct display_mode_lib * mode_lib,int i,unsigned j,unsigned k)3400 static noinline void CalculatePrefetchSchedulePerPlane(
3401 struct display_mode_lib *mode_lib,
3402 int i,
3403 unsigned j,
3404 unsigned k)
3405 {
3406 struct vba_vars_st *locals = &mode_lib->vba;
3407 Pipe myPipe;
3408 HostVM myHostVM;
3409
3410 if (mode_lib->vba.XFCEnabled[k] == true) {
3411 mode_lib->vba.XFCRemoteSurfaceFlipDelay =
3412 CalculateRemoteSurfaceFlipDelay(
3413 mode_lib,
3414 mode_lib->vba.VRatio[k],
3415 locals->SwathWidthYThisState[k],
3416 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
3417 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
3418 mode_lib->vba.XFCTSlvVupdateOffset,
3419 mode_lib->vba.XFCTSlvVupdateWidth,
3420 mode_lib->vba.XFCTSlvVreadyOffset,
3421 mode_lib->vba.XFCXBUFLatencyTolerance,
3422 mode_lib->vba.XFCFillBWOverhead,
3423 mode_lib->vba.XFCSlvChunkSize,
3424 mode_lib->vba.XFCBusTransportTime,
3425 mode_lib->vba.TimeCalc,
3426 mode_lib->vba.TWait,
3427 &mode_lib->vba.SrcActiveDrainRate,
3428 &mode_lib->vba.TInitXFill,
3429 &mode_lib->vba.TslvChk);
3430 } else {
3431 mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
3432 }
3433
3434 myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
3435 myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
3436 myPipe.PixelClock = mode_lib->vba.PixelClock[k];
3437 myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
3438 myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
3439 myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
3440 myPipe.SourceScan = mode_lib->vba.SourceScan[k];
3441 myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
3442 myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
3443 myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
3444 myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
3445 myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
3446 myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
3447 myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
3448 myPipe.HTotal = mode_lib->vba.HTotal[k];
3449
3450
3451 myHostVM.Enable = mode_lib->vba.HostVMEnable;
3452 myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
3453 myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
3454
3455
3456 mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
3457 mode_lib,
3458 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3459 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3460 &myPipe,
3461 locals->DSCDelayPerState[i][k],
3462 mode_lib->vba.DPPCLKDelaySubtotal,
3463 mode_lib->vba.DPPCLKDelaySCL,
3464 mode_lib->vba.DPPCLKDelaySCLLBOnly,
3465 mode_lib->vba.DPPCLKDelayCNVCFormater,
3466 mode_lib->vba.DPPCLKDelayCNVCCursor,
3467 mode_lib->vba.DISPCLKDelaySubtotal,
3468 locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
3469 mode_lib->vba.OutputFormat[k],
3470 mode_lib->vba.MaxInterDCNTileRepeaters,
3471 dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
3472 locals->MaximumVStartup[0][0][k],
3473 mode_lib->vba.GPUVMMaxPageTableLevels,
3474 mode_lib->vba.GPUVMEnable,
3475 &myHostVM,
3476 mode_lib->vba.DynamicMetadataEnable[k],
3477 mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
3478 mode_lib->vba.DynamicMetadataTransmittedBytes[k],
3479 mode_lib->vba.DCCEnable[k],
3480 mode_lib->vba.UrgentLatency,
3481 mode_lib->vba.ExtraLatency,
3482 mode_lib->vba.TimeCalc,
3483 locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
3484 locals->MetaRowBytes[0][0][k],
3485 locals->DPTEBytesPerRow[0][0][k],
3486 locals->PrefetchLinesY[0][0][k],
3487 locals->SwathWidthYThisState[k],
3488 locals->BytePerPixelInDETY[k],
3489 locals->PrefillY[k],
3490 locals->MaxNumSwY[k],
3491 locals->PrefetchLinesC[0][0][k],
3492 locals->BytePerPixelInDETC[k],
3493 locals->PrefillC[k],
3494 locals->MaxNumSwC[k],
3495 locals->SwathHeightYThisState[k],
3496 locals->SwathHeightCThisState[k],
3497 mode_lib->vba.TWait,
3498 mode_lib->vba.XFCEnabled[k],
3499 mode_lib->vba.XFCRemoteSurfaceFlipDelay,
3500 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
3501 &locals->dst_x_after_scaler,
3502 &locals->dst_y_after_scaler,
3503 &locals->LineTimesForPrefetch[k],
3504 &locals->PrefetchBW[k],
3505 &locals->LinesForMetaPTE[k],
3506 &locals->LinesForMetaAndDPTERow[k],
3507 &locals->VRatioPreY[i][j][k],
3508 &locals->VRatioPreC[i][j][k],
3509 &locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
3510 &locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
3511 &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
3512 &locals->Tno_bw[k],
3513 &locals->prefetch_vmrow_bw[k],
3514 locals->swath_width_luma_ub,
3515 locals->swath_width_chroma_ub,
3516 &mode_lib->vba.VUpdateOffsetPix[k],
3517 &mode_lib->vba.VUpdateWidthPix[k],
3518 &mode_lib->vba.VReadyOffsetPix[k]);
3519 }
dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3520 void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3521 {
3522 struct vba_vars_st *locals = &mode_lib->vba;
3523
3524 int i;
3525 unsigned int j, k, m;
3526
3527 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3528
3529 /*Scale Ratio, taps Support Check*/
3530
3531 mode_lib->vba.ScaleRatioAndTapsSupport = true;
3532 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3533 if (mode_lib->vba.ScalerEnabled[k] == false
3534 && ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
3535 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
3536 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
3537 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
3538 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)
3539 || mode_lib->vba.HRatio[k] != 1.0
3540 || mode_lib->vba.htaps[k] != 1.0
3541 || mode_lib->vba.VRatio[k] != 1.0
3542 || mode_lib->vba.vtaps[k] != 1.0)) {
3543 mode_lib->vba.ScaleRatioAndTapsSupport = false;
3544 } else if (mode_lib->vba.vtaps[k] < 1.0 || mode_lib->vba.vtaps[k] > 8.0
3545 || mode_lib->vba.htaps[k] < 1.0 || mode_lib->vba.htaps[k] > 8.0
3546 || (mode_lib->vba.htaps[k] > 1.0
3547 && (mode_lib->vba.htaps[k] % 2) == 1)
3548 || mode_lib->vba.HRatio[k] > mode_lib->vba.MaxHSCLRatio
3549 || mode_lib->vba.VRatio[k] > mode_lib->vba.MaxVSCLRatio
3550 || mode_lib->vba.HRatio[k] > mode_lib->vba.htaps[k]
3551 || mode_lib->vba.VRatio[k] > mode_lib->vba.vtaps[k]
3552 || (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
3553 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
3554 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
3555 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
3556 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8
3557 && (mode_lib->vba.HRatio[k] / 2.0
3558 > mode_lib->vba.HTAPsChroma[k]
3559 || mode_lib->vba.VRatio[k] / 2.0
3560 > mode_lib->vba.VTAPsChroma[k]))) {
3561 mode_lib->vba.ScaleRatioAndTapsSupport = false;
3562 }
3563 }
3564 /*Source Format, Pixel Format and Scan Support Check*/
3565
3566 mode_lib->vba.SourceFormatPixelAndScanSupport = true;
3567 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3568 if ((mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
3569 && mode_lib->vba.SourceScan[k] != dm_horz)
3570 || ((mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d
3571 || mode_lib->vba.SurfaceTiling[k] == dm_sw_4kb_d_x
3572 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d
3573 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_t
3574 || mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_d_x
3575 || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d
3576 || mode_lib->vba.SurfaceTiling[k] == dm_sw_var_d_x)
3577 && mode_lib->vba.SourcePixelFormat[k] != dm_444_64)
3578 || (mode_lib->vba.SurfaceTiling[k] == dm_sw_64kb_r_x
3579 && (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8
3580 || mode_lib->vba.SourcePixelFormat[k]
3581 == dm_420_8
3582 || mode_lib->vba.SourcePixelFormat[k]
3583 == dm_420_10))
3584 || (((mode_lib->vba.SurfaceTiling[k] == dm_sw_gfx7_2d_thin_gl
3585 || mode_lib->vba.SurfaceTiling[k]
3586 == dm_sw_gfx7_2d_thin_l_vp)
3587 && !((mode_lib->vba.SourcePixelFormat[k]
3588 == dm_444_64
3589 || mode_lib->vba.SourcePixelFormat[k]
3590 == dm_444_32)
3591 && mode_lib->vba.SourceScan[k]
3592 == dm_horz
3593 && mode_lib->vba.SupportGFX7CompatibleTilingIn32bppAnd64bpp
3594 == true
3595 && mode_lib->vba.DCCEnable[k]
3596 == false))
3597 || (mode_lib->vba.DCCEnable[k] == true
3598 && (mode_lib->vba.SurfaceTiling[k]
3599 == dm_sw_linear
3600 || mode_lib->vba.SourcePixelFormat[k]
3601 == dm_420_8
3602 || mode_lib->vba.SourcePixelFormat[k]
3603 == dm_420_10)))) {
3604 mode_lib->vba.SourceFormatPixelAndScanSupport = false;
3605 }
3606 }
3607 /*Bandwidth Support Check*/
3608
3609 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3610 if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) {
3611 locals->BytePerPixelInDETY[k] = 8.0;
3612 locals->BytePerPixelInDETC[k] = 0.0;
3613 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) {
3614 locals->BytePerPixelInDETY[k] = 4.0;
3615 locals->BytePerPixelInDETC[k] = 0.0;
3616 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16
3617 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) {
3618 locals->BytePerPixelInDETY[k] = 2.0;
3619 locals->BytePerPixelInDETC[k] = 0.0;
3620 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) {
3621 locals->BytePerPixelInDETY[k] = 1.0;
3622 locals->BytePerPixelInDETC[k] = 0.0;
3623 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) {
3624 locals->BytePerPixelInDETY[k] = 1.0;
3625 locals->BytePerPixelInDETC[k] = 2.0;
3626 } else {
3627 locals->BytePerPixelInDETY[k] = 4.0 / 3;
3628 locals->BytePerPixelInDETC[k] = 8.0 / 3;
3629 }
3630 if (mode_lib->vba.SourceScan[k] == dm_horz) {
3631 locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportWidth[k];
3632 } else {
3633 locals->SwathWidthYSingleDPP[k] = mode_lib->vba.ViewportHeight[k];
3634 }
3635 }
3636 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3637 locals->ReadBandwidthLuma[k] = locals->SwathWidthYSingleDPP[k] * dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
3638 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
3639 locals->ReadBandwidthChroma[k] = locals->SwathWidthYSingleDPP[k] / 2 * dml_ceil(locals->BytePerPixelInDETC[k], 2.0)
3640 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k] / 2.0;
3641 locals->ReadBandwidth[k] = locals->ReadBandwidthLuma[k] + locals->ReadBandwidthChroma[k];
3642 }
3643 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3644 if (mode_lib->vba.WritebackEnable[k] == true
3645 && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
3646 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3647 * mode_lib->vba.WritebackDestinationHeight[k]
3648 / (mode_lib->vba.WritebackSourceHeight[k]
3649 * mode_lib->vba.HTotal[k]
3650 / mode_lib->vba.PixelClock[k]) * 4.0;
3651 } else if (mode_lib->vba.WritebackEnable[k] == true
3652 && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
3653 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3654 * mode_lib->vba.WritebackDestinationHeight[k]
3655 / (mode_lib->vba.WritebackSourceHeight[k]
3656 * mode_lib->vba.HTotal[k]
3657 / mode_lib->vba.PixelClock[k]) * 3.0;
3658 } else if (mode_lib->vba.WritebackEnable[k] == true) {
3659 locals->WriteBandwidth[k] = mode_lib->vba.WritebackDestinationWidth[k]
3660 * mode_lib->vba.WritebackDestinationHeight[k]
3661 / (mode_lib->vba.WritebackSourceHeight[k]
3662 * mode_lib->vba.HTotal[k]
3663 / mode_lib->vba.PixelClock[k]) * 1.5;
3664 } else {
3665 locals->WriteBandwidth[k] = 0.0;
3666 }
3667 }
3668 mode_lib->vba.DCCEnabledInAnyPlane = false;
3669 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3670 if (mode_lib->vba.DCCEnable[k] == true) {
3671 mode_lib->vba.DCCEnabledInAnyPlane = true;
3672 }
3673 }
3674 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3675 locals->IdealSDPPortBandwidthPerState[i][0] = dml_min3(
3676 mode_lib->vba.ReturnBusWidth * mode_lib->vba.DCFCLKPerState[i],
3677 mode_lib->vba.DRAMSpeedPerState[i] * mode_lib->vba.NumberOfChannels
3678 * mode_lib->vba.DRAMChannelWidth,
3679 mode_lib->vba.FabricClockPerState[i]
3680 * mode_lib->vba.FabricDatapathToDCNDataReturn);
3681 if (mode_lib->vba.HostVMEnable == false) {
3682 locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
3683 * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100.0;
3684 } else {
3685 locals->ReturnBWPerState[i][0] = locals->IdealSDPPortBandwidthPerState[i][0]
3686 * mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0;
3687 }
3688 }
3689 /*Writeback Latency support check*/
3690
3691 mode_lib->vba.WritebackLatencySupport = true;
3692 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3693 if (mode_lib->vba.WritebackEnable[k] == true) {
3694 if (mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) {
3695 if (locals->WriteBandwidth[k]
3696 > (mode_lib->vba.WritebackInterfaceLumaBufferSize
3697 + mode_lib->vba.WritebackInterfaceChromaBufferSize)
3698 / mode_lib->vba.WritebackLatency) {
3699 mode_lib->vba.WritebackLatencySupport = false;
3700 }
3701 } else {
3702 if (locals->WriteBandwidth[k]
3703 > 1.5
3704 * dml_min(
3705 mode_lib->vba.WritebackInterfaceLumaBufferSize,
3706 2.0
3707 * mode_lib->vba.WritebackInterfaceChromaBufferSize)
3708 / mode_lib->vba.WritebackLatency) {
3709 mode_lib->vba.WritebackLatencySupport = false;
3710 }
3711 }
3712 }
3713 }
3714 /*Re-ordering Buffer Support Check*/
3715
3716 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
3717 locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i] =
3718 (mode_lib->vba.RoundTripPingLatencyCycles + 32.0) / mode_lib->vba.DCFCLKPerState[i]
3719 + dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
3720 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
3721 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly)
3722 * mode_lib->vba.NumberOfChannels / locals->ReturnBWPerState[i][0];
3723 if ((mode_lib->vba.ROBBufferSizeInKByte - mode_lib->vba.PixelChunkSizeInKByte) * 1024.0 / locals->ReturnBWPerState[i][0]
3724 > locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i]) {
3725 locals->ROBSupport[i][0] = true;
3726 } else {
3727 locals->ROBSupport[i][0] = false;
3728 }
3729 }
3730 /*Writeback Mode Support Check*/
3731
3732 mode_lib->vba.TotalNumberOfActiveWriteback = 0;
3733 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3734 if (mode_lib->vba.WritebackEnable[k] == true) {
3735 if (mode_lib->vba.ActiveWritebacksPerPlane[k] == 0)
3736 mode_lib->vba.ActiveWritebacksPerPlane[k] = 1;
3737 mode_lib->vba.TotalNumberOfActiveWriteback =
3738 mode_lib->vba.TotalNumberOfActiveWriteback
3739 + mode_lib->vba.ActiveWritebacksPerPlane[k];
3740 }
3741 }
3742 mode_lib->vba.WritebackModeSupport = true;
3743 if (mode_lib->vba.TotalNumberOfActiveWriteback > mode_lib->vba.MaxNumWriteback) {
3744 mode_lib->vba.WritebackModeSupport = false;
3745 }
3746 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3747 if (mode_lib->vba.WritebackEnable[k] == true
3748 && mode_lib->vba.Writeback10bpc420Supported != true
3749 && mode_lib->vba.WritebackPixelFormat[k] == dm_420_10) {
3750 mode_lib->vba.WritebackModeSupport = false;
3751 }
3752 }
3753 /*Writeback Scale Ratio and Taps Support Check*/
3754
3755 mode_lib->vba.WritebackScaleRatioAndTapsSupport = true;
3756 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3757 if (mode_lib->vba.WritebackEnable[k] == true) {
3758 if (mode_lib->vba.WritebackLumaAndChromaScalingSupported == false
3759 && (mode_lib->vba.WritebackHRatio[k] != 1.0
3760 || mode_lib->vba.WritebackVRatio[k] != 1.0)) {
3761 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3762 }
3763 if (mode_lib->vba.WritebackHRatio[k] > mode_lib->vba.WritebackMaxHSCLRatio
3764 || mode_lib->vba.WritebackVRatio[k]
3765 > mode_lib->vba.WritebackMaxVSCLRatio
3766 || mode_lib->vba.WritebackHRatio[k]
3767 < mode_lib->vba.WritebackMinHSCLRatio
3768 || mode_lib->vba.WritebackVRatio[k]
3769 < mode_lib->vba.WritebackMinVSCLRatio
3770 || mode_lib->vba.WritebackLumaHTaps[k]
3771 > mode_lib->vba.WritebackMaxHSCLTaps
3772 || mode_lib->vba.WritebackLumaVTaps[k]
3773 > mode_lib->vba.WritebackMaxVSCLTaps
3774 || mode_lib->vba.WritebackHRatio[k]
3775 > mode_lib->vba.WritebackLumaHTaps[k]
3776 || mode_lib->vba.WritebackVRatio[k]
3777 > mode_lib->vba.WritebackLumaVTaps[k]
3778 || (mode_lib->vba.WritebackLumaHTaps[k] > 2.0
3779 && ((mode_lib->vba.WritebackLumaHTaps[k] % 2)
3780 == 1))
3781 || (mode_lib->vba.WritebackPixelFormat[k] != dm_444_32
3782 && (mode_lib->vba.WritebackChromaHTaps[k]
3783 > mode_lib->vba.WritebackMaxHSCLTaps
3784 || mode_lib->vba.WritebackChromaVTaps[k]
3785 > mode_lib->vba.WritebackMaxVSCLTaps
3786 || 2.0
3787 * mode_lib->vba.WritebackHRatio[k]
3788 > mode_lib->vba.WritebackChromaHTaps[k]
3789 || 2.0
3790 * mode_lib->vba.WritebackVRatio[k]
3791 > mode_lib->vba.WritebackChromaVTaps[k]
3792 || (mode_lib->vba.WritebackChromaHTaps[k] > 2.0
3793 && ((mode_lib->vba.WritebackChromaHTaps[k] % 2) == 1))))) {
3794 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3795 }
3796 if (mode_lib->vba.WritebackVRatio[k] < 1.0) {
3797 mode_lib->vba.WritebackLumaVExtra =
3798 dml_max(1.0 - 2.0 / dml_ceil(1.0 / mode_lib->vba.WritebackVRatio[k], 1.0), 0.0);
3799 } else {
3800 mode_lib->vba.WritebackLumaVExtra = -1;
3801 }
3802 if ((mode_lib->vba.WritebackPixelFormat[k] == dm_444_32
3803 && mode_lib->vba.WritebackLumaVTaps[k]
3804 > (mode_lib->vba.WritebackLineBufferLumaBufferSize
3805 + mode_lib->vba.WritebackLineBufferChromaBufferSize)
3806 / 3.0
3807 / mode_lib->vba.WritebackDestinationWidth[k]
3808 - mode_lib->vba.WritebackLumaVExtra)
3809 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
3810 && mode_lib->vba.WritebackLumaVTaps[k]
3811 > mode_lib->vba.WritebackLineBufferLumaBufferSize
3812 * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
3813 - mode_lib->vba.WritebackLumaVExtra)
3814 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
3815 && mode_lib->vba.WritebackLumaVTaps[k]
3816 > mode_lib->vba.WritebackLineBufferLumaBufferSize
3817 * 8.0 / 10.0
3818 / mode_lib->vba.WritebackDestinationWidth[k]
3819 - mode_lib->vba.WritebackLumaVExtra)) {
3820 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3821 }
3822 if (2.0 * mode_lib->vba.WritebackVRatio[k] < 1) {
3823 mode_lib->vba.WritebackChromaVExtra = 0.0;
3824 } else {
3825 mode_lib->vba.WritebackChromaVExtra = -1;
3826 }
3827 if ((mode_lib->vba.WritebackPixelFormat[k] == dm_420_8
3828 && mode_lib->vba.WritebackChromaVTaps[k]
3829 > mode_lib->vba.WritebackLineBufferChromaBufferSize
3830 * 8.0 / 10.0 / mode_lib->vba.WritebackDestinationWidth[k]
3831 - mode_lib->vba.WritebackChromaVExtra)
3832 || (mode_lib->vba.WritebackPixelFormat[k] == dm_420_10
3833 && mode_lib->vba.WritebackChromaVTaps[k]
3834 > mode_lib->vba.WritebackLineBufferChromaBufferSize
3835 * 8.0 / 10.0
3836 / mode_lib->vba.WritebackDestinationWidth[k]
3837 - mode_lib->vba.WritebackChromaVExtra)) {
3838 mode_lib->vba.WritebackScaleRatioAndTapsSupport = false;
3839 }
3840 }
3841 }
3842 /*Maximum DISPCLK/DPPCLK Support check*/
3843
3844 mode_lib->vba.WritebackRequiredDISPCLK = 0.0;
3845 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3846 if (mode_lib->vba.WritebackEnable[k] == true) {
3847 mode_lib->vba.WritebackRequiredDISPCLK =
3848 dml_max(
3849 mode_lib->vba.WritebackRequiredDISPCLK,
3850 CalculateWriteBackDISPCLK(
3851 mode_lib->vba.WritebackPixelFormat[k],
3852 mode_lib->vba.PixelClock[k],
3853 mode_lib->vba.WritebackHRatio[k],
3854 mode_lib->vba.WritebackVRatio[k],
3855 mode_lib->vba.WritebackLumaHTaps[k],
3856 mode_lib->vba.WritebackLumaVTaps[k],
3857 mode_lib->vba.WritebackChromaHTaps[k],
3858 mode_lib->vba.WritebackChromaVTaps[k],
3859 mode_lib->vba.WritebackDestinationWidth[k],
3860 mode_lib->vba.HTotal[k],
3861 mode_lib->vba.WritebackChromaLineBufferWidth));
3862 }
3863 }
3864 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3865 if (mode_lib->vba.HRatio[k] > 1.0) {
3866 locals->PSCL_FACTOR[k] = dml_min(
3867 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3868 mode_lib->vba.MaxPSCLToLBThroughput
3869 * mode_lib->vba.HRatio[k]
3870 / dml_ceil(
3871 mode_lib->vba.htaps[k]
3872 / 6.0,
3873 1.0));
3874 } else {
3875 locals->PSCL_FACTOR[k] = dml_min(
3876 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3877 mode_lib->vba.MaxPSCLToLBThroughput);
3878 }
3879 if (locals->BytePerPixelInDETC[k] == 0.0) {
3880 locals->PSCL_FACTOR_CHROMA[k] = 0.0;
3881 locals->MinDPPCLKUsingSingleDPP[k] =
3882 mode_lib->vba.PixelClock[k]
3883 * dml_max3(
3884 mode_lib->vba.vtaps[k] / 6.0
3885 * dml_min(
3886 1.0,
3887 mode_lib->vba.HRatio[k]),
3888 mode_lib->vba.HRatio[k]
3889 * mode_lib->vba.VRatio[k]
3890 / locals->PSCL_FACTOR[k],
3891 1.0);
3892 if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0)
3893 && locals->MinDPPCLKUsingSingleDPP[k]
3894 < 2.0 * mode_lib->vba.PixelClock[k]) {
3895 locals->MinDPPCLKUsingSingleDPP[k] = 2.0
3896 * mode_lib->vba.PixelClock[k];
3897 }
3898 } else {
3899 if (mode_lib->vba.HRatio[k] / 2.0 > 1.0) {
3900 locals->PSCL_FACTOR_CHROMA[k] =
3901 dml_min(
3902 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3903 mode_lib->vba.MaxPSCLToLBThroughput
3904 * mode_lib->vba.HRatio[k]
3905 / 2.0
3906 / dml_ceil(
3907 mode_lib->vba.HTAPsChroma[k]
3908 / 6.0,
3909 1.0));
3910 } else {
3911 locals->PSCL_FACTOR_CHROMA[k] = dml_min(
3912 mode_lib->vba.MaxDCHUBToPSCLThroughput,
3913 mode_lib->vba.MaxPSCLToLBThroughput);
3914 }
3915 locals->MinDPPCLKUsingSingleDPP[k] =
3916 mode_lib->vba.PixelClock[k]
3917 * dml_max5(
3918 mode_lib->vba.vtaps[k] / 6.0
3919 * dml_min(
3920 1.0,
3921 mode_lib->vba.HRatio[k]),
3922 mode_lib->vba.HRatio[k]
3923 * mode_lib->vba.VRatio[k]
3924 / locals->PSCL_FACTOR[k],
3925 mode_lib->vba.VTAPsChroma[k]
3926 / 6.0
3927 * dml_min(
3928 1.0,
3929 mode_lib->vba.HRatio[k]
3930 / 2.0),
3931 mode_lib->vba.HRatio[k]
3932 * mode_lib->vba.VRatio[k]
3933 / 4.0
3934 / locals->PSCL_FACTOR_CHROMA[k],
3935 1.0);
3936 if ((mode_lib->vba.htaps[k] > 6.0 || mode_lib->vba.vtaps[k] > 6.0
3937 || mode_lib->vba.HTAPsChroma[k] > 6.0
3938 || mode_lib->vba.VTAPsChroma[k] > 6.0)
3939 && locals->MinDPPCLKUsingSingleDPP[k]
3940 < 2.0 * mode_lib->vba.PixelClock[k]) {
3941 locals->MinDPPCLKUsingSingleDPP[k] = 2.0
3942 * mode_lib->vba.PixelClock[k];
3943 }
3944 }
3945 }
3946 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
3947 Calculate256BBlockSizes(
3948 mode_lib->vba.SourcePixelFormat[k],
3949 mode_lib->vba.SurfaceTiling[k],
3950 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
3951 dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
3952 &locals->Read256BlockHeightY[k],
3953 &locals->Read256BlockHeightC[k],
3954 &locals->Read256BlockWidthY[k],
3955 &locals->Read256BlockWidthC[k]);
3956 if (mode_lib->vba.SourceScan[k] == dm_horz) {
3957 locals->MaxSwathHeightY[k] = locals->Read256BlockHeightY[k];
3958 locals->MaxSwathHeightC[k] = locals->Read256BlockHeightC[k];
3959 } else {
3960 locals->MaxSwathHeightY[k] = locals->Read256BlockWidthY[k];
3961 locals->MaxSwathHeightC[k] = locals->Read256BlockWidthC[k];
3962 }
3963 if ((mode_lib->vba.SourcePixelFormat[k] == dm_444_64
3964 || mode_lib->vba.SourcePixelFormat[k] == dm_444_32
3965 || mode_lib->vba.SourcePixelFormat[k] == dm_444_16
3966 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16
3967 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8)) {
3968 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
3969 || (mode_lib->vba.SourcePixelFormat[k] == dm_444_64
3970 && (mode_lib->vba.SurfaceTiling[k]
3971 == dm_sw_4kb_s
3972 || mode_lib->vba.SurfaceTiling[k]
3973 == dm_sw_4kb_s_x
3974 || mode_lib->vba.SurfaceTiling[k]
3975 == dm_sw_64kb_s
3976 || mode_lib->vba.SurfaceTiling[k]
3977 == dm_sw_64kb_s_t
3978 || mode_lib->vba.SurfaceTiling[k]
3979 == dm_sw_64kb_s_x
3980 || mode_lib->vba.SurfaceTiling[k]
3981 == dm_sw_var_s
3982 || mode_lib->vba.SurfaceTiling[k]
3983 == dm_sw_var_s_x)
3984 && mode_lib->vba.SourceScan[k] == dm_horz)) {
3985 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3986 } else {
3987 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
3988 / 2.0;
3989 }
3990 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3991 } else {
3992 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
3993 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
3994 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
3995 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8
3996 && mode_lib->vba.SourceScan[k] == dm_horz) {
3997 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k]
3998 / 2.0;
3999 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
4000 } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10
4001 && mode_lib->vba.SourceScan[k] == dm_horz) {
4002 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k]
4003 / 2.0;
4004 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
4005 } else {
4006 locals->MinSwathHeightY[k] = locals->MaxSwathHeightY[k];
4007 locals->MinSwathHeightC[k] = locals->MaxSwathHeightC[k];
4008 }
4009 }
4010 if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear) {
4011 mode_lib->vba.MaximumSwathWidthSupport = 8192.0;
4012 } else {
4013 mode_lib->vba.MaximumSwathWidthSupport = 5120.0;
4014 }
4015 mode_lib->vba.MaximumSwathWidthInDETBuffer =
4016 dml_min(
4017 mode_lib->vba.MaximumSwathWidthSupport,
4018 mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0
4019 / (locals->BytePerPixelInDETY[k]
4020 * locals->MinSwathHeightY[k]
4021 + locals->BytePerPixelInDETC[k]
4022 / 2.0
4023 * locals->MinSwathHeightC[k]));
4024 if (locals->BytePerPixelInDETC[k] == 0.0) {
4025 mode_lib->vba.MaximumSwathWidthInLineBuffer =
4026 mode_lib->vba.LineBufferSize
4027 * dml_max(mode_lib->vba.HRatio[k], 1.0)
4028 / mode_lib->vba.LBBitPerPixel[k]
4029 / (mode_lib->vba.vtaps[k]
4030 + dml_max(
4031 dml_ceil(
4032 mode_lib->vba.VRatio[k],
4033 1.0)
4034 - 2,
4035 0.0));
4036 } else {
4037 mode_lib->vba.MaximumSwathWidthInLineBuffer =
4038 dml_min(
4039 mode_lib->vba.LineBufferSize
4040 * dml_max(
4041 mode_lib->vba.HRatio[k],
4042 1.0)
4043 / mode_lib->vba.LBBitPerPixel[k]
4044 / (mode_lib->vba.vtaps[k]
4045 + dml_max(
4046 dml_ceil(
4047 mode_lib->vba.VRatio[k],
4048 1.0)
4049 - 2,
4050 0.0)),
4051 2.0 * mode_lib->vba.LineBufferSize
4052 * dml_max(
4053 mode_lib->vba.HRatio[k]
4054 / 2.0,
4055 1.0)
4056 / mode_lib->vba.LBBitPerPixel[k]
4057 / (mode_lib->vba.VTAPsChroma[k]
4058 + dml_max(
4059 dml_ceil(
4060 mode_lib->vba.VRatio[k]
4061 / 2.0,
4062 1.0)
4063 - 2,
4064 0.0)));
4065 }
4066 locals->MaximumSwathWidth[k] = dml_min(
4067 mode_lib->vba.MaximumSwathWidthInDETBuffer,
4068 mode_lib->vba.MaximumSwathWidthInLineBuffer);
4069 }
4070 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4071 double MaxMaxDispclkRoundedDown = RoundToDFSGranularityDown(
4072 mode_lib->vba.MaxDispclk[mode_lib->vba.soc.num_states],
4073 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
4074
4075 for (j = 0; j < 2; j++) {
4076 mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
4077 mode_lib->vba.MaxDispclk[i],
4078 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
4079 mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(
4080 mode_lib->vba.MaxDppclk[i],
4081 mode_lib->vba.DISPCLKDPPCLKVCOSpeed);
4082 locals->RequiredDISPCLK[i][j] = 0.0;
4083 locals->DISPCLK_DPPCLK_Support[i][j] = true;
4084 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4085 mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine =
4086 mode_lib->vba.PixelClock[k]
4087 * (1.0
4088 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading
4089 / 100.0)
4090 * (1.0
4091 + mode_lib->vba.DISPCLKRampingMargin
4092 / 100.0);
4093 if (mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine >= mode_lib->vba.MaxDispclk[i]
4094 && i == mode_lib->vba.soc.num_states)
4095 mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine = mode_lib->vba.PixelClock[k]
4096 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4097
4098 mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
4099 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
4100 if (mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine >= mode_lib->vba.MaxDispclk[i]
4101 && i == mode_lib->vba.soc.num_states)
4102 mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine = mode_lib->vba.PixelClock[k] / 2
4103 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4104
4105 locals->ODMCombineEnablePerState[i][k] = false;
4106 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithoutODMCombine;
4107 if (mode_lib->vba.ODMCapability) {
4108 if (locals->PlaneRequiredDISPCLKWithoutODMCombine > MaxMaxDispclkRoundedDown) {
4109 locals->ODMCombineEnablePerState[i][k] = true;
4110 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
4111 } else if (locals->DSCEnabled[k] && (locals->HActive[k] > DCN21_MAX_DSC_IMAGE_WIDTH)) {
4112 locals->ODMCombineEnablePerState[i][k] = true;
4113 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
4114 } else if (locals->HActive[k] > DCN21_MAX_420_IMAGE_WIDTH && locals->OutputFormat[k] == dm_420) {
4115 locals->ODMCombineEnablePerState[i][k] = true;
4116 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PlaneRequiredDISPCLKWithODMCombine;
4117 }
4118 }
4119
4120 if (locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
4121 && locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]
4122 && locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4123 locals->NoOfDPP[i][j][k] = 1;
4124 locals->RequiredDPPCLK[i][j][k] =
4125 locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4126 } else {
4127 locals->NoOfDPP[i][j][k] = 2;
4128 locals->RequiredDPPCLK[i][j][k] =
4129 locals->MinDPPCLKUsingSingleDPP[k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4130 }
4131 locals->RequiredDISPCLK[i][j] = dml_max(
4132 locals->RequiredDISPCLK[i][j],
4133 mode_lib->vba.PlaneRequiredDISPCLK);
4134 if ((locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4135 > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity)
4136 || (mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)) {
4137 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4138 }
4139 }
4140 locals->TotalNumberOfActiveDPP[i][j] = 0.0;
4141 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4142 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4143 if (j == 1) {
4144 while (locals->TotalNumberOfActiveDPP[i][j] < mode_lib->vba.MaxNumDPP
4145 && locals->TotalNumberOfActiveDPP[i][j] < 2 * mode_lib->vba.NumberOfActivePlanes) {
4146 double BWOfNonSplitPlaneOfMaximumBandwidth;
4147 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4148
4149 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4150 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4151 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4152 if (locals->ReadBandwidth[k] > BWOfNonSplitPlaneOfMaximumBandwidth && locals->NoOfDPP[i][j][k] == 1) {
4153 BWOfNonSplitPlaneOfMaximumBandwidth = locals->ReadBandwidth[k];
4154 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4155 }
4156 }
4157 locals->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4158 locals->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4159 locals->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4160 * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4161 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + 1;
4162 }
4163 }
4164 if (locals->TotalNumberOfActiveDPP[i][j] > mode_lib->vba.MaxNumDPP) {
4165 locals->RequiredDISPCLK[i][j] = 0.0;
4166 locals->DISPCLK_DPPCLK_Support[i][j] = true;
4167 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4168 locals->ODMCombineEnablePerState[i][k] = false;
4169 if (locals->SwathWidthYSingleDPP[k] <= locals->MaximumSwathWidth[k]) {
4170 locals->NoOfDPP[i][j][k] = 1;
4171 locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
4172 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4173 } else {
4174 locals->NoOfDPP[i][j][k] = 2;
4175 locals->RequiredDPPCLK[i][j][k] = locals->MinDPPCLKUsingSingleDPP[k]
4176 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4177 }
4178 if (i != mode_lib->vba.soc.num_states) {
4179 mode_lib->vba.PlaneRequiredDISPCLK =
4180 mode_lib->vba.PixelClock[k]
4181 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4182 * (1.0 + mode_lib->vba.DISPCLKRampingMargin / 100.0);
4183 } else {
4184 mode_lib->vba.PlaneRequiredDISPCLK = mode_lib->vba.PixelClock[k]
4185 * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4186 }
4187 locals->RequiredDISPCLK[i][j] = dml_max(
4188 locals->RequiredDISPCLK[i][j],
4189 mode_lib->vba.PlaneRequiredDISPCLK);
4190 if (locals->MinDPPCLKUsingSingleDPP[k] / locals->NoOfDPP[i][j][k] * (1.0 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4191 > mode_lib->vba.MaxDppclkRoundedDownToDFSGranularity
4192 || mode_lib->vba.PlaneRequiredDISPCLK > mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity)
4193 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4194 }
4195 locals->TotalNumberOfActiveDPP[i][j] = 0.0;
4196 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4197 locals->TotalNumberOfActiveDPP[i][j] = locals->TotalNumberOfActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4198 }
4199 locals->RequiredDISPCLK[i][j] = dml_max(
4200 locals->RequiredDISPCLK[i][j],
4201 mode_lib->vba.WritebackRequiredDISPCLK);
4202 if (mode_lib->vba.MaxDispclkRoundedDownToDFSGranularity
4203 < mode_lib->vba.WritebackRequiredDISPCLK) {
4204 locals->DISPCLK_DPPCLK_Support[i][j] = false;
4205 }
4206 }
4207 }
4208 /*Viewport Size Check*/
4209
4210 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4211 locals->ViewportSizeSupport[i][0] = true;
4212 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4213 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4214 if (dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]))
4215 > locals->MaximumSwathWidth[k]) {
4216 locals->ViewportSizeSupport[i][0] = false;
4217 }
4218 } else {
4219 if (locals->SwathWidthYSingleDPP[k] / 2.0 > locals->MaximumSwathWidth[k]) {
4220 locals->ViewportSizeSupport[i][0] = false;
4221 }
4222 }
4223 }
4224 }
4225 /*Total Available Pipes Support Check*/
4226
4227 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4228 for (j = 0; j < 2; j++) {
4229 if (locals->TotalNumberOfActiveDPP[i][j] <= mode_lib->vba.MaxNumDPP)
4230 locals->TotalAvailablePipesSupport[i][j] = true;
4231 else
4232 locals->TotalAvailablePipesSupport[i][j] = false;
4233 }
4234 }
4235 /*Total Available OTG Support Check*/
4236
4237 mode_lib->vba.TotalNumberOfActiveOTG = 0.0;
4238 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4239 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4240 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG
4241 + 1.0;
4242 }
4243 }
4244 if (mode_lib->vba.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG) {
4245 mode_lib->vba.NumberOfOTGSupport = true;
4246 } else {
4247 mode_lib->vba.NumberOfOTGSupport = false;
4248 }
4249 /*Display IO and DSC Support Check*/
4250
4251 mode_lib->vba.NonsupportedDSCInputBPC = false;
4252 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4253 if (!(mode_lib->vba.DSCInputBitPerComponent[k] == 12.0
4254 || mode_lib->vba.DSCInputBitPerComponent[k] == 10.0
4255 || mode_lib->vba.DSCInputBitPerComponent[k] == 8.0)) {
4256 mode_lib->vba.NonsupportedDSCInputBPC = true;
4257 }
4258 }
4259 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4260 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4261 locals->RequiresDSC[i][k] = false;
4262 locals->RequiresFEC[i][k] = 0;
4263 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4264 if (mode_lib->vba.Output[k] == dm_hdmi) {
4265 locals->RequiresDSC[i][k] = false;
4266 locals->RequiresFEC[i][k] = 0;
4267 locals->OutputBppPerState[i][k] = TruncToValidBPP(
4268 dml_min(600.0, mode_lib->vba.PHYCLKPerState[i]) / mode_lib->vba.PixelClockBackEnd[k] * 24,
4269 mode_lib->vba.ForcedOutputLinkBPP[k],
4270 false,
4271 mode_lib->vba.Output[k],
4272 mode_lib->vba.OutputFormat[k],
4273 mode_lib->vba.DSCInputBitPerComponent[k]);
4274 } else if (mode_lib->vba.Output[k] == dm_dp
4275 || mode_lib->vba.Output[k] == dm_edp) {
4276 if (mode_lib->vba.Output[k] == dm_edp) {
4277 mode_lib->vba.EffectiveFECOverhead = 0.0;
4278 } else {
4279 mode_lib->vba.EffectiveFECOverhead =
4280 mode_lib->vba.FECOverhead;
4281 }
4282 if (mode_lib->vba.PHYCLKPerState[i] >= 270.0) {
4283 mode_lib->vba.Outbpp = TruncToValidBPP(
4284 (1.0 - mode_lib->vba.Downspreading / 100.0) * 270.0
4285 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4286 mode_lib->vba.ForcedOutputLinkBPP[k],
4287 false,
4288 mode_lib->vba.Output[k],
4289 mode_lib->vba.OutputFormat[k],
4290 mode_lib->vba.DSCInputBitPerComponent[k]);
4291 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4292 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 270.0
4293 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4294 mode_lib->vba.ForcedOutputLinkBPP[k],
4295 true,
4296 mode_lib->vba.Output[k],
4297 mode_lib->vba.OutputFormat[k],
4298 mode_lib->vba.DSCInputBitPerComponent[k]);
4299 if (mode_lib->vba.DSCEnabled[k] == true) {
4300 locals->RequiresDSC[i][k] = true;
4301 if (mode_lib->vba.Output[k] == dm_dp) {
4302 locals->RequiresFEC[i][k] = true;
4303 } else {
4304 locals->RequiresFEC[i][k] = false;
4305 }
4306 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4307 } else {
4308 locals->RequiresDSC[i][k] = false;
4309 locals->RequiresFEC[i][k] = false;
4310 }
4311 locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
4312 }
4313 if (mode_lib->vba.Outbpp == BPP_INVALID && mode_lib->vba.PHYCLKPerState[i] >= 540.0) {
4314 mode_lib->vba.Outbpp = TruncToValidBPP(
4315 (1.0 - mode_lib->vba.Downspreading / 100.0) * 540.0
4316 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4317 mode_lib->vba.ForcedOutputLinkBPP[k],
4318 false,
4319 mode_lib->vba.Output[k],
4320 mode_lib->vba.OutputFormat[k],
4321 mode_lib->vba.DSCInputBitPerComponent[k]);
4322 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4323 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 540.0
4324 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4325 mode_lib->vba.ForcedOutputLinkBPP[k],
4326 true,
4327 mode_lib->vba.Output[k],
4328 mode_lib->vba.OutputFormat[k],
4329 mode_lib->vba.DSCInputBitPerComponent[k]);
4330 if (mode_lib->vba.DSCEnabled[k] == true) {
4331 locals->RequiresDSC[i][k] = true;
4332 if (mode_lib->vba.Output[k] == dm_dp) {
4333 locals->RequiresFEC[i][k] = true;
4334 } else {
4335 locals->RequiresFEC[i][k] = false;
4336 }
4337 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4338 } else {
4339 locals->RequiresDSC[i][k] = false;
4340 locals->RequiresFEC[i][k] = false;
4341 }
4342 locals->OutputBppPerState[i][k] = mode_lib->vba.Outbpp;
4343 }
4344 if (mode_lib->vba.Outbpp == BPP_INVALID
4345 && mode_lib->vba.PHYCLKPerState[i]
4346 >= 810.0) {
4347 mode_lib->vba.Outbpp = TruncToValidBPP(
4348 (1.0 - mode_lib->vba.Downspreading / 100.0) * 810.0
4349 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4350 mode_lib->vba.ForcedOutputLinkBPP[k],
4351 false,
4352 mode_lib->vba.Output[k],
4353 mode_lib->vba.OutputFormat[k],
4354 mode_lib->vba.DSCInputBitPerComponent[k]);
4355 mode_lib->vba.OutbppDSC = TruncToValidBPP(
4356 (1.0 - mode_lib->vba.Downspreading / 100.0) * (1.0 - mode_lib->vba.EffectiveFECOverhead / 100.0) * 810.0
4357 * mode_lib->vba.OutputLinkDPLanes[k] / mode_lib->vba.PixelClockBackEnd[k] * 8.0,
4358 mode_lib->vba.ForcedOutputLinkBPP[k],
4359 true,
4360 mode_lib->vba.Output[k],
4361 mode_lib->vba.OutputFormat[k],
4362 mode_lib->vba.DSCInputBitPerComponent[k]);
4363 if (mode_lib->vba.DSCEnabled[k] == true || mode_lib->vba.Outbpp == BPP_INVALID) {
4364 locals->RequiresDSC[i][k] = true;
4365 if (mode_lib->vba.Output[k] == dm_dp) {
4366 locals->RequiresFEC[i][k] = true;
4367 } else {
4368 locals->RequiresFEC[i][k] = false;
4369 }
4370 mode_lib->vba.Outbpp = mode_lib->vba.OutbppDSC;
4371 } else {
4372 locals->RequiresDSC[i][k] = false;
4373 locals->RequiresFEC[i][k] = false;
4374 }
4375 locals->OutputBppPerState[i][k] =
4376 mode_lib->vba.Outbpp;
4377 }
4378 }
4379 } else {
4380 locals->OutputBppPerState[i][k] = BPP_BLENDED_PIPE;
4381 }
4382 }
4383 }
4384 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4385 locals->DIOSupport[i] = true;
4386 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4387 if (!mode_lib->vba.skip_dio_check[k]
4388 && (locals->OutputBppPerState[i][k] == BPP_INVALID
4389 || (mode_lib->vba.OutputFormat[k] == dm_420
4390 && mode_lib->vba.Interlace[k] == true
4391 && mode_lib->vba.ProgressiveToInterlaceUnitInOPP == true))) {
4392 locals->DIOSupport[i] = false;
4393 }
4394 }
4395 }
4396 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4397 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4398 locals->DSCCLKRequiredMoreThanSupported[i] = false;
4399 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4400 if ((mode_lib->vba.Output[k] == dm_dp
4401 || mode_lib->vba.Output[k] == dm_edp)) {
4402 if (mode_lib->vba.OutputFormat[k] == dm_420
4403 || mode_lib->vba.OutputFormat[k]
4404 == dm_n422) {
4405 mode_lib->vba.DSCFormatFactor = 2;
4406 } else {
4407 mode_lib->vba.DSCFormatFactor = 1;
4408 }
4409 if (locals->RequiresDSC[i][k] == true) {
4410 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4411 if (mode_lib->vba.PixelClockBackEnd[k] / 6.0 / mode_lib->vba.DSCFormatFactor
4412 > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
4413 locals->DSCCLKRequiredMoreThanSupported[i] =
4414 true;
4415 }
4416 } else {
4417 if (mode_lib->vba.PixelClockBackEnd[k] / 3.0 / mode_lib->vba.DSCFormatFactor
4418 > (1.0 - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * mode_lib->vba.MaxDSCCLK[i]) {
4419 locals->DSCCLKRequiredMoreThanSupported[i] =
4420 true;
4421 }
4422 }
4423 }
4424 }
4425 }
4426 }
4427 }
4428 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4429 locals->NotEnoughDSCUnits[i] = false;
4430 mode_lib->vba.TotalDSCUnitsRequired = 0.0;
4431 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4432 if (locals->RequiresDSC[i][k] == true) {
4433 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4434 mode_lib->vba.TotalDSCUnitsRequired =
4435 mode_lib->vba.TotalDSCUnitsRequired + 2.0;
4436 } else {
4437 mode_lib->vba.TotalDSCUnitsRequired =
4438 mode_lib->vba.TotalDSCUnitsRequired + 1.0;
4439 }
4440 }
4441 }
4442 if (mode_lib->vba.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) {
4443 locals->NotEnoughDSCUnits[i] = true;
4444 }
4445 }
4446 /*DSC Delay per state*/
4447
4448 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4449 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4450 if (mode_lib->vba.BlendingAndTiming[k] != k) {
4451 mode_lib->vba.slices = 0;
4452 } else if (locals->RequiresDSC[i][k] == 0
4453 || locals->RequiresDSC[i][k] == false) {
4454 mode_lib->vba.slices = 0;
4455 } else if (mode_lib->vba.PixelClockBackEnd[k] > 3200.0) {
4456 mode_lib->vba.slices = dml_ceil(
4457 mode_lib->vba.PixelClockBackEnd[k] / 400.0,
4458 4.0);
4459 } else if (mode_lib->vba.PixelClockBackEnd[k] > 1360.0) {
4460 mode_lib->vba.slices = 8.0;
4461 } else if (mode_lib->vba.PixelClockBackEnd[k] > 680.0) {
4462 mode_lib->vba.slices = 4.0;
4463 } else if (mode_lib->vba.PixelClockBackEnd[k] > 340.0) {
4464 mode_lib->vba.slices = 2.0;
4465 } else {
4466 mode_lib->vba.slices = 1.0;
4467 }
4468 if (locals->OutputBppPerState[i][k] == BPP_BLENDED_PIPE
4469 || locals->OutputBppPerState[i][k] == BPP_INVALID) {
4470 mode_lib->vba.bpp = 0.0;
4471 } else {
4472 mode_lib->vba.bpp = locals->OutputBppPerState[i][k];
4473 }
4474 if (locals->RequiresDSC[i][k] == true && mode_lib->vba.bpp != 0.0) {
4475 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4476 locals->DSCDelayPerState[i][k] =
4477 dscceComputeDelay(
4478 mode_lib->vba.DSCInputBitPerComponent[k],
4479 mode_lib->vba.bpp,
4480 dml_ceil(
4481 mode_lib->vba.HActive[k]
4482 / mode_lib->vba.slices,
4483 1.0),
4484 mode_lib->vba.slices,
4485 mode_lib->vba.OutputFormat[k])
4486 + dscComputeDelay(
4487 mode_lib->vba.OutputFormat[k]);
4488 } else {
4489 locals->DSCDelayPerState[i][k] =
4490 2.0 * (dscceComputeDelay(
4491 mode_lib->vba.DSCInputBitPerComponent[k],
4492 mode_lib->vba.bpp,
4493 dml_ceil(mode_lib->vba.HActive[k] / mode_lib->vba.slices, 1.0),
4494 mode_lib->vba.slices / 2,
4495 mode_lib->vba.OutputFormat[k])
4496 + dscComputeDelay(mode_lib->vba.OutputFormat[k]));
4497 }
4498 locals->DSCDelayPerState[i][k] =
4499 locals->DSCDelayPerState[i][k] * mode_lib->vba.PixelClock[k] / mode_lib->vba.PixelClockBackEnd[k];
4500 } else {
4501 locals->DSCDelayPerState[i][k] = 0.0;
4502 }
4503 }
4504 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4505 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4506 for (j = 0; j <= mode_lib->vba.NumberOfActivePlanes - 1; j++) {
4507 if (mode_lib->vba.BlendingAndTiming[k] == m && locals->RequiresDSC[i][m] == true)
4508 locals->DSCDelayPerState[i][k] = locals->DSCDelayPerState[i][m];
4509 }
4510 }
4511 }
4512 }
4513
4514 //Prefetch Check
4515 for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
4516 for (j = 0; j <= 1; ++j) {
4517 locals->TotalNumberOfDCCActiveDPP[i][j] = 0;
4518 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4519 if (mode_lib->vba.DCCEnable[k] == true)
4520 locals->TotalNumberOfDCCActiveDPP[i][j] = locals->TotalNumberOfDCCActiveDPP[i][j] + locals->NoOfDPP[i][j][k];
4521 }
4522 }
4523 }
4524
4525 mode_lib->vba.UrgentLatency = dml_max3(
4526 mode_lib->vba.UrgentLatencyPixelDataOnly,
4527 mode_lib->vba.UrgentLatencyPixelMixedWithVMData,
4528 mode_lib->vba.UrgentLatencyVMDataOnly);
4529 mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(
4530 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
4531 &mode_lib->vba.MinPrefetchMode,
4532 &mode_lib->vba.MaxPrefetchMode);
4533
4534 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
4535 for (j = 0; j < 2; j++) {
4536 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4537 locals->RequiredDPPCLKThisState[k] = locals->RequiredDPPCLK[i][j][k];
4538 locals->NoOfDPPThisState[k] = locals->NoOfDPP[i][j][k];
4539 if (locals->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4540 locals->SwathWidthYThisState[k] =
4541 dml_min(locals->SwathWidthYSingleDPP[k], dml_round(mode_lib->vba.HActive[k] / 2.0 * mode_lib->vba.HRatio[k]));
4542 } else {
4543 locals->SwathWidthYThisState[k] = locals->SwathWidthYSingleDPP[k] / locals->NoOfDPP[i][j][k];
4544 }
4545 mode_lib->vba.SwathWidthGranularityY = 256.0
4546 / dml_ceil(locals->BytePerPixelInDETY[k], 1.0)
4547 / locals->MaxSwathHeightY[k];
4548 mode_lib->vba.RoundedUpMaxSwathSizeBytesY =
4549 (dml_ceil(locals->SwathWidthYThisState[k] - 1.0, mode_lib->vba.SwathWidthGranularityY)
4550 + mode_lib->vba.SwathWidthGranularityY) * locals->BytePerPixelInDETY[k] * locals->MaxSwathHeightY[k];
4551 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
4552 mode_lib->vba.RoundedUpMaxSwathSizeBytesY = dml_ceil(
4553 mode_lib->vba.RoundedUpMaxSwathSizeBytesY,
4554 256.0) + 256;
4555 }
4556 if (locals->MaxSwathHeightC[k] > 0.0) {
4557 mode_lib->vba.SwathWidthGranularityC = 256.0 / dml_ceil(locals->BytePerPixelInDETC[k], 2.0) / locals->MaxSwathHeightC[k];
4558 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = (dml_ceil(locals->SwathWidthYThisState[k] / 2.0 - 1.0, mode_lib->vba.SwathWidthGranularityC)
4559 + mode_lib->vba.SwathWidthGranularityC) * locals->BytePerPixelInDETC[k] * locals->MaxSwathHeightC[k];
4560 if (mode_lib->vba.SourcePixelFormat[k] == dm_420_10) {
4561 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = dml_ceil(mode_lib->vba.RoundedUpMaxSwathSizeBytesC, 256.0) + 256;
4562 }
4563 } else {
4564 mode_lib->vba.RoundedUpMaxSwathSizeBytesC = 0.0;
4565 }
4566 if (mode_lib->vba.RoundedUpMaxSwathSizeBytesY + mode_lib->vba.RoundedUpMaxSwathSizeBytesC
4567 <= mode_lib->vba.DETBufferSizeInKByte[0] * 1024.0 / 2.0) {
4568 locals->SwathHeightYThisState[k] = locals->MaxSwathHeightY[k];
4569 locals->SwathHeightCThisState[k] = locals->MaxSwathHeightC[k];
4570 } else {
4571 locals->SwathHeightYThisState[k] =
4572 locals->MinSwathHeightY[k];
4573 locals->SwathHeightCThisState[k] =
4574 locals->MinSwathHeightC[k];
4575 }
4576 }
4577
4578 CalculateDCFCLKDeepSleep(
4579 mode_lib,
4580 mode_lib->vba.NumberOfActivePlanes,
4581 locals->BytePerPixelInDETY,
4582 locals->BytePerPixelInDETC,
4583 mode_lib->vba.VRatio,
4584 locals->SwathWidthYThisState,
4585 locals->NoOfDPPThisState,
4586 mode_lib->vba.HRatio,
4587 mode_lib->vba.PixelClock,
4588 locals->PSCL_FACTOR,
4589 locals->PSCL_FACTOR_CHROMA,
4590 locals->RequiredDPPCLKThisState,
4591 &mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0]);
4592
4593 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4594 if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64
4595 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
4596 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
4597 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
4598 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8)) {
4599 mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4600 mode_lib,
4601 mode_lib->vba.DCCEnable[k],
4602 locals->Read256BlockHeightC[k],
4603 locals->Read256BlockWidthC[k],
4604 mode_lib->vba.SourcePixelFormat[k],
4605 mode_lib->vba.SurfaceTiling[k],
4606 dml_ceil(locals->BytePerPixelInDETC[k], 2.0),
4607 mode_lib->vba.SourceScan[k],
4608 mode_lib->vba.ViewportWidth[k] / 2.0,
4609 mode_lib->vba.ViewportHeight[k] / 2.0,
4610 locals->SwathWidthYThisState[k] / 2.0,
4611 mode_lib->vba.GPUVMEnable,
4612 mode_lib->vba.HostVMEnable,
4613 mode_lib->vba.HostVMMaxPageTableLevels,
4614 mode_lib->vba.HostVMCachedPageTableLevels,
4615 mode_lib->vba.VMMPageSize,
4616 mode_lib->vba.PTEBufferSizeInRequestsChroma,
4617 mode_lib->vba.PitchC[k],
4618 0.0,
4619 &locals->MacroTileWidthC[k],
4620 &mode_lib->vba.MetaRowBytesC,
4621 &mode_lib->vba.DPTEBytesPerRowC,
4622 &locals->PTEBufferSizeNotExceededC[i][j][k],
4623 locals->dpte_row_width_chroma_ub,
4624 &locals->dpte_row_height_chroma[k],
4625 &locals->meta_req_width_chroma[k],
4626 &locals->meta_req_height_chroma[k],
4627 &locals->meta_row_width_chroma[k],
4628 &locals->meta_row_height_chroma[k],
4629 &locals->vm_group_bytes_chroma,
4630 &locals->dpte_group_bytes_chroma,
4631 locals->PixelPTEReqWidthC,
4632 locals->PixelPTEReqHeightC,
4633 locals->PTERequestSizeC,
4634 locals->dpde0_bytes_per_frame_ub_c,
4635 locals->meta_pte_bytes_per_frame_ub_c);
4636 locals->PrefetchLinesC[0][0][k] = CalculatePrefetchSourceLines(
4637 mode_lib,
4638 mode_lib->vba.VRatio[k]/2,
4639 mode_lib->vba.VTAPsChroma[k],
4640 mode_lib->vba.Interlace[k],
4641 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4642 locals->SwathHeightCThisState[k],
4643 mode_lib->vba.ViewportYStartC[k],
4644 &locals->PrefillC[k],
4645 &locals->MaxNumSwC[k]);
4646 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma;
4647 } else {
4648 mode_lib->vba.PDEAndMetaPTEBytesPerFrameC = 0.0;
4649 mode_lib->vba.MetaRowBytesC = 0.0;
4650 mode_lib->vba.DPTEBytesPerRowC = 0.0;
4651 locals->PrefetchLinesC[0][0][k] = 0.0;
4652 locals->PTEBufferSizeNotExceededC[i][j][k] = true;
4653 locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma;
4654 }
4655 mode_lib->vba.PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4656 mode_lib,
4657 mode_lib->vba.DCCEnable[k],
4658 locals->Read256BlockHeightY[k],
4659 locals->Read256BlockWidthY[k],
4660 mode_lib->vba.SourcePixelFormat[k],
4661 mode_lib->vba.SurfaceTiling[k],
4662 dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
4663 mode_lib->vba.SourceScan[k],
4664 mode_lib->vba.ViewportWidth[k],
4665 mode_lib->vba.ViewportHeight[k],
4666 locals->SwathWidthYThisState[k],
4667 mode_lib->vba.GPUVMEnable,
4668 mode_lib->vba.HostVMEnable,
4669 mode_lib->vba.HostVMMaxPageTableLevels,
4670 mode_lib->vba.HostVMCachedPageTableLevels,
4671 mode_lib->vba.VMMPageSize,
4672 locals->PTEBufferSizeInRequestsForLuma,
4673 mode_lib->vba.PitchY[k],
4674 mode_lib->vba.DCCMetaPitchY[k],
4675 &locals->MacroTileWidthY[k],
4676 &mode_lib->vba.MetaRowBytesY,
4677 &mode_lib->vba.DPTEBytesPerRowY,
4678 &locals->PTEBufferSizeNotExceededY[i][j][k],
4679 locals->dpte_row_width_luma_ub,
4680 &locals->dpte_row_height[k],
4681 &locals->meta_req_width[k],
4682 &locals->meta_req_height[k],
4683 &locals->meta_row_width[k],
4684 &locals->meta_row_height[k],
4685 &locals->vm_group_bytes[k],
4686 &locals->dpte_group_bytes[k],
4687 locals->PixelPTEReqWidthY,
4688 locals->PixelPTEReqHeightY,
4689 locals->PTERequestSizeY,
4690 locals->dpde0_bytes_per_frame_ub_l,
4691 locals->meta_pte_bytes_per_frame_ub_l);
4692 locals->PrefetchLinesY[0][0][k] = CalculatePrefetchSourceLines(
4693 mode_lib,
4694 mode_lib->vba.VRatio[k],
4695 mode_lib->vba.vtaps[k],
4696 mode_lib->vba.Interlace[k],
4697 mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
4698 locals->SwathHeightYThisState[k],
4699 mode_lib->vba.ViewportYStartY[k],
4700 &locals->PrefillY[k],
4701 &locals->MaxNumSwY[k]);
4702 locals->PDEAndMetaPTEBytesPerFrame[0][0][k] =
4703 mode_lib->vba.PDEAndMetaPTEBytesPerFrameY + mode_lib->vba.PDEAndMetaPTEBytesPerFrameC;
4704 locals->MetaRowBytes[0][0][k] = mode_lib->vba.MetaRowBytesY + mode_lib->vba.MetaRowBytesC;
4705 locals->DPTEBytesPerRow[0][0][k] = mode_lib->vba.DPTEBytesPerRowY + mode_lib->vba.DPTEBytesPerRowC;
4706
4707 CalculateActiveRowBandwidth(
4708 mode_lib->vba.GPUVMEnable,
4709 mode_lib->vba.SourcePixelFormat[k],
4710 mode_lib->vba.VRatio[k],
4711 mode_lib->vba.DCCEnable[k],
4712 mode_lib->vba.HTotal[k] /
4713 mode_lib->vba.PixelClock[k],
4714 mode_lib->vba.MetaRowBytesY,
4715 mode_lib->vba.MetaRowBytesC,
4716 locals->meta_row_height[k],
4717 locals->meta_row_height_chroma[k],
4718 mode_lib->vba.DPTEBytesPerRowY,
4719 mode_lib->vba.DPTEBytesPerRowC,
4720 locals->dpte_row_height[k],
4721 locals->dpte_row_height_chroma[k],
4722 &locals->meta_row_bw[k],
4723 &locals->dpte_row_bw[k]);
4724 }
4725 mode_lib->vba.ExtraLatency = CalculateExtraLatency(
4726 locals->UrgentRoundTripAndOutOfOrderLatencyPerState[i],
4727 locals->TotalNumberOfActiveDPP[i][j],
4728 mode_lib->vba.PixelChunkSizeInKByte,
4729 locals->TotalNumberOfDCCActiveDPP[i][j],
4730 mode_lib->vba.MetaChunkSize,
4731 locals->ReturnBWPerState[i][0],
4732 mode_lib->vba.GPUVMEnable,
4733 mode_lib->vba.HostVMEnable,
4734 mode_lib->vba.NumberOfActivePlanes,
4735 locals->NoOfDPPThisState,
4736 locals->dpte_group_bytes,
4737 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4738 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4739 mode_lib->vba.HostVMMaxPageTableLevels,
4740 mode_lib->vba.HostVMCachedPageTableLevels);
4741
4742 mode_lib->vba.TimeCalc = 24.0 / mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
4743 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4744 if (mode_lib->vba.BlendingAndTiming[k] == k) {
4745 if (mode_lib->vba.WritebackEnable[k] == true) {
4746 locals->WritebackDelay[i][k] = mode_lib->vba.WritebackLatency
4747 + CalculateWriteBackDelay(
4748 mode_lib->vba.WritebackPixelFormat[k],
4749 mode_lib->vba.WritebackHRatio[k],
4750 mode_lib->vba.WritebackVRatio[k],
4751 mode_lib->vba.WritebackLumaHTaps[k],
4752 mode_lib->vba.WritebackLumaVTaps[k],
4753 mode_lib->vba.WritebackChromaHTaps[k],
4754 mode_lib->vba.WritebackChromaVTaps[k],
4755 mode_lib->vba.WritebackDestinationWidth[k]) / locals->RequiredDISPCLK[i][j];
4756 } else {
4757 locals->WritebackDelay[i][k] = 0.0;
4758 }
4759 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4760 if (mode_lib->vba.BlendingAndTiming[m] == k
4761 && mode_lib->vba.WritebackEnable[m]
4762 == true) {
4763 locals->WritebackDelay[i][k] = dml_max(locals->WritebackDelay[i][k],
4764 mode_lib->vba.WritebackLatency + CalculateWriteBackDelay(
4765 mode_lib->vba.WritebackPixelFormat[m],
4766 mode_lib->vba.WritebackHRatio[m],
4767 mode_lib->vba.WritebackVRatio[m],
4768 mode_lib->vba.WritebackLumaHTaps[m],
4769 mode_lib->vba.WritebackLumaVTaps[m],
4770 mode_lib->vba.WritebackChromaHTaps[m],
4771 mode_lib->vba.WritebackChromaVTaps[m],
4772 mode_lib->vba.WritebackDestinationWidth[m]) / locals->RequiredDISPCLK[i][j]);
4773 }
4774 }
4775 }
4776 }
4777 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4778 for (m = 0; m <= mode_lib->vba.NumberOfActivePlanes - 1; m++) {
4779 if (mode_lib->vba.BlendingAndTiming[k] == m) {
4780 locals->WritebackDelay[i][k] = locals->WritebackDelay[i][m];
4781 }
4782 }
4783 }
4784 mode_lib->vba.MaxMaxVStartup[0][0] = 0;
4785 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4786 locals->MaximumVStartup[0][0][k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]
4787 - dml_max(1.0, dml_ceil(locals->WritebackDelay[i][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1.0));
4788 mode_lib->vba.MaxMaxVStartup[0][0] = dml_max(mode_lib->vba.MaxMaxVStartup[0][0], locals->MaximumVStartup[0][0][k]);
4789 }
4790
4791 mode_lib->vba.NextPrefetchMode = mode_lib->vba.MinPrefetchMode;
4792 mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
4793 do {
4794 mode_lib->vba.PrefetchMode[i][j] = mode_lib->vba.NextPrefetchMode;
4795 mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup;
4796
4797 mode_lib->vba.TWait = CalculateTWait(
4798 mode_lib->vba.PrefetchMode[i][j],
4799 mode_lib->vba.DRAMClockChangeLatency,
4800 mode_lib->vba.UrgentLatency,
4801 mode_lib->vba.SREnterPlusExitTime);
4802 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
4803 CalculatePrefetchSchedulePerPlane(mode_lib, i, j, k);
4804
4805 mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
4806 mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
4807 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4808 unsigned int m;
4809
4810 locals->cursor_bw[k] = 0;
4811 locals->cursor_bw_pre[k] = 0;
4812 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
4813 locals->cursor_bw[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
4814 / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
4815 locals->cursor_bw_pre[k] = mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m]
4816 / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPreY[i][j][k];
4817 }
4818
4819 CalculateUrgentBurstFactor(
4820 mode_lib->vba.DETBufferSizeInKByte[0],
4821 locals->SwathHeightYThisState[k],
4822 locals->SwathHeightCThisState[k],
4823 locals->SwathWidthYThisState[k],
4824 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4825 mode_lib->vba.UrgentLatency,
4826 mode_lib->vba.CursorBufferSize,
4827 mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1],
4828 dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]),
4829 mode_lib->vba.VRatio[k],
4830 locals->VRatioPreY[i][j][k],
4831 locals->VRatioPreC[i][j][k],
4832 locals->BytePerPixelInDETY[k],
4833 locals->BytePerPixelInDETC[k],
4834 &locals->UrgentBurstFactorCursor[k],
4835 &locals->UrgentBurstFactorCursorPre[k],
4836 &locals->UrgentBurstFactorLuma[k],
4837 &locals->UrgentBurstFactorLumaPre[k],
4838 &locals->UrgentBurstFactorChroma[k],
4839 &locals->UrgentBurstFactorChromaPre[k],
4840 &locals->NotEnoughUrgentLatencyHiding[0][0],
4841 &locals->NotEnoughUrgentLatencyHidingPre);
4842
4843 if (mode_lib->vba.UseUrgentBurstBandwidth == false) {
4844 locals->UrgentBurstFactorCursor[k] = 1;
4845 locals->UrgentBurstFactorCursorPre[k] = 1;
4846 locals->UrgentBurstFactorLuma[k] = 1;
4847 locals->UrgentBurstFactorLumaPre[k] = 1;
4848 locals->UrgentBurstFactorChroma[k] = 1;
4849 locals->UrgentBurstFactorChromaPre[k] = 1;
4850 }
4851
4852 mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = mode_lib->vba.MaximumReadBandwidthWithoutPrefetch
4853 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k] + locals->ReadBandwidthLuma[k]
4854 * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
4855 * locals->UrgentBurstFactorChroma[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k];
4856 mode_lib->vba.MaximumReadBandwidthWithPrefetch = mode_lib->vba.MaximumReadBandwidthWithPrefetch
4857 + dml_max3(locals->prefetch_vmrow_bw[k],
4858 locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] + locals->ReadBandwidthChroma[k]
4859 * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k]
4860 + locals->meta_row_bw[k] + locals->dpte_row_bw[k],
4861 locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
4862 + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
4863 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
4864 }
4865 locals->BandwidthWithoutPrefetchSupported[i][0] = true;
4866 if (mode_lib->vba.MaximumReadBandwidthWithoutPrefetch > locals->ReturnBWPerState[i][0]
4867 || locals->NotEnoughUrgentLatencyHiding[0][0] == 1) {
4868 locals->BandwidthWithoutPrefetchSupported[i][0] = false;
4869 }
4870
4871 locals->PrefetchSupported[i][j] = true;
4872 if (mode_lib->vba.MaximumReadBandwidthWithPrefetch > locals->ReturnBWPerState[i][0]
4873 || locals->NotEnoughUrgentLatencyHiding[0][0] == 1
4874 || locals->NotEnoughUrgentLatencyHidingPre == 1) {
4875 locals->PrefetchSupported[i][j] = false;
4876 }
4877 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4878 if (locals->LineTimesForPrefetch[k] < 2.0
4879 || locals->LinesForMetaPTE[k] >= 32.0
4880 || locals->LinesForMetaAndDPTERow[k] >= 16.0
4881 || mode_lib->vba.IsErrorResult[i][j][k] == true) {
4882 locals->PrefetchSupported[i][j] = false;
4883 }
4884 }
4885 locals->VRatioInPrefetchSupported[i][j] = true;
4886 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4887 if (locals->VRatioPreY[i][j][k] > 4.0
4888 || locals->VRatioPreC[i][j][k] > 4.0
4889 || mode_lib->vba.IsErrorResult[i][j][k] == true) {
4890 locals->VRatioInPrefetchSupported[i][j] = false;
4891 }
4892 }
4893 mode_lib->vba.AnyLinesForVMOrRowTooLarge = false;
4894 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
4895 if (locals->LinesForMetaAndDPTERow[k] >= 16 || locals->LinesForMetaPTE[k] >= 32) {
4896 mode_lib->vba.AnyLinesForVMOrRowTooLarge = true;
4897 }
4898 }
4899
4900 if (mode_lib->vba.MaxVStartup <= 13 || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) {
4901 mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[0][0];
4902 mode_lib->vba.NextPrefetchMode = mode_lib->vba.NextPrefetchMode + 1;
4903 } else {
4904 mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1;
4905 }
4906 } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
4907 && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0]
4908 || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode));
4909
4910 if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
4911 mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0];
4912 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4913 mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.BandwidthAvailableForImmediateFlip
4914 - dml_max(locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
4915 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
4916 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
4917 locals->RequiredPrefetchPixelDataBWLuma[i][j][k] * locals->UrgentBurstFactorLumaPre[k]
4918 + locals->RequiredPrefetchPixelDataBWChroma[i][j][k] * locals->UrgentBurstFactorChromaPre[k]
4919 + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]);
4920 }
4921 mode_lib->vba.TotImmediateFlipBytes = 0.0;
4922 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4923 mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes
4924 + locals->PDEAndMetaPTEBytesPerFrame[0][0][k] + locals->MetaRowBytes[0][0][k] + locals->DPTEBytesPerRow[0][0][k];
4925 }
4926
4927 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4928 CalculateFlipSchedule(
4929 mode_lib,
4930 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4931 mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4932 mode_lib->vba.ExtraLatency,
4933 mode_lib->vba.UrgentLatency,
4934 mode_lib->vba.GPUVMMaxPageTableLevels,
4935 mode_lib->vba.HostVMEnable,
4936 mode_lib->vba.HostVMMaxPageTableLevels,
4937 mode_lib->vba.HostVMCachedPageTableLevels,
4938 mode_lib->vba.GPUVMEnable,
4939 locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
4940 locals->MetaRowBytes[0][0][k],
4941 locals->DPTEBytesPerRow[0][0][k],
4942 mode_lib->vba.BandwidthAvailableForImmediateFlip,
4943 mode_lib->vba.TotImmediateFlipBytes,
4944 mode_lib->vba.SourcePixelFormat[k],
4945 mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
4946 mode_lib->vba.VRatio[k],
4947 locals->Tno_bw[k],
4948 mode_lib->vba.DCCEnable[k],
4949 locals->dpte_row_height[k],
4950 locals->meta_row_height[k],
4951 locals->dpte_row_height_chroma[k],
4952 locals->meta_row_height_chroma[k],
4953 &locals->DestinationLinesToRequestVMInImmediateFlip[k],
4954 &locals->DestinationLinesToRequestRowInImmediateFlip[k],
4955 &locals->final_flip_bw[k],
4956 &locals->ImmediateFlipSupportedForPipe[k]);
4957 }
4958 mode_lib->vba.total_dcn_read_bw_with_flip = 0.0;
4959 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4960 mode_lib->vba.total_dcn_read_bw_with_flip = mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3(
4961 locals->prefetch_vmrow_bw[k],
4962 locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k]
4963 + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k]
4964 + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k],
4965 locals->final_flip_bw[k] + locals->RequiredPrefetchPixelDataBWLuma[i][j][k]
4966 * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixelDataBWChroma[i][j][k]
4967 * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k]
4968 * locals->UrgentBurstFactorCursorPre[k]);
4969 }
4970 locals->ImmediateFlipSupportedForState[i][j] = true;
4971 if (mode_lib->vba.total_dcn_read_bw_with_flip
4972 > locals->ReturnBWPerState[i][0]) {
4973 locals->ImmediateFlipSupportedForState[i][j] = false;
4974 }
4975 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
4976 if (locals->ImmediateFlipSupportedForPipe[k] == false) {
4977 locals->ImmediateFlipSupportedForState[i][j] = false;
4978 }
4979 }
4980 } else {
4981 locals->ImmediateFlipSupportedForState[i][j] = false;
4982 }
4983 mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3(
4984 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4985 mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4986 mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly);
4987 CalculateWatermarksAndDRAMSpeedChangeSupport(
4988 mode_lib,
4989 mode_lib->vba.PrefetchMode[i][j],
4990 mode_lib->vba.NumberOfActivePlanes,
4991 mode_lib->vba.MaxLineBufferLines,
4992 mode_lib->vba.LineBufferSize,
4993 mode_lib->vba.DPPOutputBufferPixels,
4994 mode_lib->vba.DETBufferSizeInKByte[0],
4995 mode_lib->vba.WritebackInterfaceLumaBufferSize,
4996 mode_lib->vba.WritebackInterfaceChromaBufferSize,
4997 mode_lib->vba.DCFCLKPerState[i],
4998 mode_lib->vba.UrgentOutOfOrderReturnPerChannel * mode_lib->vba.NumberOfChannels,
4999 locals->ReturnBWPerState[i][0],
5000 mode_lib->vba.GPUVMEnable,
5001 locals->dpte_group_bytes,
5002 mode_lib->vba.MetaChunkSize,
5003 mode_lib->vba.UrgentLatency,
5004 mode_lib->vba.ExtraLatency,
5005 mode_lib->vba.WritebackLatency,
5006 mode_lib->vba.WritebackChunkSize,
5007 mode_lib->vba.SOCCLKPerState[i],
5008 mode_lib->vba.DRAMClockChangeLatency,
5009 mode_lib->vba.SRExitTime,
5010 mode_lib->vba.SREnterPlusExitTime,
5011 mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0],
5012 locals->NoOfDPPThisState,
5013 mode_lib->vba.DCCEnable,
5014 locals->RequiredDPPCLKThisState,
5015 locals->SwathWidthYSingleDPP,
5016 locals->SwathHeightYThisState,
5017 locals->ReadBandwidthLuma,
5018 locals->SwathHeightCThisState,
5019 locals->ReadBandwidthChroma,
5020 mode_lib->vba.LBBitPerPixel,
5021 locals->SwathWidthYThisState,
5022 mode_lib->vba.HRatio,
5023 mode_lib->vba.vtaps,
5024 mode_lib->vba.VTAPsChroma,
5025 mode_lib->vba.VRatio,
5026 mode_lib->vba.HTotal,
5027 mode_lib->vba.PixelClock,
5028 mode_lib->vba.BlendingAndTiming,
5029 locals->BytePerPixelInDETY,
5030 locals->BytePerPixelInDETC,
5031 mode_lib->vba.WritebackEnable,
5032 mode_lib->vba.WritebackPixelFormat,
5033 mode_lib->vba.WritebackDestinationWidth,
5034 mode_lib->vba.WritebackDestinationHeight,
5035 mode_lib->vba.WritebackSourceHeight,
5036 &locals->DRAMClockChangeSupport[i][j],
5037 &mode_lib->vba.UrgentWatermark,
5038 &mode_lib->vba.WritebackUrgentWatermark,
5039 &mode_lib->vba.DRAMClockChangeWatermark,
5040 &mode_lib->vba.WritebackDRAMClockChangeWatermark,
5041 &mode_lib->vba.StutterExitWatermark,
5042 &mode_lib->vba.StutterEnterPlusExitWatermark,
5043 &mode_lib->vba.MinActiveDRAMClockChangeLatencySupported);
5044 }
5045 }
5046
5047 /*Vertical Active BW support*/
5048 {
5049 double MaxTotalVActiveRDBandwidth = 0.0;
5050 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
5051 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + locals->ReadBandwidth[k];
5052 }
5053 for (i = 0; i <= mode_lib->vba.soc.num_states; ++i) {
5054 locals->MaxTotalVerticalActiveAvailableBandwidth[i][0] = dml_min(
5055 locals->IdealSDPPortBandwidthPerState[i][0] *
5056 mode_lib->vba.MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation
5057 / 100.0, mode_lib->vba.DRAMSpeedPerState[i] *
5058 mode_lib->vba.NumberOfChannels *
5059 mode_lib->vba.DRAMChannelWidth *
5060 mode_lib->vba.MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
5061 / 100.0);
5062
5063 if (MaxTotalVActiveRDBandwidth <= locals->MaxTotalVerticalActiveAvailableBandwidth[i][0]) {
5064 locals->TotalVerticalActiveBandwidthSupport[i][0] = true;
5065 } else {
5066 locals->TotalVerticalActiveBandwidthSupport[i][0] = false;
5067 }
5068 }
5069 }
5070
5071 /*PTE Buffer Size Check*/
5072
5073 for (i = 0; i <= mode_lib->vba.soc.num_states; i++) {
5074 for (j = 0; j < 2; j++) {
5075 locals->PTEBufferSizeNotExceeded[i][j] = true;
5076 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5077 if (locals->PTEBufferSizeNotExceededY[i][j][k] == false
5078 || locals->PTEBufferSizeNotExceededC[i][j][k] == false) {
5079 locals->PTEBufferSizeNotExceeded[i][j] = false;
5080 }
5081 }
5082 }
5083 }
5084 /*Cursor Support Check*/
5085
5086 mode_lib->vba.CursorSupport = true;
5087 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5088 if (mode_lib->vba.CursorWidth[k][0] > 0.0) {
5089 for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) {
5090 if (mode_lib->vba.CursorBPP[k][m] == 64 && mode_lib->vba.Cursor64BppSupport == false) {
5091 mode_lib->vba.CursorSupport = false;
5092 }
5093 }
5094 }
5095 }
5096 /*Valid Pitch Check*/
5097
5098 mode_lib->vba.PitchSupport = true;
5099 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5100 locals->AlignedYPitch[k] = dml_ceil(
5101 dml_max(mode_lib->vba.PitchY[k], mode_lib->vba.ViewportWidth[k]),
5102 locals->MacroTileWidthY[k]);
5103 if (locals->AlignedYPitch[k] > mode_lib->vba.PitchY[k]) {
5104 mode_lib->vba.PitchSupport = false;
5105 }
5106 if (mode_lib->vba.DCCEnable[k] == true) {
5107 locals->AlignedDCCMetaPitch[k] = dml_ceil(
5108 dml_max(
5109 mode_lib->vba.DCCMetaPitchY[k],
5110 mode_lib->vba.ViewportWidth[k]),
5111 64.0 * locals->Read256BlockWidthY[k]);
5112 } else {
5113 locals->AlignedDCCMetaPitch[k] = mode_lib->vba.DCCMetaPitchY[k];
5114 }
5115 if (locals->AlignedDCCMetaPitch[k] > mode_lib->vba.DCCMetaPitchY[k]) {
5116 mode_lib->vba.PitchSupport = false;
5117 }
5118 if (mode_lib->vba.SourcePixelFormat[k] != dm_444_64
5119 && mode_lib->vba.SourcePixelFormat[k] != dm_444_32
5120 && mode_lib->vba.SourcePixelFormat[k] != dm_444_16
5121 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_16
5122 && mode_lib->vba.SourcePixelFormat[k] != dm_mono_8) {
5123 locals->AlignedCPitch[k] = dml_ceil(
5124 dml_max(
5125 mode_lib->vba.PitchC[k],
5126 mode_lib->vba.ViewportWidth[k] / 2.0),
5127 locals->MacroTileWidthC[k]);
5128 } else {
5129 locals->AlignedCPitch[k] = mode_lib->vba.PitchC[k];
5130 }
5131 if (locals->AlignedCPitch[k] > mode_lib->vba.PitchC[k]) {
5132 mode_lib->vba.PitchSupport = false;
5133 }
5134 }
5135 /*Mode Support, Voltage State and SOC Configuration*/
5136
5137 for (i = mode_lib->vba.soc.num_states; i >= 0; i--) {
5138 for (j = 0; j < 2; j++) {
5139 enum dm_validation_status status = DML_VALIDATION_OK;
5140
5141 if (!mode_lib->vba.ScaleRatioAndTapsSupport) {
5142 status = DML_FAIL_SCALE_RATIO_TAP;
5143 } else if (!mode_lib->vba.SourceFormatPixelAndScanSupport) {
5144 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5145 } else if (!locals->ViewportSizeSupport[i][0]) {
5146 status = DML_FAIL_VIEWPORT_SIZE;
5147 } else if (!locals->DIOSupport[i]) {
5148 status = DML_FAIL_DIO_SUPPORT;
5149 } else if (locals->NotEnoughDSCUnits[i]) {
5150 status = DML_FAIL_NOT_ENOUGH_DSC;
5151 } else if (locals->DSCCLKRequiredMoreThanSupported[i]) {
5152 status = DML_FAIL_DSC_CLK_REQUIRED;
5153 } else if (!locals->ROBSupport[i][0]) {
5154 status = DML_FAIL_REORDERING_BUFFER;
5155 } else if (!locals->DISPCLK_DPPCLK_Support[i][j]) {
5156 status = DML_FAIL_DISPCLK_DPPCLK;
5157 } else if (!locals->TotalAvailablePipesSupport[i][j]) {
5158 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5159 } else if (!mode_lib->vba.NumberOfOTGSupport) {
5160 status = DML_FAIL_NUM_OTG;
5161 } else if (!mode_lib->vba.WritebackModeSupport) {
5162 status = DML_FAIL_WRITEBACK_MODE;
5163 } else if (!mode_lib->vba.WritebackLatencySupport) {
5164 status = DML_FAIL_WRITEBACK_LATENCY;
5165 } else if (!mode_lib->vba.WritebackScaleRatioAndTapsSupport) {
5166 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5167 } else if (!mode_lib->vba.CursorSupport) {
5168 status = DML_FAIL_CURSOR_SUPPORT;
5169 } else if (!mode_lib->vba.PitchSupport) {
5170 status = DML_FAIL_PITCH_SUPPORT;
5171 } else if (!locals->TotalVerticalActiveBandwidthSupport[i][0]) {
5172 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5173 } else if (!locals->PTEBufferSizeNotExceeded[i][j]) {
5174 status = DML_FAIL_PTE_BUFFER_SIZE;
5175 } else if (mode_lib->vba.NonsupportedDSCInputBPC) {
5176 status = DML_FAIL_DSC_INPUT_BPC;
5177 } else if ((mode_lib->vba.HostVMEnable
5178 && !locals->ImmediateFlipSupportedForState[i][j])) {
5179 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5180 } else if (!locals->PrefetchSupported[i][j]) {
5181 status = DML_FAIL_PREFETCH_SUPPORT;
5182 } else if (!locals->VRatioInPrefetchSupported[i][j]) {
5183 status = DML_FAIL_V_RATIO_PREFETCH;
5184 }
5185
5186 if (status == DML_VALIDATION_OK) {
5187 locals->ModeSupport[i][j] = true;
5188 } else {
5189 locals->ModeSupport[i][j] = false;
5190 }
5191 locals->ValidationStatus[i] = status;
5192 }
5193 }
5194 {
5195 unsigned int MaximumMPCCombine = 0;
5196 mode_lib->vba.VoltageLevel = mode_lib->vba.soc.num_states + 1;
5197 for (i = mode_lib->vba.VoltageOverrideLevel; i <= mode_lib->vba.soc.num_states; i++) {
5198 if (locals->ModeSupport[i][0] == true || locals->ModeSupport[i][1] == true) {
5199 mode_lib->vba.VoltageLevel = i;
5200 if (locals->ModeSupport[i][1] == true && (locals->ModeSupport[i][0] == false
5201 || mode_lib->vba.WhenToDoMPCCombine == dm_mpc_always_when_possible
5202 || (mode_lib->vba.WhenToDoMPCCombine == dm_mpc_reduce_voltage_and_clocks
5203 && ((locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vactive
5204 && locals->DRAMClockChangeSupport[i][0] != dm_dram_clock_change_vactive)
5205 || (locals->DRAMClockChangeSupport[i][1] == dm_dram_clock_change_vblank
5206 && locals->DRAMClockChangeSupport[i][0] == dm_dram_clock_change_unsupported))))) {
5207 MaximumMPCCombine = 1;
5208 } else {
5209 MaximumMPCCombine = 0;
5210 }
5211 break;
5212 }
5213 }
5214 mode_lib->vba.ImmediateFlipSupport =
5215 locals->ImmediateFlipSupportedForState[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
5216 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5217 mode_lib->vba.DPPPerPlane[k] = locals->NoOfDPP[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
5218 locals->DPPCLK[k] = locals->RequiredDPPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine][k];
5219 }
5220 mode_lib->vba.DISPCLK = locals->RequiredDISPCLK[mode_lib->vba.VoltageLevel][MaximumMPCCombine];
5221 mode_lib->vba.maxMpcComb = MaximumMPCCombine;
5222 }
5223 mode_lib->vba.DCFCLK = mode_lib->vba.DCFCLKPerState[mode_lib->vba.VoltageLevel];
5224 mode_lib->vba.DRAMSpeed = mode_lib->vba.DRAMSpeedPerState[mode_lib->vba.VoltageLevel];
5225 mode_lib->vba.FabricClock = mode_lib->vba.FabricClockPerState[mode_lib->vba.VoltageLevel];
5226 mode_lib->vba.SOCCLK = mode_lib->vba.SOCCLKPerState[mode_lib->vba.VoltageLevel];
5227 mode_lib->vba.ReturnBW = locals->ReturnBWPerState[mode_lib->vba.VoltageLevel][0];
5228 for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
5229 if (mode_lib->vba.BlendingAndTiming[k] == k) {
5230 mode_lib->vba.ODMCombineEnabled[k] =
5231 locals->ODMCombineEnablePerState[mode_lib->vba.VoltageLevel][k];
5232 } else {
5233 mode_lib->vba.ODMCombineEnabled[k] = false;
5234 }
5235 mode_lib->vba.DSCEnabled[k] =
5236 locals->RequiresDSC[mode_lib->vba.VoltageLevel][k];
5237 mode_lib->vba.OutputBpp[k] =
5238 locals->OutputBppPerState[mode_lib->vba.VoltageLevel][k];
5239 }
5240 }
5241
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,unsigned int DETBufferSizeInKByte,unsigned int WritebackInterfaceLumaBufferSize,unsigned int WritebackInterfaceChromaBufferSize,double DCFCLK,double UrgentOutOfOrderReturn,double ReturnBW,bool GPUVMEnable,int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],double SwathWidthSingleDPPY[],unsigned int SwathHeightY[],double ReadBandwidthPlaneLuma[],unsigned int SwathHeightC[],double ReadBandwidthPlaneChroma[],unsigned int LBBitPerPixel[],double SwathWidthY[],double HRatio[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5242 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5243 struct display_mode_lib *mode_lib,
5244 unsigned int PrefetchMode,
5245 unsigned int NumberOfActivePlanes,
5246 unsigned int MaxLineBufferLines,
5247 unsigned int LineBufferSize,
5248 unsigned int DPPOutputBufferPixels,
5249 unsigned int DETBufferSizeInKByte,
5250 unsigned int WritebackInterfaceLumaBufferSize,
5251 unsigned int WritebackInterfaceChromaBufferSize,
5252 double DCFCLK,
5253 double UrgentOutOfOrderReturn,
5254 double ReturnBW,
5255 bool GPUVMEnable,
5256 int dpte_group_bytes[],
5257 unsigned int MetaChunkSize,
5258 double UrgentLatency,
5259 double ExtraLatency,
5260 double WritebackLatency,
5261 double WritebackChunkSize,
5262 double SOCCLK,
5263 double DRAMClockChangeLatency,
5264 double SRExitTime,
5265 double SREnterPlusExitTime,
5266 double DCFCLKDeepSleep,
5267 int DPPPerPlane[],
5268 bool DCCEnable[],
5269 double DPPCLK[],
5270 double SwathWidthSingleDPPY[],
5271 unsigned int SwathHeightY[],
5272 double ReadBandwidthPlaneLuma[],
5273 unsigned int SwathHeightC[],
5274 double ReadBandwidthPlaneChroma[],
5275 unsigned int LBBitPerPixel[],
5276 double SwathWidthY[],
5277 double HRatio[],
5278 unsigned int vtaps[],
5279 unsigned int VTAPsChroma[],
5280 double VRatio[],
5281 unsigned int HTotal[],
5282 double PixelClock[],
5283 unsigned int BlendingAndTiming[],
5284 double BytePerPixelDETY[],
5285 double BytePerPixelDETC[],
5286 bool WritebackEnable[],
5287 enum source_format_class WritebackPixelFormat[],
5288 double WritebackDestinationWidth[],
5289 double WritebackDestinationHeight[],
5290 double WritebackSourceHeight[],
5291 enum clock_change_support *DRAMClockChangeSupport,
5292 double *UrgentWatermark,
5293 double *WritebackUrgentWatermark,
5294 double *DRAMClockChangeWatermark,
5295 double *WritebackDRAMClockChangeWatermark,
5296 double *StutterExitWatermark,
5297 double *StutterEnterPlusExitWatermark,
5298 double *MinActiveDRAMClockChangeLatencySupported)
5299 {
5300 double EffectiveLBLatencyHidingY;
5301 double EffectiveLBLatencyHidingC;
5302 double DPPOutputBufferLinesY;
5303 double DPPOutputBufferLinesC;
5304 unsigned int DETBufferSizeY;
5305 unsigned int DETBufferSizeC;
5306 double LinesInDETY[DC__NUM_DPP__MAX];
5307 double LinesInDETC;
5308 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5309 unsigned int LinesInDETCRoundedDownToSwath;
5310 double FullDETBufferingTimeY[DC__NUM_DPP__MAX];
5311 double FullDETBufferingTimeC;
5312 double ActiveDRAMClockChangeLatencyMarginY;
5313 double ActiveDRAMClockChangeLatencyMarginC;
5314 double WritebackDRAMClockChangeLatencyMargin;
5315 double PlaneWithMinActiveDRAMClockChangeMargin;
5316 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5317 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5318 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5319 unsigned int k, j;
5320
5321 mode_lib->vba.TotalActiveDPP = 0;
5322 mode_lib->vba.TotalDCCActiveDPP = 0;
5323 for (k = 0; k < NumberOfActivePlanes; ++k) {
5324 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5325 if (DCCEnable[k] == true) {
5326 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5327 }
5328 }
5329
5330 mode_lib->vba.TotalDataReadBandwidth = 0;
5331 for (k = 0; k < NumberOfActivePlanes; ++k) {
5332 mode_lib->vba.TotalDataReadBandwidth = mode_lib->vba.TotalDataReadBandwidth
5333 + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
5334 }
5335
5336 *UrgentWatermark = UrgentLatency + ExtraLatency;
5337
5338 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5339
5340 mode_lib->vba.TotalActiveWriteback = 0;
5341 for (k = 0; k < NumberOfActivePlanes; ++k) {
5342 if (WritebackEnable[k] == true) {
5343 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5344 }
5345 }
5346
5347 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5348 *WritebackUrgentWatermark = WritebackLatency;
5349 } else {
5350 *WritebackUrgentWatermark = WritebackLatency
5351 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5352 }
5353
5354 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5355 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5356 } else {
5357 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency
5358 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5359 }
5360
5361 for (k = 0; k < NumberOfActivePlanes; ++k) {
5362
5363 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines,
5364 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1))
5365 - (vtaps[k] - 1);
5366
5367 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines,
5368 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / 2 / dml_max(HRatio[k] / 2, 1.0)), 1))
5369 - (VTAPsChroma[k] - 1);
5370
5371 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k]
5372 * (HTotal[k] / PixelClock[k]);
5373
5374 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC
5375 / (VRatio[k] / 2) * (HTotal[k] / PixelClock[k]);
5376
5377 if (SwathWidthY[k] > 2 * DPPOutputBufferPixels) {
5378 DPPOutputBufferLinesY = (double) DPPOutputBufferPixels / SwathWidthY[k];
5379 } else if (SwathWidthY[k] > DPPOutputBufferPixels) {
5380 DPPOutputBufferLinesY = 0.5;
5381 } else {
5382 DPPOutputBufferLinesY = 1;
5383 }
5384
5385 if (SwathWidthY[k] / 2.0 > 2 * DPPOutputBufferPixels) {
5386 DPPOutputBufferLinesC = (double) DPPOutputBufferPixels
5387 / (SwathWidthY[k] / 2.0);
5388 } else if (SwathWidthY[k] / 2.0 > DPPOutputBufferPixels) {
5389 DPPOutputBufferLinesC = 0.5;
5390 } else {
5391 DPPOutputBufferLinesC = 1;
5392 }
5393
5394 CalculateDETBufferSize(
5395 DETBufferSizeInKByte,
5396 SwathHeightY[k],
5397 SwathHeightC[k],
5398 &DETBufferSizeY,
5399 &DETBufferSizeC);
5400
5401 LinesInDETY[k] = (double)DETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5402 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5403 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k]
5404 * (HTotal[k] / PixelClock[k]) / VRatio[k];
5405 if (BytePerPixelDETC[k] > 0) {
5406 LinesInDETC = (double)DETBufferSizeC / BytePerPixelDETC[k] / (SwathWidthY[k] / 2.0);
5407 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5408 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath
5409 * (HTotal[k] / PixelClock[k]) / (VRatio[k] / 2);
5410 } else {
5411 LinesInDETC = 0;
5412 FullDETBufferingTimeC = 999999;
5413 }
5414
5415 ActiveDRAMClockChangeLatencyMarginY = HTotal[k] / PixelClock[k]
5416 * DPPOutputBufferLinesY + EffectiveLBLatencyHidingY
5417 + FullDETBufferingTimeY[k] - *DRAMClockChangeWatermark;
5418
5419 if (NumberOfActivePlanes > 1) {
5420 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5421 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5422 }
5423
5424 if (BytePerPixelDETC[k] > 0) {
5425 ActiveDRAMClockChangeLatencyMarginC = HTotal[k] / PixelClock[k]
5426 * DPPOutputBufferLinesC + EffectiveLBLatencyHidingC
5427 + FullDETBufferingTimeC - *DRAMClockChangeWatermark;
5428 if (NumberOfActivePlanes > 1) {
5429 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5430 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / (VRatio[k] / 2);
5431 }
5432 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
5433 ActiveDRAMClockChangeLatencyMarginY,
5434 ActiveDRAMClockChangeLatencyMarginC);
5435 } else {
5436 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5437 }
5438
5439 if (WritebackEnable[k] == true) {
5440 if (WritebackPixelFormat[k] == dm_444_32) {
5441 WritebackDRAMClockChangeLatencyMargin = (WritebackInterfaceLumaBufferSize
5442 + WritebackInterfaceChromaBufferSize) / (WritebackDestinationWidth[k]
5443 * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k]
5444 / PixelClock[k]) * 4) - *WritebackDRAMClockChangeWatermark;
5445 } else {
5446 WritebackDRAMClockChangeLatencyMargin = dml_min(
5447 WritebackInterfaceLumaBufferSize * 8.0 / 10,
5448 2 * WritebackInterfaceChromaBufferSize * 8.0 / 10) / (WritebackDestinationWidth[k]
5449 * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]))
5450 - *WritebackDRAMClockChangeWatermark;
5451 }
5452 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(
5453 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k],
5454 WritebackDRAMClockChangeLatencyMargin);
5455 }
5456 }
5457
5458 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5459 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5460 for (k = 0; k < NumberOfActivePlanes; ++k) {
5461 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
5462 < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5463 mode_lib->vba.MinActiveDRAMClockChangeMargin =
5464 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5465 if (BlendingAndTiming[k] == k) {
5466 PlaneWithMinActiveDRAMClockChangeMargin = k;
5467 } else {
5468 for (j = 0; j < NumberOfActivePlanes; ++j) {
5469 if (BlendingAndTiming[k] == j) {
5470 PlaneWithMinActiveDRAMClockChangeMargin = j;
5471 }
5472 }
5473 }
5474 }
5475 }
5476
5477 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5478
5479 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5480 for (k = 0; k < NumberOfActivePlanes; ++k) {
5481 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k))
5482 && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5483 && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k]
5484 < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5485 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank =
5486 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5487 }
5488 }
5489
5490 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5491 for (k = 0; k < NumberOfActivePlanes; ++k) {
5492 if (BlendingAndTiming[k] == k) {
5493 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5494 }
5495 }
5496
5497 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5498 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5499 } else if (((mode_lib->vba.SynchronizedVBlank == true
5500 || mode_lib->vba.TotalNumberOfActiveOTG == 1
5501 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0)
5502 && PrefetchMode == 0)) {
5503 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5504 } else {
5505 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5506 }
5507
5508 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5509 for (k = 0; k < NumberOfActivePlanes; ++k) {
5510 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5511 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k]
5512 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]))
5513 * (HTotal[k] / PixelClock[k]) / VRatio[k];
5514 }
5515 }
5516
5517 *StutterExitWatermark = SRExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
5518 + ExtraLatency + 10 / DCFCLKDeepSleep;
5519 *StutterEnterPlusExitWatermark = dml_max(
5520 SREnterPlusExitTime + mode_lib->vba.LastPixelOfLineExtraWatermark
5521 + ExtraLatency + 10 / DCFCLKDeepSleep,
5522 TimeToFinishSwathTransferStutterCriticalPlane);
5523
5524 }
5525
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,double BytePerPixelDETY[],double BytePerPixelDETC[],double VRatio[],double SwathWidthY[],int DPPPerPlane[],double HRatio[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double * DCFCLKDeepSleep)5526 static void CalculateDCFCLKDeepSleep(
5527 struct display_mode_lib *mode_lib,
5528 unsigned int NumberOfActivePlanes,
5529 double BytePerPixelDETY[],
5530 double BytePerPixelDETC[],
5531 double VRatio[],
5532 double SwathWidthY[],
5533 int DPPPerPlane[],
5534 double HRatio[],
5535 double PixelClock[],
5536 double PSCL_THROUGHPUT[],
5537 double PSCL_THROUGHPUT_CHROMA[],
5538 double DPPCLK[],
5539 double *DCFCLKDeepSleep)
5540 {
5541 unsigned int k;
5542 double DisplayPipeLineDeliveryTimeLuma;
5543 double DisplayPipeLineDeliveryTimeChroma;
5544 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5545
5546 for (k = 0; k < NumberOfActivePlanes; ++k) {
5547 if (VRatio[k] <= 1) {
5548 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k]
5549 / HRatio[k] / PixelClock[k];
5550 } else {
5551 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k]
5552 / DPPCLK[k];
5553 }
5554 if (BytePerPixelDETC[k] == 0) {
5555 DisplayPipeLineDeliveryTimeChroma = 0;
5556 } else {
5557 if (VRatio[k] / 2 <= 1) {
5558 DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
5559 * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
5560 } else {
5561 DisplayPipeLineDeliveryTimeChroma = SwathWidthY[k] / 2.0
5562 / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5563 }
5564 }
5565
5566 if (BytePerPixelDETC[k] > 0) {
5567 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
5568 1.1 * SwathWidthY[k] * dml_ceil(BytePerPixelDETY[k], 1)
5569 / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5570 1.1 * SwathWidthY[k] / 2.0
5571 * dml_ceil(BytePerPixelDETC[k], 2) / 32.0
5572 / DisplayPipeLineDeliveryTimeChroma);
5573 } else {
5574 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k]
5575 * dml_ceil(BytePerPixelDETY[k], 1) / 64.0
5576 / DisplayPipeLineDeliveryTimeLuma;
5577 }
5578 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(
5579 mode_lib->vba.DCFCLKDeepSleepPerPlane[k],
5580 PixelClock[k] / 16);
5581
5582 }
5583
5584 *DCFCLKDeepSleep = 8;
5585 for (k = 0; k < NumberOfActivePlanes; ++k) {
5586 *DCFCLKDeepSleep = dml_max(
5587 *DCFCLKDeepSleep,
5588 mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5589 }
5590 }
5591
CalculateDETBufferSize(unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,unsigned int * DETBufferSizeY,unsigned int * DETBufferSizeC)5592 static void CalculateDETBufferSize(
5593 unsigned int DETBufferSizeInKByte,
5594 unsigned int SwathHeightY,
5595 unsigned int SwathHeightC,
5596 unsigned int *DETBufferSizeY,
5597 unsigned int *DETBufferSizeC)
5598 {
5599 if (SwathHeightC == 0) {
5600 *DETBufferSizeY = DETBufferSizeInKByte * 1024;
5601 *DETBufferSizeC = 0;
5602 } else if (SwathHeightY <= SwathHeightC) {
5603 *DETBufferSizeY = DETBufferSizeInKByte * 1024 / 2;
5604 *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 2;
5605 } else {
5606 *DETBufferSizeY = DETBufferSizeInKByte * 1024 * 2 / 3;
5607 *DETBufferSizeC = DETBufferSizeInKByte * 1024 / 3;
5608 }
5609 }
5610
CalculateUrgentBurstFactor(unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,unsigned int SwathWidthY,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioPreY,double VRatioPreC,double BytePerPixelInDETY,double BytePerPixelInDETC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorCursorPre,double * UrgentBurstFactorLuma,double * UrgentBurstFactorLumaPre,double * UrgentBurstFactorChroma,double * UrgentBurstFactorChromaPre,unsigned int * NotEnoughUrgentLatencyHiding,unsigned int * NotEnoughUrgentLatencyHidingPre)5611 static void CalculateUrgentBurstFactor(
5612 unsigned int DETBufferSizeInKByte,
5613 unsigned int SwathHeightY,
5614 unsigned int SwathHeightC,
5615 unsigned int SwathWidthY,
5616 double LineTime,
5617 double UrgentLatency,
5618 double CursorBufferSize,
5619 unsigned int CursorWidth,
5620 unsigned int CursorBPP,
5621 double VRatio,
5622 double VRatioPreY,
5623 double VRatioPreC,
5624 double BytePerPixelInDETY,
5625 double BytePerPixelInDETC,
5626 double *UrgentBurstFactorCursor,
5627 double *UrgentBurstFactorCursorPre,
5628 double *UrgentBurstFactorLuma,
5629 double *UrgentBurstFactorLumaPre,
5630 double *UrgentBurstFactorChroma,
5631 double *UrgentBurstFactorChromaPre,
5632 unsigned int *NotEnoughUrgentLatencyHiding,
5633 unsigned int *NotEnoughUrgentLatencyHidingPre)
5634 {
5635 double LinesInDETLuma;
5636 double LinesInDETChroma;
5637 unsigned int LinesInCursorBuffer;
5638 double CursorBufferSizeInTime;
5639 double CursorBufferSizeInTimePre;
5640 double DETBufferSizeInTimeLuma;
5641 double DETBufferSizeInTimeLumaPre;
5642 double DETBufferSizeInTimeChroma;
5643 double DETBufferSizeInTimeChromaPre;
5644 unsigned int DETBufferSizeY;
5645 unsigned int DETBufferSizeC;
5646
5647 *NotEnoughUrgentLatencyHiding = 0;
5648 *NotEnoughUrgentLatencyHidingPre = 0;
5649
5650 if (CursorWidth > 0) {
5651 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(
5652 dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5653 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5654 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5655 *NotEnoughUrgentLatencyHiding = 1;
5656 *UrgentBurstFactorCursor = 0;
5657 } else {
5658 *UrgentBurstFactorCursor = CursorBufferSizeInTime
5659 / (CursorBufferSizeInTime - UrgentLatency);
5660 }
5661 if (VRatioPreY > 0) {
5662 CursorBufferSizeInTimePre = LinesInCursorBuffer * LineTime / VRatioPreY;
5663 if (CursorBufferSizeInTimePre - UrgentLatency <= 0) {
5664 *NotEnoughUrgentLatencyHidingPre = 1;
5665 *UrgentBurstFactorCursorPre = 0;
5666 } else {
5667 *UrgentBurstFactorCursorPre = CursorBufferSizeInTimePre
5668 / (CursorBufferSizeInTimePre - UrgentLatency);
5669 }
5670 } else {
5671 *UrgentBurstFactorCursorPre = 1;
5672 }
5673 }
5674
5675 CalculateDETBufferSize(
5676 DETBufferSizeInKByte,
5677 SwathHeightY,
5678 SwathHeightC,
5679 &DETBufferSizeY,
5680 &DETBufferSizeC);
5681
5682 LinesInDETLuma = (double)DETBufferSizeY / BytePerPixelInDETY / SwathWidthY;
5683 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5684 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5685 *NotEnoughUrgentLatencyHiding = 1;
5686 *UrgentBurstFactorLuma = 0;
5687 } else {
5688 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma
5689 / (DETBufferSizeInTimeLuma - UrgentLatency);
5690 }
5691 if (VRatioPreY > 0) {
5692 DETBufferSizeInTimeLumaPre = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime
5693 / VRatioPreY;
5694 if (DETBufferSizeInTimeLumaPre - UrgentLatency <= 0) {
5695 *NotEnoughUrgentLatencyHidingPre = 1;
5696 *UrgentBurstFactorLumaPre = 0;
5697 } else {
5698 *UrgentBurstFactorLumaPre = DETBufferSizeInTimeLumaPre
5699 / (DETBufferSizeInTimeLumaPre - UrgentLatency);
5700 }
5701 } else {
5702 *UrgentBurstFactorLumaPre = 1;
5703 }
5704
5705 if (BytePerPixelInDETC > 0) {
5706 LinesInDETChroma = (double)DETBufferSizeC / BytePerPixelInDETC / (SwathWidthY / 2);
5707 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime
5708 / (VRatio / 2);
5709 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5710 *NotEnoughUrgentLatencyHiding = 1;
5711 *UrgentBurstFactorChroma = 0;
5712 } else {
5713 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
5714 / (DETBufferSizeInTimeChroma - UrgentLatency);
5715 }
5716 if (VRatioPreC > 0) {
5717 DETBufferSizeInTimeChromaPre = dml_floor(LinesInDETChroma, SwathHeightC)
5718 * LineTime / VRatioPreC;
5719 if (DETBufferSizeInTimeChromaPre - UrgentLatency <= 0) {
5720 *NotEnoughUrgentLatencyHidingPre = 1;
5721 *UrgentBurstFactorChromaPre = 0;
5722 } else {
5723 *UrgentBurstFactorChromaPre = DETBufferSizeInTimeChromaPre
5724 / (DETBufferSizeInTimeChromaPre - UrgentLatency);
5725 }
5726 } else {
5727 *UrgentBurstFactorChromaPre = 1;
5728 }
5729 }
5730 }
5731
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],int DPPPerPlane[],double HRatio[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double BytePerPixelDETC[],enum scan_direction_class SourceScan[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])5732 static void CalculatePixelDeliveryTimes(
5733 unsigned int NumberOfActivePlanes,
5734 double VRatio[],
5735 double VRatioPrefetchY[],
5736 double VRatioPrefetchC[],
5737 unsigned int swath_width_luma_ub[],
5738 unsigned int swath_width_chroma_ub[],
5739 int DPPPerPlane[],
5740 double HRatio[],
5741 double PixelClock[],
5742 double PSCL_THROUGHPUT[],
5743 double PSCL_THROUGHPUT_CHROMA[],
5744 double DPPCLK[],
5745 double BytePerPixelDETC[],
5746 enum scan_direction_class SourceScan[],
5747 unsigned int BlockWidth256BytesY[],
5748 unsigned int BlockHeight256BytesY[],
5749 unsigned int BlockWidth256BytesC[],
5750 unsigned int BlockHeight256BytesC[],
5751 double DisplayPipeLineDeliveryTimeLuma[],
5752 double DisplayPipeLineDeliveryTimeChroma[],
5753 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5754 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5755 double DisplayPipeRequestDeliveryTimeLuma[],
5756 double DisplayPipeRequestDeliveryTimeChroma[],
5757 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5758 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
5759 {
5760 double req_per_swath_ub;
5761 unsigned int k;
5762
5763 for (k = 0; k < NumberOfActivePlanes; ++k) {
5764 if (VRatio[k] <= 1) {
5765 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k]
5766 / HRatio[k] / PixelClock[k];
5767 } else {
5768 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k]
5769 / PSCL_THROUGHPUT[k] / DPPCLK[k];
5770 }
5771
5772 if (BytePerPixelDETC[k] == 0) {
5773 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5774 } else {
5775 if (VRatio[k] / 2 <= 1) {
5776 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
5777 * DPPPerPlane[k] / (HRatio[k] / 2) / PixelClock[k];
5778 } else {
5779 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k]
5780 / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5781 }
5782 }
5783
5784 if (VRatioPrefetchY[k] <= 1) {
5785 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
5786 * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5787 } else {
5788 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k]
5789 / PSCL_THROUGHPUT[k] / DPPCLK[k];
5790 }
5791
5792 if (BytePerPixelDETC[k] == 0) {
5793 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5794 } else {
5795 if (VRatioPrefetchC[k] <= 1) {
5796 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
5797 swath_width_chroma_ub[k] * DPPPerPlane[k]
5798 / (HRatio[k] / 2) / PixelClock[k];
5799 } else {
5800 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
5801 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5802 }
5803 }
5804 }
5805
5806 for (k = 0; k < NumberOfActivePlanes; ++k) {
5807 if (SourceScan[k] == dm_horz) {
5808 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5809 } else {
5810 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5811 }
5812 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k]
5813 / req_per_swath_ub;
5814 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
5815 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5816 if (BytePerPixelDETC[k] == 0) {
5817 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5818 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5819 } else {
5820 if (SourceScan[k] == dm_horz) {
5821 req_per_swath_ub = swath_width_chroma_ub[k]
5822 / BlockWidth256BytesC[k];
5823 } else {
5824 req_per_swath_ub = swath_width_chroma_ub[k]
5825 / BlockHeight256BytesC[k];
5826 }
5827 DisplayPipeRequestDeliveryTimeChroma[k] =
5828 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5829 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
5830 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5831 }
5832 }
5833 }
5834
CalculateMetaAndPTETimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],double VRatio[],double VRatioPrefetchY[],double VRatioPrefetchC[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],double BytePerPixelDETY[],double BytePerPixelDETC[],enum scan_direction_class SourceScan[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_height[],unsigned int meta_req_width[],unsigned int meta_req_height[],int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double TimePerMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5835 static void CalculateMetaAndPTETimes(
5836 unsigned int NumberOfActivePlanes,
5837 bool GPUVMEnable,
5838 unsigned int MetaChunkSize,
5839 unsigned int MinMetaChunkSizeBytes,
5840 unsigned int GPUVMMaxPageTableLevels,
5841 unsigned int HTotal[],
5842 double VRatio[],
5843 double VRatioPrefetchY[],
5844 double VRatioPrefetchC[],
5845 double DestinationLinesToRequestRowInVBlank[],
5846 double DestinationLinesToRequestRowInImmediateFlip[],
5847 double DestinationLinesToRequestVMInVBlank[],
5848 double DestinationLinesToRequestVMInImmediateFlip[],
5849 bool DCCEnable[],
5850 double PixelClock[],
5851 double BytePerPixelDETY[],
5852 double BytePerPixelDETC[],
5853 enum scan_direction_class SourceScan[],
5854 unsigned int dpte_row_height[],
5855 unsigned int dpte_row_height_chroma[],
5856 unsigned int meta_row_width[],
5857 unsigned int meta_row_height[],
5858 unsigned int meta_req_width[],
5859 unsigned int meta_req_height[],
5860 int dpte_group_bytes[],
5861 unsigned int PTERequestSizeY[],
5862 unsigned int PTERequestSizeC[],
5863 unsigned int PixelPTEReqWidthY[],
5864 unsigned int PixelPTEReqHeightY[],
5865 unsigned int PixelPTEReqWidthC[],
5866 unsigned int PixelPTEReqHeightC[],
5867 unsigned int dpte_row_width_luma_ub[],
5868 unsigned int dpte_row_width_chroma_ub[],
5869 unsigned int vm_group_bytes[],
5870 unsigned int dpde0_bytes_per_frame_ub_l[],
5871 unsigned int dpde0_bytes_per_frame_ub_c[],
5872 unsigned int meta_pte_bytes_per_frame_ub_l[],
5873 unsigned int meta_pte_bytes_per_frame_ub_c[],
5874 double DST_Y_PER_PTE_ROW_NOM_L[],
5875 double DST_Y_PER_PTE_ROW_NOM_C[],
5876 double DST_Y_PER_META_ROW_NOM_L[],
5877 double TimePerMetaChunkNominal[],
5878 double TimePerMetaChunkVBlank[],
5879 double TimePerMetaChunkFlip[],
5880 double time_per_pte_group_nom_luma[],
5881 double time_per_pte_group_vblank_luma[],
5882 double time_per_pte_group_flip_luma[],
5883 double time_per_pte_group_nom_chroma[],
5884 double time_per_pte_group_vblank_chroma[],
5885 double time_per_pte_group_flip_chroma[],
5886 double TimePerVMGroupVBlank[],
5887 double TimePerVMGroupFlip[],
5888 double TimePerVMRequestVBlank[],
5889 double TimePerVMRequestFlip[])
5890 {
5891 unsigned int meta_chunk_width;
5892 unsigned int min_meta_chunk_width;
5893 unsigned int meta_chunk_per_row_int;
5894 unsigned int meta_row_remainder;
5895 unsigned int meta_chunk_threshold;
5896 unsigned int meta_chunks_per_row_ub;
5897 unsigned int dpte_group_width_luma;
5898 unsigned int dpte_group_width_chroma;
5899 unsigned int dpte_groups_per_row_luma_ub;
5900 unsigned int dpte_groups_per_row_chroma_ub;
5901 unsigned int num_group_per_lower_vm_stage;
5902 unsigned int num_req_per_lower_vm_stage;
5903 unsigned int k;
5904
5905 for (k = 0; k < NumberOfActivePlanes; ++k) {
5906 if (GPUVMEnable == true) {
5907 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5908 if (BytePerPixelDETC[k] == 0) {
5909 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5910 } else {
5911 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / (VRatio[k] / 2);
5912 }
5913 } else {
5914 DST_Y_PER_PTE_ROW_NOM_L[k] = 0;
5915 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5916 }
5917 if (DCCEnable[k] == true) {
5918 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5919 } else {
5920 DST_Y_PER_META_ROW_NOM_L[k] = 0;
5921 }
5922 }
5923
5924 for (k = 0; k < NumberOfActivePlanes; ++k) {
5925 if (DCCEnable[k] == true) {
5926 meta_chunk_width = MetaChunkSize * 1024 * 256
5927 / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
5928 min_meta_chunk_width = MinMetaChunkSizeBytes * 256
5929 / dml_ceil(BytePerPixelDETY[k], 1) / meta_row_height[k];
5930 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5931 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5932 if (SourceScan[k] == dm_horz) {
5933 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5934 } else {
5935 meta_chunk_threshold = 2 * min_meta_chunk_width
5936 - meta_req_height[k];
5937 }
5938 if (meta_row_remainder <= meta_chunk_threshold) {
5939 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5940 } else {
5941 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5942 }
5943 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k]
5944 / PixelClock[k] / meta_chunks_per_row_ub;
5945 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k]
5946 * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5947 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k]
5948 * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5949 } else {
5950 TimePerMetaChunkNominal[k] = 0;
5951 TimePerMetaChunkVBlank[k] = 0;
5952 TimePerMetaChunkFlip[k] = 0;
5953 }
5954 }
5955
5956 for (k = 0; k < NumberOfActivePlanes; ++k) {
5957 if (GPUVMEnable == true) {
5958 if (SourceScan[k] == dm_horz) {
5959 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
5960 * PixelPTEReqWidthY[k];
5961 } else {
5962 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k]
5963 * PixelPTEReqHeightY[k];
5964 }
5965 dpte_groups_per_row_luma_ub = dml_ceil(
5966 (float) dpte_row_width_luma_ub[k] / dpte_group_width_luma,
5967 1);
5968 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k]
5969 / PixelClock[k] / dpte_groups_per_row_luma_ub;
5970 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k]
5971 * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5972 time_per_pte_group_flip_luma[k] =
5973 DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k]
5974 / PixelClock[k]
5975 / dpte_groups_per_row_luma_ub;
5976 if (BytePerPixelDETC[k] == 0) {
5977 time_per_pte_group_nom_chroma[k] = 0;
5978 time_per_pte_group_vblank_chroma[k] = 0;
5979 time_per_pte_group_flip_chroma[k] = 0;
5980 } else {
5981 if (SourceScan[k] == dm_horz) {
5982 dpte_group_width_chroma = dpte_group_bytes[k]
5983 / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5984 } else {
5985 dpte_group_width_chroma = dpte_group_bytes[k]
5986 / PTERequestSizeC[k]
5987 * PixelPTEReqHeightC[k];
5988 }
5989 dpte_groups_per_row_chroma_ub = dml_ceil(
5990 (float) dpte_row_width_chroma_ub[k]
5991 / dpte_group_width_chroma,
5992 1);
5993 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k]
5994 * HTotal[k] / PixelClock[k]
5995 / dpte_groups_per_row_chroma_ub;
5996 time_per_pte_group_vblank_chroma[k] =
5997 DestinationLinesToRequestRowInVBlank[k] * HTotal[k]
5998 / PixelClock[k]
5999 / dpte_groups_per_row_chroma_ub;
6000 time_per_pte_group_flip_chroma[k] =
6001 DestinationLinesToRequestRowInImmediateFlip[k]
6002 * HTotal[k] / PixelClock[k]
6003 / dpte_groups_per_row_chroma_ub;
6004 }
6005 } else {
6006 time_per_pte_group_nom_luma[k] = 0;
6007 time_per_pte_group_vblank_luma[k] = 0;
6008 time_per_pte_group_flip_luma[k] = 0;
6009 time_per_pte_group_nom_chroma[k] = 0;
6010 time_per_pte_group_vblank_chroma[k] = 0;
6011 time_per_pte_group_flip_chroma[k] = 0;
6012 }
6013 }
6014
6015 for (k = 0; k < NumberOfActivePlanes; ++k) {
6016 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6017 if (DCCEnable[k] == false) {
6018 if (BytePerPixelDETC[k] > 0) {
6019 num_group_per_lower_vm_stage =
6020 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6021 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6022 } else {
6023 num_group_per_lower_vm_stage =
6024 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6025 }
6026 } else {
6027 if (GPUVMMaxPageTableLevels == 1) {
6028 if (BytePerPixelDETC[k] > 0) {
6029 num_group_per_lower_vm_stage =
6030 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6031 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6032 } else {
6033 num_group_per_lower_vm_stage =
6034 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6035 }
6036 } else {
6037 if (BytePerPixelDETC[k] > 0) {
6038 num_group_per_lower_vm_stage =
6039 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6040 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6041 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6042 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6043 } else {
6044 num_group_per_lower_vm_stage =
6045 dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6046 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6047 }
6048 }
6049 }
6050
6051 if (DCCEnable[k] == false) {
6052 if (BytePerPixelDETC[k] > 0) {
6053 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
6054 / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6055 } else {
6056 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k]
6057 / 64;
6058 }
6059 } else {
6060 if (GPUVMMaxPageTableLevels == 1) {
6061 if (BytePerPixelDETC[k] > 0) {
6062 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6063 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6064 } else {
6065 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6066 }
6067 } else {
6068 if (BytePerPixelDETC[k] > 0) {
6069 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6070 + dpde0_bytes_per_frame_ub_c[k] / 64
6071 + meta_pte_bytes_per_frame_ub_l[k] / 64
6072 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6073 } else {
6074 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6075 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6076 }
6077 }
6078 }
6079
6080 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k]
6081 / PixelClock[k] / num_group_per_lower_vm_stage;
6082 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
6083 * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6084 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k]
6085 * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6086 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k]
6087 * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6088
6089 if (GPUVMMaxPageTableLevels > 2) {
6090 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6091 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6092 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6093 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6094 }
6095
6096 } else {
6097 TimePerVMGroupVBlank[k] = 0;
6098 TimePerVMGroupFlip[k] = 0;
6099 TimePerVMRequestVBlank[k] = 0;
6100 TimePerVMRequestFlip[k] = 0;
6101 }
6102 }
6103 }
6104
CalculateExtraLatency(double UrgentRoundTripAndOutOfOrderLatency,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,int HostVMMaxPageTableLevels,int HostVMCachedPageTableLevels)6105 static double CalculateExtraLatency(
6106 double UrgentRoundTripAndOutOfOrderLatency,
6107 int TotalNumberOfActiveDPP,
6108 int PixelChunkSizeInKByte,
6109 int TotalNumberOfDCCActiveDPP,
6110 int MetaChunkSize,
6111 double ReturnBW,
6112 bool GPUVMEnable,
6113 bool HostVMEnable,
6114 int NumberOfActivePlanes,
6115 int NumberOfDPP[],
6116 int dpte_group_bytes[],
6117 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6118 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6119 int HostVMMaxPageTableLevels,
6120 int HostVMCachedPageTableLevels)
6121 {
6122 double CalculateExtraLatency;
6123 double HostVMInefficiencyFactor;
6124 int HostVMDynamicLevels;
6125
6126 if (GPUVMEnable && HostVMEnable) {
6127 HostVMInefficiencyFactor =
6128 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6129 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6130 HostVMDynamicLevels = HostVMMaxPageTableLevels - HostVMCachedPageTableLevels;
6131 } else {
6132 HostVMInefficiencyFactor = 1;
6133 HostVMDynamicLevels = 0;
6134 }
6135
6136 CalculateExtraLatency = UrgentRoundTripAndOutOfOrderLatency
6137 + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte
6138 + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0
6139 / ReturnBW;
6140
6141 if (GPUVMEnable) {
6142 int k;
6143
6144 for (k = 0; k < NumberOfActivePlanes; k++) {
6145 CalculateExtraLatency = CalculateExtraLatency
6146 + NumberOfDPP[k] * dpte_group_bytes[k]
6147 * (1 + 8 * HostVMDynamicLevels)
6148 * HostVMInefficiencyFactor / ReturnBW;
6149 }
6150 }
6151 return CalculateExtraLatency;
6152 }
6153
6154