1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27 #define UNIT_TEST 0
28 #if !UNIT_TEST
29 #include "dc.h"
30 #include "dc_link.h"
31 #endif
32 #include "../display_mode_lib.h"
33 #include "display_mode_vba_314.h"
34 #include "../dml_inline_defs.h"
35
36 /*
37 * NOTE:
38 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
39 *
40 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
41 * ways. Unless there is something clearly wrong with it the code should
42 * remain as-is as it provides us with a guarantee from HW that it is correct.
43 */
44
45 #define BPP_INVALID 0
46 #define BPP_BLENDED_PIPE 0xffffffff
47 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184
48 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
49
50 // For DML-C changes that hasn't been propagated to VBA yet
51 //#define __DML_VBA_ALLOW_DELTA__
52
53 // Move these to ip parameters/constant
54
55 // At which vstartup the DML start to try if the mode can be supported
56 #define __DML_VBA_MIN_VSTARTUP__ 9
57
58 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
59 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
60
61 // fudge factor for min dcfclk calclation
62 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
63
64 typedef struct {
65 double DPPCLK;
66 double DISPCLK;
67 double PixelClock;
68 double DCFCLKDeepSleep;
69 unsigned int DPPPerPlane;
70 bool ScalerEnabled;
71 double VRatio;
72 double VRatioChroma;
73 enum scan_direction_class SourceScan;
74 unsigned int BlockWidth256BytesY;
75 unsigned int BlockHeight256BytesY;
76 unsigned int BlockWidth256BytesC;
77 unsigned int BlockHeight256BytesC;
78 unsigned int InterlaceEnable;
79 unsigned int NumberOfCursors;
80 unsigned int VBlank;
81 unsigned int HTotal;
82 unsigned int DCCEnable;
83 bool ODMCombineIsEnabled;
84 enum source_format_class SourcePixelFormat;
85 int BytePerPixelY;
86 int BytePerPixelC;
87 bool ProgressiveToInterlaceUnitInOPP;
88 } Pipe;
89
90 #define BPP_INVALID 0
91 #define BPP_BLENDED_PIPE 0xffffffff
92
93 static bool CalculateBytePerPixelAnd256BBlockSizes(
94 enum source_format_class SourcePixelFormat,
95 enum dm_swizzle_mode SurfaceTiling,
96 unsigned int *BytePerPixelY,
97 unsigned int *BytePerPixelC,
98 double *BytePerPixelDETY,
99 double *BytePerPixelDETC,
100 unsigned int *BlockHeight256BytesY,
101 unsigned int *BlockHeight256BytesC,
102 unsigned int *BlockWidth256BytesY,
103 unsigned int *BlockWidth256BytesC);
104 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
105 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
106 static unsigned int dscceComputeDelay(
107 unsigned int bpc,
108 double BPP,
109 unsigned int sliceWidth,
110 unsigned int numSlices,
111 enum output_format_class pixelFormat,
112 enum output_encoder_class Output);
113 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
114 static bool CalculatePrefetchSchedule(
115 struct display_mode_lib *mode_lib,
116 double HostVMInefficiencyFactor,
117 Pipe *myPipe,
118 unsigned int DSCDelay,
119 double DPPCLKDelaySubtotalPlusCNVCFormater,
120 double DPPCLKDelaySCL,
121 double DPPCLKDelaySCLLBOnly,
122 double DPPCLKDelayCNVCCursor,
123 double DISPCLKDelaySubtotal,
124 unsigned int DPP_RECOUT_WIDTH,
125 enum output_format_class OutputFormat,
126 unsigned int MaxInterDCNTileRepeaters,
127 unsigned int VStartup,
128 unsigned int MaxVStartup,
129 unsigned int GPUVMPageTableLevels,
130 bool GPUVMEnable,
131 bool HostVMEnable,
132 unsigned int HostVMMaxNonCachedPageTableLevels,
133 double HostVMMinPageSize,
134 bool DynamicMetadataEnable,
135 bool DynamicMetadataVMEnabled,
136 int DynamicMetadataLinesBeforeActiveRequired,
137 unsigned int DynamicMetadataTransmittedBytes,
138 double UrgentLatency,
139 double UrgentExtraLatency,
140 double TCalc,
141 unsigned int PDEAndMetaPTEBytesFrame,
142 unsigned int MetaRowByte,
143 unsigned int PixelPTEBytesPerRow,
144 double PrefetchSourceLinesY,
145 unsigned int SwathWidthY,
146 double VInitPreFillY,
147 unsigned int MaxNumSwathY,
148 double PrefetchSourceLinesC,
149 unsigned int SwathWidthC,
150 double VInitPreFillC,
151 unsigned int MaxNumSwathC,
152 int swath_width_luma_ub,
153 int swath_width_chroma_ub,
154 unsigned int SwathHeightY,
155 unsigned int SwathHeightC,
156 double TWait,
157 double *DSTXAfterScaler,
158 double *DSTYAfterScaler,
159 double *DestinationLinesForPrefetch,
160 double *PrefetchBandwidth,
161 double *DestinationLinesToRequestVMInVBlank,
162 double *DestinationLinesToRequestRowInVBlank,
163 double *VRatioPrefetchY,
164 double *VRatioPrefetchC,
165 double *RequiredPrefetchPixDataBWLuma,
166 double *RequiredPrefetchPixDataBWChroma,
167 bool *NotEnoughTimeForDynamicMetadata,
168 double *Tno_bw,
169 double *prefetch_vmrow_bw,
170 double *Tdmdl_vm,
171 double *Tdmdl,
172 double *TSetup,
173 int *VUpdateOffsetPix,
174 double *VUpdateWidthPix,
175 double *VReadyOffsetPix);
176 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
177 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
178 static void CalculateDCCConfiguration(
179 bool DCCEnabled,
180 bool DCCProgrammingAssumesScanDirectionUnknown,
181 enum source_format_class SourcePixelFormat,
182 unsigned int SurfaceWidthLuma,
183 unsigned int SurfaceWidthChroma,
184 unsigned int SurfaceHeightLuma,
185 unsigned int SurfaceHeightChroma,
186 double DETBufferSize,
187 unsigned int RequestHeight256ByteLuma,
188 unsigned int RequestHeight256ByteChroma,
189 enum dm_swizzle_mode TilingFormat,
190 unsigned int BytePerPixelY,
191 unsigned int BytePerPixelC,
192 double BytePerPixelDETY,
193 double BytePerPixelDETC,
194 enum scan_direction_class ScanOrientation,
195 unsigned int *MaxUncompressedBlockLuma,
196 unsigned int *MaxUncompressedBlockChroma,
197 unsigned int *MaxCompressedBlockLuma,
198 unsigned int *MaxCompressedBlockChroma,
199 unsigned int *IndependentBlockLuma,
200 unsigned int *IndependentBlockChroma);
201 static double CalculatePrefetchSourceLines(
202 struct display_mode_lib *mode_lib,
203 double VRatio,
204 double vtaps,
205 bool Interlace,
206 bool ProgressiveToInterlaceUnitInOPP,
207 unsigned int SwathHeight,
208 unsigned int ViewportYStart,
209 double *VInitPreFill,
210 unsigned int *MaxNumSwath);
211 static unsigned int CalculateVMAndRowBytes(
212 struct display_mode_lib *mode_lib,
213 bool DCCEnable,
214 unsigned int BlockHeight256Bytes,
215 unsigned int BlockWidth256Bytes,
216 enum source_format_class SourcePixelFormat,
217 unsigned int SurfaceTiling,
218 unsigned int BytePerPixel,
219 enum scan_direction_class ScanDirection,
220 unsigned int SwathWidth,
221 unsigned int ViewportHeight,
222 bool GPUVMEnable,
223 bool HostVMEnable,
224 unsigned int HostVMMaxNonCachedPageTableLevels,
225 unsigned int GPUVMMinPageSize,
226 unsigned int HostVMMinPageSize,
227 unsigned int PTEBufferSizeInRequests,
228 unsigned int Pitch,
229 unsigned int DCCMetaPitch,
230 unsigned int *MacroTileWidth,
231 unsigned int *MetaRowByte,
232 unsigned int *PixelPTEBytesPerRow,
233 bool *PTEBufferSizeNotExceeded,
234 int *dpte_row_width_ub,
235 unsigned int *dpte_row_height,
236 unsigned int *MetaRequestWidth,
237 unsigned int *MetaRequestHeight,
238 unsigned int *meta_row_width,
239 unsigned int *meta_row_height,
240 int *vm_group_bytes,
241 unsigned int *dpte_group_bytes,
242 unsigned int *PixelPTEReqWidth,
243 unsigned int *PixelPTEReqHeight,
244 unsigned int *PTERequestSize,
245 int *DPDE0BytesFrame,
246 int *MetaPTEBytesFrame);
247 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
248 static void CalculateRowBandwidth(
249 bool GPUVMEnable,
250 enum source_format_class SourcePixelFormat,
251 double VRatio,
252 double VRatioChroma,
253 bool DCCEnable,
254 double LineTime,
255 unsigned int MetaRowByteLuma,
256 unsigned int MetaRowByteChroma,
257 unsigned int meta_row_height_luma,
258 unsigned int meta_row_height_chroma,
259 unsigned int PixelPTEBytesPerRowLuma,
260 unsigned int PixelPTEBytesPerRowChroma,
261 unsigned int dpte_row_height_luma,
262 unsigned int dpte_row_height_chroma,
263 double *meta_row_bw,
264 double *dpte_row_bw);
265
266 static void CalculateFlipSchedule(
267 struct display_mode_lib *mode_lib,
268 unsigned int k,
269 double HostVMInefficiencyFactor,
270 double UrgentExtraLatency,
271 double UrgentLatency,
272 double PDEAndMetaPTEBytesPerFrame,
273 double MetaRowBytes,
274 double DPTEBytesPerRow);
275 static double CalculateWriteBackDelay(
276 enum source_format_class WritebackPixelFormat,
277 double WritebackHRatio,
278 double WritebackVRatio,
279 unsigned int WritebackVTaps,
280 int WritebackDestinationWidth,
281 int WritebackDestinationHeight,
282 int WritebackSourceHeight,
283 unsigned int HTotal);
284
285 static void CalculateVupdateAndDynamicMetadataParameters(
286 int MaxInterDCNTileRepeaters,
287 double DPPCLK,
288 double DISPCLK,
289 double DCFClkDeepSleep,
290 double PixelClock,
291 int HTotal,
292 int VBlank,
293 int DynamicMetadataTransmittedBytes,
294 int DynamicMetadataLinesBeforeActiveRequired,
295 int InterlaceEnable,
296 bool ProgressiveToInterlaceUnitInOPP,
297 double *TSetup,
298 double *Tdmbf,
299 double *Tdmec,
300 double *Tdmsks,
301 int *VUpdateOffsetPix,
302 double *VUpdateWidthPix,
303 double *VReadyOffsetPix);
304
305 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
306 struct display_mode_lib *mode_lib,
307 unsigned int PrefetchMode,
308 double DCFCLK,
309 double ReturnBW,
310 double UrgentLatency,
311 double ExtraLatency,
312 double SOCCLK,
313 double DCFCLKDeepSleep,
314 unsigned int DETBufferSizeY[],
315 unsigned int DETBufferSizeC[],
316 unsigned int SwathHeightY[],
317 unsigned int SwathHeightC[],
318 double SwathWidthY[],
319 double SwathWidthC[],
320 unsigned int DPPPerPlane[],
321 double BytePerPixelDETY[],
322 double BytePerPixelDETC[],
323 bool UnboundedRequestEnabled,
324 unsigned int CompressedBufferSizeInkByte,
325 enum clock_change_support *DRAMClockChangeSupport,
326 double *StutterExitWatermark,
327 double *StutterEnterPlusExitWatermark,
328 double *Z8StutterExitWatermark,
329 double *Z8StutterEnterPlusExitWatermark);
330
331 static void CalculateDCFCLKDeepSleep(
332 struct display_mode_lib *mode_lib,
333 unsigned int NumberOfActivePlanes,
334 int BytePerPixelY[],
335 int BytePerPixelC[],
336 double VRatio[],
337 double VRatioChroma[],
338 double SwathWidthY[],
339 double SwathWidthC[],
340 unsigned int DPPPerPlane[],
341 double HRatio[],
342 double HRatioChroma[],
343 double PixelClock[],
344 double PSCL_THROUGHPUT[],
345 double PSCL_THROUGHPUT_CHROMA[],
346 double DPPCLK[],
347 double ReadBandwidthLuma[],
348 double ReadBandwidthChroma[],
349 int ReturnBusWidth,
350 double *DCFCLKDeepSleep);
351
352 static void CalculateUrgentBurstFactor(
353 int swath_width_luma_ub,
354 int swath_width_chroma_ub,
355 unsigned int SwathHeightY,
356 unsigned int SwathHeightC,
357 double LineTime,
358 double UrgentLatency,
359 double CursorBufferSize,
360 unsigned int CursorWidth,
361 unsigned int CursorBPP,
362 double VRatio,
363 double VRatioC,
364 double BytePerPixelInDETY,
365 double BytePerPixelInDETC,
366 double DETBufferSizeY,
367 double DETBufferSizeC,
368 double *UrgentBurstFactorCursor,
369 double *UrgentBurstFactorLuma,
370 double *UrgentBurstFactorChroma,
371 bool *NotEnoughUrgentLatencyHiding);
372
373 static void UseMinimumDCFCLK(
374 struct display_mode_lib *mode_lib,
375 int MaxPrefetchMode,
376 int ReorderingBytes);
377
378 static void CalculatePixelDeliveryTimes(
379 unsigned int NumberOfActivePlanes,
380 double VRatio[],
381 double VRatioChroma[],
382 double VRatioPrefetchY[],
383 double VRatioPrefetchC[],
384 unsigned int swath_width_luma_ub[],
385 unsigned int swath_width_chroma_ub[],
386 unsigned int DPPPerPlane[],
387 double HRatio[],
388 double HRatioChroma[],
389 double PixelClock[],
390 double PSCL_THROUGHPUT[],
391 double PSCL_THROUGHPUT_CHROMA[],
392 double DPPCLK[],
393 int BytePerPixelC[],
394 enum scan_direction_class SourceScan[],
395 unsigned int NumberOfCursors[],
396 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
397 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
398 unsigned int BlockWidth256BytesY[],
399 unsigned int BlockHeight256BytesY[],
400 unsigned int BlockWidth256BytesC[],
401 unsigned int BlockHeight256BytesC[],
402 double DisplayPipeLineDeliveryTimeLuma[],
403 double DisplayPipeLineDeliveryTimeChroma[],
404 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
405 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
406 double DisplayPipeRequestDeliveryTimeLuma[],
407 double DisplayPipeRequestDeliveryTimeChroma[],
408 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
409 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
410 double CursorRequestDeliveryTime[],
411 double CursorRequestDeliveryTimePrefetch[]);
412
413 static void CalculateMetaAndPTETimes(
414 int NumberOfActivePlanes,
415 bool GPUVMEnable,
416 int MetaChunkSize,
417 int MinMetaChunkSizeBytes,
418 int HTotal[],
419 double VRatio[],
420 double VRatioChroma[],
421 double DestinationLinesToRequestRowInVBlank[],
422 double DestinationLinesToRequestRowInImmediateFlip[],
423 bool DCCEnable[],
424 double PixelClock[],
425 int BytePerPixelY[],
426 int BytePerPixelC[],
427 enum scan_direction_class SourceScan[],
428 int dpte_row_height[],
429 int dpte_row_height_chroma[],
430 int meta_row_width[],
431 int meta_row_width_chroma[],
432 int meta_row_height[],
433 int meta_row_height_chroma[],
434 int meta_req_width[],
435 int meta_req_width_chroma[],
436 int meta_req_height[],
437 int meta_req_height_chroma[],
438 int dpte_group_bytes[],
439 int PTERequestSizeY[],
440 int PTERequestSizeC[],
441 int PixelPTEReqWidthY[],
442 int PixelPTEReqHeightY[],
443 int PixelPTEReqWidthC[],
444 int PixelPTEReqHeightC[],
445 int dpte_row_width_luma_ub[],
446 int dpte_row_width_chroma_ub[],
447 double DST_Y_PER_PTE_ROW_NOM_L[],
448 double DST_Y_PER_PTE_ROW_NOM_C[],
449 double DST_Y_PER_META_ROW_NOM_L[],
450 double DST_Y_PER_META_ROW_NOM_C[],
451 double TimePerMetaChunkNominal[],
452 double TimePerChromaMetaChunkNominal[],
453 double TimePerMetaChunkVBlank[],
454 double TimePerChromaMetaChunkVBlank[],
455 double TimePerMetaChunkFlip[],
456 double TimePerChromaMetaChunkFlip[],
457 double time_per_pte_group_nom_luma[],
458 double time_per_pte_group_vblank_luma[],
459 double time_per_pte_group_flip_luma[],
460 double time_per_pte_group_nom_chroma[],
461 double time_per_pte_group_vblank_chroma[],
462 double time_per_pte_group_flip_chroma[]);
463
464 static void CalculateVMGroupAndRequestTimes(
465 unsigned int NumberOfActivePlanes,
466 bool GPUVMEnable,
467 unsigned int GPUVMMaxPageTableLevels,
468 unsigned int HTotal[],
469 int BytePerPixelC[],
470 double DestinationLinesToRequestVMInVBlank[],
471 double DestinationLinesToRequestVMInImmediateFlip[],
472 bool DCCEnable[],
473 double PixelClock[],
474 int dpte_row_width_luma_ub[],
475 int dpte_row_width_chroma_ub[],
476 int vm_group_bytes[],
477 unsigned int dpde0_bytes_per_frame_ub_l[],
478 unsigned int dpde0_bytes_per_frame_ub_c[],
479 int meta_pte_bytes_per_frame_ub_l[],
480 int meta_pte_bytes_per_frame_ub_c[],
481 double TimePerVMGroupVBlank[],
482 double TimePerVMGroupFlip[],
483 double TimePerVMRequestVBlank[],
484 double TimePerVMRequestFlip[]);
485
486 static void CalculateStutterEfficiency(
487 struct display_mode_lib *mode_lib,
488 int CompressedBufferSizeInkByte,
489 bool UnboundedRequestEnabled,
490 int ConfigReturnBufferSizeInKByte,
491 int MetaFIFOSizeInKEntries,
492 int ZeroSizeBufferEntries,
493 int NumberOfActivePlanes,
494 int ROBBufferSizeInKByte,
495 double TotalDataReadBandwidth,
496 double DCFCLK,
497 double ReturnBW,
498 double COMPBUF_RESERVED_SPACE_64B,
499 double COMPBUF_RESERVED_SPACE_ZS,
500 double SRExitTime,
501 double SRExitZ8Time,
502 bool SynchronizedVBlank,
503 double Z8StutterEnterPlusExitWatermark,
504 double StutterEnterPlusExitWatermark,
505 bool ProgressiveToInterlaceUnitInOPP,
506 bool Interlace[],
507 double MinTTUVBlank[],
508 int DPPPerPlane[],
509 unsigned int DETBufferSizeY[],
510 int BytePerPixelY[],
511 double BytePerPixelDETY[],
512 double SwathWidthY[],
513 int SwathHeightY[],
514 int SwathHeightC[],
515 double NetDCCRateLuma[],
516 double NetDCCRateChroma[],
517 double DCCFractionOfZeroSizeRequestsLuma[],
518 double DCCFractionOfZeroSizeRequestsChroma[],
519 int HTotal[],
520 int VTotal[],
521 double PixelClock[],
522 double VRatio[],
523 enum scan_direction_class SourceScan[],
524 int BlockHeight256BytesY[],
525 int BlockWidth256BytesY[],
526 int BlockHeight256BytesC[],
527 int BlockWidth256BytesC[],
528 int DCCYMaxUncompressedBlock[],
529 int DCCCMaxUncompressedBlock[],
530 int VActive[],
531 bool DCCEnable[],
532 bool WritebackEnable[],
533 double ReadBandwidthPlaneLuma[],
534 double ReadBandwidthPlaneChroma[],
535 double meta_row_bw[],
536 double dpte_row_bw[],
537 double *StutterEfficiencyNotIncludingVBlank,
538 double *StutterEfficiency,
539 int *NumberOfStutterBurstsPerFrame,
540 double *Z8StutterEfficiencyNotIncludingVBlank,
541 double *Z8StutterEfficiency,
542 int *Z8NumberOfStutterBurstsPerFrame,
543 double *StutterPeriod);
544
545 static void CalculateSwathAndDETConfiguration(
546 bool ForceSingleDPP,
547 int NumberOfActivePlanes,
548 unsigned int DETBufferSizeInKByte,
549 double MaximumSwathWidthLuma[],
550 double MaximumSwathWidthChroma[],
551 enum scan_direction_class SourceScan[],
552 enum source_format_class SourcePixelFormat[],
553 enum dm_swizzle_mode SurfaceTiling[],
554 int ViewportWidth[],
555 int ViewportHeight[],
556 int SurfaceWidthY[],
557 int SurfaceWidthC[],
558 int SurfaceHeightY[],
559 int SurfaceHeightC[],
560 int Read256BytesBlockHeightY[],
561 int Read256BytesBlockHeightC[],
562 int Read256BytesBlockWidthY[],
563 int Read256BytesBlockWidthC[],
564 enum odm_combine_mode ODMCombineEnabled[],
565 int BlendingAndTiming[],
566 int BytePerPixY[],
567 int BytePerPixC[],
568 double BytePerPixDETY[],
569 double BytePerPixDETC[],
570 int HActive[],
571 double HRatio[],
572 double HRatioChroma[],
573 int DPPPerPlane[],
574 int swath_width_luma_ub[],
575 int swath_width_chroma_ub[],
576 double SwathWidth[],
577 double SwathWidthChroma[],
578 int SwathHeightY[],
579 int SwathHeightC[],
580 unsigned int DETBufferSizeY[],
581 unsigned int DETBufferSizeC[],
582 bool ViewportSizeSupportPerPlane[],
583 bool *ViewportSizeSupport);
584 static void CalculateSwathWidth(
585 bool ForceSingleDPP,
586 int NumberOfActivePlanes,
587 enum source_format_class SourcePixelFormat[],
588 enum scan_direction_class SourceScan[],
589 int ViewportWidth[],
590 int ViewportHeight[],
591 int SurfaceWidthY[],
592 int SurfaceWidthC[],
593 int SurfaceHeightY[],
594 int SurfaceHeightC[],
595 enum odm_combine_mode ODMCombineEnabled[],
596 int BytePerPixY[],
597 int BytePerPixC[],
598 int Read256BytesBlockHeightY[],
599 int Read256BytesBlockHeightC[],
600 int Read256BytesBlockWidthY[],
601 int Read256BytesBlockWidthC[],
602 int BlendingAndTiming[],
603 int HActive[],
604 double HRatio[],
605 int DPPPerPlane[],
606 double SwathWidthSingleDPPY[],
607 double SwathWidthSingleDPPC[],
608 double SwathWidthY[],
609 double SwathWidthC[],
610 int MaximumSwathHeightY[],
611 int MaximumSwathHeightC[],
612 int swath_width_luma_ub[],
613 int swath_width_chroma_ub[]);
614
615 static double CalculateExtraLatency(
616 int RoundTripPingLatencyCycles,
617 int ReorderingBytes,
618 double DCFCLK,
619 int TotalNumberOfActiveDPP,
620 int PixelChunkSizeInKByte,
621 int TotalNumberOfDCCActiveDPP,
622 int MetaChunkSize,
623 double ReturnBW,
624 bool GPUVMEnable,
625 bool HostVMEnable,
626 int NumberOfActivePlanes,
627 int NumberOfDPP[],
628 int dpte_group_bytes[],
629 double HostVMInefficiencyFactor,
630 double HostVMMinPageSize,
631 int HostVMMaxNonCachedPageTableLevels);
632
633 static double CalculateExtraLatencyBytes(
634 int ReorderingBytes,
635 int TotalNumberOfActiveDPP,
636 int PixelChunkSizeInKByte,
637 int TotalNumberOfDCCActiveDPP,
638 int MetaChunkSize,
639 bool GPUVMEnable,
640 bool HostVMEnable,
641 int NumberOfActivePlanes,
642 int NumberOfDPP[],
643 int dpte_group_bytes[],
644 double HostVMInefficiencyFactor,
645 double HostVMMinPageSize,
646 int HostVMMaxNonCachedPageTableLevels);
647
648 static double CalculateUrgentLatency(
649 double UrgentLatencyPixelDataOnly,
650 double UrgentLatencyPixelMixedWithVMData,
651 double UrgentLatencyVMDataOnly,
652 bool DoUrgentLatencyAdjustment,
653 double UrgentLatencyAdjustmentFabricClockComponent,
654 double UrgentLatencyAdjustmentFabricClockReference,
655 double FabricClockSingle);
656
657 static void CalculateUnboundedRequestAndCompressedBufferSize(
658 unsigned int DETBufferSizeInKByte,
659 int ConfigReturnBufferSizeInKByte,
660 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
661 int TotalActiveDPP,
662 bool NoChromaPlanes,
663 int MaxNumDPP,
664 int CompressedBufferSegmentSizeInkByteFinal,
665 enum output_encoder_class *Output,
666 bool *UnboundedRequestEnabled,
667 int *CompressedBufferSizeInkByte);
668
669 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
670 static unsigned int CalculateMaxVStartup(
671 unsigned int VTotal,
672 unsigned int VActive,
673 unsigned int VBlankNom,
674 unsigned int HTotal,
675 double PixelClock,
676 bool ProgressiveTointerlaceUnitinOPP,
677 bool Interlace,
678 unsigned int VBlankNomDefaultUS,
679 double WritebackDelayTime);
680
dml314_recalculate(struct display_mode_lib * mode_lib)681 void dml314_recalculate(struct display_mode_lib *mode_lib)
682 {
683 ModeSupportAndSystemConfiguration(mode_lib);
684 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
685 DisplayPipeConfiguration(mode_lib);
686 #ifdef __DML_VBA_DEBUG__
687 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
688 #endif
689 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
690 }
691
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)692 static unsigned int dscceComputeDelay(
693 unsigned int bpc,
694 double BPP,
695 unsigned int sliceWidth,
696 unsigned int numSlices,
697 enum output_format_class pixelFormat,
698 enum output_encoder_class Output)
699 {
700 // valid bpc = source bits per component in the set of {8, 10, 12}
701 // valid bpp = increments of 1/16 of a bit
702 // min = 6/7/8 in N420/N422/444, respectively
703 // max = such that compression is 1:1
704 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
705 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
706 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
707
708 // fixed value
709 unsigned int rcModelSize = 8192;
710
711 // N422/N420 operate at 2 pixels per clock
712 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
713
714 if (pixelFormat == dm_420)
715 pixelsPerClock = 2;
716 else if (pixelFormat == dm_444)
717 pixelsPerClock = 1;
718 else if (pixelFormat == dm_n422)
719 pixelsPerClock = 2;
720 // #all other modes operate at 1 pixel per clock
721 else
722 pixelsPerClock = 1;
723
724 //initial transmit delay as per PPS
725 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
726
727 //compute ssm delay
728 if (bpc == 8)
729 D = 81;
730 else if (bpc == 10)
731 D = 89;
732 else
733 D = 113;
734
735 //divide by pixel per cycle to compute slice width as seen by DSC
736 w = sliceWidth / pixelsPerClock;
737
738 //422 mode has an additional cycle of delay
739 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
740 s = 0;
741 else
742 s = 1;
743
744 //main calculation for the dscce
745 ix = initalXmitDelay + 45;
746 wx = (w + 2) / 3;
747 P = 3 * wx - w;
748 l0 = ix / w;
749 a = ix + P * l0;
750 ax = (a + 2) / 3 + D + 6 + 1;
751 L = (ax + wx - 1) / wx;
752 if ((ix % w) == 0 && P != 0)
753 lstall = 1;
754 else
755 lstall = 0;
756 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
757
758 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
759 pixels = Delay * 3 * pixelsPerClock;
760 return pixels;
761 }
762
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)763 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
764 {
765 unsigned int Delay = 0;
766
767 if (pixelFormat == dm_420) {
768 // sfr
769 Delay = Delay + 2;
770 // dsccif
771 Delay = Delay + 0;
772 // dscc - input deserializer
773 Delay = Delay + 3;
774 // dscc gets pixels every other cycle
775 Delay = Delay + 2;
776 // dscc - input cdc fifo
777 Delay = Delay + 12;
778 // dscc gets pixels every other cycle
779 Delay = Delay + 13;
780 // dscc - cdc uncertainty
781 Delay = Delay + 2;
782 // dscc - output cdc fifo
783 Delay = Delay + 7;
784 // dscc gets pixels every other cycle
785 Delay = Delay + 3;
786 // dscc - cdc uncertainty
787 Delay = Delay + 2;
788 // dscc - output serializer
789 Delay = Delay + 1;
790 // sft
791 Delay = Delay + 1;
792 } else if (pixelFormat == dm_n422) {
793 // sfr
794 Delay = Delay + 2;
795 // dsccif
796 Delay = Delay + 1;
797 // dscc - input deserializer
798 Delay = Delay + 5;
799 // dscc - input cdc fifo
800 Delay = Delay + 25;
801 // dscc - cdc uncertainty
802 Delay = Delay + 2;
803 // dscc - output cdc fifo
804 Delay = Delay + 10;
805 // dscc - cdc uncertainty
806 Delay = Delay + 2;
807 // dscc - output serializer
808 Delay = Delay + 1;
809 // sft
810 Delay = Delay + 1;
811 } else {
812 // sfr
813 Delay = Delay + 2;
814 // dsccif
815 Delay = Delay + 0;
816 // dscc - input deserializer
817 Delay = Delay + 3;
818 // dscc - input cdc fifo
819 Delay = Delay + 12;
820 // dscc - cdc uncertainty
821 Delay = Delay + 2;
822 // dscc - output cdc fifo
823 Delay = Delay + 7;
824 // dscc - output serializer
825 Delay = Delay + 1;
826 // dscc - cdc uncertainty
827 Delay = Delay + 2;
828 // sft
829 Delay = Delay + 1;
830 }
831
832 return Delay;
833 }
834
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)835 static bool CalculatePrefetchSchedule(
836 struct display_mode_lib *mode_lib,
837 double HostVMInefficiencyFactor,
838 Pipe *myPipe,
839 unsigned int DSCDelay,
840 double DPPCLKDelaySubtotalPlusCNVCFormater,
841 double DPPCLKDelaySCL,
842 double DPPCLKDelaySCLLBOnly,
843 double DPPCLKDelayCNVCCursor,
844 double DISPCLKDelaySubtotal,
845 unsigned int DPP_RECOUT_WIDTH,
846 enum output_format_class OutputFormat,
847 unsigned int MaxInterDCNTileRepeaters,
848 unsigned int VStartup,
849 unsigned int MaxVStartup,
850 unsigned int GPUVMPageTableLevels,
851 bool GPUVMEnable,
852 bool HostVMEnable,
853 unsigned int HostVMMaxNonCachedPageTableLevels,
854 double HostVMMinPageSize,
855 bool DynamicMetadataEnable,
856 bool DynamicMetadataVMEnabled,
857 int DynamicMetadataLinesBeforeActiveRequired,
858 unsigned int DynamicMetadataTransmittedBytes,
859 double UrgentLatency,
860 double UrgentExtraLatency,
861 double TCalc,
862 unsigned int PDEAndMetaPTEBytesFrame,
863 unsigned int MetaRowByte,
864 unsigned int PixelPTEBytesPerRow,
865 double PrefetchSourceLinesY,
866 unsigned int SwathWidthY,
867 double VInitPreFillY,
868 unsigned int MaxNumSwathY,
869 double PrefetchSourceLinesC,
870 unsigned int SwathWidthC,
871 double VInitPreFillC,
872 unsigned int MaxNumSwathC,
873 int swath_width_luma_ub,
874 int swath_width_chroma_ub,
875 unsigned int SwathHeightY,
876 unsigned int SwathHeightC,
877 double TWait,
878 double *DSTXAfterScaler,
879 double *DSTYAfterScaler,
880 double *DestinationLinesForPrefetch,
881 double *PrefetchBandwidth,
882 double *DestinationLinesToRequestVMInVBlank,
883 double *DestinationLinesToRequestRowInVBlank,
884 double *VRatioPrefetchY,
885 double *VRatioPrefetchC,
886 double *RequiredPrefetchPixDataBWLuma,
887 double *RequiredPrefetchPixDataBWChroma,
888 bool *NotEnoughTimeForDynamicMetadata,
889 double *Tno_bw,
890 double *prefetch_vmrow_bw,
891 double *Tdmdl_vm,
892 double *Tdmdl,
893 double *TSetup,
894 int *VUpdateOffsetPix,
895 double *VUpdateWidthPix,
896 double *VReadyOffsetPix)
897 {
898 bool MyError = false;
899 unsigned int DPPCycles, DISPCLKCycles;
900 double DSTTotalPixelsAfterScaler;
901 double LineTime;
902 double dst_y_prefetch_equ;
903 double Tsw_oto;
904 double prefetch_bw_oto;
905 double prefetch_bw_pr;
906 double Tvm_oto;
907 double Tr0_oto;
908 double Tvm_oto_lines;
909 double Tr0_oto_lines;
910 double dst_y_prefetch_oto;
911 double TimeForFetchingMetaPTE = 0;
912 double TimeForFetchingRowInVBlank = 0;
913 double LinesToRequestPrefetchPixelData = 0;
914 unsigned int HostVMDynamicLevelsTrips;
915 double trip_to_mem;
916 double Tvm_trips;
917 double Tr0_trips;
918 double Tvm_trips_rounded;
919 double Tr0_trips_rounded;
920 double Lsw_oto;
921 double Tpre_rounded;
922 double prefetch_bw_equ;
923 double Tvm_equ;
924 double Tr0_equ;
925 double Tdmbf;
926 double Tdmec;
927 double Tdmsks;
928 double prefetch_sw_bytes;
929 double bytes_pp;
930 double dep_bytes;
931 int max_vratio_pre = 4;
932 double min_Lsw;
933 double Tsw_est1 = 0;
934 double Tsw_est3 = 0;
935 double max_Tsw = 0;
936
937 if (GPUVMEnable == true && HostVMEnable == true) {
938 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
939 } else {
940 HostVMDynamicLevelsTrips = 0;
941 }
942 #ifdef __DML_VBA_DEBUG__
943 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
944 #endif
945 CalculateVupdateAndDynamicMetadataParameters(
946 MaxInterDCNTileRepeaters,
947 myPipe->DPPCLK,
948 myPipe->DISPCLK,
949 myPipe->DCFCLKDeepSleep,
950 myPipe->PixelClock,
951 myPipe->HTotal,
952 myPipe->VBlank,
953 DynamicMetadataTransmittedBytes,
954 DynamicMetadataLinesBeforeActiveRequired,
955 myPipe->InterlaceEnable,
956 myPipe->ProgressiveToInterlaceUnitInOPP,
957 TSetup,
958 &Tdmbf,
959 &Tdmec,
960 &Tdmsks,
961 VUpdateOffsetPix,
962 VUpdateWidthPix,
963 VReadyOffsetPix);
964
965 LineTime = myPipe->HTotal / myPipe->PixelClock;
966 trip_to_mem = UrgentLatency;
967 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
968
969 #ifdef __DML_VBA_ALLOW_DELTA__
970 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
971 #else
972 if (DynamicMetadataVMEnabled == true) {
973 #endif
974 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
975 } else {
976 *Tdmdl = TWait + UrgentExtraLatency;
977 }
978
979 #ifdef __DML_VBA_ALLOW_DELTA__
980 if (DynamicMetadataEnable == false) {
981 *Tdmdl = 0.0;
982 }
983 #endif
984
985 if (DynamicMetadataEnable == true) {
986 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
987 *NotEnoughTimeForDynamicMetadata = true;
988 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
989 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
990 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
991 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
992 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
993 } else {
994 *NotEnoughTimeForDynamicMetadata = false;
995 }
996 } else {
997 *NotEnoughTimeForDynamicMetadata = false;
998 }
999
1000 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1001
1002 if (myPipe->ScalerEnabled)
1003 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1004 else
1005 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1006
1007 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1008
1009 DISPCLKCycles = DISPCLKDelaySubtotal;
1010
1011 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1012 return true;
1013
1014 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1015
1016 #ifdef __DML_VBA_DEBUG__
1017 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1018 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1019 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1020 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1021 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1022 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1023 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1024 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1025 #endif
1026
1027 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1028
1029 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1030 *DSTYAfterScaler = 1;
1031 else
1032 *DSTYAfterScaler = 0;
1033
1034 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1035 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1036 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1037
1038 #ifdef __DML_VBA_DEBUG__
1039 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1040 #endif
1041
1042 MyError = false;
1043
1044 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1045 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1046 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1047
1048 #ifdef __DML_VBA_ALLOW_DELTA__
1049 if (!myPipe->DCCEnable) {
1050 Tr0_trips = 0.0;
1051 Tr0_trips_rounded = 0.0;
1052 }
1053 #endif
1054
1055 if (!GPUVMEnable) {
1056 Tvm_trips = 0.0;
1057 Tvm_trips_rounded = 0.0;
1058 }
1059
1060 if (GPUVMEnable) {
1061 if (GPUVMPageTableLevels >= 3) {
1062 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1063 } else {
1064 *Tno_bw = 0;
1065 }
1066 } else if (!myPipe->DCCEnable) {
1067 *Tno_bw = LineTime;
1068 } else {
1069 *Tno_bw = LineTime / 4;
1070 }
1071
1072 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1073 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1074 else
1075 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1076 /*rev 99*/
1077 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
1078 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1079 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1080 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1081 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1082
1083 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1084 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1085 Tsw_oto = Lsw_oto * LineTime;
1086
1087 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1088
1089 #ifdef __DML_VBA_DEBUG__
1090 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1091 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1092 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1093 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1094 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1095 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1096 #endif
1097
1098 if (GPUVMEnable == true)
1099 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1100 else
1101 Tvm_oto = LineTime / 4.0;
1102
1103 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1104 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1105 LineTime - Tvm_oto,
1106 LineTime / 4);
1107 } else {
1108 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1109 }
1110
1111 #ifdef __DML_VBA_DEBUG__
1112 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1113 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1114 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1115 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1116 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1117 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1118 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1119 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1120 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1121 #endif
1122
1123 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1124 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1125 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1126 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1127 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1128 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1129
1130 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1131
1132 if (prefetch_sw_bytes < dep_bytes)
1133 prefetch_sw_bytes = 2 * dep_bytes;
1134
1135 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1136 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1137 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1138 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1139 dml_print("DML: LineTime: %f\n", LineTime);
1140 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1141
1142 dml_print("DML: LineTime: %f\n", LineTime);
1143 dml_print("DML: VStartup: %d\n", VStartup);
1144 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1145 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1146 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1147 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1148 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1149 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1150 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1151 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
1152 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
1153 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
1154 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
1155
1156 *PrefetchBandwidth = 0;
1157 *DestinationLinesToRequestVMInVBlank = 0;
1158 *DestinationLinesToRequestRowInVBlank = 0;
1159 *VRatioPrefetchY = 0;
1160 *VRatioPrefetchC = 0;
1161 *RequiredPrefetchPixDataBWLuma = 0;
1162 if (dst_y_prefetch_equ > 1) {
1163 double PrefetchBandwidth1;
1164 double PrefetchBandwidth2;
1165 double PrefetchBandwidth3;
1166 double PrefetchBandwidth4;
1167
1168 if (Tpre_rounded - *Tno_bw > 0) {
1169 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1170 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1171 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1172 } else {
1173 PrefetchBandwidth1 = 0;
1174 }
1175
1176 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1177 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1178 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1179 }
1180
1181 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1182 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1183 else
1184 PrefetchBandwidth2 = 0;
1185
1186 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1187 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1188 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1189 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1190 } else {
1191 PrefetchBandwidth3 = 0;
1192 }
1193
1194 #ifdef __DML_VBA_DEBUG__
1195 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1196 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1197 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1198 #endif
1199 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1200 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1201 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1202 }
1203
1204 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1205 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1206 else
1207 PrefetchBandwidth4 = 0;
1208
1209 {
1210 bool Case1OK;
1211 bool Case2OK;
1212 bool Case3OK;
1213
1214 if (PrefetchBandwidth1 > 0) {
1215 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1216 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1217 Case1OK = true;
1218 } else {
1219 Case1OK = false;
1220 }
1221 } else {
1222 Case1OK = false;
1223 }
1224
1225 if (PrefetchBandwidth2 > 0) {
1226 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1227 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1228 Case2OK = true;
1229 } else {
1230 Case2OK = false;
1231 }
1232 } else {
1233 Case2OK = false;
1234 }
1235
1236 if (PrefetchBandwidth3 > 0) {
1237 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1238 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1239 Case3OK = true;
1240 } else {
1241 Case3OK = false;
1242 }
1243 } else {
1244 Case3OK = false;
1245 }
1246
1247 if (Case1OK) {
1248 prefetch_bw_equ = PrefetchBandwidth1;
1249 } else if (Case2OK) {
1250 prefetch_bw_equ = PrefetchBandwidth2;
1251 } else if (Case3OK) {
1252 prefetch_bw_equ = PrefetchBandwidth3;
1253 } else {
1254 prefetch_bw_equ = PrefetchBandwidth4;
1255 }
1256
1257 #ifdef __DML_VBA_DEBUG__
1258 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1259 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1260 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1261 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1262 #endif
1263
1264 if (prefetch_bw_equ > 0) {
1265 if (GPUVMEnable == true) {
1266 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1267 } else {
1268 Tvm_equ = LineTime / 4;
1269 }
1270
1271 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1272 Tr0_equ = dml_max4(
1273 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1274 Tr0_trips,
1275 (LineTime - Tvm_equ) / 2,
1276 LineTime / 4);
1277 } else {
1278 Tr0_equ = (LineTime - Tvm_equ) / 2;
1279 }
1280 } else {
1281 Tvm_equ = 0;
1282 Tr0_equ = 0;
1283 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1284 }
1285 }
1286
1287 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1288 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1289 TimeForFetchingMetaPTE = Tvm_oto;
1290 TimeForFetchingRowInVBlank = Tr0_oto;
1291 *PrefetchBandwidth = prefetch_bw_oto;
1292 } else {
1293 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1294 TimeForFetchingMetaPTE = Tvm_equ;
1295 TimeForFetchingRowInVBlank = Tr0_equ;
1296 *PrefetchBandwidth = prefetch_bw_equ;
1297 }
1298
1299 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1300
1301 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1302
1303 #ifdef __DML_VBA_ALLOW_DELTA__
1304 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1305 // See note above dated 5/30/2018
1306 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1307 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1308 #else
1309 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1310 #endif
1311
1312 #ifdef __DML_VBA_DEBUG__
1313 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1314 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1315 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1316 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1317 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1318 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1319 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1320 #endif
1321
1322 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1323
1324 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1325 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1326 #ifdef __DML_VBA_DEBUG__
1327 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1328 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1329 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1330 #endif
1331 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1332 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1333 *VRatioPrefetchY = dml_max(
1334 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1335 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1336 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1337 } else {
1338 MyError = true;
1339 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1340 *VRatioPrefetchY = 0;
1341 }
1342 #ifdef __DML_VBA_DEBUG__
1343 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1344 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1345 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1346 #endif
1347 }
1348
1349 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1350 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1351
1352 #ifdef __DML_VBA_DEBUG__
1353 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1354 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1355 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1356 #endif
1357 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1358 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1359 *VRatioPrefetchC = dml_max(
1360 *VRatioPrefetchC,
1361 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1362 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1363 } else {
1364 MyError = true;
1365 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1366 *VRatioPrefetchC = 0;
1367 }
1368 #ifdef __DML_VBA_DEBUG__
1369 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1370 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1371 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1372 #endif
1373 }
1374
1375 #ifdef __DML_VBA_DEBUG__
1376 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1377 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1378 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1379 #endif
1380
1381 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1382
1383 #ifdef __DML_VBA_DEBUG__
1384 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1385 #endif
1386
1387 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1388 / LineTime;
1389 } else {
1390 MyError = true;
1391 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1392 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1393 *VRatioPrefetchY = 0;
1394 *VRatioPrefetchC = 0;
1395 *RequiredPrefetchPixDataBWLuma = 0;
1396 *RequiredPrefetchPixDataBWChroma = 0;
1397 }
1398
1399 dml_print(
1400 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1401 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1402 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1403 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1404 dml_print(
1405 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1406 (double) LinesToRequestPrefetchPixelData * LineTime);
1407 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1408 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1409 dml_print(
1410 "DML: Tslack(pre): %fus - time left over in schedule\n",
1411 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1412 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1413 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1414
1415 } else {
1416 MyError = true;
1417 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1418 }
1419
1420 {
1421 double prefetch_vm_bw;
1422 double prefetch_row_bw;
1423
1424 if (PDEAndMetaPTEBytesFrame == 0) {
1425 prefetch_vm_bw = 0;
1426 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1427 #ifdef __DML_VBA_DEBUG__
1428 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1429 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1430 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1431 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1432 #endif
1433 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1434 #ifdef __DML_VBA_DEBUG__
1435 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1436 #endif
1437 } else {
1438 prefetch_vm_bw = 0;
1439 MyError = true;
1440 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1441 }
1442
1443 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1444 prefetch_row_bw = 0;
1445 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1446 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1447
1448 #ifdef __DML_VBA_DEBUG__
1449 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1450 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1451 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1452 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1453 #endif
1454 } else {
1455 prefetch_row_bw = 0;
1456 MyError = true;
1457 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1458 }
1459
1460 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1461 }
1462
1463 if (MyError) {
1464 *PrefetchBandwidth = 0;
1465 TimeForFetchingMetaPTE = 0;
1466 TimeForFetchingRowInVBlank = 0;
1467 *DestinationLinesToRequestVMInVBlank = 0;
1468 *DestinationLinesToRequestRowInVBlank = 0;
1469 *DestinationLinesForPrefetch = 0;
1470 LinesToRequestPrefetchPixelData = 0;
1471 *VRatioPrefetchY = 0;
1472 *VRatioPrefetchC = 0;
1473 *RequiredPrefetchPixDataBWLuma = 0;
1474 *RequiredPrefetchPixDataBWChroma = 0;
1475 }
1476
1477 return MyError;
1478 }
1479
1480 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1481 {
1482 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1483 }
1484
1485 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1486 {
1487 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1488 }
1489
1490 static void CalculateDCCConfiguration(
1491 bool DCCEnabled,
1492 bool DCCProgrammingAssumesScanDirectionUnknown,
1493 enum source_format_class SourcePixelFormat,
1494 unsigned int SurfaceWidthLuma,
1495 unsigned int SurfaceWidthChroma,
1496 unsigned int SurfaceHeightLuma,
1497 unsigned int SurfaceHeightChroma,
1498 double DETBufferSize,
1499 unsigned int RequestHeight256ByteLuma,
1500 unsigned int RequestHeight256ByteChroma,
1501 enum dm_swizzle_mode TilingFormat,
1502 unsigned int BytePerPixelY,
1503 unsigned int BytePerPixelC,
1504 double BytePerPixelDETY,
1505 double BytePerPixelDETC,
1506 enum scan_direction_class ScanOrientation,
1507 unsigned int *MaxUncompressedBlockLuma,
1508 unsigned int *MaxUncompressedBlockChroma,
1509 unsigned int *MaxCompressedBlockLuma,
1510 unsigned int *MaxCompressedBlockChroma,
1511 unsigned int *IndependentBlockLuma,
1512 unsigned int *IndependentBlockChroma)
1513 {
1514 int yuv420;
1515 int horz_div_l;
1516 int horz_div_c;
1517 int vert_div_l;
1518 int vert_div_c;
1519
1520 int swath_buf_size;
1521 double detile_buf_vp_horz_limit;
1522 double detile_buf_vp_vert_limit;
1523
1524 int MAS_vp_horz_limit;
1525 int MAS_vp_vert_limit;
1526 int max_vp_horz_width;
1527 int max_vp_vert_height;
1528 int eff_surf_width_l;
1529 int eff_surf_width_c;
1530 int eff_surf_height_l;
1531 int eff_surf_height_c;
1532
1533 int full_swath_bytes_horz_wc_l;
1534 int full_swath_bytes_horz_wc_c;
1535 int full_swath_bytes_vert_wc_l;
1536 int full_swath_bytes_vert_wc_c;
1537 int req128_horz_wc_l;
1538 int req128_horz_wc_c;
1539 int req128_vert_wc_l;
1540 int req128_vert_wc_c;
1541 int segment_order_horz_contiguous_luma;
1542 int segment_order_horz_contiguous_chroma;
1543 int segment_order_vert_contiguous_luma;
1544 int segment_order_vert_contiguous_chroma;
1545
1546 typedef enum {
1547 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1548 } RequestType;
1549 RequestType RequestLuma;
1550 RequestType RequestChroma;
1551
1552 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1553 horz_div_l = 1;
1554 horz_div_c = 1;
1555 vert_div_l = 1;
1556 vert_div_c = 1;
1557
1558 if (BytePerPixelY == 1)
1559 vert_div_l = 0;
1560 if (BytePerPixelC == 1)
1561 vert_div_c = 0;
1562 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1563 horz_div_l = 0;
1564 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1565 horz_div_c = 0;
1566
1567 if (BytePerPixelC == 0) {
1568 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1569 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1570 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1571 } else {
1572 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1573 detile_buf_vp_horz_limit = (double) swath_buf_size
1574 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1575 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1576 detile_buf_vp_vert_limit = (double) swath_buf_size
1577 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1578 }
1579
1580 if (SourcePixelFormat == dm_420_10) {
1581 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1582 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1583 }
1584
1585 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1586 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1587
1588 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1589 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1590 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1591 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1592 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1593 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1594 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1595 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1596
1597 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1598 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1599 if (BytePerPixelC > 0) {
1600 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1601 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1602 } else {
1603 full_swath_bytes_horz_wc_c = 0;
1604 full_swath_bytes_vert_wc_c = 0;
1605 }
1606
1607 if (SourcePixelFormat == dm_420_10) {
1608 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1609 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1610 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1611 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1612 }
1613
1614 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1615 req128_horz_wc_l = 0;
1616 req128_horz_wc_c = 0;
1617 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1618 req128_horz_wc_l = 0;
1619 req128_horz_wc_c = 1;
1620 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1621 req128_horz_wc_l = 1;
1622 req128_horz_wc_c = 0;
1623 } else {
1624 req128_horz_wc_l = 1;
1625 req128_horz_wc_c = 1;
1626 }
1627
1628 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1629 req128_vert_wc_l = 0;
1630 req128_vert_wc_c = 0;
1631 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1632 req128_vert_wc_l = 0;
1633 req128_vert_wc_c = 1;
1634 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1635 req128_vert_wc_l = 1;
1636 req128_vert_wc_c = 0;
1637 } else {
1638 req128_vert_wc_l = 1;
1639 req128_vert_wc_c = 1;
1640 }
1641
1642 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1643 segment_order_horz_contiguous_luma = 0;
1644 } else {
1645 segment_order_horz_contiguous_luma = 1;
1646 }
1647 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1648 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1649 segment_order_vert_contiguous_luma = 0;
1650 } else {
1651 segment_order_vert_contiguous_luma = 1;
1652 }
1653 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1654 segment_order_horz_contiguous_chroma = 0;
1655 } else {
1656 segment_order_horz_contiguous_chroma = 1;
1657 }
1658 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1659 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1660 segment_order_vert_contiguous_chroma = 0;
1661 } else {
1662 segment_order_vert_contiguous_chroma = 1;
1663 }
1664
1665 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1666 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1667 RequestLuma = REQ_256Bytes;
1668 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1669 RequestLuma = REQ_128BytesNonContiguous;
1670 } else {
1671 RequestLuma = REQ_128BytesContiguous;
1672 }
1673 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1674 RequestChroma = REQ_256Bytes;
1675 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1676 RequestChroma = REQ_128BytesNonContiguous;
1677 } else {
1678 RequestChroma = REQ_128BytesContiguous;
1679 }
1680 } else if (ScanOrientation != dm_vert) {
1681 if (req128_horz_wc_l == 0) {
1682 RequestLuma = REQ_256Bytes;
1683 } else if (segment_order_horz_contiguous_luma == 0) {
1684 RequestLuma = REQ_128BytesNonContiguous;
1685 } else {
1686 RequestLuma = REQ_128BytesContiguous;
1687 }
1688 if (req128_horz_wc_c == 0) {
1689 RequestChroma = REQ_256Bytes;
1690 } else if (segment_order_horz_contiguous_chroma == 0) {
1691 RequestChroma = REQ_128BytesNonContiguous;
1692 } else {
1693 RequestChroma = REQ_128BytesContiguous;
1694 }
1695 } else {
1696 if (req128_vert_wc_l == 0) {
1697 RequestLuma = REQ_256Bytes;
1698 } else if (segment_order_vert_contiguous_luma == 0) {
1699 RequestLuma = REQ_128BytesNonContiguous;
1700 } else {
1701 RequestLuma = REQ_128BytesContiguous;
1702 }
1703 if (req128_vert_wc_c == 0) {
1704 RequestChroma = REQ_256Bytes;
1705 } else if (segment_order_vert_contiguous_chroma == 0) {
1706 RequestChroma = REQ_128BytesNonContiguous;
1707 } else {
1708 RequestChroma = REQ_128BytesContiguous;
1709 }
1710 }
1711
1712 if (RequestLuma == REQ_256Bytes) {
1713 *MaxUncompressedBlockLuma = 256;
1714 *MaxCompressedBlockLuma = 256;
1715 *IndependentBlockLuma = 0;
1716 } else if (RequestLuma == REQ_128BytesContiguous) {
1717 *MaxUncompressedBlockLuma = 256;
1718 *MaxCompressedBlockLuma = 128;
1719 *IndependentBlockLuma = 128;
1720 } else {
1721 *MaxUncompressedBlockLuma = 256;
1722 *MaxCompressedBlockLuma = 64;
1723 *IndependentBlockLuma = 64;
1724 }
1725
1726 if (RequestChroma == REQ_256Bytes) {
1727 *MaxUncompressedBlockChroma = 256;
1728 *MaxCompressedBlockChroma = 256;
1729 *IndependentBlockChroma = 0;
1730 } else if (RequestChroma == REQ_128BytesContiguous) {
1731 *MaxUncompressedBlockChroma = 256;
1732 *MaxCompressedBlockChroma = 128;
1733 *IndependentBlockChroma = 128;
1734 } else {
1735 *MaxUncompressedBlockChroma = 256;
1736 *MaxCompressedBlockChroma = 64;
1737 *IndependentBlockChroma = 64;
1738 }
1739
1740 if (DCCEnabled != true || BytePerPixelC == 0) {
1741 *MaxUncompressedBlockChroma = 0;
1742 *MaxCompressedBlockChroma = 0;
1743 *IndependentBlockChroma = 0;
1744 }
1745
1746 if (DCCEnabled != true) {
1747 *MaxUncompressedBlockLuma = 0;
1748 *MaxCompressedBlockLuma = 0;
1749 *IndependentBlockLuma = 0;
1750 }
1751 }
1752
1753 static double CalculatePrefetchSourceLines(
1754 struct display_mode_lib *mode_lib,
1755 double VRatio,
1756 double vtaps,
1757 bool Interlace,
1758 bool ProgressiveToInterlaceUnitInOPP,
1759 unsigned int SwathHeight,
1760 unsigned int ViewportYStart,
1761 double *VInitPreFill,
1762 unsigned int *MaxNumSwath)
1763 {
1764 struct vba_vars_st *v = &mode_lib->vba;
1765 unsigned int MaxPartialSwath;
1766
1767 if (ProgressiveToInterlaceUnitInOPP)
1768 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1769 else
1770 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1771
1772 if (!v->IgnoreViewportPositioning) {
1773
1774 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1775
1776 if (*VInitPreFill > 1.0)
1777 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1778 else
1779 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1780 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1781
1782 } else {
1783
1784 if (ViewportYStart != 0)
1785 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1786
1787 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1788
1789 if (*VInitPreFill > 1.0)
1790 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1791 else
1792 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1793 }
1794
1795 #ifdef __DML_VBA_DEBUG__
1796 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1797 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1798 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1799 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1800 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1801 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1802 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1803 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1804 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1805 #endif
1806 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1807 }
1808
1809 static unsigned int CalculateVMAndRowBytes(
1810 struct display_mode_lib *mode_lib,
1811 bool DCCEnable,
1812 unsigned int BlockHeight256Bytes,
1813 unsigned int BlockWidth256Bytes,
1814 enum source_format_class SourcePixelFormat,
1815 unsigned int SurfaceTiling,
1816 unsigned int BytePerPixel,
1817 enum scan_direction_class ScanDirection,
1818 unsigned int SwathWidth,
1819 unsigned int ViewportHeight,
1820 bool GPUVMEnable,
1821 bool HostVMEnable,
1822 unsigned int HostVMMaxNonCachedPageTableLevels,
1823 unsigned int GPUVMMinPageSize,
1824 unsigned int HostVMMinPageSize,
1825 unsigned int PTEBufferSizeInRequests,
1826 unsigned int Pitch,
1827 unsigned int DCCMetaPitch,
1828 unsigned int *MacroTileWidth,
1829 unsigned int *MetaRowByte,
1830 unsigned int *PixelPTEBytesPerRow,
1831 bool *PTEBufferSizeNotExceeded,
1832 int *dpte_row_width_ub,
1833 unsigned int *dpte_row_height,
1834 unsigned int *MetaRequestWidth,
1835 unsigned int *MetaRequestHeight,
1836 unsigned int *meta_row_width,
1837 unsigned int *meta_row_height,
1838 int *vm_group_bytes,
1839 unsigned int *dpte_group_bytes,
1840 unsigned int *PixelPTEReqWidth,
1841 unsigned int *PixelPTEReqHeight,
1842 unsigned int *PTERequestSize,
1843 int *DPDE0BytesFrame,
1844 int *MetaPTEBytesFrame)
1845 {
1846 struct vba_vars_st *v = &mode_lib->vba;
1847 unsigned int MPDEBytesFrame;
1848 unsigned int DCCMetaSurfaceBytes;
1849 unsigned int MacroTileSizeBytes;
1850 unsigned int MacroTileHeight;
1851 unsigned int ExtraDPDEBytesFrame;
1852 unsigned int PDEAndMetaPTEBytesFrame;
1853 unsigned int PixelPTEReqHeightPTEs = 0;
1854 unsigned int HostVMDynamicLevels = 0;
1855 double FractionOfPTEReturnDrop;
1856
1857 if (GPUVMEnable == true && HostVMEnable == true) {
1858 if (HostVMMinPageSize < 2048) {
1859 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1860 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1861 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1862 } else {
1863 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1864 }
1865 }
1866
1867 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1868 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1869 if (ScanDirection != dm_vert) {
1870 *meta_row_height = *MetaRequestHeight;
1871 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1872 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1873 } else {
1874 *meta_row_height = *MetaRequestWidth;
1875 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1876 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1877 }
1878 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1879 if (GPUVMEnable == true) {
1880 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1881 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1882 } else {
1883 *MetaPTEBytesFrame = 0;
1884 MPDEBytesFrame = 0;
1885 }
1886
1887 if (DCCEnable != true) {
1888 *MetaPTEBytesFrame = 0;
1889 MPDEBytesFrame = 0;
1890 *MetaRowByte = 0;
1891 }
1892
1893 if (SurfaceTiling == dm_sw_linear) {
1894 MacroTileSizeBytes = 256;
1895 MacroTileHeight = BlockHeight256Bytes;
1896 } else {
1897 MacroTileSizeBytes = 65536;
1898 MacroTileHeight = 16 * BlockHeight256Bytes;
1899 }
1900 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1901
1902 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1903 if (ScanDirection != dm_vert) {
1904 *DPDE0BytesFrame = 64
1905 * (dml_ceil(
1906 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1907 / (8 * 2097152),
1908 1) + 1);
1909 } else {
1910 *DPDE0BytesFrame = 64
1911 * (dml_ceil(
1912 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1913 / (8 * 2097152),
1914 1) + 1);
1915 }
1916 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1917 } else {
1918 *DPDE0BytesFrame = 0;
1919 ExtraDPDEBytesFrame = 0;
1920 }
1921
1922 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1923
1924 #ifdef __DML_VBA_DEBUG__
1925 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1926 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1927 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1928 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1929 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1930 #endif
1931
1932 if (HostVMEnable == true) {
1933 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1934 }
1935 #ifdef __DML_VBA_DEBUG__
1936 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1937 #endif
1938
1939 if (SurfaceTiling == dm_sw_linear) {
1940 PixelPTEReqHeightPTEs = 1;
1941 *PixelPTEReqHeight = 1;
1942 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1943 *PTERequestSize = 64;
1944 FractionOfPTEReturnDrop = 0;
1945 } else if (MacroTileSizeBytes == 4096) {
1946 PixelPTEReqHeightPTEs = 1;
1947 *PixelPTEReqHeight = MacroTileHeight;
1948 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1949 *PTERequestSize = 64;
1950 if (ScanDirection != dm_vert)
1951 FractionOfPTEReturnDrop = 0;
1952 else
1953 FractionOfPTEReturnDrop = 7 / 8;
1954 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1955 PixelPTEReqHeightPTEs = 16;
1956 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1957 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1958 *PTERequestSize = 128;
1959 FractionOfPTEReturnDrop = 0;
1960 } else {
1961 PixelPTEReqHeightPTEs = 1;
1962 *PixelPTEReqHeight = MacroTileHeight;
1963 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1964 *PTERequestSize = 64;
1965 FractionOfPTEReturnDrop = 0;
1966 }
1967
1968 if (SurfaceTiling == dm_sw_linear) {
1969 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1970 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1971 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1972 } else if (ScanDirection != dm_vert) {
1973 *dpte_row_height = *PixelPTEReqHeight;
1974 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1975 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1976 } else {
1977 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1978 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1979 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1980 }
1981
1982 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1983 *PTEBufferSizeNotExceeded = true;
1984 } else {
1985 *PTEBufferSizeNotExceeded = false;
1986 }
1987
1988 if (GPUVMEnable != true) {
1989 *PixelPTEBytesPerRow = 0;
1990 *PTEBufferSizeNotExceeded = true;
1991 }
1992
1993 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1994
1995 if (HostVMEnable == true) {
1996 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1997 }
1998
1999 if (HostVMEnable == true) {
2000 *vm_group_bytes = 512;
2001 *dpte_group_bytes = 512;
2002 } else if (GPUVMEnable == true) {
2003 *vm_group_bytes = 2048;
2004 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2005 *dpte_group_bytes = 512;
2006 } else {
2007 *dpte_group_bytes = 2048;
2008 }
2009 } else {
2010 *vm_group_bytes = 0;
2011 *dpte_group_bytes = 0;
2012 }
2013 return PDEAndMetaPTEBytesFrame;
2014 }
2015
2016 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2017 {
2018 struct vba_vars_st *v = &mode_lib->vba;
2019 unsigned int j, k;
2020 double HostVMInefficiencyFactor = 1.0;
2021 bool NoChromaPlanes = true;
2022 int ReorderBytes;
2023 double VMDataOnlyReturnBW;
2024 double MaxTotalRDBandwidth = 0;
2025 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2026
2027 v->WritebackDISPCLK = 0.0;
2028 v->DISPCLKWithRamping = 0;
2029 v->DISPCLKWithoutRamping = 0;
2030 v->GlobalDPPCLK = 0.0;
2031 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
2032 {
2033 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2034 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2035 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2036 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2037
2038 if (v->HostVMEnable != true) {
2039 v->ReturnBW = dml_min(
2040 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2041 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2042 } else {
2043 v->ReturnBW = dml_min(
2044 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2045 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2046 }
2047 }
2048 /* End DAL custom code */
2049
2050 // DISPCLK and DPPCLK Calculation
2051 //
2052 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2053 if (v->WritebackEnable[k]) {
2054 v->WritebackDISPCLK = dml_max(
2055 v->WritebackDISPCLK,
2056 dml314_CalculateWriteBackDISPCLK(
2057 v->WritebackPixelFormat[k],
2058 v->PixelClock[k],
2059 v->WritebackHRatio[k],
2060 v->WritebackVRatio[k],
2061 v->WritebackHTaps[k],
2062 v->WritebackVTaps[k],
2063 v->WritebackSourceWidth[k],
2064 v->WritebackDestinationWidth[k],
2065 v->HTotal[k],
2066 v->WritebackLineBufferSize));
2067 }
2068 }
2069
2070 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2071 if (v->HRatio[k] > 1) {
2072 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2073 v->MaxDCHUBToPSCLThroughput,
2074 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2075 } else {
2076 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2077 }
2078
2079 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2080 * dml_max(
2081 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2082 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2083
2084 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2085 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2086 }
2087
2088 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2089 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2090 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2091 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2092 } else {
2093 if (v->HRatioChroma[k] > 1) {
2094 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2095 v->MaxDCHUBToPSCLThroughput,
2096 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2097 } else {
2098 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2099 }
2100 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2101 * dml_max3(
2102 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2103 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2104 1.0);
2105
2106 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2107 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2108 }
2109
2110 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2111 }
2112 }
2113
2114 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2115 if (v->BlendingAndTiming[k] != k)
2116 continue;
2117 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2118 v->DISPCLKWithRamping = dml_max(
2119 v->DISPCLKWithRamping,
2120 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2121 * (1 + v->DISPCLKRampingMargin / 100));
2122 v->DISPCLKWithoutRamping = dml_max(
2123 v->DISPCLKWithoutRamping,
2124 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2125 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2126 v->DISPCLKWithRamping = dml_max(
2127 v->DISPCLKWithRamping,
2128 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2129 * (1 + v->DISPCLKRampingMargin / 100));
2130 v->DISPCLKWithoutRamping = dml_max(
2131 v->DISPCLKWithoutRamping,
2132 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2133 } else {
2134 v->DISPCLKWithRamping = dml_max(
2135 v->DISPCLKWithRamping,
2136 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2137 v->DISPCLKWithoutRamping = dml_max(
2138 v->DISPCLKWithoutRamping,
2139 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2140 }
2141 }
2142
2143 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2144 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2145
2146 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2147 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2148 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2149 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2150 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2151 v->DISPCLKDPPCLKVCOSpeed);
2152 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2153 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2154 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2155 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2156 } else {
2157 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2158 }
2159 v->DISPCLK = v->DISPCLK_calculated;
2160 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2161
2162 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2163 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2164 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2165 }
2166 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2167 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2168 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2169 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2170 }
2171
2172 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2173 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2174 }
2175
2176 // Urgent and B P-State/DRAM Clock Change Watermark
2177 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2178 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2179
2180 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2181 CalculateBytePerPixelAnd256BBlockSizes(
2182 v->SourcePixelFormat[k],
2183 v->SurfaceTiling[k],
2184 &v->BytePerPixelY[k],
2185 &v->BytePerPixelC[k],
2186 &v->BytePerPixelDETY[k],
2187 &v->BytePerPixelDETC[k],
2188 &v->BlockHeight256BytesY[k],
2189 &v->BlockHeight256BytesC[k],
2190 &v->BlockWidth256BytesY[k],
2191 &v->BlockWidth256BytesC[k]);
2192 }
2193
2194 CalculateSwathWidth(
2195 false,
2196 v->NumberOfActivePlanes,
2197 v->SourcePixelFormat,
2198 v->SourceScan,
2199 v->ViewportWidth,
2200 v->ViewportHeight,
2201 v->SurfaceWidthY,
2202 v->SurfaceWidthC,
2203 v->SurfaceHeightY,
2204 v->SurfaceHeightC,
2205 v->ODMCombineEnabled,
2206 v->BytePerPixelY,
2207 v->BytePerPixelC,
2208 v->BlockHeight256BytesY,
2209 v->BlockHeight256BytesC,
2210 v->BlockWidth256BytesY,
2211 v->BlockWidth256BytesC,
2212 v->BlendingAndTiming,
2213 v->HActive,
2214 v->HRatio,
2215 v->DPPPerPlane,
2216 v->SwathWidthSingleDPPY,
2217 v->SwathWidthSingleDPPC,
2218 v->SwathWidthY,
2219 v->SwathWidthC,
2220 v->dummyinteger3,
2221 v->dummyinteger4,
2222 v->swath_width_luma_ub,
2223 v->swath_width_chroma_ub);
2224
2225 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2226 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2227 * v->VRatio[k];
2228 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2229 * v->VRatioChroma[k];
2230 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2231 }
2232
2233 // DCFCLK Deep Sleep
2234 CalculateDCFCLKDeepSleep(
2235 mode_lib,
2236 v->NumberOfActivePlanes,
2237 v->BytePerPixelY,
2238 v->BytePerPixelC,
2239 v->VRatio,
2240 v->VRatioChroma,
2241 v->SwathWidthY,
2242 v->SwathWidthC,
2243 v->DPPPerPlane,
2244 v->HRatio,
2245 v->HRatioChroma,
2246 v->PixelClock,
2247 v->PSCL_THROUGHPUT_LUMA,
2248 v->PSCL_THROUGHPUT_CHROMA,
2249 v->DPPCLK,
2250 v->ReadBandwidthPlaneLuma,
2251 v->ReadBandwidthPlaneChroma,
2252 v->ReturnBusWidth,
2253 &v->DCFCLKDeepSleep);
2254
2255 // DSCCLK
2256 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2257 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2258 v->DSCCLK_calculated[k] = 0.0;
2259 } else {
2260 if (v->OutputFormat[k] == dm_420)
2261 v->DSCFormatFactor = 2;
2262 else if (v->OutputFormat[k] == dm_444)
2263 v->DSCFormatFactor = 1;
2264 else if (v->OutputFormat[k] == dm_n422)
2265 v->DSCFormatFactor = 2;
2266 else
2267 v->DSCFormatFactor = 1;
2268 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2269 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2270 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2271 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2272 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2273 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2274 else
2275 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2276 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2277 }
2278 }
2279
2280 // DSC Delay
2281 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2282 double BPP = v->OutputBpp[k];
2283
2284 if (v->DSCEnabled[k] && BPP != 0) {
2285 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2286 v->DSCDelay[k] = dscceComputeDelay(
2287 v->DSCInputBitPerComponent[k],
2288 BPP,
2289 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2290 v->NumberOfDSCSlices[k],
2291 v->OutputFormat[k],
2292 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2293 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2294 v->DSCDelay[k] = 2
2295 * (dscceComputeDelay(
2296 v->DSCInputBitPerComponent[k],
2297 BPP,
2298 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2299 v->NumberOfDSCSlices[k] / 2.0,
2300 v->OutputFormat[k],
2301 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2302 } else {
2303 v->DSCDelay[k] = 4
2304 * (dscceComputeDelay(
2305 v->DSCInputBitPerComponent[k],
2306 BPP,
2307 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2308 v->NumberOfDSCSlices[k] / 4.0,
2309 v->OutputFormat[k],
2310 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2311 }
2312 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2313 } else {
2314 v->DSCDelay[k] = 0;
2315 }
2316 }
2317
2318 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2319 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2320 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2321 v->DSCDelay[k] = v->DSCDelay[j];
2322
2323 // Prefetch
2324 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2325 unsigned int PDEAndMetaPTEBytesFrameY;
2326 unsigned int PixelPTEBytesPerRowY;
2327 unsigned int MetaRowByteY;
2328 unsigned int MetaRowByteC;
2329 unsigned int PDEAndMetaPTEBytesFrameC;
2330 unsigned int PixelPTEBytesPerRowC;
2331 bool PTEBufferSizeNotExceededY;
2332 bool PTEBufferSizeNotExceededC;
2333
2334 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2335 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2336 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2337 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2338 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2339 } else {
2340 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2341 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2342 }
2343
2344 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2345 mode_lib,
2346 v->DCCEnable[k],
2347 v->BlockHeight256BytesC[k],
2348 v->BlockWidth256BytesC[k],
2349 v->SourcePixelFormat[k],
2350 v->SurfaceTiling[k],
2351 v->BytePerPixelC[k],
2352 v->SourceScan[k],
2353 v->SwathWidthC[k],
2354 v->ViewportHeightChroma[k],
2355 v->GPUVMEnable,
2356 v->HostVMEnable,
2357 v->HostVMMaxNonCachedPageTableLevels,
2358 v->GPUVMMinPageSize,
2359 v->HostVMMinPageSize,
2360 v->PTEBufferSizeInRequestsForChroma,
2361 v->PitchC[k],
2362 v->DCCMetaPitchC[k],
2363 &v->MacroTileWidthC[k],
2364 &MetaRowByteC,
2365 &PixelPTEBytesPerRowC,
2366 &PTEBufferSizeNotExceededC,
2367 &v->dpte_row_width_chroma_ub[k],
2368 &v->dpte_row_height_chroma[k],
2369 &v->meta_req_width_chroma[k],
2370 &v->meta_req_height_chroma[k],
2371 &v->meta_row_width_chroma[k],
2372 &v->meta_row_height_chroma[k],
2373 &v->dummyinteger1,
2374 &v->dummyinteger2,
2375 &v->PixelPTEReqWidthC[k],
2376 &v->PixelPTEReqHeightC[k],
2377 &v->PTERequestSizeC[k],
2378 &v->dpde0_bytes_per_frame_ub_c[k],
2379 &v->meta_pte_bytes_per_frame_ub_c[k]);
2380
2381 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2382 mode_lib,
2383 v->VRatioChroma[k],
2384 v->VTAPsChroma[k],
2385 v->Interlace[k],
2386 v->ProgressiveToInterlaceUnitInOPP,
2387 v->SwathHeightC[k],
2388 v->ViewportYStartC[k],
2389 &v->VInitPreFillC[k],
2390 &v->MaxNumSwathC[k]);
2391 } else {
2392 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2393 v->PTEBufferSizeInRequestsForChroma = 0;
2394 PixelPTEBytesPerRowC = 0;
2395 PDEAndMetaPTEBytesFrameC = 0;
2396 MetaRowByteC = 0;
2397 v->MaxNumSwathC[k] = 0;
2398 v->PrefetchSourceLinesC[k] = 0;
2399 }
2400
2401 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2402 mode_lib,
2403 v->DCCEnable[k],
2404 v->BlockHeight256BytesY[k],
2405 v->BlockWidth256BytesY[k],
2406 v->SourcePixelFormat[k],
2407 v->SurfaceTiling[k],
2408 v->BytePerPixelY[k],
2409 v->SourceScan[k],
2410 v->SwathWidthY[k],
2411 v->ViewportHeight[k],
2412 v->GPUVMEnable,
2413 v->HostVMEnable,
2414 v->HostVMMaxNonCachedPageTableLevels,
2415 v->GPUVMMinPageSize,
2416 v->HostVMMinPageSize,
2417 v->PTEBufferSizeInRequestsForLuma,
2418 v->PitchY[k],
2419 v->DCCMetaPitchY[k],
2420 &v->MacroTileWidthY[k],
2421 &MetaRowByteY,
2422 &PixelPTEBytesPerRowY,
2423 &PTEBufferSizeNotExceededY,
2424 &v->dpte_row_width_luma_ub[k],
2425 &v->dpte_row_height[k],
2426 &v->meta_req_width[k],
2427 &v->meta_req_height[k],
2428 &v->meta_row_width[k],
2429 &v->meta_row_height[k],
2430 &v->vm_group_bytes[k],
2431 &v->dpte_group_bytes[k],
2432 &v->PixelPTEReqWidthY[k],
2433 &v->PixelPTEReqHeightY[k],
2434 &v->PTERequestSizeY[k],
2435 &v->dpde0_bytes_per_frame_ub_l[k],
2436 &v->meta_pte_bytes_per_frame_ub_l[k]);
2437
2438 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2439 mode_lib,
2440 v->VRatio[k],
2441 v->vtaps[k],
2442 v->Interlace[k],
2443 v->ProgressiveToInterlaceUnitInOPP,
2444 v->SwathHeightY[k],
2445 v->ViewportYStartY[k],
2446 &v->VInitPreFillY[k],
2447 &v->MaxNumSwathY[k]);
2448 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2449 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2450 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2451
2452 CalculateRowBandwidth(
2453 v->GPUVMEnable,
2454 v->SourcePixelFormat[k],
2455 v->VRatio[k],
2456 v->VRatioChroma[k],
2457 v->DCCEnable[k],
2458 v->HTotal[k] / v->PixelClock[k],
2459 MetaRowByteY,
2460 MetaRowByteC,
2461 v->meta_row_height[k],
2462 v->meta_row_height_chroma[k],
2463 PixelPTEBytesPerRowY,
2464 PixelPTEBytesPerRowC,
2465 v->dpte_row_height[k],
2466 v->dpte_row_height_chroma[k],
2467 &v->meta_row_bw[k],
2468 &v->dpte_row_bw[k]);
2469 }
2470
2471 v->TotalDCCActiveDPP = 0;
2472 v->TotalActiveDPP = 0;
2473 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2474 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2475 if (v->DCCEnable[k])
2476 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2477 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2478 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2479 NoChromaPlanes = false;
2480 }
2481
2482 ReorderBytes = v->NumberOfChannels
2483 * dml_max3(
2484 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2485 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2486 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2487
2488 VMDataOnlyReturnBW = dml_min(
2489 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2490 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2491 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2492 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2493
2494 #ifdef __DML_VBA_DEBUG__
2495 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2496 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2497 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2498 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2499 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2500 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2501 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2502 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2503 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2504 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2505 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2506 #endif
2507
2508 if (v->GPUVMEnable && v->HostVMEnable)
2509 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2510
2511 v->UrgentExtraLatency = CalculateExtraLatency(
2512 v->RoundTripPingLatencyCycles,
2513 ReorderBytes,
2514 v->DCFCLK,
2515 v->TotalActiveDPP,
2516 v->PixelChunkSizeInKByte,
2517 v->TotalDCCActiveDPP,
2518 v->MetaChunkSize,
2519 v->ReturnBW,
2520 v->GPUVMEnable,
2521 v->HostVMEnable,
2522 v->NumberOfActivePlanes,
2523 v->DPPPerPlane,
2524 v->dpte_group_bytes,
2525 HostVMInefficiencyFactor,
2526 v->HostVMMinPageSize,
2527 v->HostVMMaxNonCachedPageTableLevels);
2528
2529 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2530
2531 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2532 if (v->BlendingAndTiming[k] == k) {
2533 if (v->WritebackEnable[k] == true) {
2534 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2535 + CalculateWriteBackDelay(
2536 v->WritebackPixelFormat[k],
2537 v->WritebackHRatio[k],
2538 v->WritebackVRatio[k],
2539 v->WritebackVTaps[k],
2540 v->WritebackDestinationWidth[k],
2541 v->WritebackDestinationHeight[k],
2542 v->WritebackSourceHeight[k],
2543 v->HTotal[k]) / v->DISPCLK;
2544 } else
2545 v->WritebackDelay[v->VoltageLevel][k] = 0;
2546 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2547 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2548 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2549 v->WritebackDelay[v->VoltageLevel][k],
2550 v->WritebackLatency
2551 + CalculateWriteBackDelay(
2552 v->WritebackPixelFormat[j],
2553 v->WritebackHRatio[j],
2554 v->WritebackVRatio[j],
2555 v->WritebackVTaps[j],
2556 v->WritebackDestinationWidth[j],
2557 v->WritebackDestinationHeight[j],
2558 v->WritebackSourceHeight[j],
2559 v->HTotal[k]) / v->DISPCLK);
2560 }
2561 }
2562 }
2563 }
2564
2565 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2566 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2567 if (v->BlendingAndTiming[k] == j)
2568 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2569
2570 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2571 v->MaxVStartupLines[k] =
2572 CalculateMaxVStartup(
2573 v->VTotal[k],
2574 v->VActive[k],
2575 v->VBlankNom[k],
2576 v->HTotal[k],
2577 v->PixelClock[k],
2578 v->ProgressiveToInterlaceUnitInOPP,
2579 v->Interlace[k],
2580 v->ip.VBlankNomDefaultUS,
2581 v->WritebackDelay[v->VoltageLevel][k]);
2582
2583 #ifdef __DML_VBA_DEBUG__
2584 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2585 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2586 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2587 #endif
2588 }
2589
2590 v->MaximumMaxVStartupLines = 0;
2591 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2592 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2593
2594 // VBA_DELTA
2595 // We don't really care to iterate between the various prefetch modes
2596 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2597
2598 v->UrgentLatency = CalculateUrgentLatency(
2599 v->UrgentLatencyPixelDataOnly,
2600 v->UrgentLatencyPixelMixedWithVMData,
2601 v->UrgentLatencyVMDataOnly,
2602 v->DoUrgentLatencyAdjustment,
2603 v->UrgentLatencyAdjustmentFabricClockComponent,
2604 v->UrgentLatencyAdjustmentFabricClockReference,
2605 v->FabricClock);
2606
2607 v->FractionOfUrgentBandwidth = 0.0;
2608 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2609
2610 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2611
2612 do {
2613 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2614 bool DestinationLineTimesForPrefetchLessThan2 = false;
2615 bool VRatioPrefetchMoreThan4 = false;
2616 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2617
2618 MaxTotalRDBandwidth = 0;
2619
2620 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2621
2622 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2623 Pipe myPipe;
2624
2625 myPipe.DPPCLK = v->DPPCLK[k];
2626 myPipe.DISPCLK = v->DISPCLK;
2627 myPipe.PixelClock = v->PixelClock[k];
2628 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2629 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2630 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2631 myPipe.VRatio = v->VRatio[k];
2632 myPipe.VRatioChroma = v->VRatioChroma[k];
2633 myPipe.SourceScan = v->SourceScan[k];
2634 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2635 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2636 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2637 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2638 myPipe.InterlaceEnable = v->Interlace[k];
2639 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2640 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2641 myPipe.HTotal = v->HTotal[k];
2642 myPipe.DCCEnable = v->DCCEnable[k];
2643 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2644 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2645 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2646 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2647 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2648 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2649 v->ErrorResult[k] = CalculatePrefetchSchedule(
2650 mode_lib,
2651 HostVMInefficiencyFactor,
2652 &myPipe,
2653 v->DSCDelay[k],
2654 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2655 v->DPPCLKDelaySCL,
2656 v->DPPCLKDelaySCLLBOnly,
2657 v->DPPCLKDelayCNVCCursor,
2658 v->DISPCLKDelaySubtotal,
2659 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2660 v->OutputFormat[k],
2661 v->MaxInterDCNTileRepeaters,
2662 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2663 v->MaxVStartupLines[k],
2664 v->GPUVMMaxPageTableLevels,
2665 v->GPUVMEnable,
2666 v->HostVMEnable,
2667 v->HostVMMaxNonCachedPageTableLevels,
2668 v->HostVMMinPageSize,
2669 v->DynamicMetadataEnable[k],
2670 v->DynamicMetadataVMEnabled,
2671 v->DynamicMetadataLinesBeforeActiveRequired[k],
2672 v->DynamicMetadataTransmittedBytes[k],
2673 v->UrgentLatency,
2674 v->UrgentExtraLatency,
2675 v->TCalc,
2676 v->PDEAndMetaPTEBytesFrame[k],
2677 v->MetaRowByte[k],
2678 v->PixelPTEBytesPerRow[k],
2679 v->PrefetchSourceLinesY[k],
2680 v->SwathWidthY[k],
2681 v->VInitPreFillY[k],
2682 v->MaxNumSwathY[k],
2683 v->PrefetchSourceLinesC[k],
2684 v->SwathWidthC[k],
2685 v->VInitPreFillC[k],
2686 v->MaxNumSwathC[k],
2687 v->swath_width_luma_ub[k],
2688 v->swath_width_chroma_ub[k],
2689 v->SwathHeightY[k],
2690 v->SwathHeightC[k],
2691 TWait,
2692 &v->DSTXAfterScaler[k],
2693 &v->DSTYAfterScaler[k],
2694 &v->DestinationLinesForPrefetch[k],
2695 &v->PrefetchBandwidth[k],
2696 &v->DestinationLinesToRequestVMInVBlank[k],
2697 &v->DestinationLinesToRequestRowInVBlank[k],
2698 &v->VRatioPrefetchY[k],
2699 &v->VRatioPrefetchC[k],
2700 &v->RequiredPrefetchPixDataBWLuma[k],
2701 &v->RequiredPrefetchPixDataBWChroma[k],
2702 &v->NotEnoughTimeForDynamicMetadata[k],
2703 &v->Tno_bw[k],
2704 &v->prefetch_vmrow_bw[k],
2705 &v->Tdmdl_vm[k],
2706 &v->Tdmdl[k],
2707 &v->TSetup[k],
2708 &v->VUpdateOffsetPix[k],
2709 &v->VUpdateWidthPix[k],
2710 &v->VReadyOffsetPix[k]);
2711
2712 #ifdef __DML_VBA_DEBUG__
2713 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2714 #endif
2715 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2716 }
2717
2718 v->NoEnoughUrgentLatencyHiding = false;
2719 v->NoEnoughUrgentLatencyHidingPre = false;
2720
2721 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2722 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2723 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2724 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2725 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2726
2727 CalculateUrgentBurstFactor(
2728 v->swath_width_luma_ub[k],
2729 v->swath_width_chroma_ub[k],
2730 v->SwathHeightY[k],
2731 v->SwathHeightC[k],
2732 v->HTotal[k] / v->PixelClock[k],
2733 v->UrgentLatency,
2734 v->CursorBufferSize,
2735 v->CursorWidth[k][0],
2736 v->CursorBPP[k][0],
2737 v->VRatio[k],
2738 v->VRatioChroma[k],
2739 v->BytePerPixelDETY[k],
2740 v->BytePerPixelDETC[k],
2741 v->DETBufferSizeY[k],
2742 v->DETBufferSizeC[k],
2743 &v->UrgBurstFactorCursor[k],
2744 &v->UrgBurstFactorLuma[k],
2745 &v->UrgBurstFactorChroma[k],
2746 &v->NoUrgentLatencyHiding[k]);
2747
2748 CalculateUrgentBurstFactor(
2749 v->swath_width_luma_ub[k],
2750 v->swath_width_chroma_ub[k],
2751 v->SwathHeightY[k],
2752 v->SwathHeightC[k],
2753 v->HTotal[k] / v->PixelClock[k],
2754 v->UrgentLatency,
2755 v->CursorBufferSize,
2756 v->CursorWidth[k][0],
2757 v->CursorBPP[k][0],
2758 v->VRatioPrefetchY[k],
2759 v->VRatioPrefetchC[k],
2760 v->BytePerPixelDETY[k],
2761 v->BytePerPixelDETC[k],
2762 v->DETBufferSizeY[k],
2763 v->DETBufferSizeC[k],
2764 &v->UrgBurstFactorCursorPre[k],
2765 &v->UrgBurstFactorLumaPre[k],
2766 &v->UrgBurstFactorChromaPre[k],
2767 &v->NoUrgentLatencyHidingPre[k]);
2768
2769 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2770 + dml_max3(
2771 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2772 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2773 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2774 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2775 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2776 v->DPPPerPlane[k]
2777 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2778 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2779 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2780
2781 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2782 + dml_max3(
2783 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2784 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2785 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2786 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2787 + v->cursor_bw_pre[k]);
2788
2789 #ifdef __DML_VBA_DEBUG__
2790 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2791 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2792 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2793 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2794 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2795
2796 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2797 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2798
2799 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2800 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2801 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2802 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2803 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2804 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2805 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2806 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2807 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2808 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2809 #endif
2810
2811 if (v->DestinationLinesForPrefetch[k] < 2)
2812 DestinationLineTimesForPrefetchLessThan2 = true;
2813
2814 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2815 VRatioPrefetchMoreThan4 = true;
2816
2817 if (v->NoUrgentLatencyHiding[k] == true)
2818 v->NoEnoughUrgentLatencyHiding = true;
2819
2820 if (v->NoUrgentLatencyHidingPre[k] == true)
2821 v->NoEnoughUrgentLatencyHidingPre = true;
2822 }
2823
2824 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2825
2826 #ifdef __DML_VBA_DEBUG__
2827 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2828 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
2829 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
2830 #endif
2831
2832 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2833 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2834 v->PrefetchModeSupported = true;
2835 else {
2836 v->PrefetchModeSupported = false;
2837 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2838 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2839 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2840 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2841 }
2842
2843 // PREVIOUS_ERROR
2844 // This error result check was done after the PrefetchModeSupported. So we will
2845 // still try to calculate flip schedule even prefetch mode not supported
2846 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2847 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2848 v->PrefetchModeSupported = false;
2849 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2850 }
2851 }
2852
2853 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2854 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2855 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2856 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2857 - dml_max(
2858 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2859 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2860 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2861 v->DPPPerPlane[k]
2862 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2863 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2864 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2865 }
2866
2867 v->TotImmediateFlipBytes = 0;
2868 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2869 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2870 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2871 }
2872 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2873 CalculateFlipSchedule(
2874 mode_lib,
2875 k,
2876 HostVMInefficiencyFactor,
2877 v->UrgentExtraLatency,
2878 v->UrgentLatency,
2879 v->PDEAndMetaPTEBytesFrame[k],
2880 v->MetaRowByte[k],
2881 v->PixelPTEBytesPerRow[k]);
2882 }
2883
2884 v->total_dcn_read_bw_with_flip = 0.0;
2885 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2886 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2887 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2888 + dml_max3(
2889 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2890 v->DPPPerPlane[k] * v->final_flip_bw[k]
2891 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2892 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2893 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2894 v->DPPPerPlane[k]
2895 * (v->final_flip_bw[k]
2896 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2897 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2898 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2899 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2900 + dml_max3(
2901 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2902 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2903 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2904 v->DPPPerPlane[k]
2905 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2906 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2907 }
2908 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2909
2910 v->ImmediateFlipSupported = true;
2911 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2912 #ifdef __DML_VBA_DEBUG__
2913 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2914 #endif
2915 v->ImmediateFlipSupported = false;
2916 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2917 }
2918 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2919 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2920 #ifdef __DML_VBA_DEBUG__
2921 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
2922 #endif
2923 v->ImmediateFlipSupported = false;
2924 }
2925 }
2926 } else {
2927 v->ImmediateFlipSupported = false;
2928 }
2929
2930 v->PrefetchAndImmediateFlipSupported =
2931 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2932 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2933 v->ImmediateFlipSupported)) ? true : false;
2934 #ifdef __DML_VBA_DEBUG__
2935 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2936 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2937 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2938 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2939 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2940 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2941 #endif
2942 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2943
2944 v->VStartupLines = v->VStartupLines + 1;
2945 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2946 ASSERT(v->PrefetchAndImmediateFlipSupported);
2947
2948 // Unbounded Request Enabled
2949 CalculateUnboundedRequestAndCompressedBufferSize(
2950 v->DETBufferSizeInKByte[0],
2951 v->ConfigReturnBufferSizeInKByte,
2952 v->UseUnboundedRequesting,
2953 v->TotalActiveDPP,
2954 NoChromaPlanes,
2955 v->MaxNumDPP,
2956 v->CompressedBufferSegmentSizeInkByte,
2957 v->Output,
2958 &v->UnboundedRequestEnabled,
2959 &v->CompressedBufferSizeInkByte);
2960
2961 //Watermarks and NB P-State/DRAM Clock Change Support
2962 {
2963 enum clock_change_support DRAMClockChangeSupport; // dummy
2964
2965 CalculateWatermarksAndDRAMSpeedChangeSupport(
2966 mode_lib,
2967 PrefetchMode,
2968 v->DCFCLK,
2969 v->ReturnBW,
2970 v->UrgentLatency,
2971 v->UrgentExtraLatency,
2972 v->SOCCLK,
2973 v->DCFCLKDeepSleep,
2974 v->DETBufferSizeY,
2975 v->DETBufferSizeC,
2976 v->SwathHeightY,
2977 v->SwathHeightC,
2978 v->SwathWidthY,
2979 v->SwathWidthC,
2980 v->DPPPerPlane,
2981 v->BytePerPixelDETY,
2982 v->BytePerPixelDETC,
2983 v->UnboundedRequestEnabled,
2984 v->CompressedBufferSizeInkByte,
2985 &DRAMClockChangeSupport,
2986 &v->StutterExitWatermark,
2987 &v->StutterEnterPlusExitWatermark,
2988 &v->Z8StutterExitWatermark,
2989 &v->Z8StutterEnterPlusExitWatermark);
2990
2991 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2992 if (v->WritebackEnable[k] == true) {
2993 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2994 0,
2995 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2996 } else {
2997 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2998 }
2999 }
3000 }
3001
3002 //Display Pipeline Delivery Time in Prefetch, Groups
3003 CalculatePixelDeliveryTimes(
3004 v->NumberOfActivePlanes,
3005 v->VRatio,
3006 v->VRatioChroma,
3007 v->VRatioPrefetchY,
3008 v->VRatioPrefetchC,
3009 v->swath_width_luma_ub,
3010 v->swath_width_chroma_ub,
3011 v->DPPPerPlane,
3012 v->HRatio,
3013 v->HRatioChroma,
3014 v->PixelClock,
3015 v->PSCL_THROUGHPUT_LUMA,
3016 v->PSCL_THROUGHPUT_CHROMA,
3017 v->DPPCLK,
3018 v->BytePerPixelC,
3019 v->SourceScan,
3020 v->NumberOfCursors,
3021 v->CursorWidth,
3022 v->CursorBPP,
3023 v->BlockWidth256BytesY,
3024 v->BlockHeight256BytesY,
3025 v->BlockWidth256BytesC,
3026 v->BlockHeight256BytesC,
3027 v->DisplayPipeLineDeliveryTimeLuma,
3028 v->DisplayPipeLineDeliveryTimeChroma,
3029 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3030 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3031 v->DisplayPipeRequestDeliveryTimeLuma,
3032 v->DisplayPipeRequestDeliveryTimeChroma,
3033 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3034 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3035 v->CursorRequestDeliveryTime,
3036 v->CursorRequestDeliveryTimePrefetch);
3037
3038 CalculateMetaAndPTETimes(
3039 v->NumberOfActivePlanes,
3040 v->GPUVMEnable,
3041 v->MetaChunkSize,
3042 v->MinMetaChunkSizeBytes,
3043 v->HTotal,
3044 v->VRatio,
3045 v->VRatioChroma,
3046 v->DestinationLinesToRequestRowInVBlank,
3047 v->DestinationLinesToRequestRowInImmediateFlip,
3048 v->DCCEnable,
3049 v->PixelClock,
3050 v->BytePerPixelY,
3051 v->BytePerPixelC,
3052 v->SourceScan,
3053 v->dpte_row_height,
3054 v->dpte_row_height_chroma,
3055 v->meta_row_width,
3056 v->meta_row_width_chroma,
3057 v->meta_row_height,
3058 v->meta_row_height_chroma,
3059 v->meta_req_width,
3060 v->meta_req_width_chroma,
3061 v->meta_req_height,
3062 v->meta_req_height_chroma,
3063 v->dpte_group_bytes,
3064 v->PTERequestSizeY,
3065 v->PTERequestSizeC,
3066 v->PixelPTEReqWidthY,
3067 v->PixelPTEReqHeightY,
3068 v->PixelPTEReqWidthC,
3069 v->PixelPTEReqHeightC,
3070 v->dpte_row_width_luma_ub,
3071 v->dpte_row_width_chroma_ub,
3072 v->DST_Y_PER_PTE_ROW_NOM_L,
3073 v->DST_Y_PER_PTE_ROW_NOM_C,
3074 v->DST_Y_PER_META_ROW_NOM_L,
3075 v->DST_Y_PER_META_ROW_NOM_C,
3076 v->TimePerMetaChunkNominal,
3077 v->TimePerChromaMetaChunkNominal,
3078 v->TimePerMetaChunkVBlank,
3079 v->TimePerChromaMetaChunkVBlank,
3080 v->TimePerMetaChunkFlip,
3081 v->TimePerChromaMetaChunkFlip,
3082 v->time_per_pte_group_nom_luma,
3083 v->time_per_pte_group_vblank_luma,
3084 v->time_per_pte_group_flip_luma,
3085 v->time_per_pte_group_nom_chroma,
3086 v->time_per_pte_group_vblank_chroma,
3087 v->time_per_pte_group_flip_chroma);
3088
3089 CalculateVMGroupAndRequestTimes(
3090 v->NumberOfActivePlanes,
3091 v->GPUVMEnable,
3092 v->GPUVMMaxPageTableLevels,
3093 v->HTotal,
3094 v->BytePerPixelC,
3095 v->DestinationLinesToRequestVMInVBlank,
3096 v->DestinationLinesToRequestVMInImmediateFlip,
3097 v->DCCEnable,
3098 v->PixelClock,
3099 v->dpte_row_width_luma_ub,
3100 v->dpte_row_width_chroma_ub,
3101 v->vm_group_bytes,
3102 v->dpde0_bytes_per_frame_ub_l,
3103 v->dpde0_bytes_per_frame_ub_c,
3104 v->meta_pte_bytes_per_frame_ub_l,
3105 v->meta_pte_bytes_per_frame_ub_c,
3106 v->TimePerVMGroupVBlank,
3107 v->TimePerVMGroupFlip,
3108 v->TimePerVMRequestVBlank,
3109 v->TimePerVMRequestFlip);
3110
3111 // Min TTUVBlank
3112 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3113 if (PrefetchMode == 0) {
3114 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3115 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3116 v->MinTTUVBlank[k] = dml_max(
3117 v->DRAMClockChangeWatermark,
3118 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3119 } else if (PrefetchMode == 1) {
3120 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3121 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3122 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3123 } else {
3124 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3125 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3126 v->MinTTUVBlank[k] = v->UrgentWatermark;
3127 }
3128 if (!v->DynamicMetadataEnable[k])
3129 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3130 }
3131
3132 // DCC Configuration
3133 v->ActiveDPPs = 0;
3134 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3135 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3136 v->SourcePixelFormat[k],
3137 v->SurfaceWidthY[k],
3138 v->SurfaceWidthC[k],
3139 v->SurfaceHeightY[k],
3140 v->SurfaceHeightC[k],
3141 v->DETBufferSizeInKByte[0] * 1024,
3142 v->BlockHeight256BytesY[k],
3143 v->BlockHeight256BytesC[k],
3144 v->SurfaceTiling[k],
3145 v->BytePerPixelY[k],
3146 v->BytePerPixelC[k],
3147 v->BytePerPixelDETY[k],
3148 v->BytePerPixelDETC[k],
3149 v->SourceScan[k],
3150 &v->DCCYMaxUncompressedBlock[k],
3151 &v->DCCCMaxUncompressedBlock[k],
3152 &v->DCCYMaxCompressedBlock[k],
3153 &v->DCCCMaxCompressedBlock[k],
3154 &v->DCCYIndependentBlock[k],
3155 &v->DCCCIndependentBlock[k]);
3156 }
3157
3158 // VStartup Adjustment
3159 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3160 bool isInterlaceTiming;
3161 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3162 #ifdef __DML_VBA_DEBUG__
3163 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3164 #endif
3165
3166 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3167
3168 #ifdef __DML_VBA_DEBUG__
3169 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3170 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3171 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3172 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3173 #endif
3174
3175 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3176 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3177 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3178 }
3179
3180 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3181 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3182 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
3183 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0);
3184 } else {
3185 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
3186 }
3187 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / (double)v->HTotal[k] / v->PixelClock[k], 1.0) / 4.0;
3188 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3189 <= (isInterlaceTiming ?
3190 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3191 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3192 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3193 } else {
3194 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3195 }
3196 #ifdef __DML_VBA_DEBUG__
3197 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3198 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3199 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3200 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3201 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3202 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3203 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3204 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3205 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3206 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3207 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3208 #endif
3209 }
3210
3211 {
3212 //Maximum Bandwidth Used
3213 double TotalWRBandwidth = 0;
3214 double MaxPerPlaneVActiveWRBandwidth = 0;
3215 double WRBandwidth = 0;
3216
3217 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3218 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3219 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3220 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3221 } else if (v->WritebackEnable[k] == true) {
3222 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3223 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3224 }
3225 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3226 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3227 }
3228
3229 v->TotalDataReadBandwidth = 0;
3230 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3231 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3232 }
3233 }
3234 // Stutter Efficiency
3235 CalculateStutterEfficiency(
3236 mode_lib,
3237 v->CompressedBufferSizeInkByte,
3238 v->UnboundedRequestEnabled,
3239 v->ConfigReturnBufferSizeInKByte,
3240 v->MetaFIFOSizeInKEntries,
3241 v->ZeroSizeBufferEntries,
3242 v->NumberOfActivePlanes,
3243 v->ROBBufferSizeInKByte,
3244 v->TotalDataReadBandwidth,
3245 v->DCFCLK,
3246 v->ReturnBW,
3247 v->COMPBUF_RESERVED_SPACE_64B,
3248 v->COMPBUF_RESERVED_SPACE_ZS,
3249 v->SRExitTime,
3250 v->SRExitZ8Time,
3251 v->SynchronizedVBlank,
3252 v->StutterEnterPlusExitWatermark,
3253 v->Z8StutterEnterPlusExitWatermark,
3254 v->ProgressiveToInterlaceUnitInOPP,
3255 v->Interlace,
3256 v->MinTTUVBlank,
3257 v->DPPPerPlane,
3258 v->DETBufferSizeY,
3259 v->BytePerPixelY,
3260 v->BytePerPixelDETY,
3261 v->SwathWidthY,
3262 v->SwathHeightY,
3263 v->SwathHeightC,
3264 v->DCCRateLuma,
3265 v->DCCRateChroma,
3266 v->DCCFractionOfZeroSizeRequestsLuma,
3267 v->DCCFractionOfZeroSizeRequestsChroma,
3268 v->HTotal,
3269 v->VTotal,
3270 v->PixelClock,
3271 v->VRatio,
3272 v->SourceScan,
3273 v->BlockHeight256BytesY,
3274 v->BlockWidth256BytesY,
3275 v->BlockHeight256BytesC,
3276 v->BlockWidth256BytesC,
3277 v->DCCYMaxUncompressedBlock,
3278 v->DCCCMaxUncompressedBlock,
3279 v->VActive,
3280 v->DCCEnable,
3281 v->WritebackEnable,
3282 v->ReadBandwidthPlaneLuma,
3283 v->ReadBandwidthPlaneChroma,
3284 v->meta_row_bw,
3285 v->dpte_row_bw,
3286 &v->StutterEfficiencyNotIncludingVBlank,
3287 &v->StutterEfficiency,
3288 &v->NumberOfStutterBurstsPerFrame,
3289 &v->Z8StutterEfficiencyNotIncludingVBlank,
3290 &v->Z8StutterEfficiency,
3291 &v->Z8NumberOfStutterBurstsPerFrame,
3292 &v->StutterPeriod);
3293 }
3294
3295 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3296 {
3297 struct vba_vars_st *v = &mode_lib->vba;
3298 // Display Pipe Configuration
3299 double BytePerPixDETY[DC__NUM_DPP__MAX];
3300 double BytePerPixDETC[DC__NUM_DPP__MAX];
3301 int BytePerPixY[DC__NUM_DPP__MAX];
3302 int BytePerPixC[DC__NUM_DPP__MAX];
3303 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3304 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3305 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3306 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3307 double dummy1[DC__NUM_DPP__MAX];
3308 double dummy2[DC__NUM_DPP__MAX];
3309 double dummy3[DC__NUM_DPP__MAX];
3310 double dummy4[DC__NUM_DPP__MAX];
3311 int dummy5[DC__NUM_DPP__MAX];
3312 int dummy6[DC__NUM_DPP__MAX];
3313 bool dummy7[DC__NUM_DPP__MAX];
3314 bool dummysinglestring;
3315
3316 unsigned int k;
3317
3318 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3319
3320 CalculateBytePerPixelAnd256BBlockSizes(
3321 v->SourcePixelFormat[k],
3322 v->SurfaceTiling[k],
3323 &BytePerPixY[k],
3324 &BytePerPixC[k],
3325 &BytePerPixDETY[k],
3326 &BytePerPixDETC[k],
3327 &Read256BytesBlockHeightY[k],
3328 &Read256BytesBlockHeightC[k],
3329 &Read256BytesBlockWidthY[k],
3330 &Read256BytesBlockWidthC[k]);
3331 }
3332
3333 CalculateSwathAndDETConfiguration(
3334 false,
3335 v->NumberOfActivePlanes,
3336 v->DETBufferSizeInKByte[0],
3337 dummy1,
3338 dummy2,
3339 v->SourceScan,
3340 v->SourcePixelFormat,
3341 v->SurfaceTiling,
3342 v->ViewportWidth,
3343 v->ViewportHeight,
3344 v->SurfaceWidthY,
3345 v->SurfaceWidthC,
3346 v->SurfaceHeightY,
3347 v->SurfaceHeightC,
3348 Read256BytesBlockHeightY,
3349 Read256BytesBlockHeightC,
3350 Read256BytesBlockWidthY,
3351 Read256BytesBlockWidthC,
3352 v->ODMCombineEnabled,
3353 v->BlendingAndTiming,
3354 BytePerPixY,
3355 BytePerPixC,
3356 BytePerPixDETY,
3357 BytePerPixDETC,
3358 v->HActive,
3359 v->HRatio,
3360 v->HRatioChroma,
3361 v->DPPPerPlane,
3362 dummy5,
3363 dummy6,
3364 dummy3,
3365 dummy4,
3366 v->SwathHeightY,
3367 v->SwathHeightC,
3368 v->DETBufferSizeY,
3369 v->DETBufferSizeC,
3370 dummy7,
3371 &dummysinglestring);
3372 }
3373
3374 static bool CalculateBytePerPixelAnd256BBlockSizes(
3375 enum source_format_class SourcePixelFormat,
3376 enum dm_swizzle_mode SurfaceTiling,
3377 unsigned int *BytePerPixelY,
3378 unsigned int *BytePerPixelC,
3379 double *BytePerPixelDETY,
3380 double *BytePerPixelDETC,
3381 unsigned int *BlockHeight256BytesY,
3382 unsigned int *BlockHeight256BytesC,
3383 unsigned int *BlockWidth256BytesY,
3384 unsigned int *BlockWidth256BytesC)
3385 {
3386 if (SourcePixelFormat == dm_444_64) {
3387 *BytePerPixelDETY = 8;
3388 *BytePerPixelDETC = 0;
3389 *BytePerPixelY = 8;
3390 *BytePerPixelC = 0;
3391 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3392 *BytePerPixelDETY = 4;
3393 *BytePerPixelDETC = 0;
3394 *BytePerPixelY = 4;
3395 *BytePerPixelC = 0;
3396 } else if (SourcePixelFormat == dm_444_16) {
3397 *BytePerPixelDETY = 2;
3398 *BytePerPixelDETC = 0;
3399 *BytePerPixelY = 2;
3400 *BytePerPixelC = 0;
3401 } else if (SourcePixelFormat == dm_444_8) {
3402 *BytePerPixelDETY = 1;
3403 *BytePerPixelDETC = 0;
3404 *BytePerPixelY = 1;
3405 *BytePerPixelC = 0;
3406 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3407 *BytePerPixelDETY = 4;
3408 *BytePerPixelDETC = 1;
3409 *BytePerPixelY = 4;
3410 *BytePerPixelC = 1;
3411 } else if (SourcePixelFormat == dm_420_8) {
3412 *BytePerPixelDETY = 1;
3413 *BytePerPixelDETC = 2;
3414 *BytePerPixelY = 1;
3415 *BytePerPixelC = 2;
3416 } else if (SourcePixelFormat == dm_420_12) {
3417 *BytePerPixelDETY = 2;
3418 *BytePerPixelDETC = 4;
3419 *BytePerPixelY = 2;
3420 *BytePerPixelC = 4;
3421 } else {
3422 *BytePerPixelDETY = 4.0 / 3;
3423 *BytePerPixelDETC = 8.0 / 3;
3424 *BytePerPixelY = 2;
3425 *BytePerPixelC = 4;
3426 }
3427
3428 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3429 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3430 if (SurfaceTiling == dm_sw_linear) {
3431 *BlockHeight256BytesY = 1;
3432 } else if (SourcePixelFormat == dm_444_64) {
3433 *BlockHeight256BytesY = 4;
3434 } else if (SourcePixelFormat == dm_444_8) {
3435 *BlockHeight256BytesY = 16;
3436 } else {
3437 *BlockHeight256BytesY = 8;
3438 }
3439 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3440 *BlockHeight256BytesC = 0;
3441 *BlockWidth256BytesC = 0;
3442 } else {
3443 if (SurfaceTiling == dm_sw_linear) {
3444 *BlockHeight256BytesY = 1;
3445 *BlockHeight256BytesC = 1;
3446 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3447 *BlockHeight256BytesY = 8;
3448 *BlockHeight256BytesC = 16;
3449 } else if (SourcePixelFormat == dm_420_8) {
3450 *BlockHeight256BytesY = 16;
3451 *BlockHeight256BytesC = 8;
3452 } else {
3453 *BlockHeight256BytesY = 8;
3454 *BlockHeight256BytesC = 8;
3455 }
3456 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3457 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3458 }
3459 return true;
3460 }
3461
3462 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3463 {
3464 if (PrefetchMode == 0) {
3465 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3466 } else if (PrefetchMode == 1) {
3467 return dml_max(SREnterPlusExitTime, UrgentLatency);
3468 } else {
3469 return UrgentLatency;
3470 }
3471 }
3472
3473 double dml314_CalculateWriteBackDISPCLK(
3474 enum source_format_class WritebackPixelFormat,
3475 double PixelClock,
3476 double WritebackHRatio,
3477 double WritebackVRatio,
3478 unsigned int WritebackHTaps,
3479 unsigned int WritebackVTaps,
3480 long WritebackSourceWidth,
3481 long WritebackDestinationWidth,
3482 unsigned int HTotal,
3483 unsigned int WritebackLineBufferSize)
3484 {
3485 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3486
3487 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3488 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3489 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3490 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3491 }
3492
3493 static double CalculateWriteBackDelay(
3494 enum source_format_class WritebackPixelFormat,
3495 double WritebackHRatio,
3496 double WritebackVRatio,
3497 unsigned int WritebackVTaps,
3498 int WritebackDestinationWidth,
3499 int WritebackDestinationHeight,
3500 int WritebackSourceHeight,
3501 unsigned int HTotal)
3502 {
3503 double CalculateWriteBackDelay;
3504 double Line_length;
3505 double Output_lines_last_notclamped;
3506 double WritebackVInit;
3507
3508 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3509 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3510 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3511 if (Output_lines_last_notclamped < 0) {
3512 CalculateWriteBackDelay = 0;
3513 } else {
3514 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3515 }
3516 return CalculateWriteBackDelay;
3517 }
3518
3519 static void CalculateVupdateAndDynamicMetadataParameters(
3520 int MaxInterDCNTileRepeaters,
3521 double DPPCLK,
3522 double DISPCLK,
3523 double DCFClkDeepSleep,
3524 double PixelClock,
3525 int HTotal,
3526 int VBlank,
3527 int DynamicMetadataTransmittedBytes,
3528 int DynamicMetadataLinesBeforeActiveRequired,
3529 int InterlaceEnable,
3530 bool ProgressiveToInterlaceUnitInOPP,
3531 double *TSetup,
3532 double *Tdmbf,
3533 double *Tdmec,
3534 double *Tdmsks,
3535 int *VUpdateOffsetPix,
3536 double *VUpdateWidthPix,
3537 double *VReadyOffsetPix)
3538 {
3539 double TotalRepeaterDelayTime;
3540
3541 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3542 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3543 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3544 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3545 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3546 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3547 *Tdmec = HTotal / PixelClock;
3548 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3549 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3550 } else {
3551 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3552 }
3553 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3554 *Tdmsks = *Tdmsks / 2;
3555 }
3556 #ifdef __DML_VBA_DEBUG__
3557 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3558 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3559 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3560 #endif
3561 }
3562
3563 static void CalculateRowBandwidth(
3564 bool GPUVMEnable,
3565 enum source_format_class SourcePixelFormat,
3566 double VRatio,
3567 double VRatioChroma,
3568 bool DCCEnable,
3569 double LineTime,
3570 unsigned int MetaRowByteLuma,
3571 unsigned int MetaRowByteChroma,
3572 unsigned int meta_row_height_luma,
3573 unsigned int meta_row_height_chroma,
3574 unsigned int PixelPTEBytesPerRowLuma,
3575 unsigned int PixelPTEBytesPerRowChroma,
3576 unsigned int dpte_row_height_luma,
3577 unsigned int dpte_row_height_chroma,
3578 double *meta_row_bw,
3579 double *dpte_row_bw)
3580 {
3581 if (DCCEnable != true) {
3582 *meta_row_bw = 0;
3583 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3584 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3585 } else {
3586 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3587 }
3588
3589 if (GPUVMEnable != true) {
3590 *dpte_row_bw = 0;
3591 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3592 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3593 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3594 } else {
3595 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3596 }
3597 }
3598
3599 static void CalculateFlipSchedule(
3600 struct display_mode_lib *mode_lib,
3601 unsigned int k,
3602 double HostVMInefficiencyFactor,
3603 double UrgentExtraLatency,
3604 double UrgentLatency,
3605 double PDEAndMetaPTEBytesPerFrame,
3606 double MetaRowBytes,
3607 double DPTEBytesPerRow)
3608 {
3609 struct vba_vars_st *v = &mode_lib->vba;
3610 double min_row_time = 0.0;
3611 unsigned int HostVMDynamicLevelsTrips;
3612 double TimeForFetchingMetaPTEImmediateFlip;
3613 double TimeForFetchingRowInVBlankImmediateFlip;
3614 double ImmediateFlipBW;
3615 double LineTime = v->HTotal[k] / v->PixelClock[k];
3616
3617 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3618 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3619 } else {
3620 HostVMDynamicLevelsTrips = 0;
3621 }
3622
3623 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3624 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3625 }
3626
3627 if (v->GPUVMEnable == true) {
3628 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3629 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3630 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3631 LineTime / 4.0);
3632 } else {
3633 TimeForFetchingMetaPTEImmediateFlip = 0;
3634 }
3635
3636 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3637 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3638 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3639 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3640 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3641 LineTime / 4);
3642 } else {
3643 TimeForFetchingRowInVBlankImmediateFlip = 0;
3644 }
3645
3646 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3647
3648 if (v->GPUVMEnable == true) {
3649 v->final_flip_bw[k] = dml_max(
3650 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3651 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3652 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3653 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3654 } else {
3655 v->final_flip_bw[k] = 0;
3656 }
3657
3658 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3659 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3660 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3661 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3662 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3663 } else {
3664 min_row_time = dml_min4(
3665 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3666 v->meta_row_height[k] * LineTime / v->VRatio[k],
3667 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3668 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3669 }
3670 } else {
3671 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3672 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3673 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3674 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3675 } else {
3676 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3677 }
3678 }
3679
3680 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3681 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3682 v->ImmediateFlipSupportedForPipe[k] = false;
3683 } else {
3684 v->ImmediateFlipSupportedForPipe[k] = true;
3685 }
3686
3687 #ifdef __DML_VBA_DEBUG__
3688 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3689 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3690 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3691 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3692 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3693 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3694 #endif
3695
3696 }
3697
3698 static double TruncToValidBPP(
3699 double LinkBitRate,
3700 int Lanes,
3701 int HTotal,
3702 int HActive,
3703 double PixelClock,
3704 double DesiredBPP,
3705 bool DSCEnable,
3706 enum output_encoder_class Output,
3707 enum output_format_class Format,
3708 unsigned int DSCInputBitPerComponent,
3709 int DSCSlices,
3710 int AudioRate,
3711 int AudioLayout,
3712 enum odm_combine_mode ODMCombine)
3713 {
3714 double MaxLinkBPP;
3715 int MinDSCBPP;
3716 double MaxDSCBPP;
3717 int NonDSCBPP0;
3718 int NonDSCBPP1;
3719 int NonDSCBPP2;
3720
3721 if (Format == dm_420) {
3722 NonDSCBPP0 = 12;
3723 NonDSCBPP1 = 15;
3724 NonDSCBPP2 = 18;
3725 MinDSCBPP = 6;
3726 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3727 } else if (Format == dm_444) {
3728 NonDSCBPP0 = 24;
3729 NonDSCBPP1 = 30;
3730 NonDSCBPP2 = 36;
3731 MinDSCBPP = 8;
3732 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3733 } else {
3734
3735 NonDSCBPP0 = 16;
3736 NonDSCBPP1 = 20;
3737 NonDSCBPP2 = 24;
3738
3739 if (Format == dm_n422) {
3740 MinDSCBPP = 7;
3741 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3742 } else {
3743 MinDSCBPP = 8;
3744 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3745 }
3746 }
3747
3748 if (DSCEnable && Output == dm_dp) {
3749 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3750 } else {
3751 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3752 }
3753
3754 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3755 MaxLinkBPP = 16;
3756 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3757 MaxLinkBPP = 32;
3758 }
3759
3760 if (DesiredBPP == 0) {
3761 if (DSCEnable) {
3762 if (MaxLinkBPP < MinDSCBPP) {
3763 return BPP_INVALID;
3764 } else if (MaxLinkBPP >= MaxDSCBPP) {
3765 return MaxDSCBPP;
3766 } else {
3767 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3768 }
3769 } else {
3770 if (MaxLinkBPP >= NonDSCBPP2) {
3771 return NonDSCBPP2;
3772 } else if (MaxLinkBPP >= NonDSCBPP1) {
3773 return NonDSCBPP1;
3774 } else if (MaxLinkBPP >= NonDSCBPP0) {
3775 return 16.0;
3776 } else {
3777 return BPP_INVALID;
3778 }
3779 }
3780 } else {
3781 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3782 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3783 return BPP_INVALID;
3784 } else {
3785 return DesiredBPP;
3786 }
3787 }
3788 return BPP_INVALID;
3789 }
3790
3791 static noinline void CalculatePrefetchSchedulePerPlane(
3792 struct display_mode_lib *mode_lib,
3793 double HostVMInefficiencyFactor,
3794 int i,
3795 unsigned int j,
3796 unsigned int k)
3797 {
3798 struct vba_vars_st *v = &mode_lib->vba;
3799 Pipe myPipe;
3800
3801 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3802 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3803 myPipe.PixelClock = v->PixelClock[k];
3804 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3805 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3806 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3807 myPipe.VRatio = mode_lib->vba.VRatio[k];
3808 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3809
3810 myPipe.SourceScan = v->SourceScan[k];
3811 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3812 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3813 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3814 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3815 myPipe.InterlaceEnable = v->Interlace[k];
3816 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3817 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3818 myPipe.HTotal = v->HTotal[k];
3819 myPipe.DCCEnable = v->DCCEnable[k];
3820 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3821 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3822 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3823 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3824 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3825 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3826 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3827 mode_lib,
3828 HostVMInefficiencyFactor,
3829 &myPipe,
3830 v->DSCDelayPerState[i][k],
3831 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3832 v->DPPCLKDelaySCL,
3833 v->DPPCLKDelaySCLLBOnly,
3834 v->DPPCLKDelayCNVCCursor,
3835 v->DISPCLKDelaySubtotal,
3836 v->SwathWidthYThisState[k] / v->HRatio[k],
3837 v->OutputFormat[k],
3838 v->MaxInterDCNTileRepeaters,
3839 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3840 v->MaximumVStartup[i][j][k],
3841 v->GPUVMMaxPageTableLevels,
3842 v->GPUVMEnable,
3843 v->HostVMEnable,
3844 v->HostVMMaxNonCachedPageTableLevels,
3845 v->HostVMMinPageSize,
3846 v->DynamicMetadataEnable[k],
3847 v->DynamicMetadataVMEnabled,
3848 v->DynamicMetadataLinesBeforeActiveRequired[k],
3849 v->DynamicMetadataTransmittedBytes[k],
3850 v->UrgLatency[i],
3851 v->ExtraLatency,
3852 v->TimeCalc,
3853 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3854 v->MetaRowBytes[i][j][k],
3855 v->DPTEBytesPerRow[i][j][k],
3856 v->PrefetchLinesY[i][j][k],
3857 v->SwathWidthYThisState[k],
3858 v->PrefillY[k],
3859 v->MaxNumSwY[k],
3860 v->PrefetchLinesC[i][j][k],
3861 v->SwathWidthCThisState[k],
3862 v->PrefillC[k],
3863 v->MaxNumSwC[k],
3864 v->swath_width_luma_ub_this_state[k],
3865 v->swath_width_chroma_ub_this_state[k],
3866 v->SwathHeightYThisState[k],
3867 v->SwathHeightCThisState[k],
3868 v->TWait,
3869 &v->DSTXAfterScaler[k],
3870 &v->DSTYAfterScaler[k],
3871 &v->LineTimesForPrefetch[k],
3872 &v->PrefetchBW[k],
3873 &v->LinesForMetaPTE[k],
3874 &v->LinesForMetaAndDPTERow[k],
3875 &v->VRatioPreY[i][j][k],
3876 &v->VRatioPreC[i][j][k],
3877 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3878 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3879 &v->NoTimeForDynamicMetadata[i][j][k],
3880 &v->Tno_bw[k],
3881 &v->prefetch_vmrow_bw[k],
3882 &v->dummy7[k],
3883 &v->dummy8[k],
3884 &v->dummy13[k],
3885 &v->VUpdateOffsetPix[k],
3886 &v->VUpdateWidthPix[k],
3887 &v->VReadyOffsetPix[k]);
3888 }
3889
3890 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3891 {
3892 struct vba_vars_st *v = &mode_lib->vba;
3893
3894 int i, j;
3895 unsigned int k, m;
3896 int ReorderingBytes;
3897 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3898 bool NoChroma = true;
3899 bool EnoughWritebackUnits = true;
3900 bool P2IWith420 = false;
3901 bool DSCOnlyIfNecessaryWithBPP = false;
3902 bool DSC422NativeNotSupported = false;
3903 double MaxTotalVActiveRDBandwidth;
3904 bool ViewportExceedsSurface = false;
3905 bool FMTBufferExceeded = false;
3906
3907 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3908
3909 CalculateMinAndMaxPrefetchMode(
3910 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3911 &MinPrefetchMode, &MaxPrefetchMode);
3912
3913 /*Scale Ratio, taps Support Check*/
3914
3915 v->ScaleRatioAndTapsSupport = true;
3916 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3917 if (v->ScalerEnabled[k] == false
3918 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3919 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3920 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3921 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3922 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3923 v->ScaleRatioAndTapsSupport = false;
3924 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3925 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3926 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3927 || v->VRatio[k] > v->vtaps[k]
3928 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3929 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3930 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3931 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3932 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3933 || v->HRatioChroma[k] > v->MaxHSCLRatio
3934 || v->VRatioChroma[k] > v->MaxVSCLRatio
3935 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3936 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3937 v->ScaleRatioAndTapsSupport = false;
3938 }
3939 }
3940 /*Source Format, Pixel Format and Scan Support Check*/
3941
3942 v->SourceFormatPixelAndScanSupport = true;
3943 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3944 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
3945 v->SourceFormatPixelAndScanSupport = false;
3946 }
3947 }
3948 /*Bandwidth Support Check*/
3949
3950 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3951 CalculateBytePerPixelAnd256BBlockSizes(
3952 v->SourcePixelFormat[k],
3953 v->SurfaceTiling[k],
3954 &v->BytePerPixelY[k],
3955 &v->BytePerPixelC[k],
3956 &v->BytePerPixelInDETY[k],
3957 &v->BytePerPixelInDETC[k],
3958 &v->Read256BlockHeightY[k],
3959 &v->Read256BlockHeightC[k],
3960 &v->Read256BlockWidthY[k],
3961 &v->Read256BlockWidthC[k]);
3962 }
3963 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3964 if (v->SourceScan[k] != dm_vert) {
3965 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3966 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3967 } else {
3968 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3969 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3970 }
3971 }
3972 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3973 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3974 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3975 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3976 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3977 }
3978 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3979 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3980 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3981 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3982 } else if (v->WritebackEnable[k] == true) {
3983 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3984 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3985 } else {
3986 v->WriteBandwidth[k] = 0.0;
3987 }
3988 }
3989
3990 /*Writeback Latency support check*/
3991
3992 v->WritebackLatencySupport = true;
3993 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3994 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3995 v->WritebackLatencySupport = false;
3996 }
3997 }
3998
3999 /*Writeback Mode Support Check*/
4000
4001 v->TotalNumberOfActiveWriteback = 0;
4002 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4003 if (v->WritebackEnable[k] == true) {
4004 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4005 }
4006 }
4007
4008 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4009 EnoughWritebackUnits = false;
4010 }
4011
4012 /*Writeback Scale Ratio and Taps Support Check*/
4013
4014 v->WritebackScaleRatioAndTapsSupport = true;
4015 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4016 if (v->WritebackEnable[k] == true) {
4017 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4018 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4019 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4020 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4021 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4022 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4023 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4024 v->WritebackScaleRatioAndTapsSupport = false;
4025 }
4026 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4027 v->WritebackScaleRatioAndTapsSupport = false;
4028 }
4029 }
4030 }
4031 /*Maximum DISPCLK/DPPCLK Support check*/
4032
4033 v->WritebackRequiredDISPCLK = 0.0;
4034 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4035 if (v->WritebackEnable[k] == true) {
4036 v->WritebackRequiredDISPCLK = dml_max(
4037 v->WritebackRequiredDISPCLK,
4038 dml314_CalculateWriteBackDISPCLK(
4039 v->WritebackPixelFormat[k],
4040 v->PixelClock[k],
4041 v->WritebackHRatio[k],
4042 v->WritebackVRatio[k],
4043 v->WritebackHTaps[k],
4044 v->WritebackVTaps[k],
4045 v->WritebackSourceWidth[k],
4046 v->WritebackDestinationWidth[k],
4047 v->HTotal[k],
4048 v->WritebackLineBufferSize));
4049 }
4050 }
4051 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4052 if (v->HRatio[k] > 1.0) {
4053 v->PSCL_FACTOR[k] = dml_min(
4054 v->MaxDCHUBToPSCLThroughput,
4055 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4056 } else {
4057 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4058 }
4059 if (v->BytePerPixelC[k] == 0.0) {
4060 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4061 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4062 * dml_max3(
4063 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4064 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4065 1.0);
4066 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4067 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4068 }
4069 } else {
4070 if (v->HRatioChroma[k] > 1.0) {
4071 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4072 v->MaxDCHUBToPSCLThroughput,
4073 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4074 } else {
4075 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4076 }
4077 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4078 * dml_max5(
4079 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4080 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4081 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4082 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4083 1.0);
4084 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4085 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4086 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4087 }
4088 }
4089 }
4090 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4091 int MaximumSwathWidthSupportLuma;
4092 int MaximumSwathWidthSupportChroma;
4093
4094 if (v->SurfaceTiling[k] == dm_sw_linear) {
4095 MaximumSwathWidthSupportLuma = 8192.0;
4096 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4097 MaximumSwathWidthSupportLuma = 2880.0;
4098 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4099 MaximumSwathWidthSupportLuma = 3840.0;
4100 } else {
4101 MaximumSwathWidthSupportLuma = 5760.0;
4102 }
4103
4104 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4105 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4106 } else {
4107 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4108 }
4109 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4110 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4111 if (v->BytePerPixelC[k] == 0.0) {
4112 v->MaximumSwathWidthInLineBufferChroma = 0;
4113 } else {
4114 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4115 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4116 }
4117 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4118 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4119 }
4120
4121 CalculateSwathAndDETConfiguration(
4122 true,
4123 v->NumberOfActivePlanes,
4124 v->DETBufferSizeInKByte[0],
4125 v->MaximumSwathWidthLuma,
4126 v->MaximumSwathWidthChroma,
4127 v->SourceScan,
4128 v->SourcePixelFormat,
4129 v->SurfaceTiling,
4130 v->ViewportWidth,
4131 v->ViewportHeight,
4132 v->SurfaceWidthY,
4133 v->SurfaceWidthC,
4134 v->SurfaceHeightY,
4135 v->SurfaceHeightC,
4136 v->Read256BlockHeightY,
4137 v->Read256BlockHeightC,
4138 v->Read256BlockWidthY,
4139 v->Read256BlockWidthC,
4140 v->odm_combine_dummy,
4141 v->BlendingAndTiming,
4142 v->BytePerPixelY,
4143 v->BytePerPixelC,
4144 v->BytePerPixelInDETY,
4145 v->BytePerPixelInDETC,
4146 v->HActive,
4147 v->HRatio,
4148 v->HRatioChroma,
4149 v->NoOfDPPThisState,
4150 v->swath_width_luma_ub_this_state,
4151 v->swath_width_chroma_ub_this_state,
4152 v->SwathWidthYThisState,
4153 v->SwathWidthCThisState,
4154 v->SwathHeightYThisState,
4155 v->SwathHeightCThisState,
4156 v->DETBufferSizeYThisState,
4157 v->DETBufferSizeCThisState,
4158 v->SingleDPPViewportSizeSupportPerPlane,
4159 &v->ViewportSizeSupport[0][0]);
4160
4161 for (i = 0; i < v->soc.num_states; i++) {
4162 for (j = 0; j < 2; j++) {
4163 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4164 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4165 v->RequiredDISPCLK[i][j] = 0.0;
4166 v->DISPCLK_DPPCLK_Support[i][j] = true;
4167 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4168 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4169 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4170 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4171 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4172 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4173 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4174 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4175 }
4176 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4177 * (1 + v->DISPCLKRampingMargin / 100.0);
4178 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4179 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4180 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4181 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4182 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4183 }
4184 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4185 * (1 + v->DISPCLKRampingMargin / 100.0);
4186 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4187 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4188 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4189 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4190 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4191 }
4192
4193 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4194 || !(v->Output[k] == dm_dp ||
4195 v->Output[k] == dm_dp2p0 ||
4196 v->Output[k] == dm_edp)) {
4197 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4198 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4199
4200 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4201 FMTBufferExceeded = true;
4202 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4203 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4204 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4205 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4206 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4207 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4208 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4209 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4210 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4211 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4212 } else {
4213 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4214 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4215 }
4216 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
4217 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4218 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
4219 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4220 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4221 } else {
4222 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4223 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4224 }
4225 }
4226 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
4227 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4228 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
4229 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4230 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4231
4232 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4233 FMTBufferExceeded = true;
4234 } else {
4235 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4236 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4237 }
4238 }
4239 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4240 v->MPCCombine[i][j][k] = false;
4241 v->NoOfDPP[i][j][k] = 4;
4242 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4243 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4244 v->MPCCombine[i][j][k] = false;
4245 v->NoOfDPP[i][j][k] = 2;
4246 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4247 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4248 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4249 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4250 v->MPCCombine[i][j][k] = false;
4251 v->NoOfDPP[i][j][k] = 1;
4252 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4253 } else {
4254 v->MPCCombine[i][j][k] = true;
4255 v->NoOfDPP[i][j][k] = 2;
4256 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4257 }
4258 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4259 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4260 > v->MaxDppclkRoundedDownToDFSGranularity)
4261 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4262 v->DISPCLK_DPPCLK_Support[i][j] = false;
4263 }
4264 }
4265 v->TotalNumberOfActiveDPP[i][j] = 0;
4266 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4267 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4268 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4269 if (v->NoOfDPP[i][j][k] == 1)
4270 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4271 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4272 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4273 NoChroma = false;
4274 }
4275
4276 // UPTO
4277 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4278 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4279 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4280 double BWOfNonSplitPlaneOfMaximumBandwidth;
4281 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4282
4283 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4284 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4285 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4286 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4287 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4288 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4289 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4290 }
4291 }
4292 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4293 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4294 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4295 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4296 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4297 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4298 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4299 }
4300 }
4301 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4302 v->RequiredDISPCLK[i][j] = 0.0;
4303 v->DISPCLK_DPPCLK_Support[i][j] = true;
4304 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4305 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4306 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4307 v->MPCCombine[i][j][k] = true;
4308 v->NoOfDPP[i][j][k] = 2;
4309 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4310 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4311 } else {
4312 v->MPCCombine[i][j][k] = false;
4313 v->NoOfDPP[i][j][k] = 1;
4314 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4315 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4316 }
4317 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4318 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4319 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4320 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4321 } else {
4322 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4323 }
4324 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4325 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4326 > v->MaxDppclkRoundedDownToDFSGranularity)
4327 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4328 v->DISPCLK_DPPCLK_Support[i][j] = false;
4329 }
4330 }
4331 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4332 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4333 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4334 }
4335 }
4336 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4337 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4338 v->DISPCLK_DPPCLK_Support[i][j] = false;
4339 }
4340 }
4341 }
4342
4343 /*Total Available Pipes Support Check*/
4344
4345 for (i = 0; i < v->soc.num_states; i++) {
4346 for (j = 0; j < 2; j++) {
4347 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4348 v->TotalAvailablePipesSupport[i][j] = true;
4349 } else {
4350 v->TotalAvailablePipesSupport[i][j] = false;
4351 }
4352 }
4353 }
4354 /*Display IO and DSC Support Check*/
4355
4356 v->NonsupportedDSCInputBPC = false;
4357 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4358 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4359 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4360 v->NonsupportedDSCInputBPC = true;
4361 }
4362 }
4363
4364 /*Number Of DSC Slices*/
4365 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4366 if (v->BlendingAndTiming[k] == k) {
4367 if (v->PixelClockBackEnd[k] > 3200) {
4368 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4369 } else if (v->PixelClockBackEnd[k] > 1360) {
4370 v->NumberOfDSCSlices[k] = 8;
4371 } else if (v->PixelClockBackEnd[k] > 680) {
4372 v->NumberOfDSCSlices[k] = 4;
4373 } else if (v->PixelClockBackEnd[k] > 340) {
4374 v->NumberOfDSCSlices[k] = 2;
4375 } else {
4376 v->NumberOfDSCSlices[k] = 1;
4377 }
4378 } else {
4379 v->NumberOfDSCSlices[k] = 0;
4380 }
4381 }
4382
4383 for (i = 0; i < v->soc.num_states; i++) {
4384 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4385 v->RequiresDSC[i][k] = false;
4386 v->RequiresFEC[i][k] = false;
4387 if (v->BlendingAndTiming[k] == k) {
4388 if (v->Output[k] == dm_hdmi) {
4389 v->RequiresDSC[i][k] = false;
4390 v->RequiresFEC[i][k] = false;
4391 v->OutputBppPerState[i][k] = TruncToValidBPP(
4392 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4393 3,
4394 v->HTotal[k],
4395 v->HActive[k],
4396 v->PixelClockBackEnd[k],
4397 v->ForcedOutputLinkBPP[k],
4398 false,
4399 v->Output[k],
4400 v->OutputFormat[k],
4401 v->DSCInputBitPerComponent[k],
4402 v->NumberOfDSCSlices[k],
4403 v->AudioSampleRate[k],
4404 v->AudioSampleLayout[k],
4405 v->ODMCombineEnablePerState[i][k]);
4406 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4407 if (v->DSCEnable[k] == true) {
4408 v->RequiresDSC[i][k] = true;
4409 v->LinkDSCEnable = true;
4410 if (v->Output[k] == dm_dp) {
4411 v->RequiresFEC[i][k] = true;
4412 } else {
4413 v->RequiresFEC[i][k] = false;
4414 }
4415 } else {
4416 v->RequiresDSC[i][k] = false;
4417 v->LinkDSCEnable = false;
4418 v->RequiresFEC[i][k] = false;
4419 }
4420
4421 v->Outbpp = BPP_INVALID;
4422 if (v->PHYCLKPerState[i] >= 270.0) {
4423 v->Outbpp = TruncToValidBPP(
4424 (1.0 - v->Downspreading / 100.0) * 2700,
4425 v->OutputLinkDPLanes[k],
4426 v->HTotal[k],
4427 v->HActive[k],
4428 v->PixelClockBackEnd[k],
4429 v->ForcedOutputLinkBPP[k],
4430 v->LinkDSCEnable,
4431 v->Output[k],
4432 v->OutputFormat[k],
4433 v->DSCInputBitPerComponent[k],
4434 v->NumberOfDSCSlices[k],
4435 v->AudioSampleRate[k],
4436 v->AudioSampleLayout[k],
4437 v->ODMCombineEnablePerState[i][k]);
4438 v->OutputBppPerState[i][k] = v->Outbpp;
4439 // TODO: Need some other way to handle this nonsense
4440 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4441 }
4442 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4443 v->Outbpp = TruncToValidBPP(
4444 (1.0 - v->Downspreading / 100.0) * 5400,
4445 v->OutputLinkDPLanes[k],
4446 v->HTotal[k],
4447 v->HActive[k],
4448 v->PixelClockBackEnd[k],
4449 v->ForcedOutputLinkBPP[k],
4450 v->LinkDSCEnable,
4451 v->Output[k],
4452 v->OutputFormat[k],
4453 v->DSCInputBitPerComponent[k],
4454 v->NumberOfDSCSlices[k],
4455 v->AudioSampleRate[k],
4456 v->AudioSampleLayout[k],
4457 v->ODMCombineEnablePerState[i][k]);
4458 v->OutputBppPerState[i][k] = v->Outbpp;
4459 // TODO: Need some other way to handle this nonsense
4460 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4461 }
4462 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4463 v->Outbpp = TruncToValidBPP(
4464 (1.0 - v->Downspreading / 100.0) * 8100,
4465 v->OutputLinkDPLanes[k],
4466 v->HTotal[k],
4467 v->HActive[k],
4468 v->PixelClockBackEnd[k],
4469 v->ForcedOutputLinkBPP[k],
4470 v->LinkDSCEnable,
4471 v->Output[k],
4472 v->OutputFormat[k],
4473 v->DSCInputBitPerComponent[k],
4474 v->NumberOfDSCSlices[k],
4475 v->AudioSampleRate[k],
4476 v->AudioSampleLayout[k],
4477 v->ODMCombineEnablePerState[i][k]);
4478 v->OutputBppPerState[i][k] = v->Outbpp;
4479 // TODO: Need some other way to handle this nonsense
4480 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4481 }
4482 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4483 v->Outbpp = TruncToValidBPP(
4484 (1.0 - v->Downspreading / 100.0) * 10000,
4485 4,
4486 v->HTotal[k],
4487 v->HActive[k],
4488 v->PixelClockBackEnd[k],
4489 v->ForcedOutputLinkBPP[k],
4490 v->LinkDSCEnable,
4491 v->Output[k],
4492 v->OutputFormat[k],
4493 v->DSCInputBitPerComponent[k],
4494 v->NumberOfDSCSlices[k],
4495 v->AudioSampleRate[k],
4496 v->AudioSampleLayout[k],
4497 v->ODMCombineEnablePerState[i][k]);
4498 v->OutputBppPerState[i][k] = v->Outbpp;
4499 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4500 }
4501 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4502 v->Outbpp = TruncToValidBPP(
4503 12000,
4504 4,
4505 v->HTotal[k],
4506 v->HActive[k],
4507 v->PixelClockBackEnd[k],
4508 v->ForcedOutputLinkBPP[k],
4509 v->LinkDSCEnable,
4510 v->Output[k],
4511 v->OutputFormat[k],
4512 v->DSCInputBitPerComponent[k],
4513 v->NumberOfDSCSlices[k],
4514 v->AudioSampleRate[k],
4515 v->AudioSampleLayout[k],
4516 v->ODMCombineEnablePerState[i][k]);
4517 v->OutputBppPerState[i][k] = v->Outbpp;
4518 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4519 }
4520 }
4521 } else {
4522 v->OutputBppPerState[i][k] = 0;
4523 }
4524 }
4525 }
4526
4527 for (i = 0; i < v->soc.num_states; i++) {
4528 v->LinkCapacitySupport[i] = true;
4529 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4530 if (v->BlendingAndTiming[k] == k
4531 && (v->Output[k] == dm_dp ||
4532 v->Output[k] == dm_edp ||
4533 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4534 v->LinkCapacitySupport[i] = false;
4535 }
4536 }
4537 }
4538
4539 // UPTO 2172
4540 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4541 if (v->BlendingAndTiming[k] == k
4542 && (v->Output[k] == dm_dp ||
4543 v->Output[k] == dm_edp ||
4544 v->Output[k] == dm_hdmi)) {
4545 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4546 P2IWith420 = true;
4547 }
4548 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4549 && !v->DSC422NativeSupport) {
4550 DSC422NativeNotSupported = true;
4551 }
4552 }
4553 }
4554
4555
4556 for (i = 0; i < v->soc.num_states; ++i) {
4557 v->ODMCombine4To1SupportCheckOK[i] = true;
4558 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4559 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4560 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4561 || v->Output[k] == dm_hdmi)) {
4562 v->ODMCombine4To1SupportCheckOK[i] = false;
4563 }
4564 }
4565 }
4566
4567 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4568
4569 for (i = 0; i < v->soc.num_states; i++) {
4570 v->NotEnoughDSCUnits[i] = false;
4571 v->TotalDSCUnitsRequired = 0.0;
4572 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4573 if (v->RequiresDSC[i][k] == true) {
4574 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4575 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4576 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4577 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4578 } else {
4579 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4580 }
4581 }
4582 }
4583 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4584 v->NotEnoughDSCUnits[i] = true;
4585 }
4586 }
4587 /*DSC Delay per state*/
4588
4589 for (i = 0; i < v->soc.num_states; i++) {
4590 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4591 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4592 v->BPP = 0.0;
4593 } else {
4594 v->BPP = v->OutputBppPerState[i][k];
4595 }
4596 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4597 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4598 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4599 v->DSCInputBitPerComponent[k],
4600 v->BPP,
4601 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4602 v->NumberOfDSCSlices[k],
4603 v->OutputFormat[k],
4604 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4605 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4606 v->DSCDelayPerState[i][k] = 2.0
4607 * (dscceComputeDelay(
4608 v->DSCInputBitPerComponent[k],
4609 v->BPP,
4610 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4611 v->NumberOfDSCSlices[k] / 2,
4612 v->OutputFormat[k],
4613 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4614 } else {
4615 v->DSCDelayPerState[i][k] = 4.0
4616 * (dscceComputeDelay(
4617 v->DSCInputBitPerComponent[k],
4618 v->BPP,
4619 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4620 v->NumberOfDSCSlices[k] / 4,
4621 v->OutputFormat[k],
4622 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4623 }
4624 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4625 } else {
4626 v->DSCDelayPerState[i][k] = 0.0;
4627 }
4628 }
4629 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4630 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4631 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4632 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4633 }
4634 }
4635 }
4636 }
4637
4638 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4639 //
4640 for (i = 0; i < v->soc.num_states; ++i) {
4641 for (j = 0; j <= 1; ++j) {
4642 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4643 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4644 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4645 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4646 }
4647
4648 CalculateSwathAndDETConfiguration(
4649 false,
4650 v->NumberOfActivePlanes,
4651 v->DETBufferSizeInKByte[0],
4652 v->MaximumSwathWidthLuma,
4653 v->MaximumSwathWidthChroma,
4654 v->SourceScan,
4655 v->SourcePixelFormat,
4656 v->SurfaceTiling,
4657 v->ViewportWidth,
4658 v->ViewportHeight,
4659 v->SurfaceWidthY,
4660 v->SurfaceWidthC,
4661 v->SurfaceHeightY,
4662 v->SurfaceHeightC,
4663 v->Read256BlockHeightY,
4664 v->Read256BlockHeightC,
4665 v->Read256BlockWidthY,
4666 v->Read256BlockWidthC,
4667 v->ODMCombineEnableThisState,
4668 v->BlendingAndTiming,
4669 v->BytePerPixelY,
4670 v->BytePerPixelC,
4671 v->BytePerPixelInDETY,
4672 v->BytePerPixelInDETC,
4673 v->HActive,
4674 v->HRatio,
4675 v->HRatioChroma,
4676 v->NoOfDPPThisState,
4677 v->swath_width_luma_ub_this_state,
4678 v->swath_width_chroma_ub_this_state,
4679 v->SwathWidthYThisState,
4680 v->SwathWidthCThisState,
4681 v->SwathHeightYThisState,
4682 v->SwathHeightCThisState,
4683 v->DETBufferSizeYThisState,
4684 v->DETBufferSizeCThisState,
4685 v->dummystring,
4686 &v->ViewportSizeSupport[i][j]);
4687
4688 CalculateDCFCLKDeepSleep(
4689 mode_lib,
4690 v->NumberOfActivePlanes,
4691 v->BytePerPixelY,
4692 v->BytePerPixelC,
4693 v->VRatio,
4694 v->VRatioChroma,
4695 v->SwathWidthYThisState,
4696 v->SwathWidthCThisState,
4697 v->NoOfDPPThisState,
4698 v->HRatio,
4699 v->HRatioChroma,
4700 v->PixelClock,
4701 v->PSCL_FACTOR,
4702 v->PSCL_FACTOR_CHROMA,
4703 v->RequiredDPPCLKThisState,
4704 v->ReadBandwidthLuma,
4705 v->ReadBandwidthChroma,
4706 v->ReturnBusWidth,
4707 &v->ProjectedDCFCLKDeepSleep[i][j]);
4708
4709 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4710 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4711 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4712 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4713 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4714 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4715 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4716 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4717 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4718 }
4719 }
4720 }
4721
4722 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4723 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4724 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4725 }
4726
4727 for (i = 0; i < v->soc.num_states; i++) {
4728 for (j = 0; j < 2; j++) {
4729 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4730
4731 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4732 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4733 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4734 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4735 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4736 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4737 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4738 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4739 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4740 }
4741
4742 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4743 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4744 if (v->DCCEnable[k] == true) {
4745 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4746 }
4747 }
4748
4749 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4750 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4751 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4752
4753 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4754 && v->SourceScan[k] != dm_vert) {
4755 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4756 / 2;
4757 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4758 } else {
4759 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4760 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4761 }
4762
4763 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4764 mode_lib,
4765 v->DCCEnable[k],
4766 v->Read256BlockHeightC[k],
4767 v->Read256BlockWidthC[k],
4768 v->SourcePixelFormat[k],
4769 v->SurfaceTiling[k],
4770 v->BytePerPixelC[k],
4771 v->SourceScan[k],
4772 v->SwathWidthCThisState[k],
4773 v->ViewportHeightChroma[k],
4774 v->GPUVMEnable,
4775 v->HostVMEnable,
4776 v->HostVMMaxNonCachedPageTableLevels,
4777 v->GPUVMMinPageSize,
4778 v->HostVMMinPageSize,
4779 v->PTEBufferSizeInRequestsForChroma,
4780 v->PitchC[k],
4781 0.0,
4782 &v->MacroTileWidthC[k],
4783 &v->MetaRowBytesC,
4784 &v->DPTEBytesPerRowC,
4785 &v->PTEBufferSizeNotExceededC[i][j][k],
4786 &v->dummyinteger7,
4787 &v->dpte_row_height_chroma[k],
4788 &v->dummyinteger28,
4789 &v->dummyinteger26,
4790 &v->dummyinteger23,
4791 &v->meta_row_height_chroma[k],
4792 &v->dummyinteger8,
4793 &v->dummyinteger9,
4794 &v->dummyinteger19,
4795 &v->dummyinteger20,
4796 &v->dummyinteger17,
4797 &v->dummyinteger10,
4798 &v->dummyinteger11);
4799
4800 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4801 mode_lib,
4802 v->VRatioChroma[k],
4803 v->VTAPsChroma[k],
4804 v->Interlace[k],
4805 v->ProgressiveToInterlaceUnitInOPP,
4806 v->SwathHeightCThisState[k],
4807 v->ViewportYStartC[k],
4808 &v->PrefillC[k],
4809 &v->MaxNumSwC[k]);
4810 } else {
4811 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4812 v->PTEBufferSizeInRequestsForChroma = 0;
4813 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4814 v->MetaRowBytesC = 0.0;
4815 v->DPTEBytesPerRowC = 0.0;
4816 v->PrefetchLinesC[i][j][k] = 0.0;
4817 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4818 }
4819 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4820 mode_lib,
4821 v->DCCEnable[k],
4822 v->Read256BlockHeightY[k],
4823 v->Read256BlockWidthY[k],
4824 v->SourcePixelFormat[k],
4825 v->SurfaceTiling[k],
4826 v->BytePerPixelY[k],
4827 v->SourceScan[k],
4828 v->SwathWidthYThisState[k],
4829 v->ViewportHeight[k],
4830 v->GPUVMEnable,
4831 v->HostVMEnable,
4832 v->HostVMMaxNonCachedPageTableLevels,
4833 v->GPUVMMinPageSize,
4834 v->HostVMMinPageSize,
4835 v->PTEBufferSizeInRequestsForLuma,
4836 v->PitchY[k],
4837 v->DCCMetaPitchY[k],
4838 &v->MacroTileWidthY[k],
4839 &v->MetaRowBytesY,
4840 &v->DPTEBytesPerRowY,
4841 &v->PTEBufferSizeNotExceededY[i][j][k],
4842 &v->dummyinteger7,
4843 &v->dpte_row_height[k],
4844 &v->dummyinteger29,
4845 &v->dummyinteger27,
4846 &v->dummyinteger24,
4847 &v->meta_row_height[k],
4848 &v->dummyinteger25,
4849 &v->dpte_group_bytes[k],
4850 &v->dummyinteger21,
4851 &v->dummyinteger22,
4852 &v->dummyinteger18,
4853 &v->dummyinteger5,
4854 &v->dummyinteger6);
4855 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4856 mode_lib,
4857 v->VRatio[k],
4858 v->vtaps[k],
4859 v->Interlace[k],
4860 v->ProgressiveToInterlaceUnitInOPP,
4861 v->SwathHeightYThisState[k],
4862 v->ViewportYStartY[k],
4863 &v->PrefillY[k],
4864 &v->MaxNumSwY[k]);
4865 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4866 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4867 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4868
4869 CalculateRowBandwidth(
4870 v->GPUVMEnable,
4871 v->SourcePixelFormat[k],
4872 v->VRatio[k],
4873 v->VRatioChroma[k],
4874 v->DCCEnable[k],
4875 v->HTotal[k] / v->PixelClock[k],
4876 v->MetaRowBytesY,
4877 v->MetaRowBytesC,
4878 v->meta_row_height[k],
4879 v->meta_row_height_chroma[k],
4880 v->DPTEBytesPerRowY,
4881 v->DPTEBytesPerRowC,
4882 v->dpte_row_height[k],
4883 v->dpte_row_height_chroma[k],
4884 &v->meta_row_bandwidth[i][j][k],
4885 &v->dpte_row_bandwidth[i][j][k]);
4886 }
4887 /*
4888 * DCCMetaBufferSizeSupport(i, j) = True
4889 * For k = 0 To NumberOfActivePlanes - 1
4890 * If MetaRowBytes(i, j, k) > 24064 Then
4891 * DCCMetaBufferSizeSupport(i, j) = False
4892 * End If
4893 * Next k
4894 */
4895 v->DCCMetaBufferSizeSupport[i][j] = true;
4896 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4897 if (v->MetaRowBytes[i][j][k] > 24064)
4898 v->DCCMetaBufferSizeSupport[i][j] = false;
4899 }
4900 v->UrgLatency[i] = CalculateUrgentLatency(
4901 v->UrgentLatencyPixelDataOnly,
4902 v->UrgentLatencyPixelMixedWithVMData,
4903 v->UrgentLatencyVMDataOnly,
4904 v->DoUrgentLatencyAdjustment,
4905 v->UrgentLatencyAdjustmentFabricClockComponent,
4906 v->UrgentLatencyAdjustmentFabricClockReference,
4907 v->FabricClockPerState[i]);
4908
4909 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4910 CalculateUrgentBurstFactor(
4911 v->swath_width_luma_ub_this_state[k],
4912 v->swath_width_chroma_ub_this_state[k],
4913 v->SwathHeightYThisState[k],
4914 v->SwathHeightCThisState[k],
4915 v->HTotal[k] / v->PixelClock[k],
4916 v->UrgLatency[i],
4917 v->CursorBufferSize,
4918 v->CursorWidth[k][0],
4919 v->CursorBPP[k][0],
4920 v->VRatio[k],
4921 v->VRatioChroma[k],
4922 v->BytePerPixelInDETY[k],
4923 v->BytePerPixelInDETC[k],
4924 v->DETBufferSizeYThisState[k],
4925 v->DETBufferSizeCThisState[k],
4926 &v->UrgentBurstFactorCursor[k],
4927 &v->UrgentBurstFactorLuma[k],
4928 &v->UrgentBurstFactorChroma[k],
4929 &NotUrgentLatencyHiding[k]);
4930 }
4931
4932 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4933 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4934 if (NotUrgentLatencyHiding[k]) {
4935 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4936 }
4937 }
4938
4939 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4940 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4941 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4942 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4943 }
4944
4945 v->TotalVActivePixelBandwidth[i][j] = 0;
4946 v->TotalVActiveCursorBandwidth[i][j] = 0;
4947 v->TotalMetaRowBandwidth[i][j] = 0;
4948 v->TotalDPTERowBandwidth[i][j] = 0;
4949 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4950 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4951 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4952 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4953 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4954 }
4955 }
4956 }
4957
4958 //Calculate Return BW
4959 for (i = 0; i < v->soc.num_states; ++i) {
4960 for (j = 0; j <= 1; ++j) {
4961 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4962 if (v->BlendingAndTiming[k] == k) {
4963 if (v->WritebackEnable[k] == true) {
4964 v->WritebackDelayTime[k] = v->WritebackLatency
4965 + CalculateWriteBackDelay(
4966 v->WritebackPixelFormat[k],
4967 v->WritebackHRatio[k],
4968 v->WritebackVRatio[k],
4969 v->WritebackVTaps[k],
4970 v->WritebackDestinationWidth[k],
4971 v->WritebackDestinationHeight[k],
4972 v->WritebackSourceHeight[k],
4973 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4974 } else {
4975 v->WritebackDelayTime[k] = 0.0;
4976 }
4977 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4978 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4979 v->WritebackDelayTime[k] = dml_max(
4980 v->WritebackDelayTime[k],
4981 v->WritebackLatency
4982 + CalculateWriteBackDelay(
4983 v->WritebackPixelFormat[m],
4984 v->WritebackHRatio[m],
4985 v->WritebackVRatio[m],
4986 v->WritebackVTaps[m],
4987 v->WritebackDestinationWidth[m],
4988 v->WritebackDestinationHeight[m],
4989 v->WritebackSourceHeight[m],
4990 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4991 }
4992 }
4993 }
4994 }
4995 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4996 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4997 if (v->BlendingAndTiming[k] == m) {
4998 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4999 }
5000 }
5001 }
5002 v->MaxMaxVStartup[i][j] = 0;
5003 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5004 v->MaximumVStartup[i][j][k] =
5005 CalculateMaxVStartup(
5006 v->VTotal[k],
5007 v->VActive[k],
5008 v->VBlankNom[k],
5009 v->HTotal[k],
5010 v->PixelClock[k],
5011 v->ProgressiveToInterlaceUnitInOPP,
5012 v->Interlace[k],
5013 v->ip.VBlankNomDefaultUS,
5014 v->WritebackDelayTime[k]);
5015 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5016 }
5017 }
5018 }
5019
5020 ReorderingBytes = v->NumberOfChannels
5021 * dml_max3(
5022 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5023 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5024 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5025
5026 for (i = 0; i < v->soc.num_states; ++i) {
5027 for (j = 0; j <= 1; ++j) {
5028 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5029 }
5030 }
5031
5032 if (v->UseMinimumRequiredDCFCLK == true)
5033 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5034
5035 for (i = 0; i < v->soc.num_states; ++i) {
5036 for (j = 0; j <= 1; ++j) {
5037 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5038 v->ReturnBusWidth * v->DCFCLKState[i][j],
5039 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5040 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5041 double PixelDataOnlyReturnBWPerState = dml_min(
5042 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5043 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5044 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5045 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5046 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5047
5048 if (v->HostVMEnable != true) {
5049 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5050 } else {
5051 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5052 }
5053 }
5054 }
5055
5056 //Re-ordering Buffer Support Check
5057 for (i = 0; i < v->soc.num_states; ++i) {
5058 for (j = 0; j <= 1; ++j) {
5059 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5060 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5061 v->ROBSupport[i][j] = true;
5062 } else {
5063 v->ROBSupport[i][j] = false;
5064 }
5065 }
5066 }
5067
5068 //Vertical Active BW support check
5069
5070 MaxTotalVActiveRDBandwidth = 0;
5071 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5072 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5073 }
5074
5075 for (i = 0; i < v->soc.num_states; ++i) {
5076 for (j = 0; j <= 1; ++j) {
5077 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5078 dml_min(
5079 v->ReturnBusWidth * v->DCFCLKState[i][j],
5080 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5081 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5082 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5083 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5084
5085 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5086 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5087 } else {
5088 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5089 }
5090 }
5091 }
5092
5093 v->UrgentLatency = CalculateUrgentLatency(
5094 v->UrgentLatencyPixelDataOnly,
5095 v->UrgentLatencyPixelMixedWithVMData,
5096 v->UrgentLatencyVMDataOnly,
5097 v->DoUrgentLatencyAdjustment,
5098 v->UrgentLatencyAdjustmentFabricClockComponent,
5099 v->UrgentLatencyAdjustmentFabricClockReference,
5100 v->FabricClock);
5101 //Prefetch Check
5102 for (i = 0; i < v->soc.num_states; ++i) {
5103 for (j = 0; j <= 1; ++j) {
5104 double VMDataOnlyReturnBWPerState;
5105 double HostVMInefficiencyFactor = 1;
5106 int NextPrefetchModeState = MinPrefetchMode;
5107 bool UnboundedRequestEnabledThisState = false;
5108 int CompressedBufferSizeInkByteThisState = 0;
5109 double dummy;
5110
5111 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5112
5113 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5114 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5115 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5116 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5117 }
5118
5119 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5120 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5121 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5122 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5123 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5124 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5125 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5126 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5127 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5128 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5129 }
5130
5131 VMDataOnlyReturnBWPerState = dml_min(
5132 dml_min(
5133 v->ReturnBusWidth * v->DCFCLKState[i][j],
5134 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5135 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5136 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5137 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5138 if (v->GPUVMEnable && v->HostVMEnable)
5139 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5140
5141 v->ExtraLatency = CalculateExtraLatency(
5142 v->RoundTripPingLatencyCycles,
5143 ReorderingBytes,
5144 v->DCFCLKState[i][j],
5145 v->TotalNumberOfActiveDPP[i][j],
5146 v->PixelChunkSizeInKByte,
5147 v->TotalNumberOfDCCActiveDPP[i][j],
5148 v->MetaChunkSize,
5149 v->ReturnBWPerState[i][j],
5150 v->GPUVMEnable,
5151 v->HostVMEnable,
5152 v->NumberOfActivePlanes,
5153 v->NoOfDPPThisState,
5154 v->dpte_group_bytes,
5155 HostVMInefficiencyFactor,
5156 v->HostVMMinPageSize,
5157 v->HostVMMaxNonCachedPageTableLevels);
5158
5159 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5160 do {
5161 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5162 v->MaxVStartup = v->NextMaxVStartup;
5163
5164 v->TWait = CalculateTWait(
5165 v->PrefetchModePerState[i][j],
5166 v->DRAMClockChangeLatency,
5167 v->UrgLatency[i],
5168 v->SREnterPlusExitTime);
5169
5170 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5171 CalculatePrefetchSchedulePerPlane(mode_lib,
5172 HostVMInefficiencyFactor,
5173 i, j, k);
5174 }
5175
5176 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5177 CalculateUrgentBurstFactor(
5178 v->swath_width_luma_ub_this_state[k],
5179 v->swath_width_chroma_ub_this_state[k],
5180 v->SwathHeightYThisState[k],
5181 v->SwathHeightCThisState[k],
5182 v->HTotal[k] / v->PixelClock[k],
5183 v->UrgentLatency,
5184 v->CursorBufferSize,
5185 v->CursorWidth[k][0],
5186 v->CursorBPP[k][0],
5187 v->VRatioPreY[i][j][k],
5188 v->VRatioPreC[i][j][k],
5189 v->BytePerPixelInDETY[k],
5190 v->BytePerPixelInDETC[k],
5191 v->DETBufferSizeYThisState[k],
5192 v->DETBufferSizeCThisState[k],
5193 &v->UrgentBurstFactorCursorPre[k],
5194 &v->UrgentBurstFactorLumaPre[k],
5195 &v->UrgentBurstFactorChroma[k],
5196 &v->NotUrgentLatencyHidingPre[k]);
5197 }
5198
5199 v->MaximumReadBandwidthWithPrefetch = 0.0;
5200 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5201 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5202 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5203
5204 v->MaximumReadBandwidthWithPrefetch =
5205 v->MaximumReadBandwidthWithPrefetch
5206 + dml_max3(
5207 v->VActivePixelBandwidth[i][j][k]
5208 + v->VActiveCursorBandwidth[i][j][k]
5209 + v->NoOfDPP[i][j][k]
5210 * (v->meta_row_bandwidth[i][j][k]
5211 + v->dpte_row_bandwidth[i][j][k]),
5212 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5213 v->NoOfDPP[i][j][k]
5214 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5215 * v->UrgentBurstFactorLumaPre[k]
5216 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5217 * v->UrgentBurstFactorChromaPre[k])
5218 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5219 }
5220
5221 v->NotEnoughUrgentLatencyHidingPre = false;
5222 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5223 if (v->NotUrgentLatencyHidingPre[k] == true) {
5224 v->NotEnoughUrgentLatencyHidingPre = true;
5225 }
5226 }
5227
5228 v->PrefetchSupported[i][j] = true;
5229 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5230 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5231 v->PrefetchSupported[i][j] = false;
5232 }
5233 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5234 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5235 || v->NoTimeForPrefetch[i][j][k] == true) {
5236 v->PrefetchSupported[i][j] = false;
5237 }
5238 }
5239
5240 v->DynamicMetadataSupported[i][j] = true;
5241 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5242 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5243 v->DynamicMetadataSupported[i][j] = false;
5244 }
5245 }
5246
5247 v->VRatioInPrefetchSupported[i][j] = true;
5248 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5249 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5250 v->VRatioInPrefetchSupported[i][j] = false;
5251 }
5252 }
5253 v->AnyLinesForVMOrRowTooLarge = false;
5254 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5255 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5256 v->AnyLinesForVMOrRowTooLarge = true;
5257 }
5258 }
5259
5260 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5261
5262 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5263 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5264 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5265 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5266 - dml_max(
5267 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5268 v->NoOfDPP[i][j][k]
5269 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5270 * v->UrgentBurstFactorLumaPre[k]
5271 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5272 * v->UrgentBurstFactorChromaPre[k])
5273 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5274 }
5275 v->TotImmediateFlipBytes = 0.0;
5276 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5277 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5278 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5279 + v->DPTEBytesPerRow[i][j][k];
5280 }
5281
5282 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5283 CalculateFlipSchedule(
5284 mode_lib,
5285 k,
5286 HostVMInefficiencyFactor,
5287 v->ExtraLatency,
5288 v->UrgLatency[i],
5289 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5290 v->MetaRowBytes[i][j][k],
5291 v->DPTEBytesPerRow[i][j][k]);
5292 }
5293 v->total_dcn_read_bw_with_flip = 0.0;
5294 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5295 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5296 + dml_max3(
5297 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5298 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5299 + v->VActiveCursorBandwidth[i][j][k],
5300 v->NoOfDPP[i][j][k]
5301 * (v->final_flip_bw[k]
5302 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5303 * v->UrgentBurstFactorLumaPre[k]
5304 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5305 * v->UrgentBurstFactorChromaPre[k])
5306 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5307 }
5308 v->ImmediateFlipSupportedForState[i][j] = true;
5309 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5310 v->ImmediateFlipSupportedForState[i][j] = false;
5311 }
5312 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5313 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5314 v->ImmediateFlipSupportedForState[i][j] = false;
5315 }
5316 }
5317 } else {
5318 v->ImmediateFlipSupportedForState[i][j] = false;
5319 }
5320
5321 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5322 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5323 NextPrefetchModeState = NextPrefetchModeState + 1;
5324 } else {
5325 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5326 }
5327 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5328 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5329 && ((v->HostVMEnable == false &&
5330 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5331 || v->ImmediateFlipSupportedForState[i][j] == true))
5332 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5333
5334 CalculateUnboundedRequestAndCompressedBufferSize(
5335 v->DETBufferSizeInKByte[0],
5336 v->ConfigReturnBufferSizeInKByte,
5337 v->UseUnboundedRequesting,
5338 v->TotalNumberOfActiveDPP[i][j],
5339 NoChroma,
5340 v->MaxNumDPP,
5341 v->CompressedBufferSegmentSizeInkByte,
5342 v->Output,
5343 &UnboundedRequestEnabledThisState,
5344 &CompressedBufferSizeInkByteThisState);
5345
5346 CalculateWatermarksAndDRAMSpeedChangeSupport(
5347 mode_lib,
5348 v->PrefetchModePerState[i][j],
5349 v->DCFCLKState[i][j],
5350 v->ReturnBWPerState[i][j],
5351 v->UrgLatency[i],
5352 v->ExtraLatency,
5353 v->SOCCLKPerState[i],
5354 v->ProjectedDCFCLKDeepSleep[i][j],
5355 v->DETBufferSizeYThisState,
5356 v->DETBufferSizeCThisState,
5357 v->SwathHeightYThisState,
5358 v->SwathHeightCThisState,
5359 v->SwathWidthYThisState,
5360 v->SwathWidthCThisState,
5361 v->NoOfDPPThisState,
5362 v->BytePerPixelInDETY,
5363 v->BytePerPixelInDETC,
5364 UnboundedRequestEnabledThisState,
5365 CompressedBufferSizeInkByteThisState,
5366 &v->DRAMClockChangeSupport[i][j],
5367 &dummy,
5368 &dummy,
5369 &dummy,
5370 &dummy);
5371 }
5372 }
5373
5374 /*PTE Buffer Size Check*/
5375 for (i = 0; i < v->soc.num_states; i++) {
5376 for (j = 0; j < 2; j++) {
5377 v->PTEBufferSizeNotExceeded[i][j] = true;
5378 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5379 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5380 v->PTEBufferSizeNotExceeded[i][j] = false;
5381 }
5382 }
5383 }
5384 }
5385
5386 /*Cursor Support Check*/
5387 v->CursorSupport = true;
5388 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5389 if (v->CursorWidth[k][0] > 0.0) {
5390 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5391 v->CursorSupport = false;
5392 }
5393 }
5394 }
5395
5396 /*Valid Pitch Check*/
5397 v->PitchSupport = true;
5398 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5399 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5400 if (v->DCCEnable[k] == true) {
5401 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5402 } else {
5403 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5404 }
5405 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5406 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5407 && v->SourcePixelFormat[k] != dm_mono_8) {
5408 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5409 if (v->DCCEnable[k] == true) {
5410 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5411 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5412 64.0 * v->Read256BlockWidthC[k]);
5413 } else {
5414 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5415 }
5416 } else {
5417 v->AlignedCPitch[k] = v->PitchC[k];
5418 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5419 }
5420 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5421 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5422 v->PitchSupport = false;
5423 }
5424 }
5425
5426 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5427 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5428 ViewportExceedsSurface = true;
5429 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5430 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5431 && v->SourcePixelFormat[k] != dm_rgbe) {
5432 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5433 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5434 ViewportExceedsSurface = true;
5435 }
5436 }
5437 }
5438 }
5439
5440 /*Mode Support, Voltage State and SOC Configuration*/
5441 for (i = v->soc.num_states - 1; i >= 0; i--) {
5442 for (j = 0; j < 2; j++) {
5443 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5444 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5445 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5446 && v->DTBCLKRequiredMoreThanSupported[i] == false
5447 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5448 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5449 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5450 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5451 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5452 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5453 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5454 && ((v->HostVMEnable == false
5455 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5456 || v->ImmediateFlipSupportedForState[i][j] == true)
5457 && FMTBufferExceeded == false) {
5458 v->ModeSupport[i][j] = true;
5459 } else {
5460 v->ModeSupport[i][j] = false;
5461 }
5462 }
5463 }
5464
5465 {
5466 unsigned int MaximumMPCCombine = 0;
5467
5468 for (i = v->soc.num_states; i >= 0; i--) {
5469 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5470 v->VoltageLevel = i;
5471 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5472 if (v->ModeSupport[i][0] == true) {
5473 MaximumMPCCombine = 0;
5474 } else {
5475 MaximumMPCCombine = 1;
5476 }
5477 }
5478 }
5479 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5480 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5481 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5482 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5483 }
5484 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5485 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5486 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5487 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5488 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5489 v->maxMpcComb = MaximumMPCCombine;
5490 }
5491 }
5492
5493 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5494 struct display_mode_lib *mode_lib,
5495 unsigned int PrefetchMode,
5496 double DCFCLK,
5497 double ReturnBW,
5498 double UrgentLatency,
5499 double ExtraLatency,
5500 double SOCCLK,
5501 double DCFCLKDeepSleep,
5502 unsigned int DETBufferSizeY[],
5503 unsigned int DETBufferSizeC[],
5504 unsigned int SwathHeightY[],
5505 unsigned int SwathHeightC[],
5506 double SwathWidthY[],
5507 double SwathWidthC[],
5508 unsigned int DPPPerPlane[],
5509 double BytePerPixelDETY[],
5510 double BytePerPixelDETC[],
5511 bool UnboundedRequestEnabled,
5512 unsigned int CompressedBufferSizeInkByte,
5513 enum clock_change_support *DRAMClockChangeSupport,
5514 double *StutterExitWatermark,
5515 double *StutterEnterPlusExitWatermark,
5516 double *Z8StutterExitWatermark,
5517 double *Z8StutterEnterPlusExitWatermark)
5518 {
5519 struct vba_vars_st *v = &mode_lib->vba;
5520 double EffectiveLBLatencyHidingY;
5521 double EffectiveLBLatencyHidingC;
5522 double LinesInDETY[DC__NUM_DPP__MAX];
5523 double LinesInDETC;
5524 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5525 unsigned int LinesInDETCRoundedDownToSwath;
5526 double FullDETBufferingTimeY;
5527 double FullDETBufferingTimeC;
5528 double ActiveDRAMClockChangeLatencyMarginY;
5529 double ActiveDRAMClockChangeLatencyMarginC;
5530 double WritebackDRAMClockChangeLatencyMargin;
5531 double PlaneWithMinActiveDRAMClockChangeMargin;
5532 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5533 double WritebackDRAMClockChangeLatencyHiding;
5534 double TotalPixelBW = 0.0;
5535 int k, j;
5536
5537 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5538
5539 #ifdef __DML_VBA_DEBUG__
5540 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5541 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5542 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5543 #endif
5544
5545 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5546
5547 #ifdef __DML_VBA_DEBUG__
5548 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5549 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5550 #endif
5551
5552 v->TotalActiveWriteback = 0;
5553 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5554 if (v->WritebackEnable[k] == true) {
5555 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5556 }
5557 }
5558
5559 if (v->TotalActiveWriteback <= 1) {
5560 v->WritebackUrgentWatermark = v->WritebackLatency;
5561 } else {
5562 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5563 }
5564
5565 if (v->TotalActiveWriteback <= 1) {
5566 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5567 } else {
5568 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5569 }
5570
5571 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5572 TotalPixelBW = TotalPixelBW
5573 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5574 / (v->HTotal[k] / v->PixelClock[k]);
5575 }
5576
5577 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5578 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5579
5580 v->LBLatencyHidingSourceLinesY = dml_min(
5581 (double) v->MaxLineBufferLines,
5582 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5583
5584 v->LBLatencyHidingSourceLinesC = dml_min(
5585 (double) v->MaxLineBufferLines,
5586 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5587
5588 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5589
5590 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5591
5592 if (UnboundedRequestEnabled) {
5593 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5594 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5595 }
5596
5597 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5598 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5599 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5600 if (BytePerPixelDETC[k] > 0) {
5601 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5602 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5603 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5604 } else {
5605 LinesInDETC = 0;
5606 FullDETBufferingTimeC = 999999;
5607 }
5608
5609 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5610 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5611
5612 if (v->NumberOfActivePlanes > 1) {
5613 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5614 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5615 }
5616
5617 if (BytePerPixelDETC[k] > 0) {
5618 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5619 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5620
5621 if (v->NumberOfActivePlanes > 1) {
5622 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5623 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5624 }
5625 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5626 } else {
5627 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5628 }
5629
5630 if (v->WritebackEnable[k] == true) {
5631 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5632 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5633 if (v->WritebackPixelFormat[k] == dm_444_64) {
5634 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5635 }
5636 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5637 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5638 }
5639 }
5640
5641 v->MinActiveDRAMClockChangeMargin = 999999;
5642 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5643 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5644 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5645 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5646 if (v->BlendingAndTiming[k] == k) {
5647 PlaneWithMinActiveDRAMClockChangeMargin = k;
5648 } else {
5649 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5650 if (v->BlendingAndTiming[k] == j) {
5651 PlaneWithMinActiveDRAMClockChangeMargin = j;
5652 }
5653 }
5654 }
5655 }
5656 }
5657
5658 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5659
5660 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5661 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5662 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5663 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5664 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5665 }
5666 }
5667
5668 v->TotalNumberOfActiveOTG = 0;
5669
5670 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5671 if (v->BlendingAndTiming[k] == k) {
5672 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5673 }
5674 }
5675
5676 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5677 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5678 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5679 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5680 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5681 } else {
5682 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5683 }
5684
5685 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5686 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5687 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5688 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5689
5690 #ifdef __DML_VBA_DEBUG__
5691 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5692 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5693 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5694 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5695 #endif
5696 }
5697
5698 static void CalculateDCFCLKDeepSleep(
5699 struct display_mode_lib *mode_lib,
5700 unsigned int NumberOfActivePlanes,
5701 int BytePerPixelY[],
5702 int BytePerPixelC[],
5703 double VRatio[],
5704 double VRatioChroma[],
5705 double SwathWidthY[],
5706 double SwathWidthC[],
5707 unsigned int DPPPerPlane[],
5708 double HRatio[],
5709 double HRatioChroma[],
5710 double PixelClock[],
5711 double PSCL_THROUGHPUT[],
5712 double PSCL_THROUGHPUT_CHROMA[],
5713 double DPPCLK[],
5714 double ReadBandwidthLuma[],
5715 double ReadBandwidthChroma[],
5716 int ReturnBusWidth,
5717 double *DCFCLKDeepSleep)
5718 {
5719 struct vba_vars_st *v = &mode_lib->vba;
5720 double DisplayPipeLineDeliveryTimeLuma;
5721 double DisplayPipeLineDeliveryTimeChroma;
5722 double ReadBandwidth = 0.0;
5723 int k;
5724
5725 for (k = 0; k < NumberOfActivePlanes; ++k) {
5726
5727 if (VRatio[k] <= 1) {
5728 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5729 } else {
5730 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5731 }
5732 if (BytePerPixelC[k] == 0) {
5733 DisplayPipeLineDeliveryTimeChroma = 0;
5734 } else {
5735 if (VRatioChroma[k] <= 1) {
5736 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5737 } else {
5738 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5739 }
5740 }
5741
5742 if (BytePerPixelC[k] > 0) {
5743 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5744 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5745 } else {
5746 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5747 }
5748 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5749
5750 }
5751
5752 for (k = 0; k < NumberOfActivePlanes; ++k) {
5753 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5754 }
5755
5756 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5757
5758 for (k = 0; k < NumberOfActivePlanes; ++k) {
5759 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5760 }
5761 }
5762
5763 static void CalculateUrgentBurstFactor(
5764 int swath_width_luma_ub,
5765 int swath_width_chroma_ub,
5766 unsigned int SwathHeightY,
5767 unsigned int SwathHeightC,
5768 double LineTime,
5769 double UrgentLatency,
5770 double CursorBufferSize,
5771 unsigned int CursorWidth,
5772 unsigned int CursorBPP,
5773 double VRatio,
5774 double VRatioC,
5775 double BytePerPixelInDETY,
5776 double BytePerPixelInDETC,
5777 double DETBufferSizeY,
5778 double DETBufferSizeC,
5779 double *UrgentBurstFactorCursor,
5780 double *UrgentBurstFactorLuma,
5781 double *UrgentBurstFactorChroma,
5782 bool *NotEnoughUrgentLatencyHiding)
5783 {
5784 double LinesInDETLuma;
5785 double LinesInDETChroma;
5786 unsigned int LinesInCursorBuffer;
5787 double CursorBufferSizeInTime;
5788 double DETBufferSizeInTimeLuma;
5789 double DETBufferSizeInTimeChroma;
5790
5791 *NotEnoughUrgentLatencyHiding = 0;
5792
5793 if (CursorWidth > 0) {
5794 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5795 if (VRatio > 0) {
5796 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5797 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5798 *NotEnoughUrgentLatencyHiding = 1;
5799 *UrgentBurstFactorCursor = 0;
5800 } else {
5801 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5802 }
5803 } else {
5804 *UrgentBurstFactorCursor = 1;
5805 }
5806 }
5807
5808 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5809 if (VRatio > 0) {
5810 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5811 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5812 *NotEnoughUrgentLatencyHiding = 1;
5813 *UrgentBurstFactorLuma = 0;
5814 } else {
5815 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5816 }
5817 } else {
5818 *UrgentBurstFactorLuma = 1;
5819 }
5820
5821 if (BytePerPixelInDETC > 0) {
5822 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5823 if (VRatio > 0) {
5824 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5825 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5826 *NotEnoughUrgentLatencyHiding = 1;
5827 *UrgentBurstFactorChroma = 0;
5828 } else {
5829 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5830 }
5831 } else {
5832 *UrgentBurstFactorChroma = 1;
5833 }
5834 }
5835 }
5836
5837 static void CalculatePixelDeliveryTimes(
5838 unsigned int NumberOfActivePlanes,
5839 double VRatio[],
5840 double VRatioChroma[],
5841 double VRatioPrefetchY[],
5842 double VRatioPrefetchC[],
5843 unsigned int swath_width_luma_ub[],
5844 unsigned int swath_width_chroma_ub[],
5845 unsigned int DPPPerPlane[],
5846 double HRatio[],
5847 double HRatioChroma[],
5848 double PixelClock[],
5849 double PSCL_THROUGHPUT[],
5850 double PSCL_THROUGHPUT_CHROMA[],
5851 double DPPCLK[],
5852 int BytePerPixelC[],
5853 enum scan_direction_class SourceScan[],
5854 unsigned int NumberOfCursors[],
5855 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5856 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5857 unsigned int BlockWidth256BytesY[],
5858 unsigned int BlockHeight256BytesY[],
5859 unsigned int BlockWidth256BytesC[],
5860 unsigned int BlockHeight256BytesC[],
5861 double DisplayPipeLineDeliveryTimeLuma[],
5862 double DisplayPipeLineDeliveryTimeChroma[],
5863 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5864 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5865 double DisplayPipeRequestDeliveryTimeLuma[],
5866 double DisplayPipeRequestDeliveryTimeChroma[],
5867 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5868 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5869 double CursorRequestDeliveryTime[],
5870 double CursorRequestDeliveryTimePrefetch[])
5871 {
5872 double req_per_swath_ub;
5873 int k;
5874
5875 for (k = 0; k < NumberOfActivePlanes; ++k) {
5876 if (VRatio[k] <= 1) {
5877 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5878 } else {
5879 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5880 }
5881
5882 if (BytePerPixelC[k] == 0) {
5883 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5884 } else {
5885 if (VRatioChroma[k] <= 1) {
5886 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5887 } else {
5888 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5889 }
5890 }
5891
5892 if (VRatioPrefetchY[k] <= 1) {
5893 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5894 } else {
5895 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5896 }
5897
5898 if (BytePerPixelC[k] == 0) {
5899 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5900 } else {
5901 if (VRatioPrefetchC[k] <= 1) {
5902 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5903 } else {
5904 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5905 }
5906 }
5907 }
5908
5909 for (k = 0; k < NumberOfActivePlanes; ++k) {
5910 if (SourceScan[k] != dm_vert) {
5911 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5912 } else {
5913 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5914 }
5915 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5916 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5917 if (BytePerPixelC[k] == 0) {
5918 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5919 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5920 } else {
5921 if (SourceScan[k] != dm_vert) {
5922 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5923 } else {
5924 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5925 }
5926 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5927 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5928 }
5929 #ifdef __DML_VBA_DEBUG__
5930 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5931 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5932 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5933 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
5934 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
5935 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
5936 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
5937 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
5938 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
5939 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
5940 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
5941 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
5942 #endif
5943 }
5944
5945 for (k = 0; k < NumberOfActivePlanes; ++k) {
5946 int cursor_req_per_width;
5947
5948 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5949 if (NumberOfCursors[k] > 0) {
5950 if (VRatio[k] <= 1) {
5951 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5952 } else {
5953 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5954 }
5955 if (VRatioPrefetchY[k] <= 1) {
5956 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5957 } else {
5958 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5959 }
5960 } else {
5961 CursorRequestDeliveryTime[k] = 0;
5962 CursorRequestDeliveryTimePrefetch[k] = 0;
5963 }
5964 #ifdef __DML_VBA_DEBUG__
5965 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
5966 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
5967 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
5968 #endif
5969 }
5970 }
5971
5972 static void CalculateMetaAndPTETimes(
5973 int NumberOfActivePlanes,
5974 bool GPUVMEnable,
5975 int MetaChunkSize,
5976 int MinMetaChunkSizeBytes,
5977 int HTotal[],
5978 double VRatio[],
5979 double VRatioChroma[],
5980 double DestinationLinesToRequestRowInVBlank[],
5981 double DestinationLinesToRequestRowInImmediateFlip[],
5982 bool DCCEnable[],
5983 double PixelClock[],
5984 int BytePerPixelY[],
5985 int BytePerPixelC[],
5986 enum scan_direction_class SourceScan[],
5987 int dpte_row_height[],
5988 int dpte_row_height_chroma[],
5989 int meta_row_width[],
5990 int meta_row_width_chroma[],
5991 int meta_row_height[],
5992 int meta_row_height_chroma[],
5993 int meta_req_width[],
5994 int meta_req_width_chroma[],
5995 int meta_req_height[],
5996 int meta_req_height_chroma[],
5997 int dpte_group_bytes[],
5998 int PTERequestSizeY[],
5999 int PTERequestSizeC[],
6000 int PixelPTEReqWidthY[],
6001 int PixelPTEReqHeightY[],
6002 int PixelPTEReqWidthC[],
6003 int PixelPTEReqHeightC[],
6004 int dpte_row_width_luma_ub[],
6005 int dpte_row_width_chroma_ub[],
6006 double DST_Y_PER_PTE_ROW_NOM_L[],
6007 double DST_Y_PER_PTE_ROW_NOM_C[],
6008 double DST_Y_PER_META_ROW_NOM_L[],
6009 double DST_Y_PER_META_ROW_NOM_C[],
6010 double TimePerMetaChunkNominal[],
6011 double TimePerChromaMetaChunkNominal[],
6012 double TimePerMetaChunkVBlank[],
6013 double TimePerChromaMetaChunkVBlank[],
6014 double TimePerMetaChunkFlip[],
6015 double TimePerChromaMetaChunkFlip[],
6016 double time_per_pte_group_nom_luma[],
6017 double time_per_pte_group_vblank_luma[],
6018 double time_per_pte_group_flip_luma[],
6019 double time_per_pte_group_nom_chroma[],
6020 double time_per_pte_group_vblank_chroma[],
6021 double time_per_pte_group_flip_chroma[])
6022 {
6023 unsigned int meta_chunk_width;
6024 unsigned int min_meta_chunk_width;
6025 unsigned int meta_chunk_per_row_int;
6026 unsigned int meta_row_remainder;
6027 unsigned int meta_chunk_threshold;
6028 unsigned int meta_chunks_per_row_ub;
6029 unsigned int meta_chunk_width_chroma;
6030 unsigned int min_meta_chunk_width_chroma;
6031 unsigned int meta_chunk_per_row_int_chroma;
6032 unsigned int meta_row_remainder_chroma;
6033 unsigned int meta_chunk_threshold_chroma;
6034 unsigned int meta_chunks_per_row_ub_chroma;
6035 unsigned int dpte_group_width_luma;
6036 unsigned int dpte_groups_per_row_luma_ub;
6037 unsigned int dpte_group_width_chroma;
6038 unsigned int dpte_groups_per_row_chroma_ub;
6039 int k;
6040
6041 for (k = 0; k < NumberOfActivePlanes; ++k) {
6042 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6043 if (BytePerPixelC[k] == 0) {
6044 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6045 } else {
6046 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6047 }
6048 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6049 if (BytePerPixelC[k] == 0) {
6050 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6051 } else {
6052 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6053 }
6054 }
6055
6056 for (k = 0; k < NumberOfActivePlanes; ++k) {
6057 if (DCCEnable[k] == true) {
6058 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6059 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6060 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6061 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6062 if (SourceScan[k] != dm_vert) {
6063 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6064 } else {
6065 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6066 }
6067 if (meta_row_remainder <= meta_chunk_threshold) {
6068 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6069 } else {
6070 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6071 }
6072 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6073 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6074 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6075 if (BytePerPixelC[k] == 0) {
6076 TimePerChromaMetaChunkNominal[k] = 0;
6077 TimePerChromaMetaChunkVBlank[k] = 0;
6078 TimePerChromaMetaChunkFlip[k] = 0;
6079 } else {
6080 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6081 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6082 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6083 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6084 if (SourceScan[k] != dm_vert) {
6085 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6086 } else {
6087 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6088 }
6089 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6090 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6091 } else {
6092 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6093 }
6094 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6095 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6096 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6097 }
6098 } else {
6099 TimePerMetaChunkNominal[k] = 0;
6100 TimePerMetaChunkVBlank[k] = 0;
6101 TimePerMetaChunkFlip[k] = 0;
6102 TimePerChromaMetaChunkNominal[k] = 0;
6103 TimePerChromaMetaChunkVBlank[k] = 0;
6104 TimePerChromaMetaChunkFlip[k] = 0;
6105 }
6106 }
6107
6108 for (k = 0; k < NumberOfActivePlanes; ++k) {
6109 if (GPUVMEnable == true) {
6110 if (SourceScan[k] != dm_vert) {
6111 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6112 } else {
6113 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6114 }
6115 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6116 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6117 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6118 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6119 if (BytePerPixelC[k] == 0) {
6120 time_per_pte_group_nom_chroma[k] = 0;
6121 time_per_pte_group_vblank_chroma[k] = 0;
6122 time_per_pte_group_flip_chroma[k] = 0;
6123 } else {
6124 if (SourceScan[k] != dm_vert) {
6125 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6126 } else {
6127 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6128 }
6129 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6130 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6131 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6132 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6133 }
6134 } else {
6135 time_per_pte_group_nom_luma[k] = 0;
6136 time_per_pte_group_vblank_luma[k] = 0;
6137 time_per_pte_group_flip_luma[k] = 0;
6138 time_per_pte_group_nom_chroma[k] = 0;
6139 time_per_pte_group_vblank_chroma[k] = 0;
6140 time_per_pte_group_flip_chroma[k] = 0;
6141 }
6142 }
6143 }
6144
6145 static void CalculateVMGroupAndRequestTimes(
6146 unsigned int NumberOfActivePlanes,
6147 bool GPUVMEnable,
6148 unsigned int GPUVMMaxPageTableLevels,
6149 unsigned int HTotal[],
6150 int BytePerPixelC[],
6151 double DestinationLinesToRequestVMInVBlank[],
6152 double DestinationLinesToRequestVMInImmediateFlip[],
6153 bool DCCEnable[],
6154 double PixelClock[],
6155 int dpte_row_width_luma_ub[],
6156 int dpte_row_width_chroma_ub[],
6157 int vm_group_bytes[],
6158 unsigned int dpde0_bytes_per_frame_ub_l[],
6159 unsigned int dpde0_bytes_per_frame_ub_c[],
6160 int meta_pte_bytes_per_frame_ub_l[],
6161 int meta_pte_bytes_per_frame_ub_c[],
6162 double TimePerVMGroupVBlank[],
6163 double TimePerVMGroupFlip[],
6164 double TimePerVMRequestVBlank[],
6165 double TimePerVMRequestFlip[])
6166 {
6167 int num_group_per_lower_vm_stage;
6168 int num_req_per_lower_vm_stage;
6169 int k;
6170
6171 for (k = 0; k < NumberOfActivePlanes; ++k) {
6172 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6173 if (DCCEnable[k] == false) {
6174 if (BytePerPixelC[k] > 0) {
6175 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6176 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6177 } else {
6178 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6179 }
6180 } else {
6181 if (GPUVMMaxPageTableLevels == 1) {
6182 if (BytePerPixelC[k] > 0) {
6183 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6184 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6185 } else {
6186 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6187 }
6188 } else {
6189 if (BytePerPixelC[k] > 0) {
6190 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6191 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6192 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6193 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6194 } else {
6195 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6196 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6197 }
6198 }
6199 }
6200
6201 if (DCCEnable[k] == false) {
6202 if (BytePerPixelC[k] > 0) {
6203 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6204 } else {
6205 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6206 }
6207 } else {
6208 if (GPUVMMaxPageTableLevels == 1) {
6209 if (BytePerPixelC[k] > 0) {
6210 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6211 } else {
6212 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6213 }
6214 } else {
6215 if (BytePerPixelC[k] > 0) {
6216 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6217 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6218 } else {
6219 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6220 }
6221 }
6222 }
6223
6224 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6225 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6226 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6227 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6228
6229 if (GPUVMMaxPageTableLevels > 2) {
6230 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6231 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6232 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6233 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6234 }
6235
6236 } else {
6237 TimePerVMGroupVBlank[k] = 0;
6238 TimePerVMGroupFlip[k] = 0;
6239 TimePerVMRequestVBlank[k] = 0;
6240 TimePerVMRequestFlip[k] = 0;
6241 }
6242 }
6243 }
6244
6245 static void CalculateStutterEfficiency(
6246 struct display_mode_lib *mode_lib,
6247 int CompressedBufferSizeInkByte,
6248 bool UnboundedRequestEnabled,
6249 int ConfigReturnBufferSizeInKByte,
6250 int MetaFIFOSizeInKEntries,
6251 int ZeroSizeBufferEntries,
6252 int NumberOfActivePlanes,
6253 int ROBBufferSizeInKByte,
6254 double TotalDataReadBandwidth,
6255 double DCFCLK,
6256 double ReturnBW,
6257 double COMPBUF_RESERVED_SPACE_64B,
6258 double COMPBUF_RESERVED_SPACE_ZS,
6259 double SRExitTime,
6260 double SRExitZ8Time,
6261 bool SynchronizedVBlank,
6262 double Z8StutterEnterPlusExitWatermark,
6263 double StutterEnterPlusExitWatermark,
6264 bool ProgressiveToInterlaceUnitInOPP,
6265 bool Interlace[],
6266 double MinTTUVBlank[],
6267 int DPPPerPlane[],
6268 unsigned int DETBufferSizeY[],
6269 int BytePerPixelY[],
6270 double BytePerPixelDETY[],
6271 double SwathWidthY[],
6272 int SwathHeightY[],
6273 int SwathHeightC[],
6274 double NetDCCRateLuma[],
6275 double NetDCCRateChroma[],
6276 double DCCFractionOfZeroSizeRequestsLuma[],
6277 double DCCFractionOfZeroSizeRequestsChroma[],
6278 int HTotal[],
6279 int VTotal[],
6280 double PixelClock[],
6281 double VRatio[],
6282 enum scan_direction_class SourceScan[],
6283 int BlockHeight256BytesY[],
6284 int BlockWidth256BytesY[],
6285 int BlockHeight256BytesC[],
6286 int BlockWidth256BytesC[],
6287 int DCCYMaxUncompressedBlock[],
6288 int DCCCMaxUncompressedBlock[],
6289 int VActive[],
6290 bool DCCEnable[],
6291 bool WritebackEnable[],
6292 double ReadBandwidthPlaneLuma[],
6293 double ReadBandwidthPlaneChroma[],
6294 double meta_row_bw[],
6295 double dpte_row_bw[],
6296 double *StutterEfficiencyNotIncludingVBlank,
6297 double *StutterEfficiency,
6298 int *NumberOfStutterBurstsPerFrame,
6299 double *Z8StutterEfficiencyNotIncludingVBlank,
6300 double *Z8StutterEfficiency,
6301 int *Z8NumberOfStutterBurstsPerFrame,
6302 double *StutterPeriod)
6303 {
6304 struct vba_vars_st *v = &mode_lib->vba;
6305
6306 double DETBufferingTimeY;
6307 double SwathWidthYCriticalPlane = 0;
6308 double VActiveTimeCriticalPlane = 0;
6309 double FrameTimeCriticalPlane = 0;
6310 int BytePerPixelYCriticalPlane = 0;
6311 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6312 double MinTTUVBlankCriticalPlane = 0;
6313 double TotalCompressedReadBandwidth;
6314 double TotalRowReadBandwidth;
6315 double AverageDCCCompressionRate;
6316 double EffectiveCompressedBufferSize;
6317 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6318 double StutterBurstTime;
6319 int TotalActiveWriteback;
6320 double LinesInDETY;
6321 double LinesInDETYRoundedDownToSwath;
6322 double MaximumEffectiveCompressionLuma;
6323 double MaximumEffectiveCompressionChroma;
6324 double TotalZeroSizeRequestReadBandwidth;
6325 double TotalZeroSizeCompressedReadBandwidth;
6326 double AverageDCCZeroSizeFraction;
6327 double AverageZeroSizeCompressionRate;
6328 int TotalNumberOfActiveOTG = 0;
6329 double LastStutterPeriod = 0.0;
6330 double LastZ8StutterPeriod = 0.0;
6331 int k;
6332
6333 TotalZeroSizeRequestReadBandwidth = 0;
6334 TotalZeroSizeCompressedReadBandwidth = 0;
6335 TotalRowReadBandwidth = 0;
6336 TotalCompressedReadBandwidth = 0;
6337
6338 for (k = 0; k < NumberOfActivePlanes; ++k) {
6339 if (DCCEnable[k] == true) {
6340 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6341 || DCCYMaxUncompressedBlock[k] < 256) {
6342 MaximumEffectiveCompressionLuma = 2;
6343 } else {
6344 MaximumEffectiveCompressionLuma = 4;
6345 }
6346 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6347 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6348 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6349 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6350 if (ReadBandwidthPlaneChroma[k] > 0) {
6351 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6352 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6353 MaximumEffectiveCompressionChroma = 2;
6354 } else {
6355 MaximumEffectiveCompressionChroma = 4;
6356 }
6357 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6358 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6359 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6360 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6361 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6362 }
6363 } else {
6364 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6365 }
6366 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6367 }
6368
6369 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6370 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6371
6372 #ifdef __DML_VBA_DEBUG__
6373 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6374 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6375 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6376 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6377 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6378 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6379 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6380 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6381 #endif
6382
6383 if (AverageDCCZeroSizeFraction == 1) {
6384 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6385 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6386 } else if (AverageDCCZeroSizeFraction > 0) {
6387 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6388 EffectiveCompressedBufferSize = dml_min(
6389 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6390 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6391 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6392 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6393 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6394 dml_print(
6395 "DML::%s: min 2 = %f\n",
6396 __func__,
6397 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6398 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6399 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6400 } else {
6401 EffectiveCompressedBufferSize = dml_min(
6402 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6403 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6404 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6405 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6406 }
6407
6408 #ifdef __DML_VBA_DEBUG__
6409 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6410 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6411 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6412 #endif
6413
6414 *StutterPeriod = 0;
6415 for (k = 0; k < NumberOfActivePlanes; ++k) {
6416 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6417 / BytePerPixelDETY[k] / SwathWidthY[k];
6418 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6419 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6420 #ifdef __DML_VBA_DEBUG__
6421 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6422 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6423 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6424 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6425 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6426 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6427 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6428 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6429 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6430 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6431 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6432 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6433 #endif
6434
6435 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6436 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6437
6438 *StutterPeriod = DETBufferingTimeY;
6439 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6440 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6441 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6442 SwathWidthYCriticalPlane = SwathWidthY[k];
6443 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6444 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6445
6446 #ifdef __DML_VBA_DEBUG__
6447 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6448 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6449 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6450 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6451 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6452 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6453 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6454 #endif
6455 }
6456 }
6457
6458 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6459 #ifdef __DML_VBA_DEBUG__
6460 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6461 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6462 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6463 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6464 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6465 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6466 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6467 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6468 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6469 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6470 #endif
6471
6472 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6473 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6474 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6475 #ifdef __DML_VBA_DEBUG__
6476 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6477 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6478 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6479 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6480 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6481 #endif
6482 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6483
6484 dml_print(
6485 "DML::%s: Time to finish residue swath=%f\n",
6486 __func__,
6487 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6488
6489 TotalActiveWriteback = 0;
6490 for (k = 0; k < NumberOfActivePlanes; ++k) {
6491 if (WritebackEnable[k]) {
6492 TotalActiveWriteback = TotalActiveWriteback + 1;
6493 }
6494 }
6495
6496 if (TotalActiveWriteback == 0) {
6497 #ifdef __DML_VBA_DEBUG__
6498 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6499 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6500 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6501 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6502 #endif
6503 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6504 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6505 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6506 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6507 } else {
6508 *StutterEfficiencyNotIncludingVBlank = 0.;
6509 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6510 *NumberOfStutterBurstsPerFrame = 0;
6511 *Z8NumberOfStutterBurstsPerFrame = 0;
6512 }
6513 #ifdef __DML_VBA_DEBUG__
6514 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6515 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6516 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6517 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6518 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6519 #endif
6520
6521 for (k = 0; k < NumberOfActivePlanes; ++k) {
6522 if (v->BlendingAndTiming[k] == k) {
6523 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6524 }
6525 }
6526
6527 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6528 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6529
6530 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6531 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6532 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6533 } else {
6534 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6535 }
6536 } else {
6537 *StutterEfficiency = 0;
6538 }
6539
6540 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6541 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6542 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6543 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6544 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6545 } else {
6546 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6547 }
6548 } else {
6549 *Z8StutterEfficiency = 0.;
6550 }
6551
6552 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6553 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6554 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6555 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6556 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6557 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6558 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6559 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6560 }
6561
6562 static void CalculateSwathAndDETConfiguration(
6563 bool ForceSingleDPP,
6564 int NumberOfActivePlanes,
6565 unsigned int DETBufferSizeInKByte,
6566 double MaximumSwathWidthLuma[],
6567 double MaximumSwathWidthChroma[],
6568 enum scan_direction_class SourceScan[],
6569 enum source_format_class SourcePixelFormat[],
6570 enum dm_swizzle_mode SurfaceTiling[],
6571 int ViewportWidth[],
6572 int ViewportHeight[],
6573 int SurfaceWidthY[],
6574 int SurfaceWidthC[],
6575 int SurfaceHeightY[],
6576 int SurfaceHeightC[],
6577 int Read256BytesBlockHeightY[],
6578 int Read256BytesBlockHeightC[],
6579 int Read256BytesBlockWidthY[],
6580 int Read256BytesBlockWidthC[],
6581 enum odm_combine_mode ODMCombineEnabled[],
6582 int BlendingAndTiming[],
6583 int BytePerPixY[],
6584 int BytePerPixC[],
6585 double BytePerPixDETY[],
6586 double BytePerPixDETC[],
6587 int HActive[],
6588 double HRatio[],
6589 double HRatioChroma[],
6590 int DPPPerPlane[],
6591 int swath_width_luma_ub[],
6592 int swath_width_chroma_ub[],
6593 double SwathWidth[],
6594 double SwathWidthChroma[],
6595 int SwathHeightY[],
6596 int SwathHeightC[],
6597 unsigned int DETBufferSizeY[],
6598 unsigned int DETBufferSizeC[],
6599 bool ViewportSizeSupportPerPlane[],
6600 bool *ViewportSizeSupport)
6601 {
6602 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6603 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6604 int MinimumSwathHeightY;
6605 int MinimumSwathHeightC;
6606 int RoundedUpMaxSwathSizeBytesY;
6607 int RoundedUpMaxSwathSizeBytesC;
6608 int RoundedUpMinSwathSizeBytesY;
6609 int RoundedUpMinSwathSizeBytesC;
6610 int RoundedUpSwathSizeBytesY;
6611 int RoundedUpSwathSizeBytesC;
6612 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6613 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6614 int k;
6615
6616 CalculateSwathWidth(
6617 ForceSingleDPP,
6618 NumberOfActivePlanes,
6619 SourcePixelFormat,
6620 SourceScan,
6621 ViewportWidth,
6622 ViewportHeight,
6623 SurfaceWidthY,
6624 SurfaceWidthC,
6625 SurfaceHeightY,
6626 SurfaceHeightC,
6627 ODMCombineEnabled,
6628 BytePerPixY,
6629 BytePerPixC,
6630 Read256BytesBlockHeightY,
6631 Read256BytesBlockHeightC,
6632 Read256BytesBlockWidthY,
6633 Read256BytesBlockWidthC,
6634 BlendingAndTiming,
6635 HActive,
6636 HRatio,
6637 DPPPerPlane,
6638 SwathWidthSingleDPP,
6639 SwathWidthSingleDPPChroma,
6640 SwathWidth,
6641 SwathWidthChroma,
6642 MaximumSwathHeightY,
6643 MaximumSwathHeightC,
6644 swath_width_luma_ub,
6645 swath_width_chroma_ub);
6646
6647 *ViewportSizeSupport = true;
6648 for (k = 0; k < NumberOfActivePlanes; ++k) {
6649 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6650 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6651 if (SurfaceTiling[k] == dm_sw_linear
6652 || (SourcePixelFormat[k] == dm_444_64
6653 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6654 && SourceScan[k] != dm_vert)) {
6655 MinimumSwathHeightY = MaximumSwathHeightY[k];
6656 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6657 MinimumSwathHeightY = MaximumSwathHeightY[k];
6658 } else {
6659 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6660 }
6661 MinimumSwathHeightC = MaximumSwathHeightC[k];
6662 } else {
6663 if (SurfaceTiling[k] == dm_sw_linear) {
6664 MinimumSwathHeightY = MaximumSwathHeightY[k];
6665 MinimumSwathHeightC = MaximumSwathHeightC[k];
6666 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6667 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6668 MinimumSwathHeightC = MaximumSwathHeightC[k];
6669 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6670 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6671 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6672 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6673 MinimumSwathHeightY = MaximumSwathHeightY[k];
6674 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6675 } else {
6676 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6677 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6678 }
6679 }
6680
6681 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6682 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6683 if (SourcePixelFormat[k] == dm_420_10) {
6684 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6685 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6686 }
6687 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6688 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6689 if (SourcePixelFormat[k] == dm_420_10) {
6690 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6691 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6692 }
6693
6694 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6695 SwathHeightY[k] = MaximumSwathHeightY[k];
6696 SwathHeightC[k] = MaximumSwathHeightC[k];
6697 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6698 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6699 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6700 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6701 SwathHeightY[k] = MinimumSwathHeightY;
6702 SwathHeightC[k] = MaximumSwathHeightC[k];
6703 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6704 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6705 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6706 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6707 SwathHeightY[k] = MaximumSwathHeightY[k];
6708 SwathHeightC[k] = MinimumSwathHeightC;
6709 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6710 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6711 } else {
6712 SwathHeightY[k] = MinimumSwathHeightY;
6713 SwathHeightC[k] = MinimumSwathHeightC;
6714 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6715 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6716 }
6717 {
6718 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6719
6720 if (SwathHeightC[k] == 0) {
6721 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6722 DETBufferSizeC[k] = 0;
6723 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6724 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6725 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6726 } else {
6727 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6728 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6729 }
6730
6731 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6732 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6733 *ViewportSizeSupport = false;
6734 ViewportSizeSupportPerPlane[k] = false;
6735 } else {
6736 ViewportSizeSupportPerPlane[k] = true;
6737 }
6738 }
6739 }
6740 }
6741
6742 static void CalculateSwathWidth(
6743 bool ForceSingleDPP,
6744 int NumberOfActivePlanes,
6745 enum source_format_class SourcePixelFormat[],
6746 enum scan_direction_class SourceScan[],
6747 int ViewportWidth[],
6748 int ViewportHeight[],
6749 int SurfaceWidthY[],
6750 int SurfaceWidthC[],
6751 int SurfaceHeightY[],
6752 int SurfaceHeightC[],
6753 enum odm_combine_mode ODMCombineEnabled[],
6754 int BytePerPixY[],
6755 int BytePerPixC[],
6756 int Read256BytesBlockHeightY[],
6757 int Read256BytesBlockHeightC[],
6758 int Read256BytesBlockWidthY[],
6759 int Read256BytesBlockWidthC[],
6760 int BlendingAndTiming[],
6761 int HActive[],
6762 double HRatio[],
6763 int DPPPerPlane[],
6764 double SwathWidthSingleDPPY[],
6765 double SwathWidthSingleDPPC[],
6766 double SwathWidthY[],
6767 double SwathWidthC[],
6768 int MaximumSwathHeightY[],
6769 int MaximumSwathHeightC[],
6770 int swath_width_luma_ub[],
6771 int swath_width_chroma_ub[])
6772 {
6773 enum odm_combine_mode MainPlaneODMCombine;
6774 int j, k;
6775
6776 #ifdef __DML_VBA_DEBUG__
6777 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6778 #endif
6779
6780 for (k = 0; k < NumberOfActivePlanes; ++k) {
6781 if (SourceScan[k] != dm_vert) {
6782 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6783 } else {
6784 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6785 }
6786
6787 #ifdef __DML_VBA_DEBUG__
6788 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6789 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6790 #endif
6791
6792 MainPlaneODMCombine = ODMCombineEnabled[k];
6793 for (j = 0; j < NumberOfActivePlanes; ++j) {
6794 if (BlendingAndTiming[k] == j) {
6795 MainPlaneODMCombine = ODMCombineEnabled[j];
6796 }
6797 }
6798
6799 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
6800 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6801 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
6802 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6803 else if (DPPPerPlane[k] == 2)
6804 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6805 else
6806 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6807
6808 #ifdef __DML_VBA_DEBUG__
6809 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6810 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6811 #endif
6812
6813 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6814 SwathWidthC[k] = SwathWidthY[k] / 2;
6815 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6816 } else {
6817 SwathWidthC[k] = SwathWidthY[k];
6818 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6819 }
6820
6821 if (ForceSingleDPP == true) {
6822 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6823 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6824 }
6825 {
6826 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6827 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6828
6829 #ifdef __DML_VBA_DEBUG__
6830 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6831 #endif
6832
6833 if (SourceScan[k] != dm_vert) {
6834 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6835 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6836 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6837 if (BytePerPixC[k] > 0) {
6838 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6839
6840 swath_width_chroma_ub[k] = dml_min(
6841 surface_width_ub_c,
6842 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6843 } else {
6844 swath_width_chroma_ub[k] = 0;
6845 }
6846 } else {
6847 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6848 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6849 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6850 if (BytePerPixC[k] > 0) {
6851 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6852
6853 swath_width_chroma_ub[k] = dml_min(
6854 surface_height_ub_c,
6855 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6856 } else {
6857 swath_width_chroma_ub[k] = 0;
6858 }
6859 }
6860 }
6861 }
6862 }
6863
6864 static double CalculateExtraLatency(
6865 int RoundTripPingLatencyCycles,
6866 int ReorderingBytes,
6867 double DCFCLK,
6868 int TotalNumberOfActiveDPP,
6869 int PixelChunkSizeInKByte,
6870 int TotalNumberOfDCCActiveDPP,
6871 int MetaChunkSize,
6872 double ReturnBW,
6873 bool GPUVMEnable,
6874 bool HostVMEnable,
6875 int NumberOfActivePlanes,
6876 int NumberOfDPP[],
6877 int dpte_group_bytes[],
6878 double HostVMInefficiencyFactor,
6879 double HostVMMinPageSize,
6880 int HostVMMaxNonCachedPageTableLevels)
6881 {
6882 double ExtraLatencyBytes;
6883 double ExtraLatency;
6884
6885 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6886 ReorderingBytes,
6887 TotalNumberOfActiveDPP,
6888 PixelChunkSizeInKByte,
6889 TotalNumberOfDCCActiveDPP,
6890 MetaChunkSize,
6891 GPUVMEnable,
6892 HostVMEnable,
6893 NumberOfActivePlanes,
6894 NumberOfDPP,
6895 dpte_group_bytes,
6896 HostVMInefficiencyFactor,
6897 HostVMMinPageSize,
6898 HostVMMaxNonCachedPageTableLevels);
6899
6900 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6901
6902 #ifdef __DML_VBA_DEBUG__
6903 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6904 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6905 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6906 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6907 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6908 #endif
6909
6910 return ExtraLatency;
6911 }
6912
6913 static double CalculateExtraLatencyBytes(
6914 int ReorderingBytes,
6915 int TotalNumberOfActiveDPP,
6916 int PixelChunkSizeInKByte,
6917 int TotalNumberOfDCCActiveDPP,
6918 int MetaChunkSize,
6919 bool GPUVMEnable,
6920 bool HostVMEnable,
6921 int NumberOfActivePlanes,
6922 int NumberOfDPP[],
6923 int dpte_group_bytes[],
6924 double HostVMInefficiencyFactor,
6925 double HostVMMinPageSize,
6926 int HostVMMaxNonCachedPageTableLevels)
6927 {
6928 double ret;
6929 int HostVMDynamicLevels = 0, k;
6930
6931 if (GPUVMEnable == true && HostVMEnable == true) {
6932 if (HostVMMinPageSize < 2048)
6933 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6934 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
6935 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6936 else
6937 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6938 } else {
6939 HostVMDynamicLevels = 0;
6940 }
6941
6942 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6943
6944 if (GPUVMEnable == true) {
6945 for (k = 0; k < NumberOfActivePlanes; ++k)
6946 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6947 }
6948 return ret;
6949 }
6950
6951 static double CalculateUrgentLatency(
6952 double UrgentLatencyPixelDataOnly,
6953 double UrgentLatencyPixelMixedWithVMData,
6954 double UrgentLatencyVMDataOnly,
6955 bool DoUrgentLatencyAdjustment,
6956 double UrgentLatencyAdjustmentFabricClockComponent,
6957 double UrgentLatencyAdjustmentFabricClockReference,
6958 double FabricClock)
6959 {
6960 double ret;
6961
6962 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6963 if (DoUrgentLatencyAdjustment == true)
6964 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6965 return ret;
6966 }
6967
6968 static void UseMinimumDCFCLK(
6969 struct display_mode_lib *mode_lib,
6970 int MaxPrefetchMode,
6971 int ReorderingBytes)
6972 {
6973 struct vba_vars_st *v = &mode_lib->vba;
6974 int dummy1, i, j, k;
6975 double NormalEfficiency, dummy2, dummy3;
6976 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
6977
6978 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
6979 for (i = 0; i < v->soc.num_states; ++i) {
6980 for (j = 0; j <= 1; ++j) {
6981 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
6982 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
6983 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
6984 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
6985 double MinimumTWait;
6986 double NonDPTEBandwidth;
6987 double DPTEBandwidth;
6988 double DCFCLKRequiredForAverageBandwidth;
6989 double ExtraLatencyBytes;
6990 double ExtraLatencyCycles;
6991 double DCFCLKRequiredForPeakBandwidth;
6992 int NoOfDPPState[DC__NUM_DPP__MAX];
6993 double MinimumTvmPlus2Tr0;
6994
6995 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6996 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6997 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6998 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
6999 }
7000
7001 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7002 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7003
7004 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7005 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7006 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7007 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7008 DCFCLKRequiredForAverageBandwidth = dml_max3(
7009 v->ProjectedDCFCLKDeepSleep[i][j],
7010 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7011 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7012 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7013
7014 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7015 ReorderingBytes,
7016 v->TotalNumberOfActiveDPP[i][j],
7017 v->PixelChunkSizeInKByte,
7018 v->TotalNumberOfDCCActiveDPP[i][j],
7019 v->MetaChunkSize,
7020 v->GPUVMEnable,
7021 v->HostVMEnable,
7022 v->NumberOfActivePlanes,
7023 NoOfDPPState,
7024 v->dpte_group_bytes,
7025 1,
7026 v->HostVMMinPageSize,
7027 v->HostVMMaxNonCachedPageTableLevels);
7028 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7029 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7030 double DCFCLKCyclesRequiredInPrefetch;
7031 double ExpectedPrefetchBWAcceleration;
7032 double PrefetchTime;
7033
7034 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7035 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7036 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7037 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7038 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7039 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7040 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7041 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7042 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7043 DynamicMetadataVMExtraLatency[k] =
7044 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7045 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7046 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7047 - v->UrgLatency[i]
7048 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7049 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7050 - DynamicMetadataVMExtraLatency[k];
7051
7052 if (PrefetchTime > 0) {
7053 double ExpectedVRatioPrefetch;
7054
7055 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7056 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7057 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7058 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7059 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7060 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7061 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7062 }
7063 } else {
7064 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7065 }
7066 if (v->DynamicMetadataEnable[k] == true) {
7067 double TSetupPipe;
7068 double TdmbfPipe;
7069 double TdmsksPipe;
7070 double TdmecPipe;
7071 double AllowedTimeForUrgentExtraLatency;
7072
7073 CalculateVupdateAndDynamicMetadataParameters(
7074 v->MaxInterDCNTileRepeaters,
7075 v->RequiredDPPCLK[i][j][k],
7076 v->RequiredDISPCLK[i][j],
7077 v->ProjectedDCFCLKDeepSleep[i][j],
7078 v->PixelClock[k],
7079 v->HTotal[k],
7080 v->VTotal[k] - v->VActive[k],
7081 v->DynamicMetadataTransmittedBytes[k],
7082 v->DynamicMetadataLinesBeforeActiveRequired[k],
7083 v->Interlace[k],
7084 v->ProgressiveToInterlaceUnitInOPP,
7085 &TSetupPipe,
7086 &TdmbfPipe,
7087 &TdmecPipe,
7088 &TdmsksPipe,
7089 &dummy1,
7090 &dummy2,
7091 &dummy3);
7092 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7093 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7094 if (AllowedTimeForUrgentExtraLatency > 0) {
7095 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7096 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7097 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7098 } else {
7099 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7100 }
7101 }
7102 }
7103 DCFCLKRequiredForPeakBandwidth = 0;
7104 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7105 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7106
7107 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7108 * (v->GPUVMEnable == true ?
7109 (v->HostVMEnable == true ?
7110 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7111 0);
7112 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7113 double MaximumTvmPlus2Tr0PlusTsw;
7114
7115 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7116 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7117 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7118 } else {
7119 DCFCLKRequiredForPeakBandwidth = dml_max3(
7120 DCFCLKRequiredForPeakBandwidth,
7121 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7122 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7123 }
7124 }
7125 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7126 }
7127 }
7128 }
7129
7130 static void CalculateUnboundedRequestAndCompressedBufferSize(
7131 unsigned int DETBufferSizeInKByte,
7132 int ConfigReturnBufferSizeInKByte,
7133 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7134 int TotalActiveDPP,
7135 bool NoChromaPlanes,
7136 int MaxNumDPP,
7137 int CompressedBufferSegmentSizeInkByteFinal,
7138 enum output_encoder_class *Output,
7139 bool *UnboundedRequestEnabled,
7140 int *CompressedBufferSizeInkByte)
7141 {
7142 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7143
7144 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7145 *CompressedBufferSizeInkByte = (
7146 *UnboundedRequestEnabled == true ?
7147 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7148 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7149 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7150
7151 #ifdef __DML_VBA_DEBUG__
7152 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7153 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7154 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7155 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7156 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7157 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7158 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7159 #endif
7160 }
7161
7162 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7163 {
7164 bool ret_val = false;
7165
7166 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7167 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
7168 ret_val = false;
7169 return ret_val;
7170 }
7171
7172 static unsigned int CalculateMaxVStartup(
7173 unsigned int VTotal,
7174 unsigned int VActive,
7175 unsigned int VBlankNom,
7176 unsigned int HTotal,
7177 double PixelClock,
7178 bool ProgressiveTointerlaceUnitinOPP,
7179 bool Interlace,
7180 unsigned int VBlankNomDefaultUS,
7181 double WritebackDelayTime)
7182 {
7183 unsigned int MaxVStartup = 0;
7184 unsigned int vblank_size = 0;
7185 double line_time_us = HTotal / PixelClock;
7186 unsigned int vblank_actual = VTotal - VActive;
7187 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0);
7188 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
7189 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
7190
7191 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail);
7192 if (Interlace && !ProgressiveTointerlaceUnitinOPP)
7193 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0);
7194 else
7195 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0));
7196 if (MaxVStartup > 1023)
7197 MaxVStartup = 1023;
7198 return MaxVStartup;
7199 }
7200