1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "dc_link.h"
28 #include "../display_mode_lib.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41 #define BPP_INVALID 0
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
45
46 // For DML-C changes that hasn't been propagated to VBA yet
47 //#define __DML_VBA_ALLOW_DELTA__
48
49 // Move these to ip paramaters/constant
50
51 // At which vstartup the DML start to try if the mode can be supported
52 #define __DML_VBA_MIN_VSTARTUP__ 9
53
54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
56
57 // fudge factor for min dcfclk calclation
58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
59
60 typedef struct {
61 double DPPCLK;
62 double DISPCLK;
63 double PixelClock;
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
66 bool ScalerEnabled;
67 double VRatio;
68 double VRatioChroma;
69 enum scan_direction_class SourceScan;
70 unsigned int BlockWidth256BytesY;
71 unsigned int BlockHeight256BytesY;
72 unsigned int BlockWidth256BytesC;
73 unsigned int BlockHeight256BytesC;
74 unsigned int InterlaceEnable;
75 unsigned int NumberOfCursors;
76 unsigned int VBlank;
77 unsigned int HTotal;
78 unsigned int DCCEnable;
79 bool ODMCombineIsEnabled;
80 enum source_format_class SourcePixelFormat;
81 int BytePerPixelY;
82 int BytePerPixelC;
83 bool ProgressiveToInterlaceUnitInOPP;
84 } Pipe;
85
86 #define BPP_INVALID 0
87 #define BPP_BLENDED_PIPE 0xffffffff
88
89 static bool CalculateBytePerPixelAnd256BBlockSizes(
90 enum source_format_class SourcePixelFormat,
91 enum dm_swizzle_mode SurfaceTiling,
92 unsigned int *BytePerPixelY,
93 unsigned int *BytePerPixelC,
94 double *BytePerPixelDETY,
95 double *BytePerPixelDETC,
96 unsigned int *BlockHeight256BytesY,
97 unsigned int *BlockHeight256BytesC,
98 unsigned int *BlockWidth256BytesY,
99 unsigned int *BlockWidth256BytesC);
100 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
101 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
102 static unsigned int dscceComputeDelay(
103 unsigned int bpc,
104 double BPP,
105 unsigned int sliceWidth,
106 unsigned int numSlices,
107 enum output_format_class pixelFormat,
108 enum output_encoder_class Output);
109 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
110 static bool CalculatePrefetchSchedule(
111 struct display_mode_lib *mode_lib,
112 double HostVMInefficiencyFactor,
113 Pipe *myPipe,
114 unsigned int DSCDelay,
115 double DPPCLKDelaySubtotalPlusCNVCFormater,
116 double DPPCLKDelaySCL,
117 double DPPCLKDelaySCLLBOnly,
118 double DPPCLKDelayCNVCCursor,
119 double DISPCLKDelaySubtotal,
120 unsigned int DPP_RECOUT_WIDTH,
121 enum output_format_class OutputFormat,
122 unsigned int MaxInterDCNTileRepeaters,
123 unsigned int VStartup,
124 unsigned int MaxVStartup,
125 unsigned int GPUVMPageTableLevels,
126 bool GPUVMEnable,
127 bool HostVMEnable,
128 unsigned int HostVMMaxNonCachedPageTableLevels,
129 double HostVMMinPageSize,
130 bool DynamicMetadataEnable,
131 bool DynamicMetadataVMEnabled,
132 int DynamicMetadataLinesBeforeActiveRequired,
133 unsigned int DynamicMetadataTransmittedBytes,
134 double UrgentLatency,
135 double UrgentExtraLatency,
136 double TCalc,
137 unsigned int PDEAndMetaPTEBytesFrame,
138 unsigned int MetaRowByte,
139 unsigned int PixelPTEBytesPerRow,
140 double PrefetchSourceLinesY,
141 unsigned int SwathWidthY,
142 double VInitPreFillY,
143 unsigned int MaxNumSwathY,
144 double PrefetchSourceLinesC,
145 unsigned int SwathWidthC,
146 double VInitPreFillC,
147 unsigned int MaxNumSwathC,
148 int swath_width_luma_ub,
149 int swath_width_chroma_ub,
150 unsigned int SwathHeightY,
151 unsigned int SwathHeightC,
152 double TWait,
153 double *DSTXAfterScaler,
154 double *DSTYAfterScaler,
155 double *DestinationLinesForPrefetch,
156 double *PrefetchBandwidth,
157 double *DestinationLinesToRequestVMInVBlank,
158 double *DestinationLinesToRequestRowInVBlank,
159 double *VRatioPrefetchY,
160 double *VRatioPrefetchC,
161 double *RequiredPrefetchPixDataBWLuma,
162 double *RequiredPrefetchPixDataBWChroma,
163 bool *NotEnoughTimeForDynamicMetadata,
164 double *Tno_bw,
165 double *prefetch_vmrow_bw,
166 double *Tdmdl_vm,
167 double *Tdmdl,
168 double *TSetup,
169 int *VUpdateOffsetPix,
170 double *VUpdateWidthPix,
171 double *VReadyOffsetPix);
172 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
173 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
174 static void CalculateDCCConfiguration(
175 bool DCCEnabled,
176 bool DCCProgrammingAssumesScanDirectionUnknown,
177 enum source_format_class SourcePixelFormat,
178 unsigned int SurfaceWidthLuma,
179 unsigned int SurfaceWidthChroma,
180 unsigned int SurfaceHeightLuma,
181 unsigned int SurfaceHeightChroma,
182 double DETBufferSize,
183 unsigned int RequestHeight256ByteLuma,
184 unsigned int RequestHeight256ByteChroma,
185 enum dm_swizzle_mode TilingFormat,
186 unsigned int BytePerPixelY,
187 unsigned int BytePerPixelC,
188 double BytePerPixelDETY,
189 double BytePerPixelDETC,
190 enum scan_direction_class ScanOrientation,
191 unsigned int *MaxUncompressedBlockLuma,
192 unsigned int *MaxUncompressedBlockChroma,
193 unsigned int *MaxCompressedBlockLuma,
194 unsigned int *MaxCompressedBlockChroma,
195 unsigned int *IndependentBlockLuma,
196 unsigned int *IndependentBlockChroma);
197 static double CalculatePrefetchSourceLines(
198 struct display_mode_lib *mode_lib,
199 double VRatio,
200 double vtaps,
201 bool Interlace,
202 bool ProgressiveToInterlaceUnitInOPP,
203 unsigned int SwathHeight,
204 unsigned int ViewportYStart,
205 double *VInitPreFill,
206 unsigned int *MaxNumSwath);
207 static unsigned int CalculateVMAndRowBytes(
208 struct display_mode_lib *mode_lib,
209 bool DCCEnable,
210 unsigned int BlockHeight256Bytes,
211 unsigned int BlockWidth256Bytes,
212 enum source_format_class SourcePixelFormat,
213 unsigned int SurfaceTiling,
214 unsigned int BytePerPixel,
215 enum scan_direction_class ScanDirection,
216 unsigned int SwathWidth,
217 unsigned int ViewportHeight,
218 bool GPUVMEnable,
219 bool HostVMEnable,
220 unsigned int HostVMMaxNonCachedPageTableLevels,
221 unsigned int GPUVMMinPageSize,
222 unsigned int HostVMMinPageSize,
223 unsigned int PTEBufferSizeInRequests,
224 unsigned int Pitch,
225 unsigned int DCCMetaPitch,
226 unsigned int *MacroTileWidth,
227 unsigned int *MetaRowByte,
228 unsigned int *PixelPTEBytesPerRow,
229 bool *PTEBufferSizeNotExceeded,
230 int *dpte_row_width_ub,
231 unsigned int *dpte_row_height,
232 unsigned int *MetaRequestWidth,
233 unsigned int *MetaRequestHeight,
234 unsigned int *meta_row_width,
235 unsigned int *meta_row_height,
236 int *vm_group_bytes,
237 unsigned int *dpte_group_bytes,
238 unsigned int *PixelPTEReqWidth,
239 unsigned int *PixelPTEReqHeight,
240 unsigned int *PTERequestSize,
241 int *DPDE0BytesFrame,
242 int *MetaPTEBytesFrame);
243 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
244 static void CalculateRowBandwidth(
245 bool GPUVMEnable,
246 enum source_format_class SourcePixelFormat,
247 double VRatio,
248 double VRatioChroma,
249 bool DCCEnable,
250 double LineTime,
251 unsigned int MetaRowByteLuma,
252 unsigned int MetaRowByteChroma,
253 unsigned int meta_row_height_luma,
254 unsigned int meta_row_height_chroma,
255 unsigned int PixelPTEBytesPerRowLuma,
256 unsigned int PixelPTEBytesPerRowChroma,
257 unsigned int dpte_row_height_luma,
258 unsigned int dpte_row_height_chroma,
259 double *meta_row_bw,
260 double *dpte_row_bw);
261
262 static void CalculateFlipSchedule(
263 struct display_mode_lib *mode_lib,
264 double HostVMInefficiencyFactor,
265 double UrgentExtraLatency,
266 double UrgentLatency,
267 unsigned int GPUVMMaxPageTableLevels,
268 bool HostVMEnable,
269 unsigned int HostVMMaxNonCachedPageTableLevels,
270 bool GPUVMEnable,
271 double HostVMMinPageSize,
272 double PDEAndMetaPTEBytesPerFrame,
273 double MetaRowBytes,
274 double DPTEBytesPerRow,
275 double BandwidthAvailableForImmediateFlip,
276 unsigned int TotImmediateFlipBytes,
277 enum source_format_class SourcePixelFormat,
278 double LineTime,
279 double VRatio,
280 double VRatioChroma,
281 double Tno_bw,
282 bool DCCEnable,
283 unsigned int dpte_row_height,
284 unsigned int meta_row_height,
285 unsigned int dpte_row_height_chroma,
286 unsigned int meta_row_height_chroma,
287 double *DestinationLinesToRequestVMInImmediateFlip,
288 double *DestinationLinesToRequestRowInImmediateFlip,
289 double *final_flip_bw,
290 bool *ImmediateFlipSupportedForPipe);
291 static double CalculateWriteBackDelay(
292 enum source_format_class WritebackPixelFormat,
293 double WritebackHRatio,
294 double WritebackVRatio,
295 unsigned int WritebackVTaps,
296 int WritebackDestinationWidth,
297 int WritebackDestinationHeight,
298 int WritebackSourceHeight,
299 unsigned int HTotal);
300
301 static void CalculateVupdateAndDynamicMetadataParameters(
302 int MaxInterDCNTileRepeaters,
303 double DPPCLK,
304 double DISPCLK,
305 double DCFClkDeepSleep,
306 double PixelClock,
307 int HTotal,
308 int VBlank,
309 int DynamicMetadataTransmittedBytes,
310 int DynamicMetadataLinesBeforeActiveRequired,
311 int InterlaceEnable,
312 bool ProgressiveToInterlaceUnitInOPP,
313 double *TSetup,
314 double *Tdmbf,
315 double *Tdmec,
316 double *Tdmsks,
317 int *VUpdateOffsetPix,
318 double *VUpdateWidthPix,
319 double *VReadyOffsetPix);
320
321 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
322 struct display_mode_lib *mode_lib,
323 unsigned int PrefetchMode,
324 unsigned int NumberOfActivePlanes,
325 unsigned int MaxLineBufferLines,
326 unsigned int LineBufferSize,
327 unsigned int WritebackInterfaceBufferSize,
328 double DCFCLK,
329 double ReturnBW,
330 bool SynchronizedVBlank,
331 unsigned int dpte_group_bytes[],
332 unsigned int MetaChunkSize,
333 double UrgentLatency,
334 double ExtraLatency,
335 double WritebackLatency,
336 double WritebackChunkSize,
337 double SOCCLK,
338 double DRAMClockChangeLatency,
339 double SRExitTime,
340 double SREnterPlusExitTime,
341 double SRExitZ8Time,
342 double SREnterPlusExitZ8Time,
343 double DCFCLKDeepSleep,
344 unsigned int DETBufferSizeY[],
345 unsigned int DETBufferSizeC[],
346 unsigned int SwathHeightY[],
347 unsigned int SwathHeightC[],
348 unsigned int LBBitPerPixel[],
349 double SwathWidthY[],
350 double SwathWidthC[],
351 double HRatio[],
352 double HRatioChroma[],
353 unsigned int vtaps[],
354 unsigned int VTAPsChroma[],
355 double VRatio[],
356 double VRatioChroma[],
357 unsigned int HTotal[],
358 double PixelClock[],
359 unsigned int BlendingAndTiming[],
360 unsigned int DPPPerPlane[],
361 double BytePerPixelDETY[],
362 double BytePerPixelDETC[],
363 double DSTXAfterScaler[],
364 double DSTYAfterScaler[],
365 bool WritebackEnable[],
366 enum source_format_class WritebackPixelFormat[],
367 double WritebackDestinationWidth[],
368 double WritebackDestinationHeight[],
369 double WritebackSourceHeight[],
370 bool UnboundedRequestEnabled,
371 int unsigned CompressedBufferSizeInkByte,
372 enum clock_change_support *DRAMClockChangeSupport,
373 double *UrgentWatermark,
374 double *WritebackUrgentWatermark,
375 double *DRAMClockChangeWatermark,
376 double *WritebackDRAMClockChangeWatermark,
377 double *StutterExitWatermark,
378 double *StutterEnterPlusExitWatermark,
379 double *Z8StutterExitWatermark,
380 double *Z8StutterEnterPlusExitWatermark,
381 double *MinActiveDRAMClockChangeLatencySupported);
382
383 static void CalculateDCFCLKDeepSleep(
384 struct display_mode_lib *mode_lib,
385 unsigned int NumberOfActivePlanes,
386 int BytePerPixelY[],
387 int BytePerPixelC[],
388 double VRatio[],
389 double VRatioChroma[],
390 double SwathWidthY[],
391 double SwathWidthC[],
392 unsigned int DPPPerPlane[],
393 double HRatio[],
394 double HRatioChroma[],
395 double PixelClock[],
396 double PSCL_THROUGHPUT[],
397 double PSCL_THROUGHPUT_CHROMA[],
398 double DPPCLK[],
399 double ReadBandwidthLuma[],
400 double ReadBandwidthChroma[],
401 int ReturnBusWidth,
402 double *DCFCLKDeepSleep);
403
404 static void CalculateUrgentBurstFactor(
405 int swath_width_luma_ub,
406 int swath_width_chroma_ub,
407 unsigned int SwathHeightY,
408 unsigned int SwathHeightC,
409 double LineTime,
410 double UrgentLatency,
411 double CursorBufferSize,
412 unsigned int CursorWidth,
413 unsigned int CursorBPP,
414 double VRatio,
415 double VRatioC,
416 double BytePerPixelInDETY,
417 double BytePerPixelInDETC,
418 double DETBufferSizeY,
419 double DETBufferSizeC,
420 double *UrgentBurstFactorCursor,
421 double *UrgentBurstFactorLuma,
422 double *UrgentBurstFactorChroma,
423 bool *NotEnoughUrgentLatencyHiding);
424
425 static void UseMinimumDCFCLK(
426 struct display_mode_lib *mode_lib,
427 int MaxPrefetchMode,
428 int ReorderingBytes);
429
430 static void CalculatePixelDeliveryTimes(
431 unsigned int NumberOfActivePlanes,
432 double VRatio[],
433 double VRatioChroma[],
434 double VRatioPrefetchY[],
435 double VRatioPrefetchC[],
436 unsigned int swath_width_luma_ub[],
437 unsigned int swath_width_chroma_ub[],
438 unsigned int DPPPerPlane[],
439 double HRatio[],
440 double HRatioChroma[],
441 double PixelClock[],
442 double PSCL_THROUGHPUT[],
443 double PSCL_THROUGHPUT_CHROMA[],
444 double DPPCLK[],
445 int BytePerPixelC[],
446 enum scan_direction_class SourceScan[],
447 unsigned int NumberOfCursors[],
448 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
449 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
450 unsigned int BlockWidth256BytesY[],
451 unsigned int BlockHeight256BytesY[],
452 unsigned int BlockWidth256BytesC[],
453 unsigned int BlockHeight256BytesC[],
454 double DisplayPipeLineDeliveryTimeLuma[],
455 double DisplayPipeLineDeliveryTimeChroma[],
456 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
457 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
458 double DisplayPipeRequestDeliveryTimeLuma[],
459 double DisplayPipeRequestDeliveryTimeChroma[],
460 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
461 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
462 double CursorRequestDeliveryTime[],
463 double CursorRequestDeliveryTimePrefetch[]);
464
465 static void CalculateMetaAndPTETimes(
466 int NumberOfActivePlanes,
467 bool GPUVMEnable,
468 int MetaChunkSize,
469 int MinMetaChunkSizeBytes,
470 int HTotal[],
471 double VRatio[],
472 double VRatioChroma[],
473 double DestinationLinesToRequestRowInVBlank[],
474 double DestinationLinesToRequestRowInImmediateFlip[],
475 bool DCCEnable[],
476 double PixelClock[],
477 int BytePerPixelY[],
478 int BytePerPixelC[],
479 enum scan_direction_class SourceScan[],
480 int dpte_row_height[],
481 int dpte_row_height_chroma[],
482 int meta_row_width[],
483 int meta_row_width_chroma[],
484 int meta_row_height[],
485 int meta_row_height_chroma[],
486 int meta_req_width[],
487 int meta_req_width_chroma[],
488 int meta_req_height[],
489 int meta_req_height_chroma[],
490 int dpte_group_bytes[],
491 int PTERequestSizeY[],
492 int PTERequestSizeC[],
493 int PixelPTEReqWidthY[],
494 int PixelPTEReqHeightY[],
495 int PixelPTEReqWidthC[],
496 int PixelPTEReqHeightC[],
497 int dpte_row_width_luma_ub[],
498 int dpte_row_width_chroma_ub[],
499 double DST_Y_PER_PTE_ROW_NOM_L[],
500 double DST_Y_PER_PTE_ROW_NOM_C[],
501 double DST_Y_PER_META_ROW_NOM_L[],
502 double DST_Y_PER_META_ROW_NOM_C[],
503 double TimePerMetaChunkNominal[],
504 double TimePerChromaMetaChunkNominal[],
505 double TimePerMetaChunkVBlank[],
506 double TimePerChromaMetaChunkVBlank[],
507 double TimePerMetaChunkFlip[],
508 double TimePerChromaMetaChunkFlip[],
509 double time_per_pte_group_nom_luma[],
510 double time_per_pte_group_vblank_luma[],
511 double time_per_pte_group_flip_luma[],
512 double time_per_pte_group_nom_chroma[],
513 double time_per_pte_group_vblank_chroma[],
514 double time_per_pte_group_flip_chroma[]);
515
516 static void CalculateVMGroupAndRequestTimes(
517 unsigned int NumberOfActivePlanes,
518 bool GPUVMEnable,
519 unsigned int GPUVMMaxPageTableLevels,
520 unsigned int HTotal[],
521 int BytePerPixelC[],
522 double DestinationLinesToRequestVMInVBlank[],
523 double DestinationLinesToRequestVMInImmediateFlip[],
524 bool DCCEnable[],
525 double PixelClock[],
526 int dpte_row_width_luma_ub[],
527 int dpte_row_width_chroma_ub[],
528 int vm_group_bytes[],
529 unsigned int dpde0_bytes_per_frame_ub_l[],
530 unsigned int dpde0_bytes_per_frame_ub_c[],
531 int meta_pte_bytes_per_frame_ub_l[],
532 int meta_pte_bytes_per_frame_ub_c[],
533 double TimePerVMGroupVBlank[],
534 double TimePerVMGroupFlip[],
535 double TimePerVMRequestVBlank[],
536 double TimePerVMRequestFlip[]);
537
538 static void CalculateStutterEfficiency(
539 struct display_mode_lib *mode_lib,
540 int CompressedBufferSizeInkByte,
541 bool UnboundedRequestEnabled,
542 int ConfigReturnBufferSizeInKByte,
543 int MetaFIFOSizeInKEntries,
544 int ZeroSizeBufferEntries,
545 int NumberOfActivePlanes,
546 int ROBBufferSizeInKByte,
547 double TotalDataReadBandwidth,
548 double DCFCLK,
549 double ReturnBW,
550 double COMPBUF_RESERVED_SPACE_64B,
551 double COMPBUF_RESERVED_SPACE_ZS,
552 double SRExitTime,
553 double SRExitZ8Time,
554 bool SynchronizedVBlank,
555 double Z8StutterEnterPlusExitWatermark,
556 double StutterEnterPlusExitWatermark,
557 bool ProgressiveToInterlaceUnitInOPP,
558 bool Interlace[],
559 double MinTTUVBlank[],
560 int DPPPerPlane[],
561 unsigned int DETBufferSizeY[],
562 int BytePerPixelY[],
563 double BytePerPixelDETY[],
564 double SwathWidthY[],
565 int SwathHeightY[],
566 int SwathHeightC[],
567 double NetDCCRateLuma[],
568 double NetDCCRateChroma[],
569 double DCCFractionOfZeroSizeRequestsLuma[],
570 double DCCFractionOfZeroSizeRequestsChroma[],
571 int HTotal[],
572 int VTotal[],
573 double PixelClock[],
574 double VRatio[],
575 enum scan_direction_class SourceScan[],
576 int BlockHeight256BytesY[],
577 int BlockWidth256BytesY[],
578 int BlockHeight256BytesC[],
579 int BlockWidth256BytesC[],
580 int DCCYMaxUncompressedBlock[],
581 int DCCCMaxUncompressedBlock[],
582 int VActive[],
583 bool DCCEnable[],
584 bool WritebackEnable[],
585 double ReadBandwidthPlaneLuma[],
586 double ReadBandwidthPlaneChroma[],
587 double meta_row_bw[],
588 double dpte_row_bw[],
589 double *StutterEfficiencyNotIncludingVBlank,
590 double *StutterEfficiency,
591 int *NumberOfStutterBurstsPerFrame,
592 double *Z8StutterEfficiencyNotIncludingVBlank,
593 double *Z8StutterEfficiency,
594 int *Z8NumberOfStutterBurstsPerFrame,
595 double *StutterPeriod);
596
597 static void CalculateSwathAndDETConfiguration(
598 bool ForceSingleDPP,
599 int NumberOfActivePlanes,
600 unsigned int DETBufferSizeInKByte,
601 double MaximumSwathWidthLuma[],
602 double MaximumSwathWidthChroma[],
603 enum scan_direction_class SourceScan[],
604 enum source_format_class SourcePixelFormat[],
605 enum dm_swizzle_mode SurfaceTiling[],
606 int ViewportWidth[],
607 int ViewportHeight[],
608 int SurfaceWidthY[],
609 int SurfaceWidthC[],
610 int SurfaceHeightY[],
611 int SurfaceHeightC[],
612 int Read256BytesBlockHeightY[],
613 int Read256BytesBlockHeightC[],
614 int Read256BytesBlockWidthY[],
615 int Read256BytesBlockWidthC[],
616 enum odm_combine_mode ODMCombineEnabled[],
617 int BlendingAndTiming[],
618 int BytePerPixY[],
619 int BytePerPixC[],
620 double BytePerPixDETY[],
621 double BytePerPixDETC[],
622 int HActive[],
623 double HRatio[],
624 double HRatioChroma[],
625 int DPPPerPlane[],
626 int swath_width_luma_ub[],
627 int swath_width_chroma_ub[],
628 double SwathWidth[],
629 double SwathWidthChroma[],
630 int SwathHeightY[],
631 int SwathHeightC[],
632 unsigned int DETBufferSizeY[],
633 unsigned int DETBufferSizeC[],
634 bool ViewportSizeSupportPerPlane[],
635 bool *ViewportSizeSupport);
636 static void CalculateSwathWidth(
637 bool ForceSingleDPP,
638 int NumberOfActivePlanes,
639 enum source_format_class SourcePixelFormat[],
640 enum scan_direction_class SourceScan[],
641 int ViewportWidth[],
642 int ViewportHeight[],
643 int SurfaceWidthY[],
644 int SurfaceWidthC[],
645 int SurfaceHeightY[],
646 int SurfaceHeightC[],
647 enum odm_combine_mode ODMCombineEnabled[],
648 int BytePerPixY[],
649 int BytePerPixC[],
650 int Read256BytesBlockHeightY[],
651 int Read256BytesBlockHeightC[],
652 int Read256BytesBlockWidthY[],
653 int Read256BytesBlockWidthC[],
654 int BlendingAndTiming[],
655 int HActive[],
656 double HRatio[],
657 int DPPPerPlane[],
658 double SwathWidthSingleDPPY[],
659 double SwathWidthSingleDPPC[],
660 double SwathWidthY[],
661 double SwathWidthC[],
662 int MaximumSwathHeightY[],
663 int MaximumSwathHeightC[],
664 int swath_width_luma_ub[],
665 int swath_width_chroma_ub[]);
666
667 static double CalculateExtraLatency(
668 int RoundTripPingLatencyCycles,
669 int ReorderingBytes,
670 double DCFCLK,
671 int TotalNumberOfActiveDPP,
672 int PixelChunkSizeInKByte,
673 int TotalNumberOfDCCActiveDPP,
674 int MetaChunkSize,
675 double ReturnBW,
676 bool GPUVMEnable,
677 bool HostVMEnable,
678 int NumberOfActivePlanes,
679 int NumberOfDPP[],
680 int dpte_group_bytes[],
681 double HostVMInefficiencyFactor,
682 double HostVMMinPageSize,
683 int HostVMMaxNonCachedPageTableLevels);
684
685 static double CalculateExtraLatencyBytes(
686 int ReorderingBytes,
687 int TotalNumberOfActiveDPP,
688 int PixelChunkSizeInKByte,
689 int TotalNumberOfDCCActiveDPP,
690 int MetaChunkSize,
691 bool GPUVMEnable,
692 bool HostVMEnable,
693 int NumberOfActivePlanes,
694 int NumberOfDPP[],
695 int dpte_group_bytes[],
696 double HostVMInefficiencyFactor,
697 double HostVMMinPageSize,
698 int HostVMMaxNonCachedPageTableLevels);
699
700 static double CalculateUrgentLatency(
701 double UrgentLatencyPixelDataOnly,
702 double UrgentLatencyPixelMixedWithVMData,
703 double UrgentLatencyVMDataOnly,
704 bool DoUrgentLatencyAdjustment,
705 double UrgentLatencyAdjustmentFabricClockComponent,
706 double UrgentLatencyAdjustmentFabricClockReference,
707 double FabricClockSingle);
708
709 static void CalculateUnboundedRequestAndCompressedBufferSize(
710 unsigned int DETBufferSizeInKByte,
711 int ConfigReturnBufferSizeInKByte,
712 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
713 int TotalActiveDPP,
714 bool NoChromaPlanes,
715 int MaxNumDPP,
716 int CompressedBufferSegmentSizeInkByteFinal,
717 enum output_encoder_class *Output,
718 bool *UnboundedRequestEnabled,
719 int *CompressedBufferSizeInkByte);
720
721 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
722
dml31_recalculate(struct display_mode_lib * mode_lib)723 void dml31_recalculate(struct display_mode_lib *mode_lib)
724 {
725 ModeSupportAndSystemConfiguration(mode_lib);
726 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
727 DisplayPipeConfiguration(mode_lib);
728 #ifdef __DML_VBA_DEBUG__
729 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
730 #endif
731 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
732 }
733
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)734 static unsigned int dscceComputeDelay(
735 unsigned int bpc,
736 double BPP,
737 unsigned int sliceWidth,
738 unsigned int numSlices,
739 enum output_format_class pixelFormat,
740 enum output_encoder_class Output)
741 {
742 // valid bpc = source bits per component in the set of {8, 10, 12}
743 // valid bpp = increments of 1/16 of a bit
744 // min = 6/7/8 in N420/N422/444, respectively
745 // max = such that compression is 1:1
746 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
747 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
748 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
749
750 // fixed value
751 unsigned int rcModelSize = 8192;
752
753 // N422/N420 operate at 2 pixels per clock
754 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
755
756 if (pixelFormat == dm_420)
757 pixelsPerClock = 2;
758 else if (pixelFormat == dm_444)
759 pixelsPerClock = 1;
760 else if (pixelFormat == dm_n422)
761 pixelsPerClock = 2;
762 // #all other modes operate at 1 pixel per clock
763 else
764 pixelsPerClock = 1;
765
766 //initial transmit delay as per PPS
767 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
768
769 //compute ssm delay
770 if (bpc == 8)
771 D = 81;
772 else if (bpc == 10)
773 D = 89;
774 else
775 D = 113;
776
777 //divide by pixel per cycle to compute slice width as seen by DSC
778 w = sliceWidth / pixelsPerClock;
779
780 //422 mode has an additional cycle of delay
781 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
782 s = 0;
783 else
784 s = 1;
785
786 //main calculation for the dscce
787 ix = initalXmitDelay + 45;
788 wx = (w + 2) / 3;
789 P = 3 * wx - w;
790 l0 = ix / w;
791 a = ix + P * l0;
792 ax = (a + 2) / 3 + D + 6 + 1;
793 L = (ax + wx - 1) / wx;
794 if ((ix % w) == 0 && P != 0)
795 lstall = 1;
796 else
797 lstall = 0;
798 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
799
800 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
801 pixels = Delay * 3 * pixelsPerClock;
802 return pixels;
803 }
804
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)805 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
806 {
807 unsigned int Delay = 0;
808
809 if (pixelFormat == dm_420) {
810 // sfr
811 Delay = Delay + 2;
812 // dsccif
813 Delay = Delay + 0;
814 // dscc - input deserializer
815 Delay = Delay + 3;
816 // dscc gets pixels every other cycle
817 Delay = Delay + 2;
818 // dscc - input cdc fifo
819 Delay = Delay + 12;
820 // dscc gets pixels every other cycle
821 Delay = Delay + 13;
822 // dscc - cdc uncertainty
823 Delay = Delay + 2;
824 // dscc - output cdc fifo
825 Delay = Delay + 7;
826 // dscc gets pixels every other cycle
827 Delay = Delay + 3;
828 // dscc - cdc uncertainty
829 Delay = Delay + 2;
830 // dscc - output serializer
831 Delay = Delay + 1;
832 // sft
833 Delay = Delay + 1;
834 } else if (pixelFormat == dm_n422) {
835 // sfr
836 Delay = Delay + 2;
837 // dsccif
838 Delay = Delay + 1;
839 // dscc - input deserializer
840 Delay = Delay + 5;
841 // dscc - input cdc fifo
842 Delay = Delay + 25;
843 // dscc - cdc uncertainty
844 Delay = Delay + 2;
845 // dscc - output cdc fifo
846 Delay = Delay + 10;
847 // dscc - cdc uncertainty
848 Delay = Delay + 2;
849 // dscc - output serializer
850 Delay = Delay + 1;
851 // sft
852 Delay = Delay + 1;
853 } else {
854 // sfr
855 Delay = Delay + 2;
856 // dsccif
857 Delay = Delay + 0;
858 // dscc - input deserializer
859 Delay = Delay + 3;
860 // dscc - input cdc fifo
861 Delay = Delay + 12;
862 // dscc - cdc uncertainty
863 Delay = Delay + 2;
864 // dscc - output cdc fifo
865 Delay = Delay + 7;
866 // dscc - output serializer
867 Delay = Delay + 1;
868 // dscc - cdc uncertainty
869 Delay = Delay + 2;
870 // sft
871 Delay = Delay + 1;
872 }
873
874 return Delay;
875 }
876
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)877 static bool CalculatePrefetchSchedule(
878 struct display_mode_lib *mode_lib,
879 double HostVMInefficiencyFactor,
880 Pipe *myPipe,
881 unsigned int DSCDelay,
882 double DPPCLKDelaySubtotalPlusCNVCFormater,
883 double DPPCLKDelaySCL,
884 double DPPCLKDelaySCLLBOnly,
885 double DPPCLKDelayCNVCCursor,
886 double DISPCLKDelaySubtotal,
887 unsigned int DPP_RECOUT_WIDTH,
888 enum output_format_class OutputFormat,
889 unsigned int MaxInterDCNTileRepeaters,
890 unsigned int VStartup,
891 unsigned int MaxVStartup,
892 unsigned int GPUVMPageTableLevels,
893 bool GPUVMEnable,
894 bool HostVMEnable,
895 unsigned int HostVMMaxNonCachedPageTableLevels,
896 double HostVMMinPageSize,
897 bool DynamicMetadataEnable,
898 bool DynamicMetadataVMEnabled,
899 int DynamicMetadataLinesBeforeActiveRequired,
900 unsigned int DynamicMetadataTransmittedBytes,
901 double UrgentLatency,
902 double UrgentExtraLatency,
903 double TCalc,
904 unsigned int PDEAndMetaPTEBytesFrame,
905 unsigned int MetaRowByte,
906 unsigned int PixelPTEBytesPerRow,
907 double PrefetchSourceLinesY,
908 unsigned int SwathWidthY,
909 double VInitPreFillY,
910 unsigned int MaxNumSwathY,
911 double PrefetchSourceLinesC,
912 unsigned int SwathWidthC,
913 double VInitPreFillC,
914 unsigned int MaxNumSwathC,
915 int swath_width_luma_ub,
916 int swath_width_chroma_ub,
917 unsigned int SwathHeightY,
918 unsigned int SwathHeightC,
919 double TWait,
920 double *DSTXAfterScaler,
921 double *DSTYAfterScaler,
922 double *DestinationLinesForPrefetch,
923 double *PrefetchBandwidth,
924 double *DestinationLinesToRequestVMInVBlank,
925 double *DestinationLinesToRequestRowInVBlank,
926 double *VRatioPrefetchY,
927 double *VRatioPrefetchC,
928 double *RequiredPrefetchPixDataBWLuma,
929 double *RequiredPrefetchPixDataBWChroma,
930 bool *NotEnoughTimeForDynamicMetadata,
931 double *Tno_bw,
932 double *prefetch_vmrow_bw,
933 double *Tdmdl_vm,
934 double *Tdmdl,
935 double *TSetup,
936 int *VUpdateOffsetPix,
937 double *VUpdateWidthPix,
938 double *VReadyOffsetPix)
939 {
940 bool MyError = false;
941 unsigned int DPPCycles, DISPCLKCycles;
942 double DSTTotalPixelsAfterScaler;
943 double LineTime;
944 double dst_y_prefetch_equ;
945 double Tsw_oto;
946 double prefetch_bw_oto;
947 double prefetch_bw_pr;
948 double Tvm_oto;
949 double Tr0_oto;
950 double Tvm_oto_lines;
951 double Tr0_oto_lines;
952 double dst_y_prefetch_oto;
953 double TimeForFetchingMetaPTE = 0;
954 double TimeForFetchingRowInVBlank = 0;
955 double LinesToRequestPrefetchPixelData = 0;
956 unsigned int HostVMDynamicLevelsTrips;
957 double trip_to_mem;
958 double Tvm_trips;
959 double Tr0_trips;
960 double Tvm_trips_rounded;
961 double Tr0_trips_rounded;
962 double Lsw_oto;
963 double Tpre_rounded;
964 double prefetch_bw_equ;
965 double Tvm_equ;
966 double Tr0_equ;
967 double Tdmbf;
968 double Tdmec;
969 double Tdmsks;
970 double prefetch_sw_bytes;
971 double bytes_pp;
972 double dep_bytes;
973 int max_vratio_pre = 4;
974 double min_Lsw;
975 double Tsw_est1 = 0;
976 double Tsw_est3 = 0;
977 double max_Tsw = 0;
978
979 if (GPUVMEnable == true && HostVMEnable == true) {
980 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
981 } else {
982 HostVMDynamicLevelsTrips = 0;
983 }
984 #ifdef __DML_VBA_DEBUG__
985 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
986 #endif
987 CalculateVupdateAndDynamicMetadataParameters(
988 MaxInterDCNTileRepeaters,
989 myPipe->DPPCLK,
990 myPipe->DISPCLK,
991 myPipe->DCFCLKDeepSleep,
992 myPipe->PixelClock,
993 myPipe->HTotal,
994 myPipe->VBlank,
995 DynamicMetadataTransmittedBytes,
996 DynamicMetadataLinesBeforeActiveRequired,
997 myPipe->InterlaceEnable,
998 myPipe->ProgressiveToInterlaceUnitInOPP,
999 TSetup,
1000 &Tdmbf,
1001 &Tdmec,
1002 &Tdmsks,
1003 VUpdateOffsetPix,
1004 VUpdateWidthPix,
1005 VReadyOffsetPix);
1006
1007 LineTime = myPipe->HTotal / myPipe->PixelClock;
1008 trip_to_mem = UrgentLatency;
1009 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1010
1011 #ifdef __DML_VBA_ALLOW_DELTA__
1012 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1013 #else
1014 if (DynamicMetadataVMEnabled == true) {
1015 #endif
1016 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1017 } else {
1018 *Tdmdl = TWait + UrgentExtraLatency;
1019 }
1020
1021 #ifdef __DML_VBA_ALLOW_DELTA__
1022 if (DynamicMetadataEnable == false) {
1023 *Tdmdl = 0.0;
1024 }
1025 #endif
1026
1027 if (DynamicMetadataEnable == true) {
1028 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1029 *NotEnoughTimeForDynamicMetadata = true;
1030 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1031 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
1032 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
1033 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
1034 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
1035 } else {
1036 *NotEnoughTimeForDynamicMetadata = false;
1037 }
1038 } else {
1039 *NotEnoughTimeForDynamicMetadata = false;
1040 }
1041
1042 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1043
1044 if (myPipe->ScalerEnabled)
1045 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1046 else
1047 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1048
1049 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1050
1051 DISPCLKCycles = DISPCLKDelaySubtotal;
1052
1053 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1054 return true;
1055
1056 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1057
1058 #ifdef __DML_VBA_DEBUG__
1059 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1060 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1061 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1062 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1063 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1064 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1065 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1066 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1067 #endif
1068
1069 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1070
1071 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1072 *DSTYAfterScaler = 1;
1073 else
1074 *DSTYAfterScaler = 0;
1075
1076 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1077 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1078 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1079
1080 #ifdef __DML_VBA_DEBUG__
1081 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1082 #endif
1083
1084 MyError = false;
1085
1086 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1087 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1088 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1089
1090 #ifdef __DML_VBA_ALLOW_DELTA__
1091 if (!myPipe->DCCEnable) {
1092 Tr0_trips = 0.0;
1093 Tr0_trips_rounded = 0.0;
1094 }
1095 #endif
1096
1097 if (!GPUVMEnable) {
1098 Tvm_trips = 0.0;
1099 Tvm_trips_rounded = 0.0;
1100 }
1101
1102 if (GPUVMEnable) {
1103 if (GPUVMPageTableLevels >= 3) {
1104 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1105 } else {
1106 *Tno_bw = 0;
1107 }
1108 } else if (!myPipe->DCCEnable) {
1109 *Tno_bw = LineTime;
1110 } else {
1111 *Tno_bw = LineTime / 4;
1112 }
1113
1114 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1115 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1116 else
1117 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1118 /*rev 99*/
1119 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
1120 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1121 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1122 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1123 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1124
1125 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1126 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1127 Tsw_oto = Lsw_oto * LineTime;
1128
1129 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1130
1131 #ifdef __DML_VBA_DEBUG__
1132 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1133 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1134 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1135 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1136 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1137 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1138 #endif
1139
1140 if (GPUVMEnable == true)
1141 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1142 else
1143 Tvm_oto = LineTime / 4.0;
1144
1145 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1146 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1147 LineTime - Tvm_oto,
1148 LineTime / 4);
1149 } else {
1150 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1151 }
1152
1153 #ifdef __DML_VBA_DEBUG__
1154 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1155 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1156 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1157 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1158 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1159 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1160 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1161 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1162 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1163 #endif
1164
1165 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1166 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1167 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1168 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1169 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1170 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1171
1172 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1173
1174 if (prefetch_sw_bytes < dep_bytes)
1175 prefetch_sw_bytes = 2 * dep_bytes;
1176
1177 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1178 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1179 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1180 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1181 dml_print("DML: LineTime: %f\n", LineTime);
1182 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1183
1184 dml_print("DML: LineTime: %f\n", LineTime);
1185 dml_print("DML: VStartup: %d\n", VStartup);
1186 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1187 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1188 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1189 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1190 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1191 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1192 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1193 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1194 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1195 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1196 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1197
1198 *PrefetchBandwidth = 0;
1199 *DestinationLinesToRequestVMInVBlank = 0;
1200 *DestinationLinesToRequestRowInVBlank = 0;
1201 *VRatioPrefetchY = 0;
1202 *VRatioPrefetchC = 0;
1203 *RequiredPrefetchPixDataBWLuma = 0;
1204 if (dst_y_prefetch_equ > 1) {
1205 double PrefetchBandwidth1;
1206 double PrefetchBandwidth2;
1207 double PrefetchBandwidth3;
1208 double PrefetchBandwidth4;
1209
1210 if (Tpre_rounded - *Tno_bw > 0) {
1211 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1212 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1213 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1214 } else {
1215 PrefetchBandwidth1 = 0;
1216 }
1217
1218 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1219 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1220 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1221 }
1222
1223 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1224 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1225 else
1226 PrefetchBandwidth2 = 0;
1227
1228 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1229 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1230 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1231 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1232 } else {
1233 PrefetchBandwidth3 = 0;
1234 }
1235
1236 #ifdef __DML_VBA_DEBUG__
1237 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1238 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1239 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1240 #endif
1241 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1242 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1243 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1244 }
1245
1246 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1247 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1248 else
1249 PrefetchBandwidth4 = 0;
1250
1251 {
1252 bool Case1OK;
1253 bool Case2OK;
1254 bool Case3OK;
1255
1256 if (PrefetchBandwidth1 > 0) {
1257 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1258 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1259 Case1OK = true;
1260 } else {
1261 Case1OK = false;
1262 }
1263 } else {
1264 Case1OK = false;
1265 }
1266
1267 if (PrefetchBandwidth2 > 0) {
1268 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1269 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1270 Case2OK = true;
1271 } else {
1272 Case2OK = false;
1273 }
1274 } else {
1275 Case2OK = false;
1276 }
1277
1278 if (PrefetchBandwidth3 > 0) {
1279 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1280 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1281 Case3OK = true;
1282 } else {
1283 Case3OK = false;
1284 }
1285 } else {
1286 Case3OK = false;
1287 }
1288
1289 if (Case1OK) {
1290 prefetch_bw_equ = PrefetchBandwidth1;
1291 } else if (Case2OK) {
1292 prefetch_bw_equ = PrefetchBandwidth2;
1293 } else if (Case3OK) {
1294 prefetch_bw_equ = PrefetchBandwidth3;
1295 } else {
1296 prefetch_bw_equ = PrefetchBandwidth4;
1297 }
1298
1299 #ifdef __DML_VBA_DEBUG__
1300 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1301 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1302 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1303 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1304 #endif
1305
1306 if (prefetch_bw_equ > 0) {
1307 if (GPUVMEnable == true) {
1308 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1309 } else {
1310 Tvm_equ = LineTime / 4;
1311 }
1312
1313 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1314 Tr0_equ = dml_max4(
1315 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1316 Tr0_trips,
1317 (LineTime - Tvm_equ) / 2,
1318 LineTime / 4);
1319 } else {
1320 Tr0_equ = (LineTime - Tvm_equ) / 2;
1321 }
1322 } else {
1323 Tvm_equ = 0;
1324 Tr0_equ = 0;
1325 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1326 }
1327 }
1328
1329 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1330 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1331 TimeForFetchingMetaPTE = Tvm_oto;
1332 TimeForFetchingRowInVBlank = Tr0_oto;
1333 *PrefetchBandwidth = prefetch_bw_oto;
1334 } else {
1335 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1336 TimeForFetchingMetaPTE = Tvm_equ;
1337 TimeForFetchingRowInVBlank = Tr0_equ;
1338 *PrefetchBandwidth = prefetch_bw_equ;
1339 }
1340
1341 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1342
1343 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1344
1345 #ifdef __DML_VBA_ALLOW_DELTA__
1346 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1347 // See note above dated 5/30/2018
1348 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1349 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1350 #else
1351 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1352 #endif
1353
1354 #ifdef __DML_VBA_DEBUG__
1355 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1356 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1357 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1358 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1359 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1360 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1361 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1362 #endif
1363
1364 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1365
1366 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1367 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1368 #ifdef __DML_VBA_DEBUG__
1369 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1370 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1371 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1372 #endif
1373 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1374 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1375 *VRatioPrefetchY = dml_max(
1376 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1377 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1378 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1379 } else {
1380 MyError = true;
1381 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1382 *VRatioPrefetchY = 0;
1383 }
1384 #ifdef __DML_VBA_DEBUG__
1385 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1386 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1387 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1388 #endif
1389 }
1390
1391 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1392 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1393
1394 #ifdef __DML_VBA_DEBUG__
1395 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1396 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1397 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1398 #endif
1399 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1400 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1401 *VRatioPrefetchC = dml_max(
1402 *VRatioPrefetchC,
1403 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1404 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1405 } else {
1406 MyError = true;
1407 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1408 *VRatioPrefetchC = 0;
1409 }
1410 #ifdef __DML_VBA_DEBUG__
1411 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1412 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1413 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1414 #endif
1415 }
1416
1417 #ifdef __DML_VBA_DEBUG__
1418 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1419 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1420 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1421 #endif
1422
1423 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1424
1425 #ifdef __DML_VBA_DEBUG__
1426 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1427 #endif
1428
1429 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1430 / LineTime;
1431 } else {
1432 MyError = true;
1433 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1434 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1435 *VRatioPrefetchY = 0;
1436 *VRatioPrefetchC = 0;
1437 *RequiredPrefetchPixDataBWLuma = 0;
1438 *RequiredPrefetchPixDataBWChroma = 0;
1439 }
1440
1441 dml_print(
1442 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1443 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1444 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1445 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1446 dml_print(
1447 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1448 (double) LinesToRequestPrefetchPixelData * LineTime);
1449 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1450 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1451 (double) myPipe->HTotal)) * LineTime);
1452 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1453 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1454 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1455 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1456 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1457
1458 } else {
1459 MyError = true;
1460 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1461 }
1462
1463 {
1464 double prefetch_vm_bw;
1465 double prefetch_row_bw;
1466
1467 if (PDEAndMetaPTEBytesFrame == 0) {
1468 prefetch_vm_bw = 0;
1469 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1470 #ifdef __DML_VBA_DEBUG__
1471 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1472 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1473 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1474 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1475 #endif
1476 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1477 #ifdef __DML_VBA_DEBUG__
1478 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1479 #endif
1480 } else {
1481 prefetch_vm_bw = 0;
1482 MyError = true;
1483 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1484 }
1485
1486 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1487 prefetch_row_bw = 0;
1488 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1489 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1490
1491 #ifdef __DML_VBA_DEBUG__
1492 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1493 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1494 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1495 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1496 #endif
1497 } else {
1498 prefetch_row_bw = 0;
1499 MyError = true;
1500 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1501 }
1502
1503 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1504 }
1505
1506 if (MyError) {
1507 *PrefetchBandwidth = 0;
1508 TimeForFetchingMetaPTE = 0;
1509 TimeForFetchingRowInVBlank = 0;
1510 *DestinationLinesToRequestVMInVBlank = 0;
1511 *DestinationLinesToRequestRowInVBlank = 0;
1512 *DestinationLinesForPrefetch = 0;
1513 LinesToRequestPrefetchPixelData = 0;
1514 *VRatioPrefetchY = 0;
1515 *VRatioPrefetchC = 0;
1516 *RequiredPrefetchPixDataBWLuma = 0;
1517 *RequiredPrefetchPixDataBWChroma = 0;
1518 }
1519
1520 return MyError;
1521 }
1522
1523 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1524 {
1525 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1526 }
1527
1528 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1529 {
1530 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1531 }
1532
1533 static void CalculateDCCConfiguration(
1534 bool DCCEnabled,
1535 bool DCCProgrammingAssumesScanDirectionUnknown,
1536 enum source_format_class SourcePixelFormat,
1537 unsigned int SurfaceWidthLuma,
1538 unsigned int SurfaceWidthChroma,
1539 unsigned int SurfaceHeightLuma,
1540 unsigned int SurfaceHeightChroma,
1541 double DETBufferSize,
1542 unsigned int RequestHeight256ByteLuma,
1543 unsigned int RequestHeight256ByteChroma,
1544 enum dm_swizzle_mode TilingFormat,
1545 unsigned int BytePerPixelY,
1546 unsigned int BytePerPixelC,
1547 double BytePerPixelDETY,
1548 double BytePerPixelDETC,
1549 enum scan_direction_class ScanOrientation,
1550 unsigned int *MaxUncompressedBlockLuma,
1551 unsigned int *MaxUncompressedBlockChroma,
1552 unsigned int *MaxCompressedBlockLuma,
1553 unsigned int *MaxCompressedBlockChroma,
1554 unsigned int *IndependentBlockLuma,
1555 unsigned int *IndependentBlockChroma)
1556 {
1557 int yuv420;
1558 int horz_div_l;
1559 int horz_div_c;
1560 int vert_div_l;
1561 int vert_div_c;
1562
1563 int swath_buf_size;
1564 double detile_buf_vp_horz_limit;
1565 double detile_buf_vp_vert_limit;
1566
1567 int MAS_vp_horz_limit;
1568 int MAS_vp_vert_limit;
1569 int max_vp_horz_width;
1570 int max_vp_vert_height;
1571 int eff_surf_width_l;
1572 int eff_surf_width_c;
1573 int eff_surf_height_l;
1574 int eff_surf_height_c;
1575
1576 int full_swath_bytes_horz_wc_l;
1577 int full_swath_bytes_horz_wc_c;
1578 int full_swath_bytes_vert_wc_l;
1579 int full_swath_bytes_vert_wc_c;
1580 int req128_horz_wc_l;
1581 int req128_horz_wc_c;
1582 int req128_vert_wc_l;
1583 int req128_vert_wc_c;
1584 int segment_order_horz_contiguous_luma;
1585 int segment_order_horz_contiguous_chroma;
1586 int segment_order_vert_contiguous_luma;
1587 int segment_order_vert_contiguous_chroma;
1588
1589 typedef enum {
1590 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1591 } RequestType;
1592 RequestType RequestLuma;
1593 RequestType RequestChroma;
1594
1595 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1596 horz_div_l = 1;
1597 horz_div_c = 1;
1598 vert_div_l = 1;
1599 vert_div_c = 1;
1600
1601 if (BytePerPixelY == 1)
1602 vert_div_l = 0;
1603 if (BytePerPixelC == 1)
1604 vert_div_c = 0;
1605 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1606 horz_div_l = 0;
1607 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1608 horz_div_c = 0;
1609
1610 if (BytePerPixelC == 0) {
1611 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1612 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1613 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1614 } else {
1615 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1616 detile_buf_vp_horz_limit = (double) swath_buf_size
1617 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1618 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1619 detile_buf_vp_vert_limit = (double) swath_buf_size
1620 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1621 }
1622
1623 if (SourcePixelFormat == dm_420_10) {
1624 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1625 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1626 }
1627
1628 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1629 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1630
1631 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1632 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1633 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1634 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1635 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1636 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1637 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1638 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1639
1640 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1641 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1642 if (BytePerPixelC > 0) {
1643 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1644 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1645 } else {
1646 full_swath_bytes_horz_wc_c = 0;
1647 full_swath_bytes_vert_wc_c = 0;
1648 }
1649
1650 if (SourcePixelFormat == dm_420_10) {
1651 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1652 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1653 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1654 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1655 }
1656
1657 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1658 req128_horz_wc_l = 0;
1659 req128_horz_wc_c = 0;
1660 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1661 req128_horz_wc_l = 0;
1662 req128_horz_wc_c = 1;
1663 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1664 req128_horz_wc_l = 1;
1665 req128_horz_wc_c = 0;
1666 } else {
1667 req128_horz_wc_l = 1;
1668 req128_horz_wc_c = 1;
1669 }
1670
1671 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1672 req128_vert_wc_l = 0;
1673 req128_vert_wc_c = 0;
1674 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1675 req128_vert_wc_l = 0;
1676 req128_vert_wc_c = 1;
1677 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1678 req128_vert_wc_l = 1;
1679 req128_vert_wc_c = 0;
1680 } else {
1681 req128_vert_wc_l = 1;
1682 req128_vert_wc_c = 1;
1683 }
1684
1685 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1686 segment_order_horz_contiguous_luma = 0;
1687 } else {
1688 segment_order_horz_contiguous_luma = 1;
1689 }
1690 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1691 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1692 segment_order_vert_contiguous_luma = 0;
1693 } else {
1694 segment_order_vert_contiguous_luma = 1;
1695 }
1696 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1697 segment_order_horz_contiguous_chroma = 0;
1698 } else {
1699 segment_order_horz_contiguous_chroma = 1;
1700 }
1701 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1702 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1703 segment_order_vert_contiguous_chroma = 0;
1704 } else {
1705 segment_order_vert_contiguous_chroma = 1;
1706 }
1707
1708 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1709 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1710 RequestLuma = REQ_256Bytes;
1711 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1712 RequestLuma = REQ_128BytesNonContiguous;
1713 } else {
1714 RequestLuma = REQ_128BytesContiguous;
1715 }
1716 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1717 RequestChroma = REQ_256Bytes;
1718 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1719 RequestChroma = REQ_128BytesNonContiguous;
1720 } else {
1721 RequestChroma = REQ_128BytesContiguous;
1722 }
1723 } else if (ScanOrientation != dm_vert) {
1724 if (req128_horz_wc_l == 0) {
1725 RequestLuma = REQ_256Bytes;
1726 } else if (segment_order_horz_contiguous_luma == 0) {
1727 RequestLuma = REQ_128BytesNonContiguous;
1728 } else {
1729 RequestLuma = REQ_128BytesContiguous;
1730 }
1731 if (req128_horz_wc_c == 0) {
1732 RequestChroma = REQ_256Bytes;
1733 } else if (segment_order_horz_contiguous_chroma == 0) {
1734 RequestChroma = REQ_128BytesNonContiguous;
1735 } else {
1736 RequestChroma = REQ_128BytesContiguous;
1737 }
1738 } else {
1739 if (req128_vert_wc_l == 0) {
1740 RequestLuma = REQ_256Bytes;
1741 } else if (segment_order_vert_contiguous_luma == 0) {
1742 RequestLuma = REQ_128BytesNonContiguous;
1743 } else {
1744 RequestLuma = REQ_128BytesContiguous;
1745 }
1746 if (req128_vert_wc_c == 0) {
1747 RequestChroma = REQ_256Bytes;
1748 } else if (segment_order_vert_contiguous_chroma == 0) {
1749 RequestChroma = REQ_128BytesNonContiguous;
1750 } else {
1751 RequestChroma = REQ_128BytesContiguous;
1752 }
1753 }
1754
1755 if (RequestLuma == REQ_256Bytes) {
1756 *MaxUncompressedBlockLuma = 256;
1757 *MaxCompressedBlockLuma = 256;
1758 *IndependentBlockLuma = 0;
1759 } else if (RequestLuma == REQ_128BytesContiguous) {
1760 *MaxUncompressedBlockLuma = 256;
1761 *MaxCompressedBlockLuma = 128;
1762 *IndependentBlockLuma = 128;
1763 } else {
1764 *MaxUncompressedBlockLuma = 256;
1765 *MaxCompressedBlockLuma = 64;
1766 *IndependentBlockLuma = 64;
1767 }
1768
1769 if (RequestChroma == REQ_256Bytes) {
1770 *MaxUncompressedBlockChroma = 256;
1771 *MaxCompressedBlockChroma = 256;
1772 *IndependentBlockChroma = 0;
1773 } else if (RequestChroma == REQ_128BytesContiguous) {
1774 *MaxUncompressedBlockChroma = 256;
1775 *MaxCompressedBlockChroma = 128;
1776 *IndependentBlockChroma = 128;
1777 } else {
1778 *MaxUncompressedBlockChroma = 256;
1779 *MaxCompressedBlockChroma = 64;
1780 *IndependentBlockChroma = 64;
1781 }
1782
1783 if (DCCEnabled != true || BytePerPixelC == 0) {
1784 *MaxUncompressedBlockChroma = 0;
1785 *MaxCompressedBlockChroma = 0;
1786 *IndependentBlockChroma = 0;
1787 }
1788
1789 if (DCCEnabled != true) {
1790 *MaxUncompressedBlockLuma = 0;
1791 *MaxCompressedBlockLuma = 0;
1792 *IndependentBlockLuma = 0;
1793 }
1794 }
1795
1796 static double CalculatePrefetchSourceLines(
1797 struct display_mode_lib *mode_lib,
1798 double VRatio,
1799 double vtaps,
1800 bool Interlace,
1801 bool ProgressiveToInterlaceUnitInOPP,
1802 unsigned int SwathHeight,
1803 unsigned int ViewportYStart,
1804 double *VInitPreFill,
1805 unsigned int *MaxNumSwath)
1806 {
1807 struct vba_vars_st *v = &mode_lib->vba;
1808 unsigned int MaxPartialSwath;
1809
1810 if (ProgressiveToInterlaceUnitInOPP)
1811 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1812 else
1813 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1814
1815 if (!v->IgnoreViewportPositioning) {
1816
1817 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1818
1819 if (*VInitPreFill > 1.0)
1820 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1821 else
1822 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1823 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1824
1825 } else {
1826
1827 if (ViewportYStart != 0)
1828 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1829
1830 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1831
1832 if (*VInitPreFill > 1.0)
1833 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1834 else
1835 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1836 }
1837
1838 #ifdef __DML_VBA_DEBUG__
1839 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1840 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1841 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1842 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1843 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1844 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1845 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1846 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1847 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1848 #endif
1849 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1850 }
1851
1852 static unsigned int CalculateVMAndRowBytes(
1853 struct display_mode_lib *mode_lib,
1854 bool DCCEnable,
1855 unsigned int BlockHeight256Bytes,
1856 unsigned int BlockWidth256Bytes,
1857 enum source_format_class SourcePixelFormat,
1858 unsigned int SurfaceTiling,
1859 unsigned int BytePerPixel,
1860 enum scan_direction_class ScanDirection,
1861 unsigned int SwathWidth,
1862 unsigned int ViewportHeight,
1863 bool GPUVMEnable,
1864 bool HostVMEnable,
1865 unsigned int HostVMMaxNonCachedPageTableLevels,
1866 unsigned int GPUVMMinPageSize,
1867 unsigned int HostVMMinPageSize,
1868 unsigned int PTEBufferSizeInRequests,
1869 unsigned int Pitch,
1870 unsigned int DCCMetaPitch,
1871 unsigned int *MacroTileWidth,
1872 unsigned int *MetaRowByte,
1873 unsigned int *PixelPTEBytesPerRow,
1874 bool *PTEBufferSizeNotExceeded,
1875 int *dpte_row_width_ub,
1876 unsigned int *dpte_row_height,
1877 unsigned int *MetaRequestWidth,
1878 unsigned int *MetaRequestHeight,
1879 unsigned int *meta_row_width,
1880 unsigned int *meta_row_height,
1881 int *vm_group_bytes,
1882 unsigned int *dpte_group_bytes,
1883 unsigned int *PixelPTEReqWidth,
1884 unsigned int *PixelPTEReqHeight,
1885 unsigned int *PTERequestSize,
1886 int *DPDE0BytesFrame,
1887 int *MetaPTEBytesFrame)
1888 {
1889 struct vba_vars_st *v = &mode_lib->vba;
1890 unsigned int MPDEBytesFrame;
1891 unsigned int DCCMetaSurfaceBytes;
1892 unsigned int MacroTileSizeBytes;
1893 unsigned int MacroTileHeight;
1894 unsigned int ExtraDPDEBytesFrame;
1895 unsigned int PDEAndMetaPTEBytesFrame;
1896 unsigned int PixelPTEReqHeightPTEs = 0;
1897 unsigned int HostVMDynamicLevels = 0;
1898 double FractionOfPTEReturnDrop;
1899
1900 if (GPUVMEnable == true && HostVMEnable == true) {
1901 if (HostVMMinPageSize < 2048) {
1902 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1903 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1904 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1905 } else {
1906 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1907 }
1908 }
1909
1910 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1911 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1912 if (ScanDirection != dm_vert) {
1913 *meta_row_height = *MetaRequestHeight;
1914 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1915 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1916 } else {
1917 *meta_row_height = *MetaRequestWidth;
1918 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1919 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1920 }
1921 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1922 if (GPUVMEnable == true) {
1923 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1924 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1925 } else {
1926 *MetaPTEBytesFrame = 0;
1927 MPDEBytesFrame = 0;
1928 }
1929
1930 if (DCCEnable != true) {
1931 *MetaPTEBytesFrame = 0;
1932 MPDEBytesFrame = 0;
1933 *MetaRowByte = 0;
1934 }
1935
1936 if (SurfaceTiling == dm_sw_linear) {
1937 MacroTileSizeBytes = 256;
1938 MacroTileHeight = BlockHeight256Bytes;
1939 } else {
1940 MacroTileSizeBytes = 65536;
1941 MacroTileHeight = 16 * BlockHeight256Bytes;
1942 }
1943 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1944
1945 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1946 if (ScanDirection != dm_vert) {
1947 *DPDE0BytesFrame = 64
1948 * (dml_ceil(
1949 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1950 / (8 * 2097152),
1951 1) + 1);
1952 } else {
1953 *DPDE0BytesFrame = 64
1954 * (dml_ceil(
1955 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1956 / (8 * 2097152),
1957 1) + 1);
1958 }
1959 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1960 } else {
1961 *DPDE0BytesFrame = 0;
1962 ExtraDPDEBytesFrame = 0;
1963 }
1964
1965 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1966
1967 #ifdef __DML_VBA_DEBUG__
1968 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1969 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1970 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1971 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1972 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1973 #endif
1974
1975 if (HostVMEnable == true) {
1976 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1977 }
1978 #ifdef __DML_VBA_DEBUG__
1979 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1980 #endif
1981
1982 if (SurfaceTiling == dm_sw_linear) {
1983 PixelPTEReqHeightPTEs = 1;
1984 *PixelPTEReqHeight = 1;
1985 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1986 *PTERequestSize = 64;
1987 FractionOfPTEReturnDrop = 0;
1988 } else if (MacroTileSizeBytes == 4096) {
1989 PixelPTEReqHeightPTEs = 1;
1990 *PixelPTEReqHeight = MacroTileHeight;
1991 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1992 *PTERequestSize = 64;
1993 if (ScanDirection != dm_vert)
1994 FractionOfPTEReturnDrop = 0;
1995 else
1996 FractionOfPTEReturnDrop = 7 / 8;
1997 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1998 PixelPTEReqHeightPTEs = 16;
1999 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2000 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2001 *PTERequestSize = 128;
2002 FractionOfPTEReturnDrop = 0;
2003 } else {
2004 PixelPTEReqHeightPTEs = 1;
2005 *PixelPTEReqHeight = MacroTileHeight;
2006 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2007 *PTERequestSize = 64;
2008 FractionOfPTEReturnDrop = 0;
2009 }
2010
2011 if (SurfaceTiling == dm_sw_linear) {
2012 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2013 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2014 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2015 } else if (ScanDirection != dm_vert) {
2016 *dpte_row_height = *PixelPTEReqHeight;
2017 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2018 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2019 } else {
2020 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
2021 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
2022 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2023 }
2024
2025 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
2026 *PTEBufferSizeNotExceeded = true;
2027 } else {
2028 *PTEBufferSizeNotExceeded = false;
2029 }
2030
2031 if (GPUVMEnable != true) {
2032 *PixelPTEBytesPerRow = 0;
2033 *PTEBufferSizeNotExceeded = true;
2034 }
2035
2036 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2037
2038 if (HostVMEnable == true) {
2039 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2040 }
2041
2042 if (HostVMEnable == true) {
2043 *vm_group_bytes = 512;
2044 *dpte_group_bytes = 512;
2045 } else if (GPUVMEnable == true) {
2046 *vm_group_bytes = 2048;
2047 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2048 *dpte_group_bytes = 512;
2049 } else {
2050 *dpte_group_bytes = 2048;
2051 }
2052 } else {
2053 *vm_group_bytes = 0;
2054 *dpte_group_bytes = 0;
2055 }
2056 return PDEAndMetaPTEBytesFrame;
2057 }
2058
2059 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2060 {
2061 struct vba_vars_st *v = &mode_lib->vba;
2062 unsigned int j, k;
2063 double HostVMInefficiencyFactor = 1.0;
2064 bool NoChromaPlanes = true;
2065 int ReorderBytes;
2066 double VMDataOnlyReturnBW;
2067 double MaxTotalRDBandwidth = 0;
2068 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2069
2070 v->WritebackDISPCLK = 0.0;
2071 v->DISPCLKWithRamping = 0;
2072 v->DISPCLKWithoutRamping = 0;
2073 v->GlobalDPPCLK = 0.0;
2074 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2075 {
2076 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2077 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2078 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2079 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2080 if (v->HostVMEnable != true) {
2081 v->ReturnBW = dml_min(
2082 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2083 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2084 } else {
2085 v->ReturnBW = dml_min(
2086 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2087 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2088 }
2089 }
2090 /* End DAL custom code */
2091
2092 // DISPCLK and DPPCLK Calculation
2093 //
2094 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2095 if (v->WritebackEnable[k]) {
2096 v->WritebackDISPCLK = dml_max(
2097 v->WritebackDISPCLK,
2098 dml31_CalculateWriteBackDISPCLK(
2099 v->WritebackPixelFormat[k],
2100 v->PixelClock[k],
2101 v->WritebackHRatio[k],
2102 v->WritebackVRatio[k],
2103 v->WritebackHTaps[k],
2104 v->WritebackVTaps[k],
2105 v->WritebackSourceWidth[k],
2106 v->WritebackDestinationWidth[k],
2107 v->HTotal[k],
2108 v->WritebackLineBufferSize));
2109 }
2110 }
2111
2112 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2113 if (v->HRatio[k] > 1) {
2114 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2115 v->MaxDCHUBToPSCLThroughput,
2116 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2117 } else {
2118 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2119 }
2120
2121 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2122 * dml_max(
2123 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2124 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2125
2126 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2127 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2128 }
2129
2130 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2131 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2132 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2133 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2134 } else {
2135 if (v->HRatioChroma[k] > 1) {
2136 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2137 v->MaxDCHUBToPSCLThroughput,
2138 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2139 } else {
2140 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2141 }
2142 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2143 * dml_max3(
2144 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2145 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2146 1.0);
2147
2148 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2149 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2150 }
2151
2152 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2153 }
2154 }
2155
2156 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2157 if (v->BlendingAndTiming[k] != k)
2158 continue;
2159 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2160 v->DISPCLKWithRamping = dml_max(
2161 v->DISPCLKWithRamping,
2162 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2163 * (1 + v->DISPCLKRampingMargin / 100));
2164 v->DISPCLKWithoutRamping = dml_max(
2165 v->DISPCLKWithoutRamping,
2166 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2167 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2168 v->DISPCLKWithRamping = dml_max(
2169 v->DISPCLKWithRamping,
2170 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2171 * (1 + v->DISPCLKRampingMargin / 100));
2172 v->DISPCLKWithoutRamping = dml_max(
2173 v->DISPCLKWithoutRamping,
2174 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2175 } else {
2176 v->DISPCLKWithRamping = dml_max(
2177 v->DISPCLKWithRamping,
2178 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2179 v->DISPCLKWithoutRamping = dml_max(
2180 v->DISPCLKWithoutRamping,
2181 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2182 }
2183 }
2184
2185 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2186 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2187
2188 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2189 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2190 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2191 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2192 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2193 v->DISPCLKDPPCLKVCOSpeed);
2194 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2195 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2196 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2197 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2198 } else {
2199 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2200 }
2201 v->DISPCLK = v->DISPCLK_calculated;
2202 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2203
2204 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2205 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2206 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2207 }
2208 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2209 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2210 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2211 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2212 }
2213
2214 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2215 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2216 }
2217
2218 // Urgent and B P-State/DRAM Clock Change Watermark
2219 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2220 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2221
2222 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2223 CalculateBytePerPixelAnd256BBlockSizes(
2224 v->SourcePixelFormat[k],
2225 v->SurfaceTiling[k],
2226 &v->BytePerPixelY[k],
2227 &v->BytePerPixelC[k],
2228 &v->BytePerPixelDETY[k],
2229 &v->BytePerPixelDETC[k],
2230 &v->BlockHeight256BytesY[k],
2231 &v->BlockHeight256BytesC[k],
2232 &v->BlockWidth256BytesY[k],
2233 &v->BlockWidth256BytesC[k]);
2234 }
2235
2236 CalculateSwathWidth(
2237 false,
2238 v->NumberOfActivePlanes,
2239 v->SourcePixelFormat,
2240 v->SourceScan,
2241 v->ViewportWidth,
2242 v->ViewportHeight,
2243 v->SurfaceWidthY,
2244 v->SurfaceWidthC,
2245 v->SurfaceHeightY,
2246 v->SurfaceHeightC,
2247 v->ODMCombineEnabled,
2248 v->BytePerPixelY,
2249 v->BytePerPixelC,
2250 v->BlockHeight256BytesY,
2251 v->BlockHeight256BytesC,
2252 v->BlockWidth256BytesY,
2253 v->BlockWidth256BytesC,
2254 v->BlendingAndTiming,
2255 v->HActive,
2256 v->HRatio,
2257 v->DPPPerPlane,
2258 v->SwathWidthSingleDPPY,
2259 v->SwathWidthSingleDPPC,
2260 v->SwathWidthY,
2261 v->SwathWidthC,
2262 v->dummyinteger3,
2263 v->dummyinteger4,
2264 v->swath_width_luma_ub,
2265 v->swath_width_chroma_ub);
2266
2267 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2268 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2269 * v->VRatio[k];
2270 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2271 * v->VRatioChroma[k];
2272 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2273 }
2274
2275 // DCFCLK Deep Sleep
2276 CalculateDCFCLKDeepSleep(
2277 mode_lib,
2278 v->NumberOfActivePlanes,
2279 v->BytePerPixelY,
2280 v->BytePerPixelC,
2281 v->VRatio,
2282 v->VRatioChroma,
2283 v->SwathWidthY,
2284 v->SwathWidthC,
2285 v->DPPPerPlane,
2286 v->HRatio,
2287 v->HRatioChroma,
2288 v->PixelClock,
2289 v->PSCL_THROUGHPUT_LUMA,
2290 v->PSCL_THROUGHPUT_CHROMA,
2291 v->DPPCLK,
2292 v->ReadBandwidthPlaneLuma,
2293 v->ReadBandwidthPlaneChroma,
2294 v->ReturnBusWidth,
2295 &v->DCFCLKDeepSleep);
2296
2297 // DSCCLK
2298 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2299 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2300 v->DSCCLK_calculated[k] = 0.0;
2301 } else {
2302 if (v->OutputFormat[k] == dm_420)
2303 v->DSCFormatFactor = 2;
2304 else if (v->OutputFormat[k] == dm_444)
2305 v->DSCFormatFactor = 1;
2306 else if (v->OutputFormat[k] == dm_n422)
2307 v->DSCFormatFactor = 2;
2308 else
2309 v->DSCFormatFactor = 1;
2310 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2311 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2312 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2313 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2314 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2315 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2316 else
2317 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2318 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2319 }
2320 }
2321
2322 // DSC Delay
2323 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2324 double BPP = v->OutputBpp[k];
2325
2326 if (v->DSCEnabled[k] && BPP != 0) {
2327 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2328 v->DSCDelay[k] = dscceComputeDelay(
2329 v->DSCInputBitPerComponent[k],
2330 BPP,
2331 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2332 v->NumberOfDSCSlices[k],
2333 v->OutputFormat[k],
2334 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2335 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2336 v->DSCDelay[k] = 2
2337 * (dscceComputeDelay(
2338 v->DSCInputBitPerComponent[k],
2339 BPP,
2340 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2341 v->NumberOfDSCSlices[k] / 2.0,
2342 v->OutputFormat[k],
2343 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2344 } else {
2345 v->DSCDelay[k] = 4
2346 * (dscceComputeDelay(
2347 v->DSCInputBitPerComponent[k],
2348 BPP,
2349 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2350 v->NumberOfDSCSlices[k] / 4.0,
2351 v->OutputFormat[k],
2352 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2353 }
2354 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2355 } else {
2356 v->DSCDelay[k] = 0;
2357 }
2358 }
2359
2360 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2361 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2362 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2363 v->DSCDelay[k] = v->DSCDelay[j];
2364
2365 // Prefetch
2366 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2367 unsigned int PDEAndMetaPTEBytesFrameY;
2368 unsigned int PixelPTEBytesPerRowY;
2369 unsigned int MetaRowByteY;
2370 unsigned int MetaRowByteC;
2371 unsigned int PDEAndMetaPTEBytesFrameC;
2372 unsigned int PixelPTEBytesPerRowC;
2373 bool PTEBufferSizeNotExceededY;
2374 bool PTEBufferSizeNotExceededC;
2375
2376 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2377 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2378 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2379 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2380 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2381 } else {
2382 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2383 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2384 }
2385
2386 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2387 mode_lib,
2388 v->DCCEnable[k],
2389 v->BlockHeight256BytesC[k],
2390 v->BlockWidth256BytesC[k],
2391 v->SourcePixelFormat[k],
2392 v->SurfaceTiling[k],
2393 v->BytePerPixelC[k],
2394 v->SourceScan[k],
2395 v->SwathWidthC[k],
2396 v->ViewportHeightChroma[k],
2397 v->GPUVMEnable,
2398 v->HostVMEnable,
2399 v->HostVMMaxNonCachedPageTableLevels,
2400 v->GPUVMMinPageSize,
2401 v->HostVMMinPageSize,
2402 v->PTEBufferSizeInRequestsForChroma,
2403 v->PitchC[k],
2404 v->DCCMetaPitchC[k],
2405 &v->MacroTileWidthC[k],
2406 &MetaRowByteC,
2407 &PixelPTEBytesPerRowC,
2408 &PTEBufferSizeNotExceededC,
2409 &v->dpte_row_width_chroma_ub[k],
2410 &v->dpte_row_height_chroma[k],
2411 &v->meta_req_width_chroma[k],
2412 &v->meta_req_height_chroma[k],
2413 &v->meta_row_width_chroma[k],
2414 &v->meta_row_height_chroma[k],
2415 &v->dummyinteger1,
2416 &v->dummyinteger2,
2417 &v->PixelPTEReqWidthC[k],
2418 &v->PixelPTEReqHeightC[k],
2419 &v->PTERequestSizeC[k],
2420 &v->dpde0_bytes_per_frame_ub_c[k],
2421 &v->meta_pte_bytes_per_frame_ub_c[k]);
2422
2423 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2424 mode_lib,
2425 v->VRatioChroma[k],
2426 v->VTAPsChroma[k],
2427 v->Interlace[k],
2428 v->ProgressiveToInterlaceUnitInOPP,
2429 v->SwathHeightC[k],
2430 v->ViewportYStartC[k],
2431 &v->VInitPreFillC[k],
2432 &v->MaxNumSwathC[k]);
2433 } else {
2434 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2435 v->PTEBufferSizeInRequestsForChroma = 0;
2436 PixelPTEBytesPerRowC = 0;
2437 PDEAndMetaPTEBytesFrameC = 0;
2438 MetaRowByteC = 0;
2439 v->MaxNumSwathC[k] = 0;
2440 v->PrefetchSourceLinesC[k] = 0;
2441 }
2442
2443 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2444 mode_lib,
2445 v->DCCEnable[k],
2446 v->BlockHeight256BytesY[k],
2447 v->BlockWidth256BytesY[k],
2448 v->SourcePixelFormat[k],
2449 v->SurfaceTiling[k],
2450 v->BytePerPixelY[k],
2451 v->SourceScan[k],
2452 v->SwathWidthY[k],
2453 v->ViewportHeight[k],
2454 v->GPUVMEnable,
2455 v->HostVMEnable,
2456 v->HostVMMaxNonCachedPageTableLevels,
2457 v->GPUVMMinPageSize,
2458 v->HostVMMinPageSize,
2459 v->PTEBufferSizeInRequestsForLuma,
2460 v->PitchY[k],
2461 v->DCCMetaPitchY[k],
2462 &v->MacroTileWidthY[k],
2463 &MetaRowByteY,
2464 &PixelPTEBytesPerRowY,
2465 &PTEBufferSizeNotExceededY,
2466 &v->dpte_row_width_luma_ub[k],
2467 &v->dpte_row_height[k],
2468 &v->meta_req_width[k],
2469 &v->meta_req_height[k],
2470 &v->meta_row_width[k],
2471 &v->meta_row_height[k],
2472 &v->vm_group_bytes[k],
2473 &v->dpte_group_bytes[k],
2474 &v->PixelPTEReqWidthY[k],
2475 &v->PixelPTEReqHeightY[k],
2476 &v->PTERequestSizeY[k],
2477 &v->dpde0_bytes_per_frame_ub_l[k],
2478 &v->meta_pte_bytes_per_frame_ub_l[k]);
2479
2480 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2481 mode_lib,
2482 v->VRatio[k],
2483 v->vtaps[k],
2484 v->Interlace[k],
2485 v->ProgressiveToInterlaceUnitInOPP,
2486 v->SwathHeightY[k],
2487 v->ViewportYStartY[k],
2488 &v->VInitPreFillY[k],
2489 &v->MaxNumSwathY[k]);
2490 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2491 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2492 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2493
2494 CalculateRowBandwidth(
2495 v->GPUVMEnable,
2496 v->SourcePixelFormat[k],
2497 v->VRatio[k],
2498 v->VRatioChroma[k],
2499 v->DCCEnable[k],
2500 v->HTotal[k] / v->PixelClock[k],
2501 MetaRowByteY,
2502 MetaRowByteC,
2503 v->meta_row_height[k],
2504 v->meta_row_height_chroma[k],
2505 PixelPTEBytesPerRowY,
2506 PixelPTEBytesPerRowC,
2507 v->dpte_row_height[k],
2508 v->dpte_row_height_chroma[k],
2509 &v->meta_row_bw[k],
2510 &v->dpte_row_bw[k]);
2511 }
2512
2513 v->TotalDCCActiveDPP = 0;
2514 v->TotalActiveDPP = 0;
2515 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2516 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2517 if (v->DCCEnable[k])
2518 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2519 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2520 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2521 NoChromaPlanes = false;
2522 }
2523
2524 ReorderBytes = v->NumberOfChannels
2525 * dml_max3(
2526 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2527 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2528 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2529
2530 VMDataOnlyReturnBW = dml_min(
2531 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2532 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2533 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2534 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2535
2536 #ifdef __DML_VBA_DEBUG__
2537 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2538 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2539 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2540 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2541 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2542 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2543 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2544 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2545 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2546 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2547 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2548 #endif
2549
2550 if (v->GPUVMEnable && v->HostVMEnable)
2551 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2552
2553 v->UrgentExtraLatency = CalculateExtraLatency(
2554 v->RoundTripPingLatencyCycles,
2555 ReorderBytes,
2556 v->DCFCLK,
2557 v->TotalActiveDPP,
2558 v->PixelChunkSizeInKByte,
2559 v->TotalDCCActiveDPP,
2560 v->MetaChunkSize,
2561 v->ReturnBW,
2562 v->GPUVMEnable,
2563 v->HostVMEnable,
2564 v->NumberOfActivePlanes,
2565 v->DPPPerPlane,
2566 v->dpte_group_bytes,
2567 HostVMInefficiencyFactor,
2568 v->HostVMMinPageSize,
2569 v->HostVMMaxNonCachedPageTableLevels);
2570
2571 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2572
2573 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2574 if (v->BlendingAndTiming[k] == k) {
2575 if (v->WritebackEnable[k] == true) {
2576 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2577 + CalculateWriteBackDelay(
2578 v->WritebackPixelFormat[k],
2579 v->WritebackHRatio[k],
2580 v->WritebackVRatio[k],
2581 v->WritebackVTaps[k],
2582 v->WritebackDestinationWidth[k],
2583 v->WritebackDestinationHeight[k],
2584 v->WritebackSourceHeight[k],
2585 v->HTotal[k]) / v->DISPCLK;
2586 } else
2587 v->WritebackDelay[v->VoltageLevel][k] = 0;
2588 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2589 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2590 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2591 v->WritebackDelay[v->VoltageLevel][k],
2592 v->WritebackLatency
2593 + CalculateWriteBackDelay(
2594 v->WritebackPixelFormat[j],
2595 v->WritebackHRatio[j],
2596 v->WritebackVRatio[j],
2597 v->WritebackVTaps[j],
2598 v->WritebackDestinationWidth[j],
2599 v->WritebackDestinationHeight[j],
2600 v->WritebackSourceHeight[j],
2601 v->HTotal[k]) / v->DISPCLK);
2602 }
2603 }
2604 }
2605 }
2606
2607 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2608 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2609 if (v->BlendingAndTiming[k] == j)
2610 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2611
2612 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2613 v->MaxVStartupLines[k] =
2614 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2615 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2616 v->VTotal[k] - v->VActive[k]
2617 - dml_max(
2618 1.0,
2619 dml_ceil(
2620 (double) v->WritebackDelay[v->VoltageLevel][k]
2621 / (v->HTotal[k] / v->PixelClock[k]),
2622 1));
2623 if (v->MaxVStartupLines[k] > 1023)
2624 v->MaxVStartupLines[k] = 1023;
2625
2626 #ifdef __DML_VBA_DEBUG__
2627 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2628 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2629 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2630 #endif
2631 }
2632
2633 v->MaximumMaxVStartupLines = 0;
2634 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2635 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2636
2637 // VBA_DELTA
2638 // We don't really care to iterate between the various prefetch modes
2639 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2640
2641 v->UrgentLatency = CalculateUrgentLatency(
2642 v->UrgentLatencyPixelDataOnly,
2643 v->UrgentLatencyPixelMixedWithVMData,
2644 v->UrgentLatencyVMDataOnly,
2645 v->DoUrgentLatencyAdjustment,
2646 v->UrgentLatencyAdjustmentFabricClockComponent,
2647 v->UrgentLatencyAdjustmentFabricClockReference,
2648 v->FabricClock);
2649
2650 v->FractionOfUrgentBandwidth = 0.0;
2651 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2652
2653 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2654
2655 do {
2656 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2657 bool DestinationLineTimesForPrefetchLessThan2 = false;
2658 bool VRatioPrefetchMoreThan4 = false;
2659 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2660 MaxTotalRDBandwidth = 0;
2661
2662 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2663
2664 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2665 Pipe myPipe;
2666
2667 myPipe.DPPCLK = v->DPPCLK[k];
2668 myPipe.DISPCLK = v->DISPCLK;
2669 myPipe.PixelClock = v->PixelClock[k];
2670 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2671 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2672 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2673 myPipe.VRatio = v->VRatio[k];
2674 myPipe.VRatioChroma = v->VRatioChroma[k];
2675 myPipe.SourceScan = v->SourceScan[k];
2676 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2677 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2678 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2679 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2680 myPipe.InterlaceEnable = v->Interlace[k];
2681 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2682 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2683 myPipe.HTotal = v->HTotal[k];
2684 myPipe.DCCEnable = v->DCCEnable[k];
2685 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2686 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2687 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2688 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2689 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2690 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2691 v->ErrorResult[k] = CalculatePrefetchSchedule(
2692 mode_lib,
2693 HostVMInefficiencyFactor,
2694 &myPipe,
2695 v->DSCDelay[k],
2696 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2697 v->DPPCLKDelaySCL,
2698 v->DPPCLKDelaySCLLBOnly,
2699 v->DPPCLKDelayCNVCCursor,
2700 v->DISPCLKDelaySubtotal,
2701 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2702 v->OutputFormat[k],
2703 v->MaxInterDCNTileRepeaters,
2704 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2705 v->MaxVStartupLines[k],
2706 v->GPUVMMaxPageTableLevels,
2707 v->GPUVMEnable,
2708 v->HostVMEnable,
2709 v->HostVMMaxNonCachedPageTableLevels,
2710 v->HostVMMinPageSize,
2711 v->DynamicMetadataEnable[k],
2712 v->DynamicMetadataVMEnabled,
2713 v->DynamicMetadataLinesBeforeActiveRequired[k],
2714 v->DynamicMetadataTransmittedBytes[k],
2715 v->UrgentLatency,
2716 v->UrgentExtraLatency,
2717 v->TCalc,
2718 v->PDEAndMetaPTEBytesFrame[k],
2719 v->MetaRowByte[k],
2720 v->PixelPTEBytesPerRow[k],
2721 v->PrefetchSourceLinesY[k],
2722 v->SwathWidthY[k],
2723 v->VInitPreFillY[k],
2724 v->MaxNumSwathY[k],
2725 v->PrefetchSourceLinesC[k],
2726 v->SwathWidthC[k],
2727 v->VInitPreFillC[k],
2728 v->MaxNumSwathC[k],
2729 v->swath_width_luma_ub[k],
2730 v->swath_width_chroma_ub[k],
2731 v->SwathHeightY[k],
2732 v->SwathHeightC[k],
2733 TWait,
2734 &v->DSTXAfterScaler[k],
2735 &v->DSTYAfterScaler[k],
2736 &v->DestinationLinesForPrefetch[k],
2737 &v->PrefetchBandwidth[k],
2738 &v->DestinationLinesToRequestVMInVBlank[k],
2739 &v->DestinationLinesToRequestRowInVBlank[k],
2740 &v->VRatioPrefetchY[k],
2741 &v->VRatioPrefetchC[k],
2742 &v->RequiredPrefetchPixDataBWLuma[k],
2743 &v->RequiredPrefetchPixDataBWChroma[k],
2744 &v->NotEnoughTimeForDynamicMetadata[k],
2745 &v->Tno_bw[k],
2746 &v->prefetch_vmrow_bw[k],
2747 &v->Tdmdl_vm[k],
2748 &v->Tdmdl[k],
2749 &v->TSetup[k],
2750 &v->VUpdateOffsetPix[k],
2751 &v->VUpdateWidthPix[k],
2752 &v->VReadyOffsetPix[k]);
2753
2754 #ifdef __DML_VBA_DEBUG__
2755 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2756 #endif
2757 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2758 }
2759
2760 v->NoEnoughUrgentLatencyHiding = false;
2761 v->NoEnoughUrgentLatencyHidingPre = false;
2762
2763 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2764 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2765 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2766 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2767 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2768
2769 CalculateUrgentBurstFactor(
2770 v->swath_width_luma_ub[k],
2771 v->swath_width_chroma_ub[k],
2772 v->SwathHeightY[k],
2773 v->SwathHeightC[k],
2774 v->HTotal[k] / v->PixelClock[k],
2775 v->UrgentLatency,
2776 v->CursorBufferSize,
2777 v->CursorWidth[k][0],
2778 v->CursorBPP[k][0],
2779 v->VRatio[k],
2780 v->VRatioChroma[k],
2781 v->BytePerPixelDETY[k],
2782 v->BytePerPixelDETC[k],
2783 v->DETBufferSizeY[k],
2784 v->DETBufferSizeC[k],
2785 &v->UrgBurstFactorCursor[k],
2786 &v->UrgBurstFactorLuma[k],
2787 &v->UrgBurstFactorChroma[k],
2788 &v->NoUrgentLatencyHiding[k]);
2789
2790 CalculateUrgentBurstFactor(
2791 v->swath_width_luma_ub[k],
2792 v->swath_width_chroma_ub[k],
2793 v->SwathHeightY[k],
2794 v->SwathHeightC[k],
2795 v->HTotal[k] / v->PixelClock[k],
2796 v->UrgentLatency,
2797 v->CursorBufferSize,
2798 v->CursorWidth[k][0],
2799 v->CursorBPP[k][0],
2800 v->VRatioPrefetchY[k],
2801 v->VRatioPrefetchC[k],
2802 v->BytePerPixelDETY[k],
2803 v->BytePerPixelDETC[k],
2804 v->DETBufferSizeY[k],
2805 v->DETBufferSizeC[k],
2806 &v->UrgBurstFactorCursorPre[k],
2807 &v->UrgBurstFactorLumaPre[k],
2808 &v->UrgBurstFactorChromaPre[k],
2809 &v->NoUrgentLatencyHidingPre[k]);
2810
2811 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2812 + dml_max3(
2813 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2814 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2815 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2816 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2817 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2818 v->DPPPerPlane[k]
2819 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2820 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2821 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2822
2823 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2824 + dml_max3(
2825 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2826 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2827 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2828 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2829 + v->cursor_bw_pre[k]);
2830
2831 #ifdef __DML_VBA_DEBUG__
2832 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2833 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2834 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2835 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2836 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2837
2838 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2839 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2840
2841 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2842 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2843 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2844 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2845 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2846 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2847 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2848 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2849 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2850 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2851 #endif
2852
2853 if (v->DestinationLinesForPrefetch[k] < 2)
2854 DestinationLineTimesForPrefetchLessThan2 = true;
2855
2856 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2857 VRatioPrefetchMoreThan4 = true;
2858
2859 if (v->NoUrgentLatencyHiding[k] == true)
2860 v->NoEnoughUrgentLatencyHiding = true;
2861
2862 if (v->NoUrgentLatencyHidingPre[k] == true)
2863 v->NoEnoughUrgentLatencyHidingPre = true;
2864 }
2865
2866 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2867
2868 #ifdef __DML_VBA_DEBUG__
2869 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2870 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2871 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2872 #endif
2873
2874 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2875 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2876 v->PrefetchModeSupported = true;
2877 else {
2878 v->PrefetchModeSupported = false;
2879 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2880 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2881 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2882 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2883 }
2884
2885 // PREVIOUS_ERROR
2886 // This error result check was done after the PrefetchModeSupported. So we will
2887 // still try to calculate flip schedule even prefetch mode not supported
2888 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2889 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2890 v->PrefetchModeSupported = false;
2891 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2892 }
2893 }
2894
2895 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2896 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2897 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2898 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2899 - dml_max(
2900 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2901 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2902 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2903 v->DPPPerPlane[k]
2904 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2905 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2906 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2907 }
2908
2909 v->TotImmediateFlipBytes = 0;
2910 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2911 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2912 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2913 }
2914 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2915 CalculateFlipSchedule(
2916 mode_lib,
2917 HostVMInefficiencyFactor,
2918 v->UrgentExtraLatency,
2919 v->UrgentLatency,
2920 v->GPUVMMaxPageTableLevels,
2921 v->HostVMEnable,
2922 v->HostVMMaxNonCachedPageTableLevels,
2923 v->GPUVMEnable,
2924 v->HostVMMinPageSize,
2925 v->PDEAndMetaPTEBytesFrame[k],
2926 v->MetaRowByte[k],
2927 v->PixelPTEBytesPerRow[k],
2928 v->BandwidthAvailableForImmediateFlip,
2929 v->TotImmediateFlipBytes,
2930 v->SourcePixelFormat[k],
2931 v->HTotal[k] / v->PixelClock[k],
2932 v->VRatio[k],
2933 v->VRatioChroma[k],
2934 v->Tno_bw[k],
2935 v->DCCEnable[k],
2936 v->dpte_row_height[k],
2937 v->meta_row_height[k],
2938 v->dpte_row_height_chroma[k],
2939 v->meta_row_height_chroma[k],
2940 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2941 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2942 &v->final_flip_bw[k],
2943 &v->ImmediateFlipSupportedForPipe[k]);
2944 }
2945
2946 v->total_dcn_read_bw_with_flip = 0.0;
2947 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2948 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2949 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2950 + dml_max3(
2951 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2952 v->DPPPerPlane[k] * v->final_flip_bw[k]
2953 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2954 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2955 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2956 v->DPPPerPlane[k]
2957 * (v->final_flip_bw[k]
2958 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2959 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2960 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2961 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2962 + dml_max3(
2963 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2964 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2965 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2966 v->DPPPerPlane[k]
2967 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2968 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2969 }
2970 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2971
2972 v->ImmediateFlipSupported = true;
2973 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2974 #ifdef __DML_VBA_DEBUG__
2975 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2976 #endif
2977 v->ImmediateFlipSupported = false;
2978 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2979 }
2980 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2981 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2982 #ifdef __DML_VBA_DEBUG__
2983 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2984 __func__, k);
2985 #endif
2986 v->ImmediateFlipSupported = false;
2987 }
2988 }
2989 } else {
2990 v->ImmediateFlipSupported = false;
2991 }
2992
2993 v->PrefetchAndImmediateFlipSupported =
2994 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2995 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2996 v->ImmediateFlipSupported)) ? true : false;
2997 #ifdef __DML_VBA_DEBUG__
2998 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2999 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
3000 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
3001 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
3002 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
3003 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
3004 #endif
3005 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
3006
3007 v->VStartupLines = v->VStartupLines + 1;
3008 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
3009 ASSERT(v->PrefetchAndImmediateFlipSupported);
3010
3011 // Unbounded Request Enabled
3012 CalculateUnboundedRequestAndCompressedBufferSize(
3013 v->DETBufferSizeInKByte[0],
3014 v->ConfigReturnBufferSizeInKByte,
3015 v->UseUnboundedRequesting,
3016 v->TotalActiveDPP,
3017 NoChromaPlanes,
3018 v->MaxNumDPP,
3019 v->CompressedBufferSegmentSizeInkByte,
3020 v->Output,
3021 &v->UnboundedRequestEnabled,
3022 &v->CompressedBufferSizeInkByte);
3023
3024 //Watermarks and NB P-State/DRAM Clock Change Support
3025 {
3026 enum clock_change_support DRAMClockChangeSupport; // dummy
3027 CalculateWatermarksAndDRAMSpeedChangeSupport(
3028 mode_lib,
3029 PrefetchMode,
3030 v->NumberOfActivePlanes,
3031 v->MaxLineBufferLines,
3032 v->LineBufferSize,
3033 v->WritebackInterfaceBufferSize,
3034 v->DCFCLK,
3035 v->ReturnBW,
3036 v->SynchronizedVBlank,
3037 v->dpte_group_bytes,
3038 v->MetaChunkSize,
3039 v->UrgentLatency,
3040 v->UrgentExtraLatency,
3041 v->WritebackLatency,
3042 v->WritebackChunkSize,
3043 v->SOCCLK,
3044 v->DRAMClockChangeLatency,
3045 v->SRExitTime,
3046 v->SREnterPlusExitTime,
3047 v->SRExitZ8Time,
3048 v->SREnterPlusExitZ8Time,
3049 v->DCFCLKDeepSleep,
3050 v->DETBufferSizeY,
3051 v->DETBufferSizeC,
3052 v->SwathHeightY,
3053 v->SwathHeightC,
3054 v->LBBitPerPixel,
3055 v->SwathWidthY,
3056 v->SwathWidthC,
3057 v->HRatio,
3058 v->HRatioChroma,
3059 v->vtaps,
3060 v->VTAPsChroma,
3061 v->VRatio,
3062 v->VRatioChroma,
3063 v->HTotal,
3064 v->PixelClock,
3065 v->BlendingAndTiming,
3066 v->DPPPerPlane,
3067 v->BytePerPixelDETY,
3068 v->BytePerPixelDETC,
3069 v->DSTXAfterScaler,
3070 v->DSTYAfterScaler,
3071 v->WritebackEnable,
3072 v->WritebackPixelFormat,
3073 v->WritebackDestinationWidth,
3074 v->WritebackDestinationHeight,
3075 v->WritebackSourceHeight,
3076 v->UnboundedRequestEnabled,
3077 v->CompressedBufferSizeInkByte,
3078 &DRAMClockChangeSupport,
3079 &v->UrgentWatermark,
3080 &v->WritebackUrgentWatermark,
3081 &v->DRAMClockChangeWatermark,
3082 &v->WritebackDRAMClockChangeWatermark,
3083 &v->StutterExitWatermark,
3084 &v->StutterEnterPlusExitWatermark,
3085 &v->Z8StutterExitWatermark,
3086 &v->Z8StutterEnterPlusExitWatermark,
3087 &v->MinActiveDRAMClockChangeLatencySupported);
3088
3089 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3090 if (v->WritebackEnable[k] == true) {
3091 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
3092 0,
3093 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3094 } else {
3095 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3096 }
3097 }
3098 }
3099
3100 //Display Pipeline Delivery Time in Prefetch, Groups
3101 CalculatePixelDeliveryTimes(
3102 v->NumberOfActivePlanes,
3103 v->VRatio,
3104 v->VRatioChroma,
3105 v->VRatioPrefetchY,
3106 v->VRatioPrefetchC,
3107 v->swath_width_luma_ub,
3108 v->swath_width_chroma_ub,
3109 v->DPPPerPlane,
3110 v->HRatio,
3111 v->HRatioChroma,
3112 v->PixelClock,
3113 v->PSCL_THROUGHPUT_LUMA,
3114 v->PSCL_THROUGHPUT_CHROMA,
3115 v->DPPCLK,
3116 v->BytePerPixelC,
3117 v->SourceScan,
3118 v->NumberOfCursors,
3119 v->CursorWidth,
3120 v->CursorBPP,
3121 v->BlockWidth256BytesY,
3122 v->BlockHeight256BytesY,
3123 v->BlockWidth256BytesC,
3124 v->BlockHeight256BytesC,
3125 v->DisplayPipeLineDeliveryTimeLuma,
3126 v->DisplayPipeLineDeliveryTimeChroma,
3127 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3128 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3129 v->DisplayPipeRequestDeliveryTimeLuma,
3130 v->DisplayPipeRequestDeliveryTimeChroma,
3131 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3132 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3133 v->CursorRequestDeliveryTime,
3134 v->CursorRequestDeliveryTimePrefetch);
3135
3136 CalculateMetaAndPTETimes(
3137 v->NumberOfActivePlanes,
3138 v->GPUVMEnable,
3139 v->MetaChunkSize,
3140 v->MinMetaChunkSizeBytes,
3141 v->HTotal,
3142 v->VRatio,
3143 v->VRatioChroma,
3144 v->DestinationLinesToRequestRowInVBlank,
3145 v->DestinationLinesToRequestRowInImmediateFlip,
3146 v->DCCEnable,
3147 v->PixelClock,
3148 v->BytePerPixelY,
3149 v->BytePerPixelC,
3150 v->SourceScan,
3151 v->dpte_row_height,
3152 v->dpte_row_height_chroma,
3153 v->meta_row_width,
3154 v->meta_row_width_chroma,
3155 v->meta_row_height,
3156 v->meta_row_height_chroma,
3157 v->meta_req_width,
3158 v->meta_req_width_chroma,
3159 v->meta_req_height,
3160 v->meta_req_height_chroma,
3161 v->dpte_group_bytes,
3162 v->PTERequestSizeY,
3163 v->PTERequestSizeC,
3164 v->PixelPTEReqWidthY,
3165 v->PixelPTEReqHeightY,
3166 v->PixelPTEReqWidthC,
3167 v->PixelPTEReqHeightC,
3168 v->dpte_row_width_luma_ub,
3169 v->dpte_row_width_chroma_ub,
3170 v->DST_Y_PER_PTE_ROW_NOM_L,
3171 v->DST_Y_PER_PTE_ROW_NOM_C,
3172 v->DST_Y_PER_META_ROW_NOM_L,
3173 v->DST_Y_PER_META_ROW_NOM_C,
3174 v->TimePerMetaChunkNominal,
3175 v->TimePerChromaMetaChunkNominal,
3176 v->TimePerMetaChunkVBlank,
3177 v->TimePerChromaMetaChunkVBlank,
3178 v->TimePerMetaChunkFlip,
3179 v->TimePerChromaMetaChunkFlip,
3180 v->time_per_pte_group_nom_luma,
3181 v->time_per_pte_group_vblank_luma,
3182 v->time_per_pte_group_flip_luma,
3183 v->time_per_pte_group_nom_chroma,
3184 v->time_per_pte_group_vblank_chroma,
3185 v->time_per_pte_group_flip_chroma);
3186
3187 CalculateVMGroupAndRequestTimes(
3188 v->NumberOfActivePlanes,
3189 v->GPUVMEnable,
3190 v->GPUVMMaxPageTableLevels,
3191 v->HTotal,
3192 v->BytePerPixelC,
3193 v->DestinationLinesToRequestVMInVBlank,
3194 v->DestinationLinesToRequestVMInImmediateFlip,
3195 v->DCCEnable,
3196 v->PixelClock,
3197 v->dpte_row_width_luma_ub,
3198 v->dpte_row_width_chroma_ub,
3199 v->vm_group_bytes,
3200 v->dpde0_bytes_per_frame_ub_l,
3201 v->dpde0_bytes_per_frame_ub_c,
3202 v->meta_pte_bytes_per_frame_ub_l,
3203 v->meta_pte_bytes_per_frame_ub_c,
3204 v->TimePerVMGroupVBlank,
3205 v->TimePerVMGroupFlip,
3206 v->TimePerVMRequestVBlank,
3207 v->TimePerVMRequestFlip);
3208
3209 // Min TTUVBlank
3210 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3211 if (PrefetchMode == 0) {
3212 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3213 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3214 v->MinTTUVBlank[k] = dml_max(
3215 v->DRAMClockChangeWatermark,
3216 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3217 } else if (PrefetchMode == 1) {
3218 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3219 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3220 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3221 } else {
3222 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3223 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3224 v->MinTTUVBlank[k] = v->UrgentWatermark;
3225 }
3226 if (!v->DynamicMetadataEnable[k])
3227 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3228 }
3229
3230 // DCC Configuration
3231 v->ActiveDPPs = 0;
3232 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3233 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3234 v->SourcePixelFormat[k],
3235 v->SurfaceWidthY[k],
3236 v->SurfaceWidthC[k],
3237 v->SurfaceHeightY[k],
3238 v->SurfaceHeightC[k],
3239 v->DETBufferSizeInKByte[0] * 1024,
3240 v->BlockHeight256BytesY[k],
3241 v->BlockHeight256BytesC[k],
3242 v->SurfaceTiling[k],
3243 v->BytePerPixelY[k],
3244 v->BytePerPixelC[k],
3245 v->BytePerPixelDETY[k],
3246 v->BytePerPixelDETC[k],
3247 v->SourceScan[k],
3248 &v->DCCYMaxUncompressedBlock[k],
3249 &v->DCCCMaxUncompressedBlock[k],
3250 &v->DCCYMaxCompressedBlock[k],
3251 &v->DCCCMaxCompressedBlock[k],
3252 &v->DCCYIndependentBlock[k],
3253 &v->DCCCIndependentBlock[k]);
3254 }
3255
3256 // VStartup Adjustment
3257 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3258 bool isInterlaceTiming;
3259 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3260 #ifdef __DML_VBA_DEBUG__
3261 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3262 #endif
3263
3264 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3265
3266 #ifdef __DML_VBA_DEBUG__
3267 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3268 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3269 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3270 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3271 #endif
3272
3273 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3274 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3275 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3276 }
3277
3278 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3279
3280 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3281 - v->VFrontPorch[k])
3282 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3283 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3284
3285 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3286
3287 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3288 <= (isInterlaceTiming ?
3289 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3290 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3291 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3292 } else {
3293 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3294 }
3295 #ifdef __DML_VBA_DEBUG__
3296 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3297 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3298 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3299 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3300 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3301 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3302 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3303 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3304 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3305 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3306 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3307 #endif
3308 }
3309
3310 {
3311 //Maximum Bandwidth Used
3312 double TotalWRBandwidth = 0;
3313 double MaxPerPlaneVActiveWRBandwidth = 0;
3314 double WRBandwidth = 0;
3315 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3316 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3317 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3318 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3319 } else if (v->WritebackEnable[k] == true) {
3320 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3321 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3322 }
3323 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3324 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3325 }
3326
3327 v->TotalDataReadBandwidth = 0;
3328 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3329 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3330 }
3331 }
3332 // Stutter Efficiency
3333 CalculateStutterEfficiency(
3334 mode_lib,
3335 v->CompressedBufferSizeInkByte,
3336 v->UnboundedRequestEnabled,
3337 v->ConfigReturnBufferSizeInKByte,
3338 v->MetaFIFOSizeInKEntries,
3339 v->ZeroSizeBufferEntries,
3340 v->NumberOfActivePlanes,
3341 v->ROBBufferSizeInKByte,
3342 v->TotalDataReadBandwidth,
3343 v->DCFCLK,
3344 v->ReturnBW,
3345 v->COMPBUF_RESERVED_SPACE_64B,
3346 v->COMPBUF_RESERVED_SPACE_ZS,
3347 v->SRExitTime,
3348 v->SRExitZ8Time,
3349 v->SynchronizedVBlank,
3350 v->StutterEnterPlusExitWatermark,
3351 v->Z8StutterEnterPlusExitWatermark,
3352 v->ProgressiveToInterlaceUnitInOPP,
3353 v->Interlace,
3354 v->MinTTUVBlank,
3355 v->DPPPerPlane,
3356 v->DETBufferSizeY,
3357 v->BytePerPixelY,
3358 v->BytePerPixelDETY,
3359 v->SwathWidthY,
3360 v->SwathHeightY,
3361 v->SwathHeightC,
3362 v->DCCRateLuma,
3363 v->DCCRateChroma,
3364 v->DCCFractionOfZeroSizeRequestsLuma,
3365 v->DCCFractionOfZeroSizeRequestsChroma,
3366 v->HTotal,
3367 v->VTotal,
3368 v->PixelClock,
3369 v->VRatio,
3370 v->SourceScan,
3371 v->BlockHeight256BytesY,
3372 v->BlockWidth256BytesY,
3373 v->BlockHeight256BytesC,
3374 v->BlockWidth256BytesC,
3375 v->DCCYMaxUncompressedBlock,
3376 v->DCCCMaxUncompressedBlock,
3377 v->VActive,
3378 v->DCCEnable,
3379 v->WritebackEnable,
3380 v->ReadBandwidthPlaneLuma,
3381 v->ReadBandwidthPlaneChroma,
3382 v->meta_row_bw,
3383 v->dpte_row_bw,
3384 &v->StutterEfficiencyNotIncludingVBlank,
3385 &v->StutterEfficiency,
3386 &v->NumberOfStutterBurstsPerFrame,
3387 &v->Z8StutterEfficiencyNotIncludingVBlank,
3388 &v->Z8StutterEfficiency,
3389 &v->Z8NumberOfStutterBurstsPerFrame,
3390 &v->StutterPeriod);
3391 }
3392
3393 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3394 {
3395 struct vba_vars_st *v = &mode_lib->vba;
3396 // Display Pipe Configuration
3397 double BytePerPixDETY[DC__NUM_DPP__MAX];
3398 double BytePerPixDETC[DC__NUM_DPP__MAX];
3399 int BytePerPixY[DC__NUM_DPP__MAX];
3400 int BytePerPixC[DC__NUM_DPP__MAX];
3401 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3402 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3403 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3404 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3405 double dummy1[DC__NUM_DPP__MAX];
3406 double dummy2[DC__NUM_DPP__MAX];
3407 double dummy3[DC__NUM_DPP__MAX];
3408 double dummy4[DC__NUM_DPP__MAX];
3409 int dummy5[DC__NUM_DPP__MAX];
3410 int dummy6[DC__NUM_DPP__MAX];
3411 bool dummy7[DC__NUM_DPP__MAX];
3412 bool dummysinglestring;
3413
3414 unsigned int k;
3415
3416 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3417
3418 CalculateBytePerPixelAnd256BBlockSizes(
3419 v->SourcePixelFormat[k],
3420 v->SurfaceTiling[k],
3421 &BytePerPixY[k],
3422 &BytePerPixC[k],
3423 &BytePerPixDETY[k],
3424 &BytePerPixDETC[k],
3425 &Read256BytesBlockHeightY[k],
3426 &Read256BytesBlockHeightC[k],
3427 &Read256BytesBlockWidthY[k],
3428 &Read256BytesBlockWidthC[k]);
3429 }
3430
3431 CalculateSwathAndDETConfiguration(
3432 false,
3433 v->NumberOfActivePlanes,
3434 v->DETBufferSizeInKByte[0],
3435 dummy1,
3436 dummy2,
3437 v->SourceScan,
3438 v->SourcePixelFormat,
3439 v->SurfaceTiling,
3440 v->ViewportWidth,
3441 v->ViewportHeight,
3442 v->SurfaceWidthY,
3443 v->SurfaceWidthC,
3444 v->SurfaceHeightY,
3445 v->SurfaceHeightC,
3446 Read256BytesBlockHeightY,
3447 Read256BytesBlockHeightC,
3448 Read256BytesBlockWidthY,
3449 Read256BytesBlockWidthC,
3450 v->ODMCombineEnabled,
3451 v->BlendingAndTiming,
3452 BytePerPixY,
3453 BytePerPixC,
3454 BytePerPixDETY,
3455 BytePerPixDETC,
3456 v->HActive,
3457 v->HRatio,
3458 v->HRatioChroma,
3459 v->DPPPerPlane,
3460 dummy5,
3461 dummy6,
3462 dummy3,
3463 dummy4,
3464 v->SwathHeightY,
3465 v->SwathHeightC,
3466 v->DETBufferSizeY,
3467 v->DETBufferSizeC,
3468 dummy7,
3469 &dummysinglestring);
3470 }
3471
3472 static bool CalculateBytePerPixelAnd256BBlockSizes(
3473 enum source_format_class SourcePixelFormat,
3474 enum dm_swizzle_mode SurfaceTiling,
3475 unsigned int *BytePerPixelY,
3476 unsigned int *BytePerPixelC,
3477 double *BytePerPixelDETY,
3478 double *BytePerPixelDETC,
3479 unsigned int *BlockHeight256BytesY,
3480 unsigned int *BlockHeight256BytesC,
3481 unsigned int *BlockWidth256BytesY,
3482 unsigned int *BlockWidth256BytesC)
3483 {
3484 if (SourcePixelFormat == dm_444_64) {
3485 *BytePerPixelDETY = 8;
3486 *BytePerPixelDETC = 0;
3487 *BytePerPixelY = 8;
3488 *BytePerPixelC = 0;
3489 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3490 *BytePerPixelDETY = 4;
3491 *BytePerPixelDETC = 0;
3492 *BytePerPixelY = 4;
3493 *BytePerPixelC = 0;
3494 } else if (SourcePixelFormat == dm_444_16) {
3495 *BytePerPixelDETY = 2;
3496 *BytePerPixelDETC = 0;
3497 *BytePerPixelY = 2;
3498 *BytePerPixelC = 0;
3499 } else if (SourcePixelFormat == dm_444_8) {
3500 *BytePerPixelDETY = 1;
3501 *BytePerPixelDETC = 0;
3502 *BytePerPixelY = 1;
3503 *BytePerPixelC = 0;
3504 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3505 *BytePerPixelDETY = 4;
3506 *BytePerPixelDETC = 1;
3507 *BytePerPixelY = 4;
3508 *BytePerPixelC = 1;
3509 } else if (SourcePixelFormat == dm_420_8) {
3510 *BytePerPixelDETY = 1;
3511 *BytePerPixelDETC = 2;
3512 *BytePerPixelY = 1;
3513 *BytePerPixelC = 2;
3514 } else if (SourcePixelFormat == dm_420_12) {
3515 *BytePerPixelDETY = 2;
3516 *BytePerPixelDETC = 4;
3517 *BytePerPixelY = 2;
3518 *BytePerPixelC = 4;
3519 } else {
3520 *BytePerPixelDETY = 4.0 / 3;
3521 *BytePerPixelDETC = 8.0 / 3;
3522 *BytePerPixelY = 2;
3523 *BytePerPixelC = 4;
3524 }
3525
3526 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3527 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3528 if (SurfaceTiling == dm_sw_linear) {
3529 *BlockHeight256BytesY = 1;
3530 } else if (SourcePixelFormat == dm_444_64) {
3531 *BlockHeight256BytesY = 4;
3532 } else if (SourcePixelFormat == dm_444_8) {
3533 *BlockHeight256BytesY = 16;
3534 } else {
3535 *BlockHeight256BytesY = 8;
3536 }
3537 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3538 *BlockHeight256BytesC = 0;
3539 *BlockWidth256BytesC = 0;
3540 } else {
3541 if (SurfaceTiling == dm_sw_linear) {
3542 *BlockHeight256BytesY = 1;
3543 *BlockHeight256BytesC = 1;
3544 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3545 *BlockHeight256BytesY = 8;
3546 *BlockHeight256BytesC = 16;
3547 } else if (SourcePixelFormat == dm_420_8) {
3548 *BlockHeight256BytesY = 16;
3549 *BlockHeight256BytesC = 8;
3550 } else {
3551 *BlockHeight256BytesY = 8;
3552 *BlockHeight256BytesC = 8;
3553 }
3554 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3555 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3556 }
3557 return true;
3558 }
3559
3560 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3561 {
3562 if (PrefetchMode == 0) {
3563 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3564 } else if (PrefetchMode == 1) {
3565 return dml_max(SREnterPlusExitTime, UrgentLatency);
3566 } else {
3567 return UrgentLatency;
3568 }
3569 }
3570
3571 double dml31_CalculateWriteBackDISPCLK(
3572 enum source_format_class WritebackPixelFormat,
3573 double PixelClock,
3574 double WritebackHRatio,
3575 double WritebackVRatio,
3576 unsigned int WritebackHTaps,
3577 unsigned int WritebackVTaps,
3578 long WritebackSourceWidth,
3579 long WritebackDestinationWidth,
3580 unsigned int HTotal,
3581 unsigned int WritebackLineBufferSize)
3582 {
3583 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3584
3585 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3586 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3587 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3588 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3589 }
3590
3591 static double CalculateWriteBackDelay(
3592 enum source_format_class WritebackPixelFormat,
3593 double WritebackHRatio,
3594 double WritebackVRatio,
3595 unsigned int WritebackVTaps,
3596 int WritebackDestinationWidth,
3597 int WritebackDestinationHeight,
3598 int WritebackSourceHeight,
3599 unsigned int HTotal)
3600 {
3601 double CalculateWriteBackDelay;
3602 double Line_length;
3603 double Output_lines_last_notclamped;
3604 double WritebackVInit;
3605
3606 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3607 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3608 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3609 if (Output_lines_last_notclamped < 0) {
3610 CalculateWriteBackDelay = 0;
3611 } else {
3612 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3613 }
3614 return CalculateWriteBackDelay;
3615 }
3616
3617 static void CalculateVupdateAndDynamicMetadataParameters(
3618 int MaxInterDCNTileRepeaters,
3619 double DPPCLK,
3620 double DISPCLK,
3621 double DCFClkDeepSleep,
3622 double PixelClock,
3623 int HTotal,
3624 int VBlank,
3625 int DynamicMetadataTransmittedBytes,
3626 int DynamicMetadataLinesBeforeActiveRequired,
3627 int InterlaceEnable,
3628 bool ProgressiveToInterlaceUnitInOPP,
3629 double *TSetup,
3630 double *Tdmbf,
3631 double *Tdmec,
3632 double *Tdmsks,
3633 int *VUpdateOffsetPix,
3634 double *VUpdateWidthPix,
3635 double *VReadyOffsetPix)
3636 {
3637 double TotalRepeaterDelayTime;
3638
3639 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3640 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3641 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3642 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3643 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3644 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3645 *Tdmec = HTotal / PixelClock;
3646 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3647 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3648 } else {
3649 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3650 }
3651 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3652 *Tdmsks = *Tdmsks / 2;
3653 }
3654 #ifdef __DML_VBA_DEBUG__
3655 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3656 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3657 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3658 #endif
3659 }
3660
3661 static void CalculateRowBandwidth(
3662 bool GPUVMEnable,
3663 enum source_format_class SourcePixelFormat,
3664 double VRatio,
3665 double VRatioChroma,
3666 bool DCCEnable,
3667 double LineTime,
3668 unsigned int MetaRowByteLuma,
3669 unsigned int MetaRowByteChroma,
3670 unsigned int meta_row_height_luma,
3671 unsigned int meta_row_height_chroma,
3672 unsigned int PixelPTEBytesPerRowLuma,
3673 unsigned int PixelPTEBytesPerRowChroma,
3674 unsigned int dpte_row_height_luma,
3675 unsigned int dpte_row_height_chroma,
3676 double *meta_row_bw,
3677 double *dpte_row_bw)
3678 {
3679 if (DCCEnable != true) {
3680 *meta_row_bw = 0;
3681 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3682 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3683 } else {
3684 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3685 }
3686
3687 if (GPUVMEnable != true) {
3688 *dpte_row_bw = 0;
3689 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3690 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3691 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3692 } else {
3693 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3694 }
3695 }
3696
3697 static void CalculateFlipSchedule(
3698 struct display_mode_lib *mode_lib,
3699 double HostVMInefficiencyFactor,
3700 double UrgentExtraLatency,
3701 double UrgentLatency,
3702 unsigned int GPUVMMaxPageTableLevels,
3703 bool HostVMEnable,
3704 unsigned int HostVMMaxNonCachedPageTableLevels,
3705 bool GPUVMEnable,
3706 double HostVMMinPageSize,
3707 double PDEAndMetaPTEBytesPerFrame,
3708 double MetaRowBytes,
3709 double DPTEBytesPerRow,
3710 double BandwidthAvailableForImmediateFlip,
3711 unsigned int TotImmediateFlipBytes,
3712 enum source_format_class SourcePixelFormat,
3713 double LineTime,
3714 double VRatio,
3715 double VRatioChroma,
3716 double Tno_bw,
3717 bool DCCEnable,
3718 unsigned int dpte_row_height,
3719 unsigned int meta_row_height,
3720 unsigned int dpte_row_height_chroma,
3721 unsigned int meta_row_height_chroma,
3722 double *DestinationLinesToRequestVMInImmediateFlip,
3723 double *DestinationLinesToRequestRowInImmediateFlip,
3724 double *final_flip_bw,
3725 bool *ImmediateFlipSupportedForPipe)
3726 {
3727 double min_row_time = 0.0;
3728 unsigned int HostVMDynamicLevelsTrips;
3729 double TimeForFetchingMetaPTEImmediateFlip;
3730 double TimeForFetchingRowInVBlankImmediateFlip;
3731 double ImmediateFlipBW;
3732
3733 if (GPUVMEnable == true && HostVMEnable == true) {
3734 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3735 } else {
3736 HostVMDynamicLevelsTrips = 0;
3737 }
3738
3739 if (GPUVMEnable == true || DCCEnable == true) {
3740 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3741 }
3742
3743 if (GPUVMEnable == true) {
3744 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3745 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3746 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3747 LineTime / 4.0);
3748 } else {
3749 TimeForFetchingMetaPTEImmediateFlip = 0;
3750 }
3751
3752 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3753 if ((GPUVMEnable == true || DCCEnable == true)) {
3754 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3755 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3756 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3757 LineTime / 4);
3758 } else {
3759 TimeForFetchingRowInVBlankImmediateFlip = 0;
3760 }
3761
3762 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3763
3764 if (GPUVMEnable == true) {
3765 *final_flip_bw = dml_max(
3766 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3767 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3768 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3769 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3770 } else {
3771 *final_flip_bw = 0;
3772 }
3773
3774 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3775 if (GPUVMEnable == true && DCCEnable != true) {
3776 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3777 } else if (GPUVMEnable != true && DCCEnable == true) {
3778 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3779 } else {
3780 min_row_time = dml_min4(
3781 dpte_row_height * LineTime / VRatio,
3782 meta_row_height * LineTime / VRatio,
3783 dpte_row_height_chroma * LineTime / VRatioChroma,
3784 meta_row_height_chroma * LineTime / VRatioChroma);
3785 }
3786 } else {
3787 if (GPUVMEnable == true && DCCEnable != true) {
3788 min_row_time = dpte_row_height * LineTime / VRatio;
3789 } else if (GPUVMEnable != true && DCCEnable == true) {
3790 min_row_time = meta_row_height * LineTime / VRatio;
3791 } else {
3792 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3793 }
3794 }
3795
3796 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3797 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3798 *ImmediateFlipSupportedForPipe = false;
3799 } else {
3800 *ImmediateFlipSupportedForPipe = true;
3801 }
3802
3803 #ifdef __DML_VBA_DEBUG__
3804 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
3805 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
3806 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3807 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3808 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3809 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
3810 #endif
3811
3812 }
3813
3814 static double TruncToValidBPP(
3815 double LinkBitRate,
3816 int Lanes,
3817 int HTotal,
3818 int HActive,
3819 double PixelClock,
3820 double DesiredBPP,
3821 bool DSCEnable,
3822 enum output_encoder_class Output,
3823 enum output_format_class Format,
3824 unsigned int DSCInputBitPerComponent,
3825 int DSCSlices,
3826 int AudioRate,
3827 int AudioLayout,
3828 enum odm_combine_mode ODMCombine)
3829 {
3830 double MaxLinkBPP;
3831 int MinDSCBPP;
3832 double MaxDSCBPP;
3833 int NonDSCBPP0;
3834 int NonDSCBPP1;
3835 int NonDSCBPP2;
3836
3837 if (Format == dm_420) {
3838 NonDSCBPP0 = 12;
3839 NonDSCBPP1 = 15;
3840 NonDSCBPP2 = 18;
3841 MinDSCBPP = 6;
3842 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3843 } else if (Format == dm_444) {
3844 NonDSCBPP0 = 24;
3845 NonDSCBPP1 = 30;
3846 NonDSCBPP2 = 36;
3847 MinDSCBPP = 8;
3848 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3849 } else {
3850
3851 NonDSCBPP0 = 16;
3852 NonDSCBPP1 = 20;
3853 NonDSCBPP2 = 24;
3854
3855 if (Format == dm_n422) {
3856 MinDSCBPP = 7;
3857 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3858 } else {
3859 MinDSCBPP = 8;
3860 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3861 }
3862 }
3863
3864 if (DSCEnable && Output == dm_dp) {
3865 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3866 } else {
3867 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3868 }
3869
3870 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3871 MaxLinkBPP = 16;
3872 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3873 MaxLinkBPP = 32;
3874 }
3875
3876 if (DesiredBPP == 0) {
3877 if (DSCEnable) {
3878 if (MaxLinkBPP < MinDSCBPP) {
3879 return BPP_INVALID;
3880 } else if (MaxLinkBPP >= MaxDSCBPP) {
3881 return MaxDSCBPP;
3882 } else {
3883 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3884 }
3885 } else {
3886 if (MaxLinkBPP >= NonDSCBPP2) {
3887 return NonDSCBPP2;
3888 } else if (MaxLinkBPP >= NonDSCBPP1) {
3889 return NonDSCBPP1;
3890 } else if (MaxLinkBPP >= NonDSCBPP0) {
3891 return 16.0;
3892 } else {
3893 return BPP_INVALID;
3894 }
3895 }
3896 } else {
3897 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3898 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3899 return BPP_INVALID;
3900 } else {
3901 return DesiredBPP;
3902 }
3903 }
3904 return BPP_INVALID;
3905 }
3906
3907 static noinline void CalculatePrefetchSchedulePerPlane(
3908 struct display_mode_lib *mode_lib,
3909 double HostVMInefficiencyFactor,
3910 int i,
3911 unsigned j,
3912 unsigned k)
3913 {
3914 struct vba_vars_st *v = &mode_lib->vba;
3915 Pipe myPipe;
3916
3917 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3918 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3919 myPipe.PixelClock = v->PixelClock[k];
3920 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3921 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3922 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3923 myPipe.VRatio = mode_lib->vba.VRatio[k];
3924 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3925
3926 myPipe.SourceScan = v->SourceScan[k];
3927 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3928 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3929 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3930 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3931 myPipe.InterlaceEnable = v->Interlace[k];
3932 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3933 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3934 myPipe.HTotal = v->HTotal[k];
3935 myPipe.DCCEnable = v->DCCEnable[k];
3936 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3937 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3938 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3939 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3940 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3941 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3942 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3943 mode_lib,
3944 HostVMInefficiencyFactor,
3945 &myPipe,
3946 v->DSCDelayPerState[i][k],
3947 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3948 v->DPPCLKDelaySCL,
3949 v->DPPCLKDelaySCLLBOnly,
3950 v->DPPCLKDelayCNVCCursor,
3951 v->DISPCLKDelaySubtotal,
3952 v->SwathWidthYThisState[k] / v->HRatio[k],
3953 v->OutputFormat[k],
3954 v->MaxInterDCNTileRepeaters,
3955 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3956 v->MaximumVStartup[i][j][k],
3957 v->GPUVMMaxPageTableLevels,
3958 v->GPUVMEnable,
3959 v->HostVMEnable,
3960 v->HostVMMaxNonCachedPageTableLevels,
3961 v->HostVMMinPageSize,
3962 v->DynamicMetadataEnable[k],
3963 v->DynamicMetadataVMEnabled,
3964 v->DynamicMetadataLinesBeforeActiveRequired[k],
3965 v->DynamicMetadataTransmittedBytes[k],
3966 v->UrgLatency[i],
3967 v->ExtraLatency,
3968 v->TimeCalc,
3969 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3970 v->MetaRowBytes[i][j][k],
3971 v->DPTEBytesPerRow[i][j][k],
3972 v->PrefetchLinesY[i][j][k],
3973 v->SwathWidthYThisState[k],
3974 v->PrefillY[k],
3975 v->MaxNumSwY[k],
3976 v->PrefetchLinesC[i][j][k],
3977 v->SwathWidthCThisState[k],
3978 v->PrefillC[k],
3979 v->MaxNumSwC[k],
3980 v->swath_width_luma_ub_this_state[k],
3981 v->swath_width_chroma_ub_this_state[k],
3982 v->SwathHeightYThisState[k],
3983 v->SwathHeightCThisState[k],
3984 v->TWait,
3985 &v->DSTXAfterScaler[k],
3986 &v->DSTYAfterScaler[k],
3987 &v->LineTimesForPrefetch[k],
3988 &v->PrefetchBW[k],
3989 &v->LinesForMetaPTE[k],
3990 &v->LinesForMetaAndDPTERow[k],
3991 &v->VRatioPreY[i][j][k],
3992 &v->VRatioPreC[i][j][k],
3993 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3994 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3995 &v->NoTimeForDynamicMetadata[i][j][k],
3996 &v->Tno_bw[k],
3997 &v->prefetch_vmrow_bw[k],
3998 &v->dummy7[k],
3999 &v->dummy8[k],
4000 &v->dummy13[k],
4001 &v->VUpdateOffsetPix[k],
4002 &v->VUpdateWidthPix[k],
4003 &v->VReadyOffsetPix[k]);
4004 }
4005
4006 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
4007 {
4008 struct vba_vars_st *v = &mode_lib->vba;
4009
4010 int i, j;
4011 unsigned int k, m;
4012 int ReorderingBytes;
4013 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
4014 bool NoChroma = true;
4015 bool EnoughWritebackUnits = true;
4016 bool P2IWith420 = false;
4017 bool DSCOnlyIfNecessaryWithBPP = false;
4018 bool DSC422NativeNotSupported = false;
4019 double MaxTotalVActiveRDBandwidth;
4020 bool ViewportExceedsSurface = false;
4021 bool FMTBufferExceeded = false;
4022
4023 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
4024
4025 CalculateMinAndMaxPrefetchMode(
4026 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
4027 &MinPrefetchMode, &MaxPrefetchMode);
4028
4029 /*Scale Ratio, taps Support Check*/
4030
4031 v->ScaleRatioAndTapsSupport = true;
4032 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4033 if (v->ScalerEnabled[k] == false
4034 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
4035 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
4036 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
4037 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
4038 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
4039 v->ScaleRatioAndTapsSupport = false;
4040 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
4041 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
4042 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
4043 || v->VRatio[k] > v->vtaps[k]
4044 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
4045 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
4046 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
4047 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
4048 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
4049 || v->HRatioChroma[k] > v->MaxHSCLRatio
4050 || v->VRatioChroma[k] > v->MaxVSCLRatio
4051 || v->HRatioChroma[k] > v->HTAPsChroma[k]
4052 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
4053 v->ScaleRatioAndTapsSupport = false;
4054 }
4055 }
4056 /*Source Format, Pixel Format and Scan Support Check*/
4057
4058 v->SourceFormatPixelAndScanSupport = true;
4059 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4060 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
4061 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
4062 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
4063 v->SourceFormatPixelAndScanSupport = false;
4064 }
4065 }
4066 /*Bandwidth Support Check*/
4067
4068 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4069 CalculateBytePerPixelAnd256BBlockSizes(
4070 v->SourcePixelFormat[k],
4071 v->SurfaceTiling[k],
4072 &v->BytePerPixelY[k],
4073 &v->BytePerPixelC[k],
4074 &v->BytePerPixelInDETY[k],
4075 &v->BytePerPixelInDETC[k],
4076 &v->Read256BlockHeightY[k],
4077 &v->Read256BlockHeightC[k],
4078 &v->Read256BlockWidthY[k],
4079 &v->Read256BlockWidthC[k]);
4080 }
4081 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4082 if (v->SourceScan[k] != dm_vert) {
4083 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
4084 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
4085 } else {
4086 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
4087 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
4088 }
4089 }
4090 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4091 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
4092 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4093 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
4094 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
4095 }
4096 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4097 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
4098 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4099 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
4100 } else if (v->WritebackEnable[k] == true) {
4101 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4102 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
4103 } else {
4104 v->WriteBandwidth[k] = 0.0;
4105 }
4106 }
4107
4108 /*Writeback Latency support check*/
4109
4110 v->WritebackLatencySupport = true;
4111 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4112 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
4113 v->WritebackLatencySupport = false;
4114 }
4115 }
4116
4117 /*Writeback Mode Support Check*/
4118
4119 v->TotalNumberOfActiveWriteback = 0;
4120 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4121 if (v->WritebackEnable[k] == true) {
4122 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4123 }
4124 }
4125
4126 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4127 EnoughWritebackUnits = false;
4128 }
4129
4130 /*Writeback Scale Ratio and Taps Support Check*/
4131
4132 v->WritebackScaleRatioAndTapsSupport = true;
4133 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4134 if (v->WritebackEnable[k] == true) {
4135 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4136 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4137 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4138 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4139 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4140 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4141 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4142 v->WritebackScaleRatioAndTapsSupport = false;
4143 }
4144 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4145 v->WritebackScaleRatioAndTapsSupport = false;
4146 }
4147 }
4148 }
4149 /*Maximum DISPCLK/DPPCLK Support check*/
4150
4151 v->WritebackRequiredDISPCLK = 0.0;
4152 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4153 if (v->WritebackEnable[k] == true) {
4154 v->WritebackRequiredDISPCLK = dml_max(
4155 v->WritebackRequiredDISPCLK,
4156 dml31_CalculateWriteBackDISPCLK(
4157 v->WritebackPixelFormat[k],
4158 v->PixelClock[k],
4159 v->WritebackHRatio[k],
4160 v->WritebackVRatio[k],
4161 v->WritebackHTaps[k],
4162 v->WritebackVTaps[k],
4163 v->WritebackSourceWidth[k],
4164 v->WritebackDestinationWidth[k],
4165 v->HTotal[k],
4166 v->WritebackLineBufferSize));
4167 }
4168 }
4169 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4170 if (v->HRatio[k] > 1.0) {
4171 v->PSCL_FACTOR[k] = dml_min(
4172 v->MaxDCHUBToPSCLThroughput,
4173 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4174 } else {
4175 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4176 }
4177 if (v->BytePerPixelC[k] == 0.0) {
4178 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4179 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4180 * dml_max3(
4181 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4182 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4183 1.0);
4184 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4185 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4186 }
4187 } else {
4188 if (v->HRatioChroma[k] > 1.0) {
4189 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4190 v->MaxDCHUBToPSCLThroughput,
4191 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4192 } else {
4193 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4194 }
4195 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4196 * dml_max5(
4197 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4198 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4199 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4200 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4201 1.0);
4202 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4203 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4204 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4205 }
4206 }
4207 }
4208 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4209 int MaximumSwathWidthSupportLuma;
4210 int MaximumSwathWidthSupportChroma;
4211
4212 if (v->SurfaceTiling[k] == dm_sw_linear) {
4213 MaximumSwathWidthSupportLuma = 8192.0;
4214 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4215 MaximumSwathWidthSupportLuma = 2880.0;
4216 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4217 MaximumSwathWidthSupportLuma = 3840.0;
4218 } else {
4219 MaximumSwathWidthSupportLuma = 5760.0;
4220 }
4221
4222 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4223 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4224 } else {
4225 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4226 }
4227 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4228 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4229 if (v->BytePerPixelC[k] == 0.0) {
4230 v->MaximumSwathWidthInLineBufferChroma = 0;
4231 } else {
4232 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4233 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4234 }
4235 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4236 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4237 }
4238
4239 CalculateSwathAndDETConfiguration(
4240 true,
4241 v->NumberOfActivePlanes,
4242 v->DETBufferSizeInKByte[0],
4243 v->MaximumSwathWidthLuma,
4244 v->MaximumSwathWidthChroma,
4245 v->SourceScan,
4246 v->SourcePixelFormat,
4247 v->SurfaceTiling,
4248 v->ViewportWidth,
4249 v->ViewportHeight,
4250 v->SurfaceWidthY,
4251 v->SurfaceWidthC,
4252 v->SurfaceHeightY,
4253 v->SurfaceHeightC,
4254 v->Read256BlockHeightY,
4255 v->Read256BlockHeightC,
4256 v->Read256BlockWidthY,
4257 v->Read256BlockWidthC,
4258 v->odm_combine_dummy,
4259 v->BlendingAndTiming,
4260 v->BytePerPixelY,
4261 v->BytePerPixelC,
4262 v->BytePerPixelInDETY,
4263 v->BytePerPixelInDETC,
4264 v->HActive,
4265 v->HRatio,
4266 v->HRatioChroma,
4267 v->NoOfDPPThisState,
4268 v->swath_width_luma_ub_this_state,
4269 v->swath_width_chroma_ub_this_state,
4270 v->SwathWidthYThisState,
4271 v->SwathWidthCThisState,
4272 v->SwathHeightYThisState,
4273 v->SwathHeightCThisState,
4274 v->DETBufferSizeYThisState,
4275 v->DETBufferSizeCThisState,
4276 v->SingleDPPViewportSizeSupportPerPlane,
4277 &v->ViewportSizeSupport[0][0]);
4278
4279 for (i = 0; i < v->soc.num_states; i++) {
4280 for (j = 0; j < 2; j++) {
4281 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4282 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4283 v->RequiredDISPCLK[i][j] = 0.0;
4284 v->DISPCLK_DPPCLK_Support[i][j] = true;
4285 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4286 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4287 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4288 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4289 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4290 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4291 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4292 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4293 }
4294 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4295 * (1 + v->DISPCLKRampingMargin / 100.0);
4296 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4297 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4298 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4299 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4300 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4301 }
4302 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4303 * (1 + v->DISPCLKRampingMargin / 100.0);
4304 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4305 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4306 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4307 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4308 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4309 }
4310
4311 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4312 || !(v->Output[k] == dm_dp ||
4313 v->Output[k] == dm_edp)) {
4314 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4315 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4316
4317 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4318 FMTBufferExceeded = true;
4319 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4320 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4321 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4322 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4323 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4324 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4325 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4326 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4327 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4328 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4329 } else {
4330 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4331 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4332 }
4333 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4334 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4335 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4336 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4337 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4338 } else {
4339 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4340 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4341 }
4342 }
4343 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4344 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4345 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4346 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4347 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4348
4349 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4350 FMTBufferExceeded = true;
4351 } else {
4352 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4353 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4354 }
4355 }
4356 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4357 v->MPCCombine[i][j][k] = false;
4358 v->NoOfDPP[i][j][k] = 4;
4359 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4360 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4361 v->MPCCombine[i][j][k] = false;
4362 v->NoOfDPP[i][j][k] = 2;
4363 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4364 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4365 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4366 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4367 v->MPCCombine[i][j][k] = false;
4368 v->NoOfDPP[i][j][k] = 1;
4369 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4370 } else {
4371 v->MPCCombine[i][j][k] = true;
4372 v->NoOfDPP[i][j][k] = 2;
4373 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4374 }
4375 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4376 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4377 > v->MaxDppclkRoundedDownToDFSGranularity)
4378 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4379 v->DISPCLK_DPPCLK_Support[i][j] = false;
4380 }
4381 }
4382 v->TotalNumberOfActiveDPP[i][j] = 0;
4383 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4384 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4385 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4386 if (v->NoOfDPP[i][j][k] == 1)
4387 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4388 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4389 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4390 NoChroma = false;
4391 }
4392
4393 // UPTO
4394 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4395 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4396 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4397 double BWOfNonSplitPlaneOfMaximumBandwidth;
4398 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4399 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4400 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4401 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4402 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4403 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4404 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4405 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4406 }
4407 }
4408 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4409 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4410 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4411 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4412 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4413 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4414 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4415 }
4416 }
4417 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4418 v->RequiredDISPCLK[i][j] = 0.0;
4419 v->DISPCLK_DPPCLK_Support[i][j] = true;
4420 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4421 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4422 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4423 v->MPCCombine[i][j][k] = true;
4424 v->NoOfDPP[i][j][k] = 2;
4425 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4426 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4427 } else {
4428 v->MPCCombine[i][j][k] = false;
4429 v->NoOfDPP[i][j][k] = 1;
4430 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4431 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4432 }
4433 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4434 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4435 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4436 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4437 } else {
4438 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4439 }
4440 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4441 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4442 > v->MaxDppclkRoundedDownToDFSGranularity)
4443 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4444 v->DISPCLK_DPPCLK_Support[i][j] = false;
4445 }
4446 }
4447 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4448 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4449 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4450 }
4451 }
4452 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4453 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4454 v->DISPCLK_DPPCLK_Support[i][j] = false;
4455 }
4456 }
4457 }
4458
4459 /*Total Available Pipes Support Check*/
4460
4461 for (i = 0; i < v->soc.num_states; i++) {
4462 for (j = 0; j < 2; j++) {
4463 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4464 v->TotalAvailablePipesSupport[i][j] = true;
4465 } else {
4466 v->TotalAvailablePipesSupport[i][j] = false;
4467 }
4468 }
4469 }
4470 /*Display IO and DSC Support Check*/
4471
4472 v->NonsupportedDSCInputBPC = false;
4473 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4474 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4475 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4476 v->NonsupportedDSCInputBPC = true;
4477 }
4478 }
4479
4480 /*Number Of DSC Slices*/
4481 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4482 if (v->BlendingAndTiming[k] == k) {
4483 if (v->PixelClockBackEnd[k] > 3200) {
4484 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4485 } else if (v->PixelClockBackEnd[k] > 1360) {
4486 v->NumberOfDSCSlices[k] = 8;
4487 } else if (v->PixelClockBackEnd[k] > 680) {
4488 v->NumberOfDSCSlices[k] = 4;
4489 } else if (v->PixelClockBackEnd[k] > 340) {
4490 v->NumberOfDSCSlices[k] = 2;
4491 } else {
4492 v->NumberOfDSCSlices[k] = 1;
4493 }
4494 } else {
4495 v->NumberOfDSCSlices[k] = 0;
4496 }
4497 }
4498
4499 for (i = 0; i < v->soc.num_states; i++) {
4500 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4501 v->RequiresDSC[i][k] = false;
4502 v->RequiresFEC[i][k] = false;
4503 if (v->BlendingAndTiming[k] == k) {
4504 if (v->Output[k] == dm_hdmi) {
4505 v->RequiresDSC[i][k] = false;
4506 v->RequiresFEC[i][k] = false;
4507 v->OutputBppPerState[i][k] = TruncToValidBPP(
4508 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4509 3,
4510 v->HTotal[k],
4511 v->HActive[k],
4512 v->PixelClockBackEnd[k],
4513 v->ForcedOutputLinkBPP[k],
4514 false,
4515 v->Output[k],
4516 v->OutputFormat[k],
4517 v->DSCInputBitPerComponent[k],
4518 v->NumberOfDSCSlices[k],
4519 v->AudioSampleRate[k],
4520 v->AudioSampleLayout[k],
4521 v->ODMCombineEnablePerState[i][k]);
4522 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4523 if (v->DSCEnable[k] == true) {
4524 v->RequiresDSC[i][k] = true;
4525 v->LinkDSCEnable = true;
4526 if (v->Output[k] == dm_dp) {
4527 v->RequiresFEC[i][k] = true;
4528 } else {
4529 v->RequiresFEC[i][k] = false;
4530 }
4531 } else {
4532 v->RequiresDSC[i][k] = false;
4533 v->LinkDSCEnable = false;
4534 v->RequiresFEC[i][k] = false;
4535 }
4536
4537 v->Outbpp = BPP_INVALID;
4538 if (v->PHYCLKPerState[i] >= 270.0) {
4539 v->Outbpp = TruncToValidBPP(
4540 (1.0 - v->Downspreading / 100.0) * 2700,
4541 v->OutputLinkDPLanes[k],
4542 v->HTotal[k],
4543 v->HActive[k],
4544 v->PixelClockBackEnd[k],
4545 v->ForcedOutputLinkBPP[k],
4546 v->LinkDSCEnable,
4547 v->Output[k],
4548 v->OutputFormat[k],
4549 v->DSCInputBitPerComponent[k],
4550 v->NumberOfDSCSlices[k],
4551 v->AudioSampleRate[k],
4552 v->AudioSampleLayout[k],
4553 v->ODMCombineEnablePerState[i][k]);
4554 v->OutputBppPerState[i][k] = v->Outbpp;
4555 // TODO: Need some other way to handle this nonsense
4556 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4557 }
4558 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4559 v->Outbpp = TruncToValidBPP(
4560 (1.0 - v->Downspreading / 100.0) * 5400,
4561 v->OutputLinkDPLanes[k],
4562 v->HTotal[k],
4563 v->HActive[k],
4564 v->PixelClockBackEnd[k],
4565 v->ForcedOutputLinkBPP[k],
4566 v->LinkDSCEnable,
4567 v->Output[k],
4568 v->OutputFormat[k],
4569 v->DSCInputBitPerComponent[k],
4570 v->NumberOfDSCSlices[k],
4571 v->AudioSampleRate[k],
4572 v->AudioSampleLayout[k],
4573 v->ODMCombineEnablePerState[i][k]);
4574 v->OutputBppPerState[i][k] = v->Outbpp;
4575 // TODO: Need some other way to handle this nonsense
4576 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4577 }
4578 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4579 v->Outbpp = TruncToValidBPP(
4580 (1.0 - v->Downspreading / 100.0) * 8100,
4581 v->OutputLinkDPLanes[k],
4582 v->HTotal[k],
4583 v->HActive[k],
4584 v->PixelClockBackEnd[k],
4585 v->ForcedOutputLinkBPP[k],
4586 v->LinkDSCEnable,
4587 v->Output[k],
4588 v->OutputFormat[k],
4589 v->DSCInputBitPerComponent[k],
4590 v->NumberOfDSCSlices[k],
4591 v->AudioSampleRate[k],
4592 v->AudioSampleLayout[k],
4593 v->ODMCombineEnablePerState[i][k]);
4594 v->OutputBppPerState[i][k] = v->Outbpp;
4595 // TODO: Need some other way to handle this nonsense
4596 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4597 }
4598 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4599 v->Outbpp = TruncToValidBPP(
4600 (1.0 - v->Downspreading / 100.0) * 10000,
4601 4,
4602 v->HTotal[k],
4603 v->HActive[k],
4604 v->PixelClockBackEnd[k],
4605 v->ForcedOutputLinkBPP[k],
4606 v->LinkDSCEnable,
4607 v->Output[k],
4608 v->OutputFormat[k],
4609 v->DSCInputBitPerComponent[k],
4610 v->NumberOfDSCSlices[k],
4611 v->AudioSampleRate[k],
4612 v->AudioSampleLayout[k],
4613 v->ODMCombineEnablePerState[i][k]);
4614 v->OutputBppPerState[i][k] = v->Outbpp;
4615 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4616 }
4617 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4618 v->Outbpp = TruncToValidBPP(
4619 12000,
4620 4,
4621 v->HTotal[k],
4622 v->HActive[k],
4623 v->PixelClockBackEnd[k],
4624 v->ForcedOutputLinkBPP[k],
4625 v->LinkDSCEnable,
4626 v->Output[k],
4627 v->OutputFormat[k],
4628 v->DSCInputBitPerComponent[k],
4629 v->NumberOfDSCSlices[k],
4630 v->AudioSampleRate[k],
4631 v->AudioSampleLayout[k],
4632 v->ODMCombineEnablePerState[i][k]);
4633 v->OutputBppPerState[i][k] = v->Outbpp;
4634 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4635 }
4636 }
4637 } else {
4638 v->OutputBppPerState[i][k] = 0;
4639 }
4640 }
4641 }
4642
4643 for (i = 0; i < v->soc.num_states; i++) {
4644 v->LinkCapacitySupport[i] = true;
4645 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4646 if (v->BlendingAndTiming[k] == k
4647 && (v->Output[k] == dm_dp ||
4648 v->Output[k] == dm_edp ||
4649 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4650 v->LinkCapacitySupport[i] = false;
4651 }
4652 }
4653 }
4654
4655 // UPTO 2172
4656 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4657 if (v->BlendingAndTiming[k] == k
4658 && (v->Output[k] == dm_dp ||
4659 v->Output[k] == dm_edp ||
4660 v->Output[k] == dm_hdmi)) {
4661 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4662 P2IWith420 = true;
4663 }
4664 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4665 && !v->DSC422NativeSupport) {
4666 DSC422NativeNotSupported = true;
4667 }
4668 }
4669 }
4670
4671 for (i = 0; i < v->soc.num_states; ++i) {
4672 v->ODMCombine4To1SupportCheckOK[i] = true;
4673 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4674 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4675 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4676 || v->Output[k] == dm_hdmi)) {
4677 v->ODMCombine4To1SupportCheckOK[i] = false;
4678 }
4679 }
4680 }
4681
4682 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4683
4684 for (i = 0; i < v->soc.num_states; i++) {
4685 v->NotEnoughDSCUnits[i] = false;
4686 v->TotalDSCUnitsRequired = 0.0;
4687 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4688 if (v->RequiresDSC[i][k] == true) {
4689 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4690 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4691 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4692 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4693 } else {
4694 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4695 }
4696 }
4697 }
4698 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4699 v->NotEnoughDSCUnits[i] = true;
4700 }
4701 }
4702 /*DSC Delay per state*/
4703
4704 for (i = 0; i < v->soc.num_states; i++) {
4705 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4706 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4707 v->BPP = 0.0;
4708 } else {
4709 v->BPP = v->OutputBppPerState[i][k];
4710 }
4711 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4712 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4713 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4714 v->DSCInputBitPerComponent[k],
4715 v->BPP,
4716 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4717 v->NumberOfDSCSlices[k],
4718 v->OutputFormat[k],
4719 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4720 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4721 v->DSCDelayPerState[i][k] = 2.0
4722 * (dscceComputeDelay(
4723 v->DSCInputBitPerComponent[k],
4724 v->BPP,
4725 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4726 v->NumberOfDSCSlices[k] / 2,
4727 v->OutputFormat[k],
4728 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4729 } else {
4730 v->DSCDelayPerState[i][k] = 4.0
4731 * (dscceComputeDelay(
4732 v->DSCInputBitPerComponent[k],
4733 v->BPP,
4734 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4735 v->NumberOfDSCSlices[k] / 4,
4736 v->OutputFormat[k],
4737 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4738 }
4739 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4740 } else {
4741 v->DSCDelayPerState[i][k] = 0.0;
4742 }
4743 }
4744 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4745 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4746 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4747 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4748 }
4749 }
4750 }
4751 }
4752
4753 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4754 //
4755 for (i = 0; i < v->soc.num_states; ++i) {
4756 for (j = 0; j <= 1; ++j) {
4757 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4758 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4759 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4760 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4761 }
4762
4763 CalculateSwathAndDETConfiguration(
4764 false,
4765 v->NumberOfActivePlanes,
4766 v->DETBufferSizeInKByte[0],
4767 v->MaximumSwathWidthLuma,
4768 v->MaximumSwathWidthChroma,
4769 v->SourceScan,
4770 v->SourcePixelFormat,
4771 v->SurfaceTiling,
4772 v->ViewportWidth,
4773 v->ViewportHeight,
4774 v->SurfaceWidthY,
4775 v->SurfaceWidthC,
4776 v->SurfaceHeightY,
4777 v->SurfaceHeightC,
4778 v->Read256BlockHeightY,
4779 v->Read256BlockHeightC,
4780 v->Read256BlockWidthY,
4781 v->Read256BlockWidthC,
4782 v->ODMCombineEnableThisState,
4783 v->BlendingAndTiming,
4784 v->BytePerPixelY,
4785 v->BytePerPixelC,
4786 v->BytePerPixelInDETY,
4787 v->BytePerPixelInDETC,
4788 v->HActive,
4789 v->HRatio,
4790 v->HRatioChroma,
4791 v->NoOfDPPThisState,
4792 v->swath_width_luma_ub_this_state,
4793 v->swath_width_chroma_ub_this_state,
4794 v->SwathWidthYThisState,
4795 v->SwathWidthCThisState,
4796 v->SwathHeightYThisState,
4797 v->SwathHeightCThisState,
4798 v->DETBufferSizeYThisState,
4799 v->DETBufferSizeCThisState,
4800 v->dummystring,
4801 &v->ViewportSizeSupport[i][j]);
4802
4803 CalculateDCFCLKDeepSleep(
4804 mode_lib,
4805 v->NumberOfActivePlanes,
4806 v->BytePerPixelY,
4807 v->BytePerPixelC,
4808 v->VRatio,
4809 v->VRatioChroma,
4810 v->SwathWidthYThisState,
4811 v->SwathWidthCThisState,
4812 v->NoOfDPPThisState,
4813 v->HRatio,
4814 v->HRatioChroma,
4815 v->PixelClock,
4816 v->PSCL_FACTOR,
4817 v->PSCL_FACTOR_CHROMA,
4818 v->RequiredDPPCLKThisState,
4819 v->ReadBandwidthLuma,
4820 v->ReadBandwidthChroma,
4821 v->ReturnBusWidth,
4822 &v->ProjectedDCFCLKDeepSleep[i][j]);
4823
4824 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4825 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4826 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4827 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4828 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4829 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4830 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4831 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4832 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4833 }
4834 }
4835 }
4836
4837 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4838 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4839 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4840 }
4841
4842 for (i = 0; i < v->soc.num_states; i++) {
4843 for (j = 0; j < 2; j++) {
4844 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4845
4846 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4847 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4848 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4849 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4850 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4851 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4852 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4853 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4854 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4855 }
4856
4857 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4858 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4859 if (v->DCCEnable[k] == true) {
4860 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4861 }
4862 }
4863
4864 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4865 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4866 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4867
4868 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4869 && v->SourceScan[k] != dm_vert) {
4870 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4871 / 2;
4872 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4873 } else {
4874 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4875 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4876 }
4877
4878 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4879 mode_lib,
4880 v->DCCEnable[k],
4881 v->Read256BlockHeightC[k],
4882 v->Read256BlockWidthC[k],
4883 v->SourcePixelFormat[k],
4884 v->SurfaceTiling[k],
4885 v->BytePerPixelC[k],
4886 v->SourceScan[k],
4887 v->SwathWidthCThisState[k],
4888 v->ViewportHeightChroma[k],
4889 v->GPUVMEnable,
4890 v->HostVMEnable,
4891 v->HostVMMaxNonCachedPageTableLevels,
4892 v->GPUVMMinPageSize,
4893 v->HostVMMinPageSize,
4894 v->PTEBufferSizeInRequestsForChroma,
4895 v->PitchC[k],
4896 0.0,
4897 &v->MacroTileWidthC[k],
4898 &v->MetaRowBytesC,
4899 &v->DPTEBytesPerRowC,
4900 &v->PTEBufferSizeNotExceededC[i][j][k],
4901 &v->dummyinteger7,
4902 &v->dpte_row_height_chroma[k],
4903 &v->dummyinteger28,
4904 &v->dummyinteger26,
4905 &v->dummyinteger23,
4906 &v->meta_row_height_chroma[k],
4907 &v->dummyinteger8,
4908 &v->dummyinteger9,
4909 &v->dummyinteger19,
4910 &v->dummyinteger20,
4911 &v->dummyinteger17,
4912 &v->dummyinteger10,
4913 &v->dummyinteger11);
4914
4915 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4916 mode_lib,
4917 v->VRatioChroma[k],
4918 v->VTAPsChroma[k],
4919 v->Interlace[k],
4920 v->ProgressiveToInterlaceUnitInOPP,
4921 v->SwathHeightCThisState[k],
4922 v->ViewportYStartC[k],
4923 &v->PrefillC[k],
4924 &v->MaxNumSwC[k]);
4925 } else {
4926 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4927 v->PTEBufferSizeInRequestsForChroma = 0;
4928 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4929 v->MetaRowBytesC = 0.0;
4930 v->DPTEBytesPerRowC = 0.0;
4931 v->PrefetchLinesC[i][j][k] = 0.0;
4932 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4933 }
4934 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4935 mode_lib,
4936 v->DCCEnable[k],
4937 v->Read256BlockHeightY[k],
4938 v->Read256BlockWidthY[k],
4939 v->SourcePixelFormat[k],
4940 v->SurfaceTiling[k],
4941 v->BytePerPixelY[k],
4942 v->SourceScan[k],
4943 v->SwathWidthYThisState[k],
4944 v->ViewportHeight[k],
4945 v->GPUVMEnable,
4946 v->HostVMEnable,
4947 v->HostVMMaxNonCachedPageTableLevels,
4948 v->GPUVMMinPageSize,
4949 v->HostVMMinPageSize,
4950 v->PTEBufferSizeInRequestsForLuma,
4951 v->PitchY[k],
4952 v->DCCMetaPitchY[k],
4953 &v->MacroTileWidthY[k],
4954 &v->MetaRowBytesY,
4955 &v->DPTEBytesPerRowY,
4956 &v->PTEBufferSizeNotExceededY[i][j][k],
4957 &v->dummyinteger7,
4958 &v->dpte_row_height[k],
4959 &v->dummyinteger29,
4960 &v->dummyinteger27,
4961 &v->dummyinteger24,
4962 &v->meta_row_height[k],
4963 &v->dummyinteger25,
4964 &v->dpte_group_bytes[k],
4965 &v->dummyinteger21,
4966 &v->dummyinteger22,
4967 &v->dummyinteger18,
4968 &v->dummyinteger5,
4969 &v->dummyinteger6);
4970 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4971 mode_lib,
4972 v->VRatio[k],
4973 v->vtaps[k],
4974 v->Interlace[k],
4975 v->ProgressiveToInterlaceUnitInOPP,
4976 v->SwathHeightYThisState[k],
4977 v->ViewportYStartY[k],
4978 &v->PrefillY[k],
4979 &v->MaxNumSwY[k]);
4980 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4981 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4982 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4983
4984 CalculateRowBandwidth(
4985 v->GPUVMEnable,
4986 v->SourcePixelFormat[k],
4987 v->VRatio[k],
4988 v->VRatioChroma[k],
4989 v->DCCEnable[k],
4990 v->HTotal[k] / v->PixelClock[k],
4991 v->MetaRowBytesY,
4992 v->MetaRowBytesC,
4993 v->meta_row_height[k],
4994 v->meta_row_height_chroma[k],
4995 v->DPTEBytesPerRowY,
4996 v->DPTEBytesPerRowC,
4997 v->dpte_row_height[k],
4998 v->dpte_row_height_chroma[k],
4999 &v->meta_row_bandwidth[i][j][k],
5000 &v->dpte_row_bandwidth[i][j][k]);
5001 }
5002 /*DCCMetaBufferSizeSupport(i, j) = True
5003 For k = 0 To NumberOfActivePlanes - 1
5004 If MetaRowBytes(i, j, k) > 24064 Then
5005 DCCMetaBufferSizeSupport(i, j) = False
5006 End If
5007 Next k*/
5008 v->DCCMetaBufferSizeSupport[i][j] = true;
5009 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5010 if (v->MetaRowBytes[i][j][k] > 24064)
5011 v->DCCMetaBufferSizeSupport[i][j] = false;
5012 }
5013 v->UrgLatency[i] = CalculateUrgentLatency(
5014 v->UrgentLatencyPixelDataOnly,
5015 v->UrgentLatencyPixelMixedWithVMData,
5016 v->UrgentLatencyVMDataOnly,
5017 v->DoUrgentLatencyAdjustment,
5018 v->UrgentLatencyAdjustmentFabricClockComponent,
5019 v->UrgentLatencyAdjustmentFabricClockReference,
5020 v->FabricClockPerState[i]);
5021
5022 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5023 CalculateUrgentBurstFactor(
5024 v->swath_width_luma_ub_this_state[k],
5025 v->swath_width_chroma_ub_this_state[k],
5026 v->SwathHeightYThisState[k],
5027 v->SwathHeightCThisState[k],
5028 v->HTotal[k] / v->PixelClock[k],
5029 v->UrgLatency[i],
5030 v->CursorBufferSize,
5031 v->CursorWidth[k][0],
5032 v->CursorBPP[k][0],
5033 v->VRatio[k],
5034 v->VRatioChroma[k],
5035 v->BytePerPixelInDETY[k],
5036 v->BytePerPixelInDETC[k],
5037 v->DETBufferSizeYThisState[k],
5038 v->DETBufferSizeCThisState[k],
5039 &v->UrgentBurstFactorCursor[k],
5040 &v->UrgentBurstFactorLuma[k],
5041 &v->UrgentBurstFactorChroma[k],
5042 &NotUrgentLatencyHiding[k]);
5043 }
5044
5045 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5046 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5047 if (NotUrgentLatencyHiding[k]) {
5048 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5049 }
5050 }
5051
5052 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5053 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5054 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5055 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5056 }
5057
5058 v->TotalVActivePixelBandwidth[i][j] = 0;
5059 v->TotalVActiveCursorBandwidth[i][j] = 0;
5060 v->TotalMetaRowBandwidth[i][j] = 0;
5061 v->TotalDPTERowBandwidth[i][j] = 0;
5062 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5063 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5064 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5065 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5066 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5067 }
5068 }
5069 }
5070
5071 //Calculate Return BW
5072 for (i = 0; i < v->soc.num_states; ++i) {
5073 for (j = 0; j <= 1; ++j) {
5074 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5075 if (v->BlendingAndTiming[k] == k) {
5076 if (v->WritebackEnable[k] == true) {
5077 v->WritebackDelayTime[k] = v->WritebackLatency
5078 + CalculateWriteBackDelay(
5079 v->WritebackPixelFormat[k],
5080 v->WritebackHRatio[k],
5081 v->WritebackVRatio[k],
5082 v->WritebackVTaps[k],
5083 v->WritebackDestinationWidth[k],
5084 v->WritebackDestinationHeight[k],
5085 v->WritebackSourceHeight[k],
5086 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5087 } else {
5088 v->WritebackDelayTime[k] = 0.0;
5089 }
5090 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5091 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5092 v->WritebackDelayTime[k] = dml_max(
5093 v->WritebackDelayTime[k],
5094 v->WritebackLatency
5095 + CalculateWriteBackDelay(
5096 v->WritebackPixelFormat[m],
5097 v->WritebackHRatio[m],
5098 v->WritebackVRatio[m],
5099 v->WritebackVTaps[m],
5100 v->WritebackDestinationWidth[m],
5101 v->WritebackDestinationHeight[m],
5102 v->WritebackSourceHeight[m],
5103 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5104 }
5105 }
5106 }
5107 }
5108 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5109 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5110 if (v->BlendingAndTiming[k] == m) {
5111 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5112 }
5113 }
5114 }
5115 v->MaxMaxVStartup[i][j] = 0;
5116 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5117 v->MaximumVStartup[i][j][k] =
5118 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5119 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5120 v->VTotal[k] - v->VActive[k]
5121 - dml_max(
5122 1.0,
5123 dml_ceil(
5124 1.0 * v->WritebackDelayTime[k]
5125 / (v->HTotal[k]
5126 / v->PixelClock[k]),
5127 1.0));
5128 if (v->MaximumVStartup[i][j][k] > 1023)
5129 v->MaximumVStartup[i][j][k] = 1023;
5130 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5131 }
5132 }
5133 }
5134
5135 ReorderingBytes = v->NumberOfChannels
5136 * dml_max3(
5137 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5138 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5139 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5140
5141 for (i = 0; i < v->soc.num_states; ++i) {
5142 for (j = 0; j <= 1; ++j) {
5143 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5144 }
5145 }
5146
5147 if (v->UseMinimumRequiredDCFCLK == true)
5148 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5149
5150 for (i = 0; i < v->soc.num_states; ++i) {
5151 for (j = 0; j <= 1; ++j) {
5152 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5153 v->ReturnBusWidth * v->DCFCLKState[i][j],
5154 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5155 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5156 double PixelDataOnlyReturnBWPerState = dml_min(
5157 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5158 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5159 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5160 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5161 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5162
5163 if (v->HostVMEnable != true) {
5164 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5165 } else {
5166 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5167 }
5168 }
5169 }
5170
5171 //Re-ordering Buffer Support Check
5172 for (i = 0; i < v->soc.num_states; ++i) {
5173 for (j = 0; j <= 1; ++j) {
5174 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5175 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5176 v->ROBSupport[i][j] = true;
5177 } else {
5178 v->ROBSupport[i][j] = false;
5179 }
5180 }
5181 }
5182
5183 //Vertical Active BW support check
5184
5185 MaxTotalVActiveRDBandwidth = 0;
5186 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5187 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5188 }
5189
5190 for (i = 0; i < v->soc.num_states; ++i) {
5191 for (j = 0; j <= 1; ++j) {
5192 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5193 dml_min(
5194 v->ReturnBusWidth * v->DCFCLKState[i][j],
5195 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5196 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5197 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5198 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5199
5200 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5201 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5202 } else {
5203 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5204 }
5205 }
5206 }
5207
5208 v->UrgentLatency = CalculateUrgentLatency(
5209 v->UrgentLatencyPixelDataOnly,
5210 v->UrgentLatencyPixelMixedWithVMData,
5211 v->UrgentLatencyVMDataOnly,
5212 v->DoUrgentLatencyAdjustment,
5213 v->UrgentLatencyAdjustmentFabricClockComponent,
5214 v->UrgentLatencyAdjustmentFabricClockReference,
5215 v->FabricClock);
5216 //Prefetch Check
5217 for (i = 0; i < v->soc.num_states; ++i) {
5218 for (j = 0; j <= 1; ++j) {
5219 double VMDataOnlyReturnBWPerState;
5220 double HostVMInefficiencyFactor = 1;
5221 int NextPrefetchModeState = MinPrefetchMode;
5222 bool UnboundedRequestEnabledThisState = false;
5223 int CompressedBufferSizeInkByteThisState = 0;
5224 double dummy;
5225
5226 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5227
5228 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5229 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5230 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5231 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5232 }
5233
5234 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5235 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5236 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5237 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5238 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5239 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5240 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5241 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5242 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5243 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5244 }
5245
5246 VMDataOnlyReturnBWPerState = dml_min(
5247 dml_min(
5248 v->ReturnBusWidth * v->DCFCLKState[i][j],
5249 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5250 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5251 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5252 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5253 if (v->GPUVMEnable && v->HostVMEnable)
5254 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5255
5256 v->ExtraLatency = CalculateExtraLatency(
5257 v->RoundTripPingLatencyCycles,
5258 ReorderingBytes,
5259 v->DCFCLKState[i][j],
5260 v->TotalNumberOfActiveDPP[i][j],
5261 v->PixelChunkSizeInKByte,
5262 v->TotalNumberOfDCCActiveDPP[i][j],
5263 v->MetaChunkSize,
5264 v->ReturnBWPerState[i][j],
5265 v->GPUVMEnable,
5266 v->HostVMEnable,
5267 v->NumberOfActivePlanes,
5268 v->NoOfDPPThisState,
5269 v->dpte_group_bytes,
5270 HostVMInefficiencyFactor,
5271 v->HostVMMinPageSize,
5272 v->HostVMMaxNonCachedPageTableLevels);
5273
5274 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5275 do {
5276 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5277 v->MaxVStartup = v->NextMaxVStartup;
5278
5279 v->TWait = CalculateTWait(
5280 v->PrefetchModePerState[i][j],
5281 v->DRAMClockChangeLatency,
5282 v->UrgLatency[i],
5283 v->SREnterPlusExitTime);
5284
5285 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5286 CalculatePrefetchSchedulePerPlane(mode_lib,
5287 HostVMInefficiencyFactor,
5288 i, j, k);
5289 }
5290
5291 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5292 CalculateUrgentBurstFactor(
5293 v->swath_width_luma_ub_this_state[k],
5294 v->swath_width_chroma_ub_this_state[k],
5295 v->SwathHeightYThisState[k],
5296 v->SwathHeightCThisState[k],
5297 v->HTotal[k] / v->PixelClock[k],
5298 v->UrgentLatency,
5299 v->CursorBufferSize,
5300 v->CursorWidth[k][0],
5301 v->CursorBPP[k][0],
5302 v->VRatioPreY[i][j][k],
5303 v->VRatioPreC[i][j][k],
5304 v->BytePerPixelInDETY[k],
5305 v->BytePerPixelInDETC[k],
5306 v->DETBufferSizeYThisState[k],
5307 v->DETBufferSizeCThisState[k],
5308 &v->UrgentBurstFactorCursorPre[k],
5309 &v->UrgentBurstFactorLumaPre[k],
5310 &v->UrgentBurstFactorChroma[k],
5311 &v->NotUrgentLatencyHidingPre[k]);
5312 }
5313
5314 v->MaximumReadBandwidthWithPrefetch = 0.0;
5315 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5316 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5317 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5318
5319 v->MaximumReadBandwidthWithPrefetch =
5320 v->MaximumReadBandwidthWithPrefetch
5321 + dml_max3(
5322 v->VActivePixelBandwidth[i][j][k]
5323 + v->VActiveCursorBandwidth[i][j][k]
5324 + v->NoOfDPP[i][j][k]
5325 * (v->meta_row_bandwidth[i][j][k]
5326 + v->dpte_row_bandwidth[i][j][k]),
5327 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5328 v->NoOfDPP[i][j][k]
5329 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5330 * v->UrgentBurstFactorLumaPre[k]
5331 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5332 * v->UrgentBurstFactorChromaPre[k])
5333 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5334 }
5335
5336 v->NotEnoughUrgentLatencyHidingPre = false;
5337 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5338 if (v->NotUrgentLatencyHidingPre[k] == true) {
5339 v->NotEnoughUrgentLatencyHidingPre = true;
5340 }
5341 }
5342
5343 v->PrefetchSupported[i][j] = true;
5344 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5345 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5346 v->PrefetchSupported[i][j] = false;
5347 }
5348 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5349 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5350 || v->NoTimeForPrefetch[i][j][k] == true) {
5351 v->PrefetchSupported[i][j] = false;
5352 }
5353 }
5354
5355 v->DynamicMetadataSupported[i][j] = true;
5356 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5357 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5358 v->DynamicMetadataSupported[i][j] = false;
5359 }
5360 }
5361
5362 v->VRatioInPrefetchSupported[i][j] = true;
5363 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5364 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5365 v->VRatioInPrefetchSupported[i][j] = false;
5366 }
5367 }
5368 v->AnyLinesForVMOrRowTooLarge = false;
5369 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5370 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5371 v->AnyLinesForVMOrRowTooLarge = true;
5372 }
5373 }
5374
5375 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5376
5377 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5378 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5379 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5380 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5381 - dml_max(
5382 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5383 v->NoOfDPP[i][j][k]
5384 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5385 * v->UrgentBurstFactorLumaPre[k]
5386 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5387 * v->UrgentBurstFactorChromaPre[k])
5388 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5389 }
5390 v->TotImmediateFlipBytes = 0.0;
5391 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5392 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5393 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5394 + v->DPTEBytesPerRow[i][j][k];
5395 }
5396
5397 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5398 CalculateFlipSchedule(
5399 mode_lib,
5400 HostVMInefficiencyFactor,
5401 v->ExtraLatency,
5402 v->UrgLatency[i],
5403 v->GPUVMMaxPageTableLevels,
5404 v->HostVMEnable,
5405 v->HostVMMaxNonCachedPageTableLevels,
5406 v->GPUVMEnable,
5407 v->HostVMMinPageSize,
5408 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5409 v->MetaRowBytes[i][j][k],
5410 v->DPTEBytesPerRow[i][j][k],
5411 v->BandwidthAvailableForImmediateFlip,
5412 v->TotImmediateFlipBytes,
5413 v->SourcePixelFormat[k],
5414 v->HTotal[k] / v->PixelClock[k],
5415 v->VRatio[k],
5416 v->VRatioChroma[k],
5417 v->Tno_bw[k],
5418 v->DCCEnable[k],
5419 v->dpte_row_height[k],
5420 v->meta_row_height[k],
5421 v->dpte_row_height_chroma[k],
5422 v->meta_row_height_chroma[k],
5423 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5424 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5425 &v->final_flip_bw[k],
5426 &v->ImmediateFlipSupportedForPipe[k]);
5427 }
5428 v->total_dcn_read_bw_with_flip = 0.0;
5429 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5430 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5431 + dml_max3(
5432 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5433 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5434 + v->VActiveCursorBandwidth[i][j][k],
5435 v->NoOfDPP[i][j][k]
5436 * (v->final_flip_bw[k]
5437 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5438 * v->UrgentBurstFactorLumaPre[k]
5439 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5440 * v->UrgentBurstFactorChromaPre[k])
5441 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5442 }
5443 v->ImmediateFlipSupportedForState[i][j] = true;
5444 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5445 v->ImmediateFlipSupportedForState[i][j] = false;
5446 }
5447 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5448 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5449 v->ImmediateFlipSupportedForState[i][j] = false;
5450 }
5451 }
5452 } else {
5453 v->ImmediateFlipSupportedForState[i][j] = false;
5454 }
5455
5456 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5457 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5458 NextPrefetchModeState = NextPrefetchModeState + 1;
5459 } else {
5460 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5461 }
5462 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5463 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5464 && ((v->HostVMEnable == false &&
5465 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5466 || v->ImmediateFlipSupportedForState[i][j] == true))
5467 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5468
5469 CalculateUnboundedRequestAndCompressedBufferSize(
5470 v->DETBufferSizeInKByte[0],
5471 v->ConfigReturnBufferSizeInKByte,
5472 v->UseUnboundedRequesting,
5473 v->TotalNumberOfActiveDPP[i][j],
5474 NoChroma,
5475 v->MaxNumDPP,
5476 v->CompressedBufferSegmentSizeInkByte,
5477 v->Output,
5478 &UnboundedRequestEnabledThisState,
5479 &CompressedBufferSizeInkByteThisState);
5480
5481 CalculateWatermarksAndDRAMSpeedChangeSupport(
5482 mode_lib,
5483 v->PrefetchModePerState[i][j],
5484 v->NumberOfActivePlanes,
5485 v->MaxLineBufferLines,
5486 v->LineBufferSize,
5487 v->WritebackInterfaceBufferSize,
5488 v->DCFCLKState[i][j],
5489 v->ReturnBWPerState[i][j],
5490 v->SynchronizedVBlank,
5491 v->dpte_group_bytes,
5492 v->MetaChunkSize,
5493 v->UrgLatency[i],
5494 v->ExtraLatency,
5495 v->WritebackLatency,
5496 v->WritebackChunkSize,
5497 v->SOCCLKPerState[i],
5498 v->DRAMClockChangeLatency,
5499 v->SRExitTime,
5500 v->SREnterPlusExitTime,
5501 v->SRExitZ8Time,
5502 v->SREnterPlusExitZ8Time,
5503 v->ProjectedDCFCLKDeepSleep[i][j],
5504 v->DETBufferSizeYThisState,
5505 v->DETBufferSizeCThisState,
5506 v->SwathHeightYThisState,
5507 v->SwathHeightCThisState,
5508 v->LBBitPerPixel,
5509 v->SwathWidthYThisState,
5510 v->SwathWidthCThisState,
5511 v->HRatio,
5512 v->HRatioChroma,
5513 v->vtaps,
5514 v->VTAPsChroma,
5515 v->VRatio,
5516 v->VRatioChroma,
5517 v->HTotal,
5518 v->PixelClock,
5519 v->BlendingAndTiming,
5520 v->NoOfDPPThisState,
5521 v->BytePerPixelInDETY,
5522 v->BytePerPixelInDETC,
5523 v->DSTXAfterScaler,
5524 v->DSTYAfterScaler,
5525 v->WritebackEnable,
5526 v->WritebackPixelFormat,
5527 v->WritebackDestinationWidth,
5528 v->WritebackDestinationHeight,
5529 v->WritebackSourceHeight,
5530 UnboundedRequestEnabledThisState,
5531 CompressedBufferSizeInkByteThisState,
5532 &v->DRAMClockChangeSupport[i][j],
5533 &v->UrgentWatermark,
5534 &v->WritebackUrgentWatermark,
5535 &v->DRAMClockChangeWatermark,
5536 &v->WritebackDRAMClockChangeWatermark,
5537 &dummy,
5538 &dummy,
5539 &dummy,
5540 &dummy,
5541 &v->MinActiveDRAMClockChangeLatencySupported);
5542 }
5543 }
5544
5545 /*PTE Buffer Size Check*/
5546 for (i = 0; i < v->soc.num_states; i++) {
5547 for (j = 0; j < 2; j++) {
5548 v->PTEBufferSizeNotExceeded[i][j] = true;
5549 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5550 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5551 v->PTEBufferSizeNotExceeded[i][j] = false;
5552 }
5553 }
5554 }
5555 }
5556
5557 /*Cursor Support Check*/
5558 v->CursorSupport = true;
5559 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5560 if (v->CursorWidth[k][0] > 0.0) {
5561 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5562 v->CursorSupport = false;
5563 }
5564 }
5565 }
5566
5567 /*Valid Pitch Check*/
5568 v->PitchSupport = true;
5569 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5570 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5571 if (v->DCCEnable[k] == true) {
5572 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5573 } else {
5574 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5575 }
5576 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5577 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5578 && v->SourcePixelFormat[k] != dm_mono_8) {
5579 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5580 if (v->DCCEnable[k] == true) {
5581 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5582 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5583 64.0 * v->Read256BlockWidthC[k]);
5584 } else {
5585 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5586 }
5587 } else {
5588 v->AlignedCPitch[k] = v->PitchC[k];
5589 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5590 }
5591 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5592 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5593 v->PitchSupport = false;
5594 }
5595 }
5596
5597 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5598 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5599 ViewportExceedsSurface = true;
5600 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5601 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5602 && v->SourcePixelFormat[k] != dm_rgbe) {
5603 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5604 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5605 ViewportExceedsSurface = true;
5606 }
5607 }
5608 }
5609 }
5610
5611 /*Mode Support, Voltage State and SOC Configuration*/
5612 for (i = v->soc.num_states - 1; i >= 0; i--) {
5613 for (j = 0; j < 2; j++) {
5614 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5615 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5616 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5617 && v->DTBCLKRequiredMoreThanSupported[i] == false
5618 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5619 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5620 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5621 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5622 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5623 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5624 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5625 && ((v->HostVMEnable == false
5626 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5627 || v->ImmediateFlipSupportedForState[i][j] == true)
5628 && FMTBufferExceeded == false) {
5629 v->ModeSupport[i][j] = true;
5630 } else {
5631 v->ModeSupport[i][j] = false;
5632 }
5633 }
5634 }
5635
5636 {
5637 unsigned int MaximumMPCCombine = 0;
5638 for (i = v->soc.num_states; i >= 0; i--) {
5639 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5640 v->VoltageLevel = i;
5641 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5642 if (v->ModeSupport[i][0] == true) {
5643 MaximumMPCCombine = 0;
5644 } else {
5645 MaximumMPCCombine = 1;
5646 }
5647 }
5648 }
5649 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5650 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5651 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5652 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5653 }
5654 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5655 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5656 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5657 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5658 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5659 v->maxMpcComb = MaximumMPCCombine;
5660 }
5661 }
5662
5663 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5664 struct display_mode_lib *mode_lib,
5665 unsigned int PrefetchMode,
5666 unsigned int NumberOfActivePlanes,
5667 unsigned int MaxLineBufferLines,
5668 unsigned int LineBufferSize,
5669 unsigned int WritebackInterfaceBufferSize,
5670 double DCFCLK,
5671 double ReturnBW,
5672 bool SynchronizedVBlank,
5673 unsigned int dpte_group_bytes[],
5674 unsigned int MetaChunkSize,
5675 double UrgentLatency,
5676 double ExtraLatency,
5677 double WritebackLatency,
5678 double WritebackChunkSize,
5679 double SOCCLK,
5680 double DRAMClockChangeLatency,
5681 double SRExitTime,
5682 double SREnterPlusExitTime,
5683 double SRExitZ8Time,
5684 double SREnterPlusExitZ8Time,
5685 double DCFCLKDeepSleep,
5686 unsigned int DETBufferSizeY[],
5687 unsigned int DETBufferSizeC[],
5688 unsigned int SwathHeightY[],
5689 unsigned int SwathHeightC[],
5690 unsigned int LBBitPerPixel[],
5691 double SwathWidthY[],
5692 double SwathWidthC[],
5693 double HRatio[],
5694 double HRatioChroma[],
5695 unsigned int vtaps[],
5696 unsigned int VTAPsChroma[],
5697 double VRatio[],
5698 double VRatioChroma[],
5699 unsigned int HTotal[],
5700 double PixelClock[],
5701 unsigned int BlendingAndTiming[],
5702 unsigned int DPPPerPlane[],
5703 double BytePerPixelDETY[],
5704 double BytePerPixelDETC[],
5705 double DSTXAfterScaler[],
5706 double DSTYAfterScaler[],
5707 bool WritebackEnable[],
5708 enum source_format_class WritebackPixelFormat[],
5709 double WritebackDestinationWidth[],
5710 double WritebackDestinationHeight[],
5711 double WritebackSourceHeight[],
5712 bool UnboundedRequestEnabled,
5713 int unsigned CompressedBufferSizeInkByte,
5714 enum clock_change_support *DRAMClockChangeSupport,
5715 double *UrgentWatermark,
5716 double *WritebackUrgentWatermark,
5717 double *DRAMClockChangeWatermark,
5718 double *WritebackDRAMClockChangeWatermark,
5719 double *StutterExitWatermark,
5720 double *StutterEnterPlusExitWatermark,
5721 double *Z8StutterExitWatermark,
5722 double *Z8StutterEnterPlusExitWatermark,
5723 double *MinActiveDRAMClockChangeLatencySupported)
5724 {
5725 struct vba_vars_st *v = &mode_lib->vba;
5726 double EffectiveLBLatencyHidingY;
5727 double EffectiveLBLatencyHidingC;
5728 double LinesInDETY[DC__NUM_DPP__MAX];
5729 double LinesInDETC;
5730 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5731 unsigned int LinesInDETCRoundedDownToSwath;
5732 double FullDETBufferingTimeY;
5733 double FullDETBufferingTimeC;
5734 double ActiveDRAMClockChangeLatencyMarginY;
5735 double ActiveDRAMClockChangeLatencyMarginC;
5736 double WritebackDRAMClockChangeLatencyMargin;
5737 double PlaneWithMinActiveDRAMClockChangeMargin;
5738 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5739 double WritebackDRAMClockChangeLatencyHiding;
5740 double TotalPixelBW = 0.0;
5741 int k, j;
5742
5743 *UrgentWatermark = UrgentLatency + ExtraLatency;
5744
5745 #ifdef __DML_VBA_DEBUG__
5746 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5747 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5748 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
5749 #endif
5750
5751 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5752
5753 #ifdef __DML_VBA_DEBUG__
5754 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
5755 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
5756 #endif
5757
5758 v->TotalActiveWriteback = 0;
5759 for (k = 0; k < NumberOfActivePlanes; ++k) {
5760 if (WritebackEnable[k] == true) {
5761 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5762 }
5763 }
5764
5765 if (v->TotalActiveWriteback <= 1) {
5766 *WritebackUrgentWatermark = WritebackLatency;
5767 } else {
5768 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5769 }
5770
5771 if (v->TotalActiveWriteback <= 1) {
5772 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5773 } else {
5774 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5775 }
5776
5777 for (k = 0; k < NumberOfActivePlanes; ++k) {
5778 TotalPixelBW = TotalPixelBW
5779 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
5780 / (HTotal[k] / PixelClock[k]);
5781 }
5782
5783 for (k = 0; k < NumberOfActivePlanes; ++k) {
5784 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5785
5786 v->LBLatencyHidingSourceLinesY = dml_min(
5787 (double) MaxLineBufferLines,
5788 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5789
5790 v->LBLatencyHidingSourceLinesC = dml_min(
5791 (double) MaxLineBufferLines,
5792 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5793
5794 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5795
5796 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5797
5798 if (UnboundedRequestEnabled) {
5799 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5800 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
5801 }
5802
5803 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5804 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5805 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5806 if (BytePerPixelDETC[k] > 0) {
5807 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5808 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5809 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5810 } else {
5811 LinesInDETC = 0;
5812 FullDETBufferingTimeC = 999999;
5813 }
5814
5815 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5816 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5817
5818 if (NumberOfActivePlanes > 1) {
5819 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5820 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5821 }
5822
5823 if (BytePerPixelDETC[k] > 0) {
5824 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5825 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5826
5827 if (NumberOfActivePlanes > 1) {
5828 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5829 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5830 }
5831 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5832 } else {
5833 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5834 }
5835
5836 if (WritebackEnable[k] == true) {
5837 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
5838 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5839 if (WritebackPixelFormat[k] == dm_444_64) {
5840 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5841 }
5842 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5843 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5844 }
5845 }
5846
5847 v->MinActiveDRAMClockChangeMargin = 999999;
5848 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5849 for (k = 0; k < NumberOfActivePlanes; ++k) {
5850 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5851 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5852 if (BlendingAndTiming[k] == k) {
5853 PlaneWithMinActiveDRAMClockChangeMargin = k;
5854 } else {
5855 for (j = 0; j < NumberOfActivePlanes; ++j) {
5856 if (BlendingAndTiming[k] == j) {
5857 PlaneWithMinActiveDRAMClockChangeMargin = j;
5858 }
5859 }
5860 }
5861 }
5862 }
5863
5864 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5865
5866 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5867 for (k = 0; k < NumberOfActivePlanes; ++k) {
5868 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5869 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5870 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5871 }
5872 }
5873
5874 v->TotalNumberOfActiveOTG = 0;
5875
5876 for (k = 0; k < NumberOfActivePlanes; ++k) {
5877 if (BlendingAndTiming[k] == k) {
5878 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5879 }
5880 }
5881
5882 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5883 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5884 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5885 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5886 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5887 } else {
5888 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5889 }
5890
5891 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5892 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5893 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5894 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5895
5896 #ifdef __DML_VBA_DEBUG__
5897 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5898 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5899 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5900 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5901 #endif
5902 }
5903
5904 static void CalculateDCFCLKDeepSleep(
5905 struct display_mode_lib *mode_lib,
5906 unsigned int NumberOfActivePlanes,
5907 int BytePerPixelY[],
5908 int BytePerPixelC[],
5909 double VRatio[],
5910 double VRatioChroma[],
5911 double SwathWidthY[],
5912 double SwathWidthC[],
5913 unsigned int DPPPerPlane[],
5914 double HRatio[],
5915 double HRatioChroma[],
5916 double PixelClock[],
5917 double PSCL_THROUGHPUT[],
5918 double PSCL_THROUGHPUT_CHROMA[],
5919 double DPPCLK[],
5920 double ReadBandwidthLuma[],
5921 double ReadBandwidthChroma[],
5922 int ReturnBusWidth,
5923 double *DCFCLKDeepSleep)
5924 {
5925 struct vba_vars_st *v = &mode_lib->vba;
5926 double DisplayPipeLineDeliveryTimeLuma;
5927 double DisplayPipeLineDeliveryTimeChroma;
5928 double ReadBandwidth = 0.0;
5929 int k;
5930
5931 for (k = 0; k < NumberOfActivePlanes; ++k) {
5932
5933 if (VRatio[k] <= 1) {
5934 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5935 } else {
5936 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5937 }
5938 if (BytePerPixelC[k] == 0) {
5939 DisplayPipeLineDeliveryTimeChroma = 0;
5940 } else {
5941 if (VRatioChroma[k] <= 1) {
5942 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5943 } else {
5944 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5945 }
5946 }
5947
5948 if (BytePerPixelC[k] > 0) {
5949 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5950 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5951 } else {
5952 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5953 }
5954 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5955
5956 }
5957
5958 for (k = 0; k < NumberOfActivePlanes; ++k) {
5959 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5960 }
5961
5962 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5963
5964 for (k = 0; k < NumberOfActivePlanes; ++k) {
5965 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5966 }
5967 }
5968
5969 static void CalculateUrgentBurstFactor(
5970 int swath_width_luma_ub,
5971 int swath_width_chroma_ub,
5972 unsigned int SwathHeightY,
5973 unsigned int SwathHeightC,
5974 double LineTime,
5975 double UrgentLatency,
5976 double CursorBufferSize,
5977 unsigned int CursorWidth,
5978 unsigned int CursorBPP,
5979 double VRatio,
5980 double VRatioC,
5981 double BytePerPixelInDETY,
5982 double BytePerPixelInDETC,
5983 double DETBufferSizeY,
5984 double DETBufferSizeC,
5985 double *UrgentBurstFactorCursor,
5986 double *UrgentBurstFactorLuma,
5987 double *UrgentBurstFactorChroma,
5988 bool *NotEnoughUrgentLatencyHiding)
5989 {
5990 double LinesInDETLuma;
5991 double LinesInDETChroma;
5992 unsigned int LinesInCursorBuffer;
5993 double CursorBufferSizeInTime;
5994 double DETBufferSizeInTimeLuma;
5995 double DETBufferSizeInTimeChroma;
5996
5997 *NotEnoughUrgentLatencyHiding = 0;
5998
5999 if (CursorWidth > 0) {
6000 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
6001 if (VRatio > 0) {
6002 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
6003 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
6004 *NotEnoughUrgentLatencyHiding = 1;
6005 *UrgentBurstFactorCursor = 0;
6006 } else {
6007 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
6008 }
6009 } else {
6010 *UrgentBurstFactorCursor = 1;
6011 }
6012 }
6013
6014 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
6015 if (VRatio > 0) {
6016 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
6017 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
6018 *NotEnoughUrgentLatencyHiding = 1;
6019 *UrgentBurstFactorLuma = 0;
6020 } else {
6021 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
6022 }
6023 } else {
6024 *UrgentBurstFactorLuma = 1;
6025 }
6026
6027 if (BytePerPixelInDETC > 0) {
6028 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
6029 if (VRatio > 0) {
6030 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
6031 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
6032 *NotEnoughUrgentLatencyHiding = 1;
6033 *UrgentBurstFactorChroma = 0;
6034 } else {
6035 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
6036 }
6037 } else {
6038 *UrgentBurstFactorChroma = 1;
6039 }
6040 }
6041 }
6042
6043 static void CalculatePixelDeliveryTimes(
6044 unsigned int NumberOfActivePlanes,
6045 double VRatio[],
6046 double VRatioChroma[],
6047 double VRatioPrefetchY[],
6048 double VRatioPrefetchC[],
6049 unsigned int swath_width_luma_ub[],
6050 unsigned int swath_width_chroma_ub[],
6051 unsigned int DPPPerPlane[],
6052 double HRatio[],
6053 double HRatioChroma[],
6054 double PixelClock[],
6055 double PSCL_THROUGHPUT[],
6056 double PSCL_THROUGHPUT_CHROMA[],
6057 double DPPCLK[],
6058 int BytePerPixelC[],
6059 enum scan_direction_class SourceScan[],
6060 unsigned int NumberOfCursors[],
6061 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6062 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6063 unsigned int BlockWidth256BytesY[],
6064 unsigned int BlockHeight256BytesY[],
6065 unsigned int BlockWidth256BytesC[],
6066 unsigned int BlockHeight256BytesC[],
6067 double DisplayPipeLineDeliveryTimeLuma[],
6068 double DisplayPipeLineDeliveryTimeChroma[],
6069 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6070 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6071 double DisplayPipeRequestDeliveryTimeLuma[],
6072 double DisplayPipeRequestDeliveryTimeChroma[],
6073 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6074 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6075 double CursorRequestDeliveryTime[],
6076 double CursorRequestDeliveryTimePrefetch[])
6077 {
6078 double req_per_swath_ub;
6079 int k;
6080
6081 for (k = 0; k < NumberOfActivePlanes; ++k) {
6082 if (VRatio[k] <= 1) {
6083 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6084 } else {
6085 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6086 }
6087
6088 if (BytePerPixelC[k] == 0) {
6089 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6090 } else {
6091 if (VRatioChroma[k] <= 1) {
6092 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6093 } else {
6094 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6095 }
6096 }
6097
6098 if (VRatioPrefetchY[k] <= 1) {
6099 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6100 } else {
6101 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6102 }
6103
6104 if (BytePerPixelC[k] == 0) {
6105 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6106 } else {
6107 if (VRatioPrefetchC[k] <= 1) {
6108 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6109 } else {
6110 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6111 }
6112 }
6113 }
6114
6115 for (k = 0; k < NumberOfActivePlanes; ++k) {
6116 if (SourceScan[k] != dm_vert) {
6117 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6118 } else {
6119 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6120 }
6121 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6122 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6123 if (BytePerPixelC[k] == 0) {
6124 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6125 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6126 } else {
6127 if (SourceScan[k] != dm_vert) {
6128 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6129 } else {
6130 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6131 }
6132 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6133 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6134 }
6135 #ifdef __DML_VBA_DEBUG__
6136 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6137 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6138 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6139 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6140 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6141 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6142 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6143 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6144 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6145 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6146 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6147 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6148 #endif
6149 }
6150
6151 for (k = 0; k < NumberOfActivePlanes; ++k) {
6152 int cursor_req_per_width;
6153 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6154 if (NumberOfCursors[k] > 0) {
6155 if (VRatio[k] <= 1) {
6156 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6157 } else {
6158 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6159 }
6160 if (VRatioPrefetchY[k] <= 1) {
6161 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6162 } else {
6163 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6164 }
6165 } else {
6166 CursorRequestDeliveryTime[k] = 0;
6167 CursorRequestDeliveryTimePrefetch[k] = 0;
6168 }
6169 #ifdef __DML_VBA_DEBUG__
6170 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6171 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6172 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6173 #endif
6174 }
6175 }
6176
6177 static void CalculateMetaAndPTETimes(
6178 int NumberOfActivePlanes,
6179 bool GPUVMEnable,
6180 int MetaChunkSize,
6181 int MinMetaChunkSizeBytes,
6182 int HTotal[],
6183 double VRatio[],
6184 double VRatioChroma[],
6185 double DestinationLinesToRequestRowInVBlank[],
6186 double DestinationLinesToRequestRowInImmediateFlip[],
6187 bool DCCEnable[],
6188 double PixelClock[],
6189 int BytePerPixelY[],
6190 int BytePerPixelC[],
6191 enum scan_direction_class SourceScan[],
6192 int dpte_row_height[],
6193 int dpte_row_height_chroma[],
6194 int meta_row_width[],
6195 int meta_row_width_chroma[],
6196 int meta_row_height[],
6197 int meta_row_height_chroma[],
6198 int meta_req_width[],
6199 int meta_req_width_chroma[],
6200 int meta_req_height[],
6201 int meta_req_height_chroma[],
6202 int dpte_group_bytes[],
6203 int PTERequestSizeY[],
6204 int PTERequestSizeC[],
6205 int PixelPTEReqWidthY[],
6206 int PixelPTEReqHeightY[],
6207 int PixelPTEReqWidthC[],
6208 int PixelPTEReqHeightC[],
6209 int dpte_row_width_luma_ub[],
6210 int dpte_row_width_chroma_ub[],
6211 double DST_Y_PER_PTE_ROW_NOM_L[],
6212 double DST_Y_PER_PTE_ROW_NOM_C[],
6213 double DST_Y_PER_META_ROW_NOM_L[],
6214 double DST_Y_PER_META_ROW_NOM_C[],
6215 double TimePerMetaChunkNominal[],
6216 double TimePerChromaMetaChunkNominal[],
6217 double TimePerMetaChunkVBlank[],
6218 double TimePerChromaMetaChunkVBlank[],
6219 double TimePerMetaChunkFlip[],
6220 double TimePerChromaMetaChunkFlip[],
6221 double time_per_pte_group_nom_luma[],
6222 double time_per_pte_group_vblank_luma[],
6223 double time_per_pte_group_flip_luma[],
6224 double time_per_pte_group_nom_chroma[],
6225 double time_per_pte_group_vblank_chroma[],
6226 double time_per_pte_group_flip_chroma[])
6227 {
6228 unsigned int meta_chunk_width;
6229 unsigned int min_meta_chunk_width;
6230 unsigned int meta_chunk_per_row_int;
6231 unsigned int meta_row_remainder;
6232 unsigned int meta_chunk_threshold;
6233 unsigned int meta_chunks_per_row_ub;
6234 unsigned int meta_chunk_width_chroma;
6235 unsigned int min_meta_chunk_width_chroma;
6236 unsigned int meta_chunk_per_row_int_chroma;
6237 unsigned int meta_row_remainder_chroma;
6238 unsigned int meta_chunk_threshold_chroma;
6239 unsigned int meta_chunks_per_row_ub_chroma;
6240 unsigned int dpte_group_width_luma;
6241 unsigned int dpte_groups_per_row_luma_ub;
6242 unsigned int dpte_group_width_chroma;
6243 unsigned int dpte_groups_per_row_chroma_ub;
6244 int k;
6245
6246 for (k = 0; k < NumberOfActivePlanes; ++k) {
6247 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6248 if (BytePerPixelC[k] == 0) {
6249 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6250 } else {
6251 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6252 }
6253 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6254 if (BytePerPixelC[k] == 0) {
6255 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6256 } else {
6257 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6258 }
6259 }
6260
6261 for (k = 0; k < NumberOfActivePlanes; ++k) {
6262 if (DCCEnable[k] == true) {
6263 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6264 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6265 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6266 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6267 if (SourceScan[k] != dm_vert) {
6268 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6269 } else {
6270 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6271 }
6272 if (meta_row_remainder <= meta_chunk_threshold) {
6273 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6274 } else {
6275 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6276 }
6277 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6278 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6279 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6280 if (BytePerPixelC[k] == 0) {
6281 TimePerChromaMetaChunkNominal[k] = 0;
6282 TimePerChromaMetaChunkVBlank[k] = 0;
6283 TimePerChromaMetaChunkFlip[k] = 0;
6284 } else {
6285 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6286 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6287 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6288 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6289 if (SourceScan[k] != dm_vert) {
6290 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6291 } else {
6292 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6293 }
6294 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6295 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6296 } else {
6297 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6298 }
6299 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6300 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6301 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6302 }
6303 } else {
6304 TimePerMetaChunkNominal[k] = 0;
6305 TimePerMetaChunkVBlank[k] = 0;
6306 TimePerMetaChunkFlip[k] = 0;
6307 TimePerChromaMetaChunkNominal[k] = 0;
6308 TimePerChromaMetaChunkVBlank[k] = 0;
6309 TimePerChromaMetaChunkFlip[k] = 0;
6310 }
6311 }
6312
6313 for (k = 0; k < NumberOfActivePlanes; ++k) {
6314 if (GPUVMEnable == true) {
6315 if (SourceScan[k] != dm_vert) {
6316 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6317 } else {
6318 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6319 }
6320 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6321 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6322 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6323 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6324 if (BytePerPixelC[k] == 0) {
6325 time_per_pte_group_nom_chroma[k] = 0;
6326 time_per_pte_group_vblank_chroma[k] = 0;
6327 time_per_pte_group_flip_chroma[k] = 0;
6328 } else {
6329 if (SourceScan[k] != dm_vert) {
6330 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6331 } else {
6332 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6333 }
6334 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6335 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6336 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6337 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6338 }
6339 } else {
6340 time_per_pte_group_nom_luma[k] = 0;
6341 time_per_pte_group_vblank_luma[k] = 0;
6342 time_per_pte_group_flip_luma[k] = 0;
6343 time_per_pte_group_nom_chroma[k] = 0;
6344 time_per_pte_group_vblank_chroma[k] = 0;
6345 time_per_pte_group_flip_chroma[k] = 0;
6346 }
6347 }
6348 }
6349
6350 static void CalculateVMGroupAndRequestTimes(
6351 unsigned int NumberOfActivePlanes,
6352 bool GPUVMEnable,
6353 unsigned int GPUVMMaxPageTableLevels,
6354 unsigned int HTotal[],
6355 int BytePerPixelC[],
6356 double DestinationLinesToRequestVMInVBlank[],
6357 double DestinationLinesToRequestVMInImmediateFlip[],
6358 bool DCCEnable[],
6359 double PixelClock[],
6360 int dpte_row_width_luma_ub[],
6361 int dpte_row_width_chroma_ub[],
6362 int vm_group_bytes[],
6363 unsigned int dpde0_bytes_per_frame_ub_l[],
6364 unsigned int dpde0_bytes_per_frame_ub_c[],
6365 int meta_pte_bytes_per_frame_ub_l[],
6366 int meta_pte_bytes_per_frame_ub_c[],
6367 double TimePerVMGroupVBlank[],
6368 double TimePerVMGroupFlip[],
6369 double TimePerVMRequestVBlank[],
6370 double TimePerVMRequestFlip[])
6371 {
6372 int num_group_per_lower_vm_stage;
6373 int num_req_per_lower_vm_stage;
6374 int k;
6375
6376 for (k = 0; k < NumberOfActivePlanes; ++k) {
6377 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6378 if (DCCEnable[k] == false) {
6379 if (BytePerPixelC[k] > 0) {
6380 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6381 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6382 } else {
6383 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6384 }
6385 } else {
6386 if (GPUVMMaxPageTableLevels == 1) {
6387 if (BytePerPixelC[k] > 0) {
6388 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6389 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6390 } else {
6391 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6392 }
6393 } else {
6394 if (BytePerPixelC[k] > 0) {
6395 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6396 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6397 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6398 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6399 } else {
6400 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6401 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6402 }
6403 }
6404 }
6405
6406 if (DCCEnable[k] == false) {
6407 if (BytePerPixelC[k] > 0) {
6408 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6409 } else {
6410 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6411 }
6412 } else {
6413 if (GPUVMMaxPageTableLevels == 1) {
6414 if (BytePerPixelC[k] > 0) {
6415 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6416 } else {
6417 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6418 }
6419 } else {
6420 if (BytePerPixelC[k] > 0) {
6421 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6422 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6423 } else {
6424 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6425 }
6426 }
6427 }
6428
6429 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6430 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6431 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6432 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6433
6434 if (GPUVMMaxPageTableLevels > 2) {
6435 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6436 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6437 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6438 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6439 }
6440
6441 } else {
6442 TimePerVMGroupVBlank[k] = 0;
6443 TimePerVMGroupFlip[k] = 0;
6444 TimePerVMRequestVBlank[k] = 0;
6445 TimePerVMRequestFlip[k] = 0;
6446 }
6447 }
6448 }
6449
6450 static void CalculateStutterEfficiency(
6451 struct display_mode_lib *mode_lib,
6452 int CompressedBufferSizeInkByte,
6453 bool UnboundedRequestEnabled,
6454 int ConfigReturnBufferSizeInKByte,
6455 int MetaFIFOSizeInKEntries,
6456 int ZeroSizeBufferEntries,
6457 int NumberOfActivePlanes,
6458 int ROBBufferSizeInKByte,
6459 double TotalDataReadBandwidth,
6460 double DCFCLK,
6461 double ReturnBW,
6462 double COMPBUF_RESERVED_SPACE_64B,
6463 double COMPBUF_RESERVED_SPACE_ZS,
6464 double SRExitTime,
6465 double SRExitZ8Time,
6466 bool SynchronizedVBlank,
6467 double Z8StutterEnterPlusExitWatermark,
6468 double StutterEnterPlusExitWatermark,
6469 bool ProgressiveToInterlaceUnitInOPP,
6470 bool Interlace[],
6471 double MinTTUVBlank[],
6472 int DPPPerPlane[],
6473 unsigned int DETBufferSizeY[],
6474 int BytePerPixelY[],
6475 double BytePerPixelDETY[],
6476 double SwathWidthY[],
6477 int SwathHeightY[],
6478 int SwathHeightC[],
6479 double NetDCCRateLuma[],
6480 double NetDCCRateChroma[],
6481 double DCCFractionOfZeroSizeRequestsLuma[],
6482 double DCCFractionOfZeroSizeRequestsChroma[],
6483 int HTotal[],
6484 int VTotal[],
6485 double PixelClock[],
6486 double VRatio[],
6487 enum scan_direction_class SourceScan[],
6488 int BlockHeight256BytesY[],
6489 int BlockWidth256BytesY[],
6490 int BlockHeight256BytesC[],
6491 int BlockWidth256BytesC[],
6492 int DCCYMaxUncompressedBlock[],
6493 int DCCCMaxUncompressedBlock[],
6494 int VActive[],
6495 bool DCCEnable[],
6496 bool WritebackEnable[],
6497 double ReadBandwidthPlaneLuma[],
6498 double ReadBandwidthPlaneChroma[],
6499 double meta_row_bw[],
6500 double dpte_row_bw[],
6501 double *StutterEfficiencyNotIncludingVBlank,
6502 double *StutterEfficiency,
6503 int *NumberOfStutterBurstsPerFrame,
6504 double *Z8StutterEfficiencyNotIncludingVBlank,
6505 double *Z8StutterEfficiency,
6506 int *Z8NumberOfStutterBurstsPerFrame,
6507 double *StutterPeriod)
6508 {
6509 struct vba_vars_st *v = &mode_lib->vba;
6510
6511 double DETBufferingTimeY;
6512 double SwathWidthYCriticalPlane = 0;
6513 double VActiveTimeCriticalPlane = 0;
6514 double FrameTimeCriticalPlane = 0;
6515 int BytePerPixelYCriticalPlane = 0;
6516 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6517 double MinTTUVBlankCriticalPlane = 0;
6518 double TotalCompressedReadBandwidth;
6519 double TotalRowReadBandwidth;
6520 double AverageDCCCompressionRate;
6521 double EffectiveCompressedBufferSize;
6522 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6523 double StutterBurstTime;
6524 int TotalActiveWriteback;
6525 double LinesInDETY;
6526 double LinesInDETYRoundedDownToSwath;
6527 double MaximumEffectiveCompressionLuma;
6528 double MaximumEffectiveCompressionChroma;
6529 double TotalZeroSizeRequestReadBandwidth;
6530 double TotalZeroSizeCompressedReadBandwidth;
6531 double AverageDCCZeroSizeFraction;
6532 double AverageZeroSizeCompressionRate;
6533 int TotalNumberOfActiveOTG = 0;
6534 double LastStutterPeriod = 0.0;
6535 double LastZ8StutterPeriod = 0.0;
6536 int k;
6537
6538 TotalZeroSizeRequestReadBandwidth = 0;
6539 TotalZeroSizeCompressedReadBandwidth = 0;
6540 TotalRowReadBandwidth = 0;
6541 TotalCompressedReadBandwidth = 0;
6542
6543 for (k = 0; k < NumberOfActivePlanes; ++k) {
6544 if (DCCEnable[k] == true) {
6545 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6546 || DCCYMaxUncompressedBlock[k] < 256) {
6547 MaximumEffectiveCompressionLuma = 2;
6548 } else {
6549 MaximumEffectiveCompressionLuma = 4;
6550 }
6551 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6552 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6553 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6554 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6555 if (ReadBandwidthPlaneChroma[k] > 0) {
6556 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6557 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6558 MaximumEffectiveCompressionChroma = 2;
6559 } else {
6560 MaximumEffectiveCompressionChroma = 4;
6561 }
6562 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6563 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6564 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6565 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6566 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6567 }
6568 } else {
6569 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6570 }
6571 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6572 }
6573
6574 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6575 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6576
6577 #ifdef __DML_VBA_DEBUG__
6578 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6579 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6580 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6581 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6582 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6583 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6584 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6585 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6586 #endif
6587
6588 if (AverageDCCZeroSizeFraction == 1) {
6589 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6590 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6591 } else if (AverageDCCZeroSizeFraction > 0) {
6592 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6593 EffectiveCompressedBufferSize = dml_min(
6594 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6595 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6596 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6597 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6598 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6599 dml_print(
6600 "DML::%s: min 2 = %f\n",
6601 __func__,
6602 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6603 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6604 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6605 } else {
6606 EffectiveCompressedBufferSize = dml_min(
6607 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6608 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6609 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6610 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6611 }
6612
6613 #ifdef __DML_VBA_DEBUG__
6614 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6615 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6616 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6617 #endif
6618
6619 *StutterPeriod = 0;
6620 for (k = 0; k < NumberOfActivePlanes; ++k) {
6621 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6622 / BytePerPixelDETY[k] / SwathWidthY[k];
6623 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6624 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6625 #ifdef __DML_VBA_DEBUG__
6626 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6627 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6628 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6629 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6630 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6631 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6632 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6633 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6634 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6635 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6636 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6637 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6638 #endif
6639
6640 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6641 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6642
6643 *StutterPeriod = DETBufferingTimeY;
6644 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6645 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6646 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6647 SwathWidthYCriticalPlane = SwathWidthY[k];
6648 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6649 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6650
6651 #ifdef __DML_VBA_DEBUG__
6652 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6653 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6654 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6655 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6656 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6657 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6658 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6659 #endif
6660 }
6661 }
6662
6663 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6664 #ifdef __DML_VBA_DEBUG__
6665 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6666 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6667 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6668 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6669 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6670 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6671 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6672 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6673 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6674 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6675 #endif
6676
6677 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6678 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6679 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6680 #ifdef __DML_VBA_DEBUG__
6681 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6682 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6683 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6684 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6685 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6686 #endif
6687 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6688
6689 dml_print(
6690 "DML::%s: Time to finish residue swath=%f\n",
6691 __func__,
6692 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6693
6694 TotalActiveWriteback = 0;
6695 for (k = 0; k < NumberOfActivePlanes; ++k) {
6696 if (WritebackEnable[k]) {
6697 TotalActiveWriteback = TotalActiveWriteback + 1;
6698 }
6699 }
6700
6701 if (TotalActiveWriteback == 0) {
6702 #ifdef __DML_VBA_DEBUG__
6703 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6704 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6705 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6706 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6707 #endif
6708 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6709 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6710 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6711 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6712 } else {
6713 *StutterEfficiencyNotIncludingVBlank = 0.;
6714 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6715 *NumberOfStutterBurstsPerFrame = 0;
6716 *Z8NumberOfStutterBurstsPerFrame = 0;
6717 }
6718 #ifdef __DML_VBA_DEBUG__
6719 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6720 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6721 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6722 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6723 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6724 #endif
6725
6726 for (k = 0; k < NumberOfActivePlanes; ++k) {
6727 if (v->BlendingAndTiming[k] == k) {
6728 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6729 }
6730 }
6731
6732 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6733 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6734
6735 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6736 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6737 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6738 } else {
6739 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6740 }
6741 } else {
6742 *StutterEfficiency = 0;
6743 }
6744
6745 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6746 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6747 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6748 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6749 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6750 } else {
6751 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6752 }
6753 } else {
6754 *Z8StutterEfficiency = 0.;
6755 }
6756
6757 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6758 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6759 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6760 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6761 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6762 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6763 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6764 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6765 }
6766
6767 static void CalculateSwathAndDETConfiguration(
6768 bool ForceSingleDPP,
6769 int NumberOfActivePlanes,
6770 unsigned int DETBufferSizeInKByte,
6771 double MaximumSwathWidthLuma[],
6772 double MaximumSwathWidthChroma[],
6773 enum scan_direction_class SourceScan[],
6774 enum source_format_class SourcePixelFormat[],
6775 enum dm_swizzle_mode SurfaceTiling[],
6776 int ViewportWidth[],
6777 int ViewportHeight[],
6778 int SurfaceWidthY[],
6779 int SurfaceWidthC[],
6780 int SurfaceHeightY[],
6781 int SurfaceHeightC[],
6782 int Read256BytesBlockHeightY[],
6783 int Read256BytesBlockHeightC[],
6784 int Read256BytesBlockWidthY[],
6785 int Read256BytesBlockWidthC[],
6786 enum odm_combine_mode ODMCombineEnabled[],
6787 int BlendingAndTiming[],
6788 int BytePerPixY[],
6789 int BytePerPixC[],
6790 double BytePerPixDETY[],
6791 double BytePerPixDETC[],
6792 int HActive[],
6793 double HRatio[],
6794 double HRatioChroma[],
6795 int DPPPerPlane[],
6796 int swath_width_luma_ub[],
6797 int swath_width_chroma_ub[],
6798 double SwathWidth[],
6799 double SwathWidthChroma[],
6800 int SwathHeightY[],
6801 int SwathHeightC[],
6802 unsigned int DETBufferSizeY[],
6803 unsigned int DETBufferSizeC[],
6804 bool ViewportSizeSupportPerPlane[],
6805 bool *ViewportSizeSupport)
6806 {
6807 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6808 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6809 int MinimumSwathHeightY;
6810 int MinimumSwathHeightC;
6811 int RoundedUpMaxSwathSizeBytesY;
6812 int RoundedUpMaxSwathSizeBytesC;
6813 int RoundedUpMinSwathSizeBytesY;
6814 int RoundedUpMinSwathSizeBytesC;
6815 int RoundedUpSwathSizeBytesY;
6816 int RoundedUpSwathSizeBytesC;
6817 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6818 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6819 int k;
6820
6821 CalculateSwathWidth(
6822 ForceSingleDPP,
6823 NumberOfActivePlanes,
6824 SourcePixelFormat,
6825 SourceScan,
6826 ViewportWidth,
6827 ViewportHeight,
6828 SurfaceWidthY,
6829 SurfaceWidthC,
6830 SurfaceHeightY,
6831 SurfaceHeightC,
6832 ODMCombineEnabled,
6833 BytePerPixY,
6834 BytePerPixC,
6835 Read256BytesBlockHeightY,
6836 Read256BytesBlockHeightC,
6837 Read256BytesBlockWidthY,
6838 Read256BytesBlockWidthC,
6839 BlendingAndTiming,
6840 HActive,
6841 HRatio,
6842 DPPPerPlane,
6843 SwathWidthSingleDPP,
6844 SwathWidthSingleDPPChroma,
6845 SwathWidth,
6846 SwathWidthChroma,
6847 MaximumSwathHeightY,
6848 MaximumSwathHeightC,
6849 swath_width_luma_ub,
6850 swath_width_chroma_ub);
6851
6852 *ViewportSizeSupport = true;
6853 for (k = 0; k < NumberOfActivePlanes; ++k) {
6854 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6855 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6856 if (SurfaceTiling[k] == dm_sw_linear
6857 || (SourcePixelFormat[k] == dm_444_64
6858 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6859 && SourceScan[k] != dm_vert)) {
6860 MinimumSwathHeightY = MaximumSwathHeightY[k];
6861 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6862 MinimumSwathHeightY = MaximumSwathHeightY[k];
6863 } else {
6864 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6865 }
6866 MinimumSwathHeightC = MaximumSwathHeightC[k];
6867 } else {
6868 if (SurfaceTiling[k] == dm_sw_linear) {
6869 MinimumSwathHeightY = MaximumSwathHeightY[k];
6870 MinimumSwathHeightC = MaximumSwathHeightC[k];
6871 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6872 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6873 MinimumSwathHeightC = MaximumSwathHeightC[k];
6874 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6875 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6876 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6877 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6878 MinimumSwathHeightY = MaximumSwathHeightY[k];
6879 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6880 } else {
6881 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6882 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6883 }
6884 }
6885
6886 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6887 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6888 if (SourcePixelFormat[k] == dm_420_10) {
6889 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6890 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6891 }
6892 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6893 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6894 if (SourcePixelFormat[k] == dm_420_10) {
6895 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6896 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6897 }
6898
6899 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6900 SwathHeightY[k] = MaximumSwathHeightY[k];
6901 SwathHeightC[k] = MaximumSwathHeightC[k];
6902 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6903 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6904 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6905 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6906 SwathHeightY[k] = MinimumSwathHeightY;
6907 SwathHeightC[k] = MaximumSwathHeightC[k];
6908 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6909 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6910 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6911 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6912 SwathHeightY[k] = MaximumSwathHeightY[k];
6913 SwathHeightC[k] = MinimumSwathHeightC;
6914 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6915 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6916 } else {
6917 SwathHeightY[k] = MinimumSwathHeightY;
6918 SwathHeightC[k] = MinimumSwathHeightC;
6919 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6920 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6921 }
6922 {
6923 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6924 if (SwathHeightC[k] == 0) {
6925 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6926 DETBufferSizeC[k] = 0;
6927 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6928 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6929 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6930 } else {
6931 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6932 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6933 }
6934
6935 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6936 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6937 *ViewportSizeSupport = false;
6938 ViewportSizeSupportPerPlane[k] = false;
6939 } else {
6940 ViewportSizeSupportPerPlane[k] = true;
6941 }
6942 }
6943 }
6944 }
6945
6946 static void CalculateSwathWidth(
6947 bool ForceSingleDPP,
6948 int NumberOfActivePlanes,
6949 enum source_format_class SourcePixelFormat[],
6950 enum scan_direction_class SourceScan[],
6951 int ViewportWidth[],
6952 int ViewportHeight[],
6953 int SurfaceWidthY[],
6954 int SurfaceWidthC[],
6955 int SurfaceHeightY[],
6956 int SurfaceHeightC[],
6957 enum odm_combine_mode ODMCombineEnabled[],
6958 int BytePerPixY[],
6959 int BytePerPixC[],
6960 int Read256BytesBlockHeightY[],
6961 int Read256BytesBlockHeightC[],
6962 int Read256BytesBlockWidthY[],
6963 int Read256BytesBlockWidthC[],
6964 int BlendingAndTiming[],
6965 int HActive[],
6966 double HRatio[],
6967 int DPPPerPlane[],
6968 double SwathWidthSingleDPPY[],
6969 double SwathWidthSingleDPPC[],
6970 double SwathWidthY[],
6971 double SwathWidthC[],
6972 int MaximumSwathHeightY[],
6973 int MaximumSwathHeightC[],
6974 int swath_width_luma_ub[],
6975 int swath_width_chroma_ub[])
6976 {
6977 enum odm_combine_mode MainPlaneODMCombine;
6978 int j, k;
6979
6980 #ifdef __DML_VBA_DEBUG__
6981 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6982 #endif
6983
6984 for (k = 0; k < NumberOfActivePlanes; ++k) {
6985 if (SourceScan[k] != dm_vert) {
6986 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6987 } else {
6988 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6989 }
6990
6991 #ifdef __DML_VBA_DEBUG__
6992 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6993 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6994 #endif
6995
6996 MainPlaneODMCombine = ODMCombineEnabled[k];
6997 for (j = 0; j < NumberOfActivePlanes; ++j) {
6998 if (BlendingAndTiming[k] == j) {
6999 MainPlaneODMCombine = ODMCombineEnabled[j];
7000 }
7001 }
7002
7003 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
7004 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
7005 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
7006 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
7007 } else if (DPPPerPlane[k] == 2) {
7008 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
7009 } else {
7010 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7011 }
7012
7013 #ifdef __DML_VBA_DEBUG__
7014 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
7015 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
7016 #endif
7017
7018 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
7019 SwathWidthC[k] = SwathWidthY[k] / 2;
7020 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
7021 } else {
7022 SwathWidthC[k] = SwathWidthY[k];
7023 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
7024 }
7025
7026 if (ForceSingleDPP == true) {
7027 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7028 SwathWidthC[k] = SwathWidthSingleDPPC[k];
7029 }
7030 {
7031 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
7032 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
7033 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
7034 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
7035
7036 #ifdef __DML_VBA_DEBUG__
7037 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
7038 #endif
7039
7040 if (SourceScan[k] != dm_vert) {
7041 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
7042 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
7043 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
7044 if (BytePerPixC[k] > 0) {
7045 swath_width_chroma_ub[k] = dml_min(
7046 surface_width_ub_c,
7047 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7048 } else {
7049 swath_width_chroma_ub[k] = 0;
7050 }
7051 } else {
7052 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7053 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7054 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7055 if (BytePerPixC[k] > 0) {
7056 swath_width_chroma_ub[k] = dml_min(
7057 surface_height_ub_c,
7058 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7059 } else {
7060 swath_width_chroma_ub[k] = 0;
7061 }
7062 }
7063 }
7064 }
7065 }
7066
7067 static double CalculateExtraLatency(
7068 int RoundTripPingLatencyCycles,
7069 int ReorderingBytes,
7070 double DCFCLK,
7071 int TotalNumberOfActiveDPP,
7072 int PixelChunkSizeInKByte,
7073 int TotalNumberOfDCCActiveDPP,
7074 int MetaChunkSize,
7075 double ReturnBW,
7076 bool GPUVMEnable,
7077 bool HostVMEnable,
7078 int NumberOfActivePlanes,
7079 int NumberOfDPP[],
7080 int dpte_group_bytes[],
7081 double HostVMInefficiencyFactor,
7082 double HostVMMinPageSize,
7083 int HostVMMaxNonCachedPageTableLevels)
7084 {
7085 double ExtraLatencyBytes;
7086 double ExtraLatency;
7087
7088 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7089 ReorderingBytes,
7090 TotalNumberOfActiveDPP,
7091 PixelChunkSizeInKByte,
7092 TotalNumberOfDCCActiveDPP,
7093 MetaChunkSize,
7094 GPUVMEnable,
7095 HostVMEnable,
7096 NumberOfActivePlanes,
7097 NumberOfDPP,
7098 dpte_group_bytes,
7099 HostVMInefficiencyFactor,
7100 HostVMMinPageSize,
7101 HostVMMaxNonCachedPageTableLevels);
7102
7103 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7104
7105 #ifdef __DML_VBA_DEBUG__
7106 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7107 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7108 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7109 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7110 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7111 #endif
7112
7113 return ExtraLatency;
7114 }
7115
7116 static double CalculateExtraLatencyBytes(
7117 int ReorderingBytes,
7118 int TotalNumberOfActiveDPP,
7119 int PixelChunkSizeInKByte,
7120 int TotalNumberOfDCCActiveDPP,
7121 int MetaChunkSize,
7122 bool GPUVMEnable,
7123 bool HostVMEnable,
7124 int NumberOfActivePlanes,
7125 int NumberOfDPP[],
7126 int dpte_group_bytes[],
7127 double HostVMInefficiencyFactor,
7128 double HostVMMinPageSize,
7129 int HostVMMaxNonCachedPageTableLevels)
7130 {
7131 double ret;
7132 int HostVMDynamicLevels = 0, k;
7133
7134 if (GPUVMEnable == true && HostVMEnable == true) {
7135 if (HostVMMinPageSize < 2048) {
7136 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7137 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
7138 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7139 } else {
7140 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7141 }
7142 } else {
7143 HostVMDynamicLevels = 0;
7144 }
7145
7146 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7147
7148 if (GPUVMEnable == true) {
7149 for (k = 0; k < NumberOfActivePlanes; ++k) {
7150 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7151 }
7152 }
7153 return ret;
7154 }
7155
7156 static double CalculateUrgentLatency(
7157 double UrgentLatencyPixelDataOnly,
7158 double UrgentLatencyPixelMixedWithVMData,
7159 double UrgentLatencyVMDataOnly,
7160 bool DoUrgentLatencyAdjustment,
7161 double UrgentLatencyAdjustmentFabricClockComponent,
7162 double UrgentLatencyAdjustmentFabricClockReference,
7163 double FabricClock)
7164 {
7165 double ret;
7166
7167 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7168 if (DoUrgentLatencyAdjustment == true) {
7169 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7170 }
7171 return ret;
7172 }
7173
7174 static void UseMinimumDCFCLK(
7175 struct display_mode_lib *mode_lib,
7176 int MaxPrefetchMode,
7177 int ReorderingBytes)
7178 {
7179 struct vba_vars_st *v = &mode_lib->vba;
7180 int dummy1, i, j, k;
7181 double NormalEfficiency, dummy2, dummy3;
7182 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7183
7184 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7185 for (i = 0; i < v->soc.num_states; ++i) {
7186 for (j = 0; j <= 1; ++j) {
7187 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7188 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7189 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7190 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7191 double MinimumTWait;
7192 double NonDPTEBandwidth;
7193 double DPTEBandwidth;
7194 double DCFCLKRequiredForAverageBandwidth;
7195 double ExtraLatencyBytes;
7196 double ExtraLatencyCycles;
7197 double DCFCLKRequiredForPeakBandwidth;
7198 int NoOfDPPState[DC__NUM_DPP__MAX];
7199 double MinimumTvmPlus2Tr0;
7200
7201 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7202 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7203 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7204 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7205 }
7206
7207 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7208 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7209 }
7210
7211 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7212 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7213 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7214 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7215 DCFCLKRequiredForAverageBandwidth = dml_max3(
7216 v->ProjectedDCFCLKDeepSleep[i][j],
7217 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7218 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7219 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7220
7221 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7222 ReorderingBytes,
7223 v->TotalNumberOfActiveDPP[i][j],
7224 v->PixelChunkSizeInKByte,
7225 v->TotalNumberOfDCCActiveDPP[i][j],
7226 v->MetaChunkSize,
7227 v->GPUVMEnable,
7228 v->HostVMEnable,
7229 v->NumberOfActivePlanes,
7230 NoOfDPPState,
7231 v->dpte_group_bytes,
7232 1,
7233 v->HostVMMinPageSize,
7234 v->HostVMMaxNonCachedPageTableLevels);
7235 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7236 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7237 double DCFCLKCyclesRequiredInPrefetch;
7238 double ExpectedPrefetchBWAcceleration;
7239 double PrefetchTime;
7240
7241 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7242 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7243 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7244 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7245 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7246 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7247 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7248 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7249 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7250 DynamicMetadataVMExtraLatency[k] =
7251 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7252 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7253 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7254 - v->UrgLatency[i]
7255 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7256 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7257 - DynamicMetadataVMExtraLatency[k];
7258
7259 if (PrefetchTime > 0) {
7260 double ExpectedVRatioPrefetch;
7261 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7262 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7263 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7264 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7265 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7266 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7267 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7268 }
7269 } else {
7270 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7271 }
7272 if (v->DynamicMetadataEnable[k] == true) {
7273 double TSetupPipe;
7274 double TdmbfPipe;
7275 double TdmsksPipe;
7276 double TdmecPipe;
7277 double AllowedTimeForUrgentExtraLatency;
7278
7279 CalculateVupdateAndDynamicMetadataParameters(
7280 v->MaxInterDCNTileRepeaters,
7281 v->RequiredDPPCLK[i][j][k],
7282 v->RequiredDISPCLK[i][j],
7283 v->ProjectedDCFCLKDeepSleep[i][j],
7284 v->PixelClock[k],
7285 v->HTotal[k],
7286 v->VTotal[k] - v->VActive[k],
7287 v->DynamicMetadataTransmittedBytes[k],
7288 v->DynamicMetadataLinesBeforeActiveRequired[k],
7289 v->Interlace[k],
7290 v->ProgressiveToInterlaceUnitInOPP,
7291 &TSetupPipe,
7292 &TdmbfPipe,
7293 &TdmecPipe,
7294 &TdmsksPipe,
7295 &dummy1,
7296 &dummy2,
7297 &dummy3);
7298 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7299 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7300 if (AllowedTimeForUrgentExtraLatency > 0) {
7301 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7302 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7303 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7304 } else {
7305 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7306 }
7307 }
7308 }
7309 DCFCLKRequiredForPeakBandwidth = 0;
7310 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7311 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7312 }
7313 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7314 * (v->GPUVMEnable == true ?
7315 (v->HostVMEnable == true ?
7316 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7317 0);
7318 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7319 double MaximumTvmPlus2Tr0PlusTsw;
7320 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7321 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7322 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7323 } else {
7324 DCFCLKRequiredForPeakBandwidth = dml_max3(
7325 DCFCLKRequiredForPeakBandwidth,
7326 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7327 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7328 }
7329 }
7330 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7331 }
7332 }
7333 }
7334
7335 static void CalculateUnboundedRequestAndCompressedBufferSize(
7336 unsigned int DETBufferSizeInKByte,
7337 int ConfigReturnBufferSizeInKByte,
7338 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7339 int TotalActiveDPP,
7340 bool NoChromaPlanes,
7341 int MaxNumDPP,
7342 int CompressedBufferSegmentSizeInkByteFinal,
7343 enum output_encoder_class *Output,
7344 bool *UnboundedRequestEnabled,
7345 int *CompressedBufferSizeInkByte)
7346 {
7347 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7348
7349 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7350 *CompressedBufferSizeInkByte = (
7351 *UnboundedRequestEnabled == true ?
7352 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7353 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7354 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7355
7356 #ifdef __DML_VBA_DEBUG__
7357 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7358 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7359 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7360 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7361 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7362 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7363 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7364 #endif
7365 }
7366
7367 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7368 {
7369 bool ret_val = false;
7370
7371 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7372 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
7373 ret_val = false;
7374 }
7375 return (ret_val);
7376 }
7377
7378