1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #ifdef CONFIG_DRM_AMD_DC_DCN
27 #include "dc.h"
28 #include "dc_link.h"
29 #include "../display_mode_lib.h"
30 #include "display_mode_vba_30.h"
31 #include "../dml_inline_defs.h"
32
33
34 /*
35 * NOTE:
36 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
37 *
38 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
39 * ways. Unless there is something clearly wrong with it the code should
40 * remain as-is as it provides us with a guarantee from HW that it is correct.
41 */
42
43
44 typedef struct {
45 double DPPCLK;
46 double DISPCLK;
47 double PixelClock;
48 double DCFCLKDeepSleep;
49 unsigned int DPPPerPlane;
50 bool ScalerEnabled;
51 enum scan_direction_class SourceScan;
52 unsigned int BlockWidth256BytesY;
53 unsigned int BlockHeight256BytesY;
54 unsigned int BlockWidth256BytesC;
55 unsigned int BlockHeight256BytesC;
56 unsigned int InterlaceEnable;
57 unsigned int NumberOfCursors;
58 unsigned int VBlank;
59 unsigned int HTotal;
60 unsigned int DCCEnable;
61 bool ODMCombineEnabled;
62 } Pipe;
63
64 #define BPP_INVALID 0
65 #define BPP_BLENDED_PIPE 0xffffffff
66 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
67 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
68
69 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
70 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
71 struct display_mode_lib *mode_lib);
72 static unsigned int dscceComputeDelay(
73 unsigned int bpc,
74 double BPP,
75 unsigned int sliceWidth,
76 unsigned int numSlices,
77 enum output_format_class pixelFormat,
78 enum output_encoder_class Output);
79 static unsigned int dscComputeDelay(
80 enum output_format_class pixelFormat,
81 enum output_encoder_class Output);
82 // Super monster function with some 45 argument
83 static bool CalculatePrefetchSchedule(
84 struct display_mode_lib *mode_lib,
85 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
86 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
87 Pipe *myPipe,
88 unsigned int DSCDelay,
89 double DPPCLKDelaySubtotalPlusCNVCFormater,
90 double DPPCLKDelaySCL,
91 double DPPCLKDelaySCLLBOnly,
92 double DPPCLKDelayCNVCCursor,
93 double DISPCLKDelaySubtotal,
94 unsigned int DPP_RECOUT_WIDTH,
95 enum output_format_class OutputFormat,
96 unsigned int MaxInterDCNTileRepeaters,
97 unsigned int VStartup,
98 unsigned int MaxVStartup,
99 unsigned int GPUVMPageTableLevels,
100 bool GPUVMEnable,
101 bool HostVMEnable,
102 unsigned int HostVMMaxNonCachedPageTableLevels,
103 double HostVMMinPageSize,
104 bool DynamicMetadataEnable,
105 bool DynamicMetadataVMEnabled,
106 int DynamicMetadataLinesBeforeActiveRequired,
107 unsigned int DynamicMetadataTransmittedBytes,
108 double UrgentLatency,
109 double UrgentExtraLatency,
110 double TCalc,
111 unsigned int PDEAndMetaPTEBytesFrame,
112 unsigned int MetaRowByte,
113 unsigned int PixelPTEBytesPerRow,
114 double PrefetchSourceLinesY,
115 unsigned int SwathWidthY,
116 int BytePerPixelY,
117 double VInitPreFillY,
118 unsigned int MaxNumSwathY,
119 double PrefetchSourceLinesC,
120 unsigned int SwathWidthC,
121 int BytePerPixelC,
122 double VInitPreFillC,
123 unsigned int MaxNumSwathC,
124 long swath_width_luma_ub,
125 long swath_width_chroma_ub,
126 unsigned int SwathHeightY,
127 unsigned int SwathHeightC,
128 double TWait,
129 bool ProgressiveToInterlaceUnitInOPP,
130 double *DSTXAfterScaler,
131 double *DSTYAfterScaler,
132 double *DestinationLinesForPrefetch,
133 double *PrefetchBandwidth,
134 double *DestinationLinesToRequestVMInVBlank,
135 double *DestinationLinesToRequestRowInVBlank,
136 double *VRatioPrefetchY,
137 double *VRatioPrefetchC,
138 double *RequiredPrefetchPixDataBWLuma,
139 double *RequiredPrefetchPixDataBWChroma,
140 bool *NotEnoughTimeForDynamicMetadata,
141 double *Tno_bw,
142 double *prefetch_vmrow_bw,
143 double *Tdmdl_vm,
144 double *Tdmdl,
145 unsigned int *VUpdateOffsetPix,
146 double *VUpdateWidthPix,
147 double *VReadyOffsetPix);
148 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
149 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
150 static void CalculateDCCConfiguration(
151 bool DCCEnabled,
152 bool DCCProgrammingAssumesScanDirectionUnknown,
153 enum source_format_class SourcePixelFormat,
154 unsigned int ViewportWidthLuma,
155 unsigned int ViewportWidthChroma,
156 unsigned int ViewportHeightLuma,
157 unsigned int ViewportHeightChroma,
158 double DETBufferSize,
159 unsigned int RequestHeight256ByteLuma,
160 unsigned int RequestHeight256ByteChroma,
161 enum dm_swizzle_mode TilingFormat,
162 unsigned int BytePerPixelY,
163 unsigned int BytePerPixelC,
164 double BytePerPixelDETY,
165 double BytePerPixelDETC,
166 enum scan_direction_class ScanOrientation,
167 unsigned int *MaxUncompressedBlockLuma,
168 unsigned int *MaxUncompressedBlockChroma,
169 unsigned int *MaxCompressedBlockLuma,
170 unsigned int *MaxCompressedBlockChroma,
171 unsigned int *IndependentBlockLuma,
172 unsigned int *IndependentBlockChroma);
173 static double CalculatePrefetchSourceLines(
174 struct display_mode_lib *mode_lib,
175 double VRatio,
176 double vtaps,
177 bool Interlace,
178 bool ProgressiveToInterlaceUnitInOPP,
179 unsigned int SwathHeight,
180 unsigned int ViewportYStart,
181 double *VInitPreFill,
182 unsigned int *MaxNumSwath);
183 static unsigned int CalculateVMAndRowBytes(
184 struct display_mode_lib *mode_lib,
185 bool DCCEnable,
186 unsigned int BlockHeight256Bytes,
187 unsigned int BlockWidth256Bytes,
188 enum source_format_class SourcePixelFormat,
189 unsigned int SurfaceTiling,
190 unsigned int BytePerPixel,
191 enum scan_direction_class ScanDirection,
192 unsigned int SwathWidth,
193 unsigned int ViewportHeight,
194 bool GPUVMEnable,
195 bool HostVMEnable,
196 unsigned int HostVMMaxNonCachedPageTableLevels,
197 unsigned int GPUVMMinPageSize,
198 unsigned int HostVMMinPageSize,
199 unsigned int PTEBufferSizeInRequests,
200 unsigned int Pitch,
201 unsigned int DCCMetaPitch,
202 unsigned int *MacroTileWidth,
203 unsigned int *MetaRowByte,
204 unsigned int *PixelPTEBytesPerRow,
205 bool *PTEBufferSizeNotExceeded,
206 unsigned int *dpte_row_width_ub,
207 unsigned int *dpte_row_height,
208 unsigned int *MetaRequestWidth,
209 unsigned int *MetaRequestHeight,
210 unsigned int *meta_row_width,
211 unsigned int *meta_row_height,
212 unsigned int *vm_group_bytes,
213 unsigned int *dpte_group_bytes,
214 unsigned int *PixelPTEReqWidth,
215 unsigned int *PixelPTEReqHeight,
216 unsigned int *PTERequestSize,
217 unsigned int *DPDE0BytesFrame,
218 unsigned int *MetaPTEBytesFrame);
219 static double CalculateTWait(
220 unsigned int PrefetchMode,
221 double DRAMClockChangeLatency,
222 double UrgentLatency,
223 double SREnterPlusExitTime);
224 static void CalculateRowBandwidth(
225 bool GPUVMEnable,
226 enum source_format_class SourcePixelFormat,
227 double VRatio,
228 double VRatioChroma,
229 bool DCCEnable,
230 double LineTime,
231 unsigned int MetaRowByteLuma,
232 unsigned int MetaRowByteChroma,
233 unsigned int meta_row_height_luma,
234 unsigned int meta_row_height_chroma,
235 unsigned int PixelPTEBytesPerRowLuma,
236 unsigned int PixelPTEBytesPerRowChroma,
237 unsigned int dpte_row_height_luma,
238 unsigned int dpte_row_height_chroma,
239 double *meta_row_bw,
240 double *dpte_row_bw);
241 static void CalculateFlipSchedule(
242 struct display_mode_lib *mode_lib,
243 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
244 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
245 double UrgentExtraLatency,
246 double UrgentLatency,
247 unsigned int GPUVMMaxPageTableLevels,
248 bool HostVMEnable,
249 unsigned int HostVMMaxNonCachedPageTableLevels,
250 bool GPUVMEnable,
251 double HostVMMinPageSize,
252 double PDEAndMetaPTEBytesPerFrame,
253 double MetaRowBytes,
254 double DPTEBytesPerRow,
255 double BandwidthAvailableForImmediateFlip,
256 unsigned int TotImmediateFlipBytes,
257 enum source_format_class SourcePixelFormat,
258 double LineTime,
259 double VRatio,
260 double VRatioChroma,
261 double Tno_bw,
262 bool DCCEnable,
263 unsigned int dpte_row_height,
264 unsigned int meta_row_height,
265 unsigned int dpte_row_height_chroma,
266 unsigned int meta_row_height_chroma,
267 double *DestinationLinesToRequestVMInImmediateFlip,
268 double *DestinationLinesToRequestRowInImmediateFlip,
269 double *final_flip_bw,
270 bool *ImmediateFlipSupportedForPipe);
271 static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 long WritebackDestinationWidth,
277 long WritebackDestinationHeight,
278 long WritebackSourceHeight,
279 unsigned int HTotal);
280 static void CalculateDynamicMetadataParameters(
281 int MaxInterDCNTileRepeaters,
282 double DPPCLK,
283 double DISPCLK,
284 double DCFClkDeepSleep,
285 double PixelClock,
286 long HTotal,
287 long VBlank,
288 long DynamicMetadataTransmittedBytes,
289 long DynamicMetadataLinesBeforeActiveRequired,
290 int InterlaceEnable,
291 bool ProgressiveToInterlaceUnitInOPP,
292 double *Tsetup,
293 double *Tdmbf,
294 double *Tdmec,
295 double *Tdmsks);
296 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
297 struct display_mode_lib *mode_lib,
298 unsigned int PrefetchMode,
299 unsigned int NumberOfActivePlanes,
300 unsigned int MaxLineBufferLines,
301 unsigned int LineBufferSize,
302 unsigned int DPPOutputBufferPixels,
303 unsigned int DETBufferSizeInKByte,
304 unsigned int WritebackInterfaceBufferSize,
305 double DCFCLK,
306 double ReturnBW,
307 bool GPUVMEnable,
308 unsigned int dpte_group_bytes[],
309 unsigned int MetaChunkSize,
310 double UrgentLatency,
311 double ExtraLatency,
312 double WritebackLatency,
313 double WritebackChunkSize,
314 double SOCCLK,
315 double DRAMClockChangeLatency,
316 double SRExitTime,
317 double SREnterPlusExitTime,
318 double DCFCLKDeepSleep,
319 unsigned int DPPPerPlane[],
320 bool DCCEnable[],
321 double DPPCLK[],
322 unsigned int DETBufferSizeY[],
323 unsigned int DETBufferSizeC[],
324 unsigned int SwathHeightY[],
325 unsigned int SwathHeightC[],
326 unsigned int LBBitPerPixel[],
327 double SwathWidthY[],
328 double SwathWidthC[],
329 double HRatio[],
330 double HRatioChroma[],
331 unsigned int vtaps[],
332 unsigned int VTAPsChroma[],
333 double VRatio[],
334 double VRatioChroma[],
335 unsigned int HTotal[],
336 double PixelClock[],
337 unsigned int BlendingAndTiming[],
338 double BytePerPixelDETY[],
339 double BytePerPixelDETC[],
340 double DSTXAfterScaler[],
341 double DSTYAfterScaler[],
342 bool WritebackEnable[],
343 enum source_format_class WritebackPixelFormat[],
344 double WritebackDestinationWidth[],
345 double WritebackDestinationHeight[],
346 double WritebackSourceHeight[],
347 enum clock_change_support *DRAMClockChangeSupport,
348 double *UrgentWatermark,
349 double *WritebackUrgentWatermark,
350 double *DRAMClockChangeWatermark,
351 double *WritebackDRAMClockChangeWatermark,
352 double *StutterExitWatermark,
353 double *StutterEnterPlusExitWatermark,
354 double *MinActiveDRAMClockChangeLatencySupported);
355 static void CalculateDCFCLKDeepSleep(
356 struct display_mode_lib *mode_lib,
357 unsigned int NumberOfActivePlanes,
358 int BytePerPixelY[],
359 int BytePerPixelC[],
360 double VRatio[],
361 double VRatioChroma[],
362 double SwathWidthY[],
363 double SwathWidthC[],
364 unsigned int DPPPerPlane[],
365 double HRatio[],
366 double HRatioChroma[],
367 double PixelClock[],
368 double PSCL_THROUGHPUT[],
369 double PSCL_THROUGHPUT_CHROMA[],
370 double DPPCLK[],
371 double ReadBandwidthLuma[],
372 double ReadBandwidthChroma[],
373 int ReturnBusWidth,
374 double *DCFCLKDeepSleep);
375 static void CalculateUrgentBurstFactor(
376 long swath_width_luma_ub,
377 long swath_width_chroma_ub,
378 unsigned int DETBufferSizeInKByte,
379 unsigned int SwathHeightY,
380 unsigned int SwathHeightC,
381 double LineTime,
382 double UrgentLatency,
383 double CursorBufferSize,
384 unsigned int CursorWidth,
385 unsigned int CursorBPP,
386 double VRatio,
387 double VRatioC,
388 double BytePerPixelInDETY,
389 double BytePerPixelInDETC,
390 double DETBufferSizeY,
391 double DETBufferSizeC,
392 double *UrgentBurstFactorCursor,
393 double *UrgentBurstFactorLuma,
394 double *UrgentBurstFactorChroma,
395 bool *NotEnoughUrgentLatencyHiding);
396
397 static void UseMinimumDCFCLK(
398 struct display_mode_lib *mode_lib,
399 int MaxInterDCNTileRepeaters,
400 int MaxPrefetchMode,
401 double FinalDRAMClockChangeLatency,
402 double SREnterPlusExitTime,
403 int ReturnBusWidth,
404 int RoundTripPingLatencyCycles,
405 int ReorderingBytes,
406 int PixelChunkSizeInKByte,
407 int MetaChunkSize,
408 bool GPUVMEnable,
409 int GPUVMMaxPageTableLevels,
410 bool HostVMEnable,
411 int NumberOfActivePlanes,
412 double HostVMMinPageSize,
413 int HostVMMaxNonCachedPageTableLevels,
414 bool DynamicMetadataVMEnabled,
415 enum immediate_flip_requirement ImmediateFlipRequirement,
416 bool ProgressiveToInterlaceUnitInOPP,
417 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
418 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
419 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
420 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
421 int VTotal[],
422 int VActive[],
423 int DynamicMetadataTransmittedBytes[],
424 int DynamicMetadataLinesBeforeActiveRequired[],
425 bool Interlace[],
426 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
427 double RequiredDISPCLK[][2],
428 double UrgLatency[],
429 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
430 double ProjectedDCFCLKDeepSleep[][2],
431 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
432 double TotalVActivePixelBandwidth[][2],
433 double TotalVActiveCursorBandwidth[][2],
434 double TotalMetaRowBandwidth[][2],
435 double TotalDPTERowBandwidth[][2],
436 unsigned int TotalNumberOfActiveDPP[][2],
437 unsigned int TotalNumberOfDCCActiveDPP[][2],
438 int dpte_group_bytes[],
439 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
440 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
441 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
442 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
443 int BytePerPixelY[],
444 int BytePerPixelC[],
445 int HTotal[],
446 double PixelClock[],
447 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
448 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
449 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
450 bool DynamicMetadataEnable[],
451 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
452 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
453 double ReadBandwidthLuma[],
454 double ReadBandwidthChroma[],
455 double DCFCLKPerState[],
456 double DCFCLKState[][2]);
457 static void CalculatePixelDeliveryTimes(
458 unsigned int NumberOfActivePlanes,
459 double VRatio[],
460 double VRatioChroma[],
461 double VRatioPrefetchY[],
462 double VRatioPrefetchC[],
463 unsigned int swath_width_luma_ub[],
464 unsigned int swath_width_chroma_ub[],
465 unsigned int DPPPerPlane[],
466 double HRatio[],
467 double HRatioChroma[],
468 double PixelClock[],
469 double PSCL_THROUGHPUT[],
470 double PSCL_THROUGHPUT_CHROMA[],
471 double DPPCLK[],
472 int BytePerPixelC[],
473 enum scan_direction_class SourceScan[],
474 unsigned int NumberOfCursors[],
475 unsigned int CursorWidth[][2],
476 unsigned int CursorBPP[][2],
477 unsigned int BlockWidth256BytesY[],
478 unsigned int BlockHeight256BytesY[],
479 unsigned int BlockWidth256BytesC[],
480 unsigned int BlockHeight256BytesC[],
481 double DisplayPipeLineDeliveryTimeLuma[],
482 double DisplayPipeLineDeliveryTimeChroma[],
483 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
484 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
485 double DisplayPipeRequestDeliveryTimeLuma[],
486 double DisplayPipeRequestDeliveryTimeChroma[],
487 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
488 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
489 double CursorRequestDeliveryTime[],
490 double CursorRequestDeliveryTimePrefetch[]);
491
492 static void CalculateMetaAndPTETimes(
493 int NumberOfActivePlanes,
494 bool GPUVMEnable,
495 int MetaChunkSize,
496 int MinMetaChunkSizeBytes,
497 int HTotal[],
498 double VRatio[],
499 double VRatioChroma[],
500 double DestinationLinesToRequestRowInVBlank[],
501 double DestinationLinesToRequestRowInImmediateFlip[],
502 bool DCCEnable[],
503 double PixelClock[],
504 int BytePerPixelY[],
505 int BytePerPixelC[],
506 enum scan_direction_class SourceScan[],
507 int dpte_row_height[],
508 int dpte_row_height_chroma[],
509 int meta_row_width[],
510 int meta_row_width_chroma[],
511 int meta_row_height[],
512 int meta_row_height_chroma[],
513 int meta_req_width[],
514 int meta_req_width_chroma[],
515 int meta_req_height[],
516 int meta_req_height_chroma[],
517 int dpte_group_bytes[],
518 int PTERequestSizeY[],
519 int PTERequestSizeC[],
520 int PixelPTEReqWidthY[],
521 int PixelPTEReqHeightY[],
522 int PixelPTEReqWidthC[],
523 int PixelPTEReqHeightC[],
524 int dpte_row_width_luma_ub[],
525 int dpte_row_width_chroma_ub[],
526 double DST_Y_PER_PTE_ROW_NOM_L[],
527 double DST_Y_PER_PTE_ROW_NOM_C[],
528 double DST_Y_PER_META_ROW_NOM_L[],
529 double DST_Y_PER_META_ROW_NOM_C[],
530 double TimePerMetaChunkNominal[],
531 double TimePerChromaMetaChunkNominal[],
532 double TimePerMetaChunkVBlank[],
533 double TimePerChromaMetaChunkVBlank[],
534 double TimePerMetaChunkFlip[],
535 double TimePerChromaMetaChunkFlip[],
536 double time_per_pte_group_nom_luma[],
537 double time_per_pte_group_vblank_luma[],
538 double time_per_pte_group_flip_luma[],
539 double time_per_pte_group_nom_chroma[],
540 double time_per_pte_group_vblank_chroma[],
541 double time_per_pte_group_flip_chroma[]);
542
543 static void CalculateVMGroupAndRequestTimes(
544 unsigned int NumberOfActivePlanes,
545 bool GPUVMEnable,
546 unsigned int GPUVMMaxPageTableLevels,
547 unsigned int HTotal[],
548 int BytePerPixelC[],
549 double DestinationLinesToRequestVMInVBlank[],
550 double DestinationLinesToRequestVMInImmediateFlip[],
551 bool DCCEnable[],
552 double PixelClock[],
553 int dpte_row_width_luma_ub[],
554 int dpte_row_width_chroma_ub[],
555 int vm_group_bytes[],
556 unsigned int dpde0_bytes_per_frame_ub_l[],
557 unsigned int dpde0_bytes_per_frame_ub_c[],
558 int meta_pte_bytes_per_frame_ub_l[],
559 int meta_pte_bytes_per_frame_ub_c[],
560 double TimePerVMGroupVBlank[],
561 double TimePerVMGroupFlip[],
562 double TimePerVMRequestVBlank[],
563 double TimePerVMRequestFlip[]);
564
565 static void CalculateStutterEfficiency(
566 int NumberOfActivePlanes,
567 long ROBBufferSizeInKByte,
568 double TotalDataReadBandwidth,
569 double DCFCLK,
570 double ReturnBW,
571 double SRExitTime,
572 bool SynchronizedVBlank,
573 int DPPPerPlane[],
574 unsigned int DETBufferSizeY[],
575 int BytePerPixelY[],
576 double BytePerPixelDETY[],
577 double SwathWidthY[],
578 int SwathHeightY[],
579 int SwathHeightC[],
580 double DCCRateLuma[],
581 double DCCRateChroma[],
582 int HTotal[],
583 int VTotal[],
584 double PixelClock[],
585 double VRatio[],
586 enum scan_direction_class SourceScan[],
587 int BlockHeight256BytesY[],
588 int BlockWidth256BytesY[],
589 int BlockHeight256BytesC[],
590 int BlockWidth256BytesC[],
591 int DCCYMaxUncompressedBlock[],
592 int DCCCMaxUncompressedBlock[],
593 int VActive[],
594 bool DCCEnable[],
595 bool WritebackEnable[],
596 double ReadBandwidthPlaneLuma[],
597 double ReadBandwidthPlaneChroma[],
598 double meta_row_bw[],
599 double dpte_row_bw[],
600 double *StutterEfficiencyNotIncludingVBlank,
601 double *StutterEfficiency,
602 double *StutterPeriodOut);
603
604 static void CalculateSwathAndDETConfiguration(
605 bool ForceSingleDPP,
606 int NumberOfActivePlanes,
607 unsigned int DETBufferSizeInKByte,
608 double MaximumSwathWidthLuma[],
609 double MaximumSwathWidthChroma[],
610 enum scan_direction_class SourceScan[],
611 enum source_format_class SourcePixelFormat[],
612 enum dm_swizzle_mode SurfaceTiling[],
613 int ViewportWidth[],
614 int ViewportHeight[],
615 int SurfaceWidthY[],
616 int SurfaceWidthC[],
617 int SurfaceHeightY[],
618 int SurfaceHeightC[],
619 int Read256BytesBlockHeightY[],
620 int Read256BytesBlockHeightC[],
621 int Read256BytesBlockWidthY[],
622 int Read256BytesBlockWidthC[],
623 enum odm_combine_mode ODMCombineEnabled[],
624 int BlendingAndTiming[],
625 int BytePerPixY[],
626 int BytePerPixC[],
627 double BytePerPixDETY[],
628 double BytePerPixDETC[],
629 int HActive[],
630 double HRatio[],
631 double HRatioChroma[],
632 int DPPPerPlane[],
633 int swath_width_luma_ub[],
634 int swath_width_chroma_ub[],
635 double SwathWidth[],
636 double SwathWidthChroma[],
637 int SwathHeightY[],
638 int SwathHeightC[],
639 unsigned int DETBufferSizeY[],
640 unsigned int DETBufferSizeC[],
641 bool ViewportSizeSupportPerPlane[],
642 bool *ViewportSizeSupport);
643 static void CalculateSwathWidth(
644 bool ForceSingleDPP,
645 int NumberOfActivePlanes,
646 enum source_format_class SourcePixelFormat[],
647 enum scan_direction_class SourceScan[],
648 unsigned int ViewportWidth[],
649 unsigned int ViewportHeight[],
650 unsigned int SurfaceWidthY[],
651 unsigned int SurfaceWidthC[],
652 unsigned int SurfaceHeightY[],
653 unsigned int SurfaceHeightC[],
654 enum odm_combine_mode ODMCombineEnabled[],
655 int BytePerPixY[],
656 int BytePerPixC[],
657 int Read256BytesBlockHeightY[],
658 int Read256BytesBlockHeightC[],
659 int Read256BytesBlockWidthY[],
660 int Read256BytesBlockWidthC[],
661 int BlendingAndTiming[],
662 unsigned int HActive[],
663 double HRatio[],
664 int DPPPerPlane[],
665 double SwathWidthSingleDPPY[],
666 double SwathWidthSingleDPPC[],
667 double SwathWidthY[],
668 double SwathWidthC[],
669 int MaximumSwathHeightY[],
670 int MaximumSwathHeightC[],
671 unsigned int swath_width_luma_ub[],
672 unsigned int swath_width_chroma_ub[]);
673 static double CalculateExtraLatency(
674 long RoundTripPingLatencyCycles,
675 long ReorderingBytes,
676 double DCFCLK,
677 int TotalNumberOfActiveDPP,
678 int PixelChunkSizeInKByte,
679 int TotalNumberOfDCCActiveDPP,
680 int MetaChunkSize,
681 double ReturnBW,
682 bool GPUVMEnable,
683 bool HostVMEnable,
684 int NumberOfActivePlanes,
685 int NumberOfDPP[],
686 int dpte_group_bytes[],
687 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
688 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
689 double HostVMMinPageSize,
690 int HostVMMaxNonCachedPageTableLevels);
691 static double CalculateExtraLatencyBytes(
692 long ReorderingBytes,
693 int TotalNumberOfActiveDPP,
694 int PixelChunkSizeInKByte,
695 int TotalNumberOfDCCActiveDPP,
696 int MetaChunkSize,
697 bool GPUVMEnable,
698 bool HostVMEnable,
699 int NumberOfActivePlanes,
700 int NumberOfDPP[],
701 int dpte_group_bytes[],
702 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
703 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
704 double HostVMMinPageSize,
705 int HostVMMaxNonCachedPageTableLevels);
706 static double CalculateUrgentLatency(
707 double UrgentLatencyPixelDataOnly,
708 double UrgentLatencyPixelMixedWithVMData,
709 double UrgentLatencyVMDataOnly,
710 bool DoUrgentLatencyAdjustment,
711 double UrgentLatencyAdjustmentFabricClockComponent,
712 double UrgentLatencyAdjustmentFabricClockReference,
713 double FabricClockSingle);
714
715 static bool CalculateBytePerPixelAnd256BBlockSizes(
716 enum source_format_class SourcePixelFormat,
717 enum dm_swizzle_mode SurfaceTiling,
718 unsigned int *BytePerPixelY,
719 unsigned int *BytePerPixelC,
720 double *BytePerPixelDETY,
721 double *BytePerPixelDETC,
722 unsigned int *BlockHeight256BytesY,
723 unsigned int *BlockHeight256BytesC,
724 unsigned int *BlockWidth256BytesY,
725 unsigned int *BlockWidth256BytesC);
726
dml30_recalculate(struct display_mode_lib * mode_lib)727 void dml30_recalculate(struct display_mode_lib *mode_lib)
728 {
729 ModeSupportAndSystemConfiguration(mode_lib);
730 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
731 DisplayPipeConfiguration(mode_lib);
732 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
733 }
734
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)735 static unsigned int dscceComputeDelay(
736 unsigned int bpc,
737 double BPP,
738 unsigned int sliceWidth,
739 unsigned int numSlices,
740 enum output_format_class pixelFormat,
741 enum output_encoder_class Output)
742 {
743 // valid bpc = source bits per component in the set of {8, 10, 12}
744 // valid bpp = increments of 1/16 of a bit
745 // min = 6/7/8 in N420/N422/444, respectively
746 // max = such that compression is 1:1
747 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
748 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
749 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
750
751 // fixed value
752 unsigned int rcModelSize = 8192;
753
754 // N422/N420 operate at 2 pixels per clock
755 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
756 Delay, pixels;
757
758 if (pixelFormat == dm_420)
759 pixelsPerClock = 2;
760 // #all other modes operate at 1 pixel per clock
761 else if (pixelFormat == dm_444)
762 pixelsPerClock = 1;
763 else if (pixelFormat == dm_n422)
764 pixelsPerClock = 2;
765 else
766 pixelsPerClock = 1;
767
768 //initial transmit delay as per PPS
769 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
770
771 //compute ssm delay
772 if (bpc == 8)
773 D = 81;
774 else if (bpc == 10)
775 D = 89;
776 else
777 D = 113;
778
779 //divide by pixel per cycle to compute slice width as seen by DSC
780 w = sliceWidth / pixelsPerClock;
781
782 //422 mode has an additional cycle of delay
783 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
784 s = 0;
785 else
786 s = 1;
787
788 //main calculation for the dscce
789 ix = initalXmitDelay + 45;
790 wx = (w + 2) / 3;
791 P = 3 * wx - w;
792 l0 = ix / w;
793 a = ix + P * l0;
794 ax = (a + 2) / 3 + D + 6 + 1;
795 L = (ax + wx - 1) / wx;
796 if ((ix % w) == 0 && P != 0)
797 lstall = 1;
798 else
799 lstall = 0;
800 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
801
802 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
803 pixels = Delay * 3 * pixelsPerClock;
804 return pixels;
805 }
806
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)807 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
808 {
809 unsigned int Delay = 0;
810
811 if (pixelFormat == dm_420) {
812 // sfr
813 Delay = Delay + 2;
814 // dsccif
815 Delay = Delay + 0;
816 // dscc - input deserializer
817 Delay = Delay + 3;
818 // dscc gets pixels every other cycle
819 Delay = Delay + 2;
820 // dscc - input cdc fifo
821 Delay = Delay + 12;
822 // dscc gets pixels every other cycle
823 Delay = Delay + 13;
824 // dscc - cdc uncertainty
825 Delay = Delay + 2;
826 // dscc - output cdc fifo
827 Delay = Delay + 7;
828 // dscc gets pixels every other cycle
829 Delay = Delay + 3;
830 // dscc - cdc uncertainty
831 Delay = Delay + 2;
832 // dscc - output serializer
833 Delay = Delay + 1;
834 // sft
835 Delay = Delay + 1;
836 } else if (pixelFormat == dm_n422) {
837 // sfr
838 Delay = Delay + 2;
839 // dsccif
840 Delay = Delay + 1;
841 // dscc - input deserializer
842 Delay = Delay + 5;
843 // dscc - input cdc fifo
844 Delay = Delay + 25;
845 // dscc - cdc uncertainty
846 Delay = Delay + 2;
847 // dscc - output cdc fifo
848 Delay = Delay + 10;
849 // dscc - cdc uncertainty
850 Delay = Delay + 2;
851 // dscc - output serializer
852 Delay = Delay + 1;
853 // sft
854 Delay = Delay + 1;
855 }
856 else {
857 // sfr
858 Delay = Delay + 2;
859 // dsccif
860 Delay = Delay + 0;
861 // dscc - input deserializer
862 Delay = Delay + 3;
863 // dscc - input cdc fifo
864 Delay = Delay + 12;
865 // dscc - cdc uncertainty
866 Delay = Delay + 2;
867 // dscc - output cdc fifo
868 Delay = Delay + 7;
869 // dscc - output serializer
870 Delay = Delay + 1;
871 // dscc - cdc uncertainty
872 Delay = Delay + 2;
873 // sft
874 Delay = Delay + 1;
875 }
876
877 return Delay;
878 }
879
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,int BytePerPixelY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,int BytePerPixelC,double VInitPreFillC,unsigned int MaxNumSwathC,long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)880 static bool CalculatePrefetchSchedule(
881 struct display_mode_lib *mode_lib,
882 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
883 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
884 Pipe *myPipe,
885 unsigned int DSCDelay,
886 double DPPCLKDelaySubtotalPlusCNVCFormater,
887 double DPPCLKDelaySCL,
888 double DPPCLKDelaySCLLBOnly,
889 double DPPCLKDelayCNVCCursor,
890 double DISPCLKDelaySubtotal,
891 unsigned int DPP_RECOUT_WIDTH,
892 enum output_format_class OutputFormat,
893 unsigned int MaxInterDCNTileRepeaters,
894 unsigned int VStartup,
895 unsigned int MaxVStartup,
896 unsigned int GPUVMPageTableLevels,
897 bool GPUVMEnable,
898 bool HostVMEnable,
899 unsigned int HostVMMaxNonCachedPageTableLevels,
900 double HostVMMinPageSize,
901 bool DynamicMetadataEnable,
902 bool DynamicMetadataVMEnabled,
903 int DynamicMetadataLinesBeforeActiveRequired,
904 unsigned int DynamicMetadataTransmittedBytes,
905 double UrgentLatency,
906 double UrgentExtraLatency,
907 double TCalc,
908 unsigned int PDEAndMetaPTEBytesFrame,
909 unsigned int MetaRowByte,
910 unsigned int PixelPTEBytesPerRow,
911 double PrefetchSourceLinesY,
912 unsigned int SwathWidthY,
913 int BytePerPixelY,
914 double VInitPreFillY,
915 unsigned int MaxNumSwathY,
916 double PrefetchSourceLinesC,
917 unsigned int SwathWidthC,
918 int BytePerPixelC,
919 double VInitPreFillC,
920 unsigned int MaxNumSwathC,
921 long swath_width_luma_ub,
922 long swath_width_chroma_ub,
923 unsigned int SwathHeightY,
924 unsigned int SwathHeightC,
925 double TWait,
926 bool ProgressiveToInterlaceUnitInOPP,
927 double *DSTXAfterScaler,
928 double *DSTYAfterScaler,
929 double *DestinationLinesForPrefetch,
930 double *PrefetchBandwidth,
931 double *DestinationLinesToRequestVMInVBlank,
932 double *DestinationLinesToRequestRowInVBlank,
933 double *VRatioPrefetchY,
934 double *VRatioPrefetchC,
935 double *RequiredPrefetchPixDataBWLuma,
936 double *RequiredPrefetchPixDataBWChroma,
937 bool *NotEnoughTimeForDynamicMetadata,
938 double *Tno_bw,
939 double *prefetch_vmrow_bw,
940 double *Tdmdl_vm,
941 double *Tdmdl,
942 unsigned int *VUpdateOffsetPix,
943 double *VUpdateWidthPix,
944 double *VReadyOffsetPix)
945 {
946 bool MyError = false;
947 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
948 double DSTTotalPixelsAfterScaler = 0;
949 double LineTime = 0, Tsetup = 0;
950 double dst_y_prefetch_equ = 0;
951 double Tsw_oto = 0;
952 double prefetch_bw_oto = 0;
953 double Tvm_oto = 0;
954 double Tr0_oto = 0;
955 double Tvm_oto_lines = 0;
956 double Tr0_oto_lines = 0;
957 double dst_y_prefetch_oto = 0;
958 double TimeForFetchingMetaPTE = 0;
959 double TimeForFetchingRowInVBlank = 0;
960 double LinesToRequestPrefetchPixelData = 0;
961 double HostVMInefficiencyFactor = 0;
962 unsigned int HostVMDynamicLevelsTrips = 0;
963 double trip_to_mem = 0;
964 double Tvm_trips = 0;
965 double Tr0_trips = 0;
966 double Tvm_trips_rounded = 0;
967 double Tr0_trips_rounded = 0;
968 double Lsw_oto = 0;
969 double Tpre_rounded = 0;
970 double prefetch_bw_equ = 0;
971 double Tvm_equ = 0;
972 double Tr0_equ = 0;
973 double Tdmbf = 0;
974 double Tdmec = 0;
975 double Tdmsks = 0;
976
977 if (GPUVMEnable == true && HostVMEnable == true) {
978 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
979 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
980 } else {
981 HostVMInefficiencyFactor = 1;
982 HostVMDynamicLevelsTrips = 0;
983 }
984
985 CalculateDynamicMetadataParameters(
986 MaxInterDCNTileRepeaters,
987 myPipe->DPPCLK,
988 myPipe->DISPCLK,
989 myPipe->DCFCLKDeepSleep,
990 myPipe->PixelClock,
991 myPipe->HTotal,
992 myPipe->VBlank,
993 DynamicMetadataTransmittedBytes,
994 DynamicMetadataLinesBeforeActiveRequired,
995 myPipe->InterlaceEnable,
996 ProgressiveToInterlaceUnitInOPP,
997 &Tsetup,
998 &Tdmbf,
999 &Tdmec,
1000 &Tdmsks);
1001
1002 LineTime = myPipe->HTotal / myPipe->PixelClock;
1003 trip_to_mem = UrgentLatency;
1004 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1005
1006 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1007 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1008 } else {
1009 *Tdmdl = TWait + UrgentExtraLatency;
1010 }
1011
1012 if (DynamicMetadataEnable == true) {
1013 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1014 *NotEnoughTimeForDynamicMetadata = true;
1015 } else {
1016 *NotEnoughTimeForDynamicMetadata = false;
1017 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
1018 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1019 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1020 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1021 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1022 }
1023 } else {
1024 *NotEnoughTimeForDynamicMetadata = false;
1025 }
1026
1027 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1028
1029 if (myPipe->ScalerEnabled)
1030 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1031 else
1032 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1033
1034 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1035
1036 DISPCLKCycles = DISPCLKDelaySubtotal;
1037
1038 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1039 return true;
1040
1041 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
1042 + DSCDelay;
1043
1044 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1045
1046 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
1047 *DSTYAfterScaler = 1;
1048 else
1049 *DSTYAfterScaler = 0;
1050
1051 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1052 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1053 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1054
1055 MyError = false;
1056
1057
1058 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1059 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1060 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1061
1062 if (GPUVMEnable) {
1063 if (GPUVMPageTableLevels >= 3) {
1064 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1065 } else
1066 *Tno_bw = 0;
1067 } else if (!myPipe->DCCEnable)
1068 *Tno_bw = LineTime;
1069 else
1070 *Tno_bw = LineTime / 4;
1071
1072 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1073 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1074
1075 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1076 Tsw_oto = Lsw_oto * LineTime;
1077
1078 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1079
1080 if (GPUVMEnable == true) {
1081 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1082 Tvm_trips,
1083 LineTime / 4.0);
1084 } else
1085 Tvm_oto = LineTime / 4.0;
1086
1087 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1088 Tr0_oto = dml_max3(
1089 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1090 LineTime - Tvm_oto, LineTime / 4);
1091 } else
1092 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1093
1094 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1095 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1096 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1097
1098 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1099 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1100
1101 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1102 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1103
1104 dml_print("DML: LineTime: %f\n", LineTime);
1105 dml_print("DML: VStartup: %d\n", VStartup);
1106 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1107 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1108 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1109 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1110 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1111 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1112 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1113 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1114 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1115 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1116 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1117
1118 *PrefetchBandwidth = 0;
1119 *DestinationLinesToRequestVMInVBlank = 0;
1120 *DestinationLinesToRequestRowInVBlank = 0;
1121 *VRatioPrefetchY = 0;
1122 *VRatioPrefetchC = 0;
1123 *RequiredPrefetchPixDataBWLuma = 0;
1124 if (dst_y_prefetch_equ > 1) {
1125 double PrefetchBandwidth1 = 0;
1126 double PrefetchBandwidth2 = 0;
1127 double PrefetchBandwidth3 = 0;
1128 double PrefetchBandwidth4 = 0;
1129
1130 if (Tpre_rounded - *Tno_bw > 0)
1131 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1132 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1133 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1134 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1135 / (Tpre_rounded - *Tno_bw);
1136 else
1137 PrefetchBandwidth1 = 0;
1138
1139 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1140 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1141 }
1142
1143 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1144 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1145 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1146 swath_width_luma_ub * BytePerPixelY +
1147 PrefetchSourceLinesC * swath_width_chroma_ub *
1148 BytePerPixelC) /
1149 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1150 else
1151 PrefetchBandwidth2 = 0;
1152
1153 if (Tpre_rounded - Tvm_trips_rounded > 0)
1154 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1155 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1156 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1157 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1158 Tvm_trips_rounded);
1159 else
1160 PrefetchBandwidth3 = 0;
1161
1162 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1163 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1164 }
1165
1166 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1167 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1168 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1169 else
1170 PrefetchBandwidth4 = 0;
1171
1172 {
1173 bool Case1OK;
1174 bool Case2OK;
1175 bool Case3OK;
1176
1177 if (PrefetchBandwidth1 > 0) {
1178 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1179 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1180 Case1OK = true;
1181 } else {
1182 Case1OK = false;
1183 }
1184 } else {
1185 Case1OK = false;
1186 }
1187
1188 if (PrefetchBandwidth2 > 0) {
1189 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1190 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1191 Case2OK = true;
1192 } else {
1193 Case2OK = false;
1194 }
1195 } else {
1196 Case2OK = false;
1197 }
1198
1199 if (PrefetchBandwidth3 > 0) {
1200 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1201 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1202 Case3OK = true;
1203 } else {
1204 Case3OK = false;
1205 }
1206 } else {
1207 Case3OK = false;
1208 }
1209
1210 if (Case1OK) {
1211 prefetch_bw_equ = PrefetchBandwidth1;
1212 } else if (Case2OK) {
1213 prefetch_bw_equ = PrefetchBandwidth2;
1214 } else if (Case3OK) {
1215 prefetch_bw_equ = PrefetchBandwidth3;
1216 } else {
1217 prefetch_bw_equ = PrefetchBandwidth4;
1218 }
1219
1220 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1221
1222 if (prefetch_bw_equ > 0) {
1223 if (GPUVMEnable) {
1224 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1225 } else {
1226 Tvm_equ = LineTime / 4;
1227 }
1228
1229 if ((GPUVMEnable || myPipe->DCCEnable)) {
1230 Tr0_equ = dml_max4(
1231 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1232 Tr0_trips,
1233 (LineTime - Tvm_equ) / 2,
1234 LineTime / 4);
1235 } else {
1236 Tr0_equ = (LineTime - Tvm_equ) / 2;
1237 }
1238 } else {
1239 Tvm_equ = 0;
1240 Tr0_equ = 0;
1241 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1242 }
1243 }
1244
1245 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1246 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1247 TimeForFetchingMetaPTE = Tvm_oto;
1248 TimeForFetchingRowInVBlank = Tr0_oto;
1249 *PrefetchBandwidth = prefetch_bw_oto;
1250 } else {
1251 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1252 TimeForFetchingMetaPTE = Tvm_equ;
1253 TimeForFetchingRowInVBlank = Tr0_equ;
1254 *PrefetchBandwidth = prefetch_bw_equ;
1255 }
1256
1257 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1258
1259 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1260
1261
1262 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1263 - 2 * *DestinationLinesToRequestRowInVBlank;
1264
1265 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1266
1267 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1268 / LinesToRequestPrefetchPixelData;
1269 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1270 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1271 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1272 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1273 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1274 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1275 } else {
1276 MyError = true;
1277 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1278 *VRatioPrefetchY = 0;
1279 }
1280 }
1281
1282 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1283 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1284
1285 if ((SwathHeightC > 4)) {
1286 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1287 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1288 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1289 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1290 } else {
1291 MyError = true;
1292 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1293 *VRatioPrefetchC = 0;
1294 }
1295 }
1296
1297 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1298 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1299 } else {
1300 MyError = true;
1301 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1302 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1303 *VRatioPrefetchY = 0;
1304 *VRatioPrefetchC = 0;
1305 *RequiredPrefetchPixDataBWLuma = 0;
1306 *RequiredPrefetchPixDataBWChroma = 0;
1307 }
1308
1309 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1310 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1311 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1312 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1313 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1314 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1315 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1316 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1317 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1318
1319 } else {
1320 MyError = true;
1321 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1322 }
1323
1324 {
1325 double prefetch_vm_bw = 0;
1326 double prefetch_row_bw = 0;
1327
1328 if (PDEAndMetaPTEBytesFrame == 0) {
1329 prefetch_vm_bw = 0;
1330 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1331 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1332 } else {
1333 prefetch_vm_bw = 0;
1334 MyError = true;
1335 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1336 }
1337 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1338 prefetch_row_bw = 0;
1339 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1340 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1341 } else {
1342 prefetch_row_bw = 0;
1343 MyError = true;
1344 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1345 }
1346
1347 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1348 }
1349
1350 if (MyError) {
1351 *PrefetchBandwidth = 0;
1352 TimeForFetchingMetaPTE = 0;
1353 TimeForFetchingRowInVBlank = 0;
1354 *DestinationLinesToRequestVMInVBlank = 0;
1355 *DestinationLinesToRequestRowInVBlank = 0;
1356 *DestinationLinesForPrefetch = 0;
1357 LinesToRequestPrefetchPixelData = 0;
1358 *VRatioPrefetchY = 0;
1359 *VRatioPrefetchC = 0;
1360 *RequiredPrefetchPixDataBWLuma = 0;
1361 *RequiredPrefetchPixDataBWChroma = 0;
1362 }
1363
1364 return MyError;
1365 }
1366
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1367 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1368 {
1369 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1370 }
1371
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1372 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1373 {
1374 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1375 }
1376
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,double DETBufferSize,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)1377 static void CalculateDCCConfiguration(
1378 bool DCCEnabled,
1379 bool DCCProgrammingAssumesScanDirectionUnknown,
1380 enum source_format_class SourcePixelFormat,
1381 unsigned int SurfaceWidthLuma,
1382 unsigned int SurfaceWidthChroma,
1383 unsigned int SurfaceHeightLuma,
1384 unsigned int SurfaceHeightChroma,
1385 double DETBufferSize,
1386 unsigned int RequestHeight256ByteLuma,
1387 unsigned int RequestHeight256ByteChroma,
1388 enum dm_swizzle_mode TilingFormat,
1389 unsigned int BytePerPixelY,
1390 unsigned int BytePerPixelC,
1391 double BytePerPixelDETY,
1392 double BytePerPixelDETC,
1393 enum scan_direction_class ScanOrientation,
1394 unsigned int *MaxUncompressedBlockLuma,
1395 unsigned int *MaxUncompressedBlockChroma,
1396 unsigned int *MaxCompressedBlockLuma,
1397 unsigned int *MaxCompressedBlockChroma,
1398 unsigned int *IndependentBlockLuma,
1399 unsigned int *IndependentBlockChroma)
1400 {
1401 int yuv420 = 0;
1402 int horz_div_l = 0;
1403 int horz_div_c = 0;
1404 int vert_div_l = 0;
1405 int vert_div_c = 0;
1406
1407 int req128_horz_wc_l = 0;
1408 int req128_horz_wc_c = 0;
1409 int req128_vert_wc_l = 0;
1410 int req128_vert_wc_c = 0;
1411 int segment_order_horz_contiguous_luma = 0;
1412 int segment_order_horz_contiguous_chroma = 0;
1413 int segment_order_vert_contiguous_luma = 0;
1414 int segment_order_vert_contiguous_chroma = 0;
1415
1416 long full_swath_bytes_horz_wc_l = 0;
1417 long full_swath_bytes_horz_wc_c = 0;
1418 long full_swath_bytes_vert_wc_l = 0;
1419 long full_swath_bytes_vert_wc_c = 0;
1420
1421 long swath_buf_size = 0;
1422 double detile_buf_vp_horz_limit = 0;
1423 double detile_buf_vp_vert_limit = 0;
1424
1425 long MAS_vp_horz_limit = 0;
1426 long MAS_vp_vert_limit = 0;
1427 long max_vp_horz_width = 0;
1428 long max_vp_vert_height = 0;
1429 long eff_surf_width_l = 0;
1430 long eff_surf_width_c = 0;
1431 long eff_surf_height_l = 0;
1432 long eff_surf_height_c = 0;
1433
1434 typedef enum {
1435 REQ_256Bytes,
1436 REQ_128BytesNonContiguous,
1437 REQ_128BytesContiguous,
1438 REQ_NA
1439 } RequestType;
1440
1441 RequestType RequestLuma;
1442 RequestType RequestChroma;
1443
1444 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1445 horz_div_l = 1;
1446 horz_div_c = 1;
1447 vert_div_l = 1;
1448 vert_div_c = 1;
1449
1450 if (BytePerPixelY == 1)
1451 vert_div_l = 0;
1452 if (BytePerPixelC == 1)
1453 vert_div_c = 0;
1454 if (BytePerPixelY == 8
1455 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1456 || TilingFormat == dm_sw_64kb_s_x))
1457 horz_div_l = 0;
1458 if (BytePerPixelC == 8
1459 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1460 || TilingFormat == dm_sw_64kb_s_x))
1461 horz_div_c = 0;
1462
1463 if (BytePerPixelC == 0) {
1464 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1465 detile_buf_vp_horz_limit = (double) swath_buf_size
1466 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1467 / (1 + horz_div_l));
1468 detile_buf_vp_vert_limit = (double) swath_buf_size
1469 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1470 } else {
1471 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1472 detile_buf_vp_horz_limit = (double) swath_buf_size
1473 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1474 / (1 + horz_div_l)
1475 + (double) RequestHeight256ByteChroma
1476 * BytePerPixelC / (1 + horz_div_c)
1477 / (1 + yuv420));
1478 detile_buf_vp_vert_limit = (double) swath_buf_size
1479 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1480 + 256.0 / RequestHeight256ByteChroma
1481 / (1 + vert_div_c) / (1 + yuv420));
1482 }
1483
1484 if (SourcePixelFormat == dm_420_10) {
1485 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1486 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1487 }
1488
1489 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1490 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1491
1492 MAS_vp_horz_limit = 5760;
1493 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1494 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1495 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1496 eff_surf_width_l =
1497 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1498 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1499 eff_surf_height_l = (
1500 SurfaceHeightLuma > max_vp_vert_height ?
1501 max_vp_vert_height : SurfaceHeightLuma);
1502 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1503
1504 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1505 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1506 if (BytePerPixelC > 0) {
1507 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1508 * BytePerPixelC;
1509 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1510 } else {
1511 full_swath_bytes_horz_wc_c = 0;
1512 full_swath_bytes_vert_wc_c = 0;
1513 }
1514
1515 if (SourcePixelFormat == dm_420_10) {
1516 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1517 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1518 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1519 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1520 }
1521
1522 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1523 req128_horz_wc_l = 0;
1524 req128_horz_wc_c = 0;
1525 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1526 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1527 <= DETBufferSize) {
1528 req128_horz_wc_l = 0;
1529 req128_horz_wc_c = 1;
1530 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1531 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1532 <= DETBufferSize) {
1533 req128_horz_wc_l = 1;
1534 req128_horz_wc_c = 0;
1535 } else {
1536 req128_horz_wc_l = 1;
1537 req128_horz_wc_c = 1;
1538 }
1539
1540 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1541 req128_vert_wc_l = 0;
1542 req128_vert_wc_c = 0;
1543 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1544 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1545 <= DETBufferSize) {
1546 req128_vert_wc_l = 0;
1547 req128_vert_wc_c = 1;
1548 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1549 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1550 <= DETBufferSize) {
1551 req128_vert_wc_l = 1;
1552 req128_vert_wc_c = 0;
1553 } else {
1554 req128_vert_wc_l = 1;
1555 req128_vert_wc_c = 1;
1556 }
1557
1558 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1559 segment_order_horz_contiguous_luma = 0;
1560 } else {
1561 segment_order_horz_contiguous_luma = 1;
1562 }
1563 if ((BytePerPixelY == 8
1564 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1565 || TilingFormat == dm_sw_64kb_d_t
1566 || TilingFormat == dm_sw_64kb_r_x))
1567 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1568 segment_order_vert_contiguous_luma = 0;
1569 } else {
1570 segment_order_vert_contiguous_luma = 1;
1571 }
1572 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1573 segment_order_horz_contiguous_chroma = 0;
1574 } else {
1575 segment_order_horz_contiguous_chroma = 1;
1576 }
1577 if ((BytePerPixelC == 8
1578 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1579 || TilingFormat == dm_sw_64kb_d_t
1580 || TilingFormat == dm_sw_64kb_r_x))
1581 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1582 segment_order_vert_contiguous_chroma = 0;
1583 } else {
1584 segment_order_vert_contiguous_chroma = 1;
1585 }
1586
1587 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1588 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1589 RequestLuma = REQ_256Bytes;
1590 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1591 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1592 RequestLuma = REQ_128BytesNonContiguous;
1593 } else {
1594 RequestLuma = REQ_128BytesContiguous;
1595 }
1596 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1597 RequestChroma = REQ_256Bytes;
1598 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1599 || (req128_vert_wc_c == 1
1600 && segment_order_vert_contiguous_chroma == 0)) {
1601 RequestChroma = REQ_128BytesNonContiguous;
1602 } else {
1603 RequestChroma = REQ_128BytesContiguous;
1604 }
1605 } else if (ScanOrientation != dm_vert) {
1606 if (req128_horz_wc_l == 0) {
1607 RequestLuma = REQ_256Bytes;
1608 } else if (segment_order_horz_contiguous_luma == 0) {
1609 RequestLuma = REQ_128BytesNonContiguous;
1610 } else {
1611 RequestLuma = REQ_128BytesContiguous;
1612 }
1613 if (req128_horz_wc_c == 0) {
1614 RequestChroma = REQ_256Bytes;
1615 } else if (segment_order_horz_contiguous_chroma == 0) {
1616 RequestChroma = REQ_128BytesNonContiguous;
1617 } else {
1618 RequestChroma = REQ_128BytesContiguous;
1619 }
1620 } else {
1621 if (req128_vert_wc_l == 0) {
1622 RequestLuma = REQ_256Bytes;
1623 } else if (segment_order_vert_contiguous_luma == 0) {
1624 RequestLuma = REQ_128BytesNonContiguous;
1625 } else {
1626 RequestLuma = REQ_128BytesContiguous;
1627 }
1628 if (req128_vert_wc_c == 0) {
1629 RequestChroma = REQ_256Bytes;
1630 } else if (segment_order_vert_contiguous_chroma == 0) {
1631 RequestChroma = REQ_128BytesNonContiguous;
1632 } else {
1633 RequestChroma = REQ_128BytesContiguous;
1634 }
1635 }
1636
1637 if (RequestLuma == REQ_256Bytes) {
1638 *MaxUncompressedBlockLuma = 256;
1639 *MaxCompressedBlockLuma = 256;
1640 *IndependentBlockLuma = 0;
1641 } else if (RequestLuma == REQ_128BytesContiguous) {
1642 *MaxUncompressedBlockLuma = 256;
1643 *MaxCompressedBlockLuma = 128;
1644 *IndependentBlockLuma = 128;
1645 } else {
1646 *MaxUncompressedBlockLuma = 256;
1647 *MaxCompressedBlockLuma = 64;
1648 *IndependentBlockLuma = 64;
1649 }
1650
1651 if (RequestChroma == REQ_256Bytes) {
1652 *MaxUncompressedBlockChroma = 256;
1653 *MaxCompressedBlockChroma = 256;
1654 *IndependentBlockChroma = 0;
1655 } else if (RequestChroma == REQ_128BytesContiguous) {
1656 *MaxUncompressedBlockChroma = 256;
1657 *MaxCompressedBlockChroma = 128;
1658 *IndependentBlockChroma = 128;
1659 } else {
1660 *MaxUncompressedBlockChroma = 256;
1661 *MaxCompressedBlockChroma = 64;
1662 *IndependentBlockChroma = 64;
1663 }
1664
1665 if (DCCEnabled != true || BytePerPixelC == 0) {
1666 *MaxUncompressedBlockChroma = 0;
1667 *MaxCompressedBlockChroma = 0;
1668 *IndependentBlockChroma = 0;
1669 }
1670
1671 if (DCCEnabled != true) {
1672 *MaxUncompressedBlockLuma = 0;
1673 *MaxCompressedBlockLuma = 0;
1674 *IndependentBlockLuma = 0;
1675 }
1676 }
1677
1678
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1679 static double CalculatePrefetchSourceLines(
1680 struct display_mode_lib *mode_lib,
1681 double VRatio,
1682 double vtaps,
1683 bool Interlace,
1684 bool ProgressiveToInterlaceUnitInOPP,
1685 unsigned int SwathHeight,
1686 unsigned int ViewportYStart,
1687 double *VInitPreFill,
1688 unsigned int *MaxNumSwath)
1689 {
1690 unsigned int MaxPartialSwath = 0;
1691
1692 if (ProgressiveToInterlaceUnitInOPP)
1693 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1694 else
1695 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1696
1697 if (!mode_lib->vba.IgnoreViewportPositioning) {
1698
1699 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1700
1701 if (*VInitPreFill > 1.0)
1702 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1703 else
1704 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1705 % SwathHeight;
1706 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1707
1708 } else {
1709
1710 if (ViewportYStart != 0)
1711 dml_print(
1712 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1713
1714 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1715
1716 if (*VInitPreFill > 1.0)
1717 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1718 else
1719 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1720 % SwathHeight;
1721 }
1722
1723 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1724 }
1725
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int SwathWidth,unsigned int ViewportHeight,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMinPageSize,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1726 static unsigned int CalculateVMAndRowBytes(
1727 struct display_mode_lib *mode_lib,
1728 bool DCCEnable,
1729 unsigned int BlockHeight256Bytes,
1730 unsigned int BlockWidth256Bytes,
1731 enum source_format_class SourcePixelFormat,
1732 unsigned int SurfaceTiling,
1733 unsigned int BytePerPixel,
1734 enum scan_direction_class ScanDirection,
1735 unsigned int SwathWidth,
1736 unsigned int ViewportHeight,
1737 bool GPUVMEnable,
1738 bool HostVMEnable,
1739 unsigned int HostVMMaxNonCachedPageTableLevels,
1740 unsigned int GPUVMMinPageSize,
1741 unsigned int HostVMMinPageSize,
1742 unsigned int PTEBufferSizeInRequests,
1743 unsigned int Pitch,
1744 unsigned int DCCMetaPitch,
1745 unsigned int *MacroTileWidth,
1746 unsigned int *MetaRowByte,
1747 unsigned int *PixelPTEBytesPerRow,
1748 bool *PTEBufferSizeNotExceeded,
1749 unsigned int *dpte_row_width_ub,
1750 unsigned int *dpte_row_height,
1751 unsigned int *MetaRequestWidth,
1752 unsigned int *MetaRequestHeight,
1753 unsigned int *meta_row_width,
1754 unsigned int *meta_row_height,
1755 unsigned int *vm_group_bytes,
1756 unsigned int *dpte_group_bytes,
1757 unsigned int *PixelPTEReqWidth,
1758 unsigned int *PixelPTEReqHeight,
1759 unsigned int *PTERequestSize,
1760 unsigned int *DPDE0BytesFrame,
1761 unsigned int *MetaPTEBytesFrame)
1762 {
1763 unsigned int MPDEBytesFrame = 0;
1764 unsigned int DCCMetaSurfaceBytes = 0;
1765 unsigned int MacroTileSizeBytes = 0;
1766 unsigned int MacroTileHeight = 0;
1767 unsigned int ExtraDPDEBytesFrame = 0;
1768 unsigned int PDEAndMetaPTEBytesFrame = 0;
1769 unsigned int PixelPTEReqHeightPTEs = 0;
1770 unsigned int HostVMDynamicLevels = 0;
1771
1772 double FractionOfPTEReturnDrop;
1773
1774 if (GPUVMEnable == true && HostVMEnable == true) {
1775 if (HostVMMinPageSize < 2048) {
1776 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1777 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1778 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1779 } else {
1780 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1781 }
1782 }
1783
1784 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1785 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1786 if (ScanDirection != dm_vert) {
1787 *meta_row_height = *MetaRequestHeight;
1788 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1789 + *MetaRequestWidth;
1790 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1791 } else {
1792 *meta_row_height = *MetaRequestWidth;
1793 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1794 + *MetaRequestHeight;
1795 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1796 }
1797 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1798 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1799 if (GPUVMEnable == true) {
1800 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1801 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1802 } else {
1803 *MetaPTEBytesFrame = 0;
1804 MPDEBytesFrame = 0;
1805 }
1806
1807 if (DCCEnable != true) {
1808 *MetaPTEBytesFrame = 0;
1809 MPDEBytesFrame = 0;
1810 *MetaRowByte = 0;
1811 }
1812
1813 if (SurfaceTiling == dm_sw_linear) {
1814 MacroTileSizeBytes = 256;
1815 MacroTileHeight = BlockHeight256Bytes;
1816 } else {
1817 MacroTileSizeBytes = 65536;
1818 MacroTileHeight = 16 * BlockHeight256Bytes;
1819 }
1820 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1821
1822 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1823 if (ScanDirection != dm_vert) {
1824 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1825 } else {
1826 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1827 }
1828 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1829 } else {
1830 *DPDE0BytesFrame = 0;
1831 ExtraDPDEBytesFrame = 0;
1832 }
1833
1834 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1835 + ExtraDPDEBytesFrame;
1836
1837 if (HostVMEnable == true) {
1838 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1839 }
1840
1841 if (SurfaceTiling == dm_sw_linear) {
1842 PixelPTEReqHeightPTEs = 1;
1843 *PixelPTEReqHeight = 1;
1844 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1845 *PTERequestSize = 64;
1846 FractionOfPTEReturnDrop = 0;
1847 } else if (MacroTileSizeBytes == 4096) {
1848 PixelPTEReqHeightPTEs = 1;
1849 *PixelPTEReqHeight = MacroTileHeight;
1850 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1851 *PTERequestSize = 64;
1852 if (ScanDirection != dm_vert)
1853 FractionOfPTEReturnDrop = 0;
1854 else
1855 FractionOfPTEReturnDrop = 7 / 8;
1856 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1857 PixelPTEReqHeightPTEs = 16;
1858 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1859 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1860 *PTERequestSize = 128;
1861 FractionOfPTEReturnDrop = 0;
1862 } else {
1863 PixelPTEReqHeightPTEs = 1;
1864 *PixelPTEReqHeight = MacroTileHeight;
1865 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1866 *PTERequestSize = 64;
1867 FractionOfPTEReturnDrop = 0;
1868 }
1869
1870 if (SurfaceTiling == dm_sw_linear) {
1871 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1872 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1873 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1874 } else if (ScanDirection != dm_vert) {
1875 *dpte_row_height = *PixelPTEReqHeight;
1876 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1877 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1878 } else {
1879 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1880 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1881 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1882 }
1883 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1884 <= 64 * PTEBufferSizeInRequests) {
1885 *PTEBufferSizeNotExceeded = true;
1886 } else {
1887 *PTEBufferSizeNotExceeded = false;
1888 }
1889
1890 if (GPUVMEnable != true) {
1891 *PixelPTEBytesPerRow = 0;
1892 *PTEBufferSizeNotExceeded = true;
1893 }
1894 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1895
1896 if (HostVMEnable == true) {
1897 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1898 }
1899
1900 if (HostVMEnable == true) {
1901 *vm_group_bytes = 512;
1902 *dpte_group_bytes = 512;
1903 } else if (GPUVMEnable == true) {
1904 *vm_group_bytes = 2048;
1905 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1906 *dpte_group_bytes = 512;
1907 } else {
1908 *dpte_group_bytes = 2048;
1909 }
1910 } else {
1911 *vm_group_bytes = 0;
1912 *dpte_group_bytes = 0;
1913 }
1914
1915 return PDEAndMetaPTEBytesFrame;
1916 }
1917
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1918 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1919 struct display_mode_lib *mode_lib)
1920 {
1921 struct vba_vars_st *v = &mode_lib->vba;
1922 unsigned int j, k;
1923 long ReorderBytes = 0;
1924 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1925 double MaxTotalRDBandwidth = 0;
1926 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1927 bool DestinationLineTimesForPrefetchLessThan2 = false;
1928 bool VRatioPrefetchMoreThan4 = false;
1929 double TWait;
1930
1931 v->WritebackDISPCLK = 0.0;
1932 v->DISPCLKWithRamping = 0;
1933 v->DISPCLKWithoutRamping = 0;
1934 v->GlobalDPPCLK = 0.0;
1935 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1936 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1937 v->ReturnBusWidth * v->DCFCLK,
1938 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1939 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1940 if (v->HostVMEnable != true) {
1941 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1942 } else {
1943 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1944 }
1945 /* End DAL custom code */
1946
1947 // DISPCLK and DPPCLK Calculation
1948 //
1949 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1950 if (v->WritebackEnable[k]) {
1951 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1952 dml30_CalculateWriteBackDISPCLK(
1953 v->WritebackPixelFormat[k],
1954 v->PixelClock[k],
1955 v->WritebackHRatio[k],
1956 v->WritebackVRatio[k],
1957 v->WritebackHTaps[k],
1958 v->WritebackVTaps[k],
1959 v->WritebackSourceWidth[k],
1960 v->WritebackDestinationWidth[k],
1961 v->HTotal[k],
1962 v->WritebackLineBufferSize));
1963 }
1964 }
1965
1966 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1967 if (v->HRatio[k] > 1) {
1968 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1969 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1970 } else {
1971 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1972 v->MaxDCHUBToPSCLThroughput,
1973 v->MaxPSCLToLBThroughput);
1974 }
1975
1976 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1977 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1978 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1979
1980 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1981 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1982 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1983 }
1984
1985 if ((v->SourcePixelFormat[k] != dm_420_8
1986 && v->SourcePixelFormat[k] != dm_420_10
1987 && v->SourcePixelFormat[k] != dm_420_12
1988 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1989 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1990 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1991 } else {
1992 if (v->HRatioChroma[k] > 1) {
1993 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1994 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1995 } else {
1996 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1997 v->MaxDCHUBToPSCLThroughput,
1998 v->MaxPSCLToLBThroughput);
1999 }
2000 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2001 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2002 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
2003
2004 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
2005 && v->DPPCLKUsingSingleDPPChroma
2006 < 2 * v->PixelClock[k]) {
2007 v->DPPCLKUsingSingleDPPChroma = 2
2008 * v->PixelClock[k];
2009 }
2010
2011 v->DPPCLKUsingSingleDPP[k] = dml_max(
2012 v->DPPCLKUsingSingleDPPLuma,
2013 v->DPPCLKUsingSingleDPPChroma);
2014 }
2015 }
2016
2017 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2018 if (v->BlendingAndTiming[k] != k)
2019 continue;
2020 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2021 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2022 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2023 * (1 + v->DISPCLKRampingMargin / 100));
2024 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2025 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2026 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2027 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2028 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2029 * (1 + v->DISPCLKRampingMargin / 100));
2030 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2031 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2032 } else {
2033 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
2034 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2035 * (1 + v->DISPCLKRampingMargin / 100));
2036 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
2037 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2038 }
2039 }
2040
2041 v->DISPCLKWithRamping = dml_max(
2042 v->DISPCLKWithRamping,
2043 v->WritebackDISPCLK);
2044 v->DISPCLKWithoutRamping = dml_max(
2045 v->DISPCLKWithoutRamping,
2046 v->WritebackDISPCLK);
2047
2048 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2049 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2050 v->DISPCLKWithRamping,
2051 v->DISPCLKDPPCLKVCOSpeed);
2052 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
2053 v->DISPCLKWithoutRamping,
2054 v->DISPCLKDPPCLKVCOSpeed);
2055 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2056 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
2057 v->DISPCLKDPPCLKVCOSpeed);
2058 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
2059 > v->MaxDispclkRoundedToDFSGranularity) {
2060 v->DISPCLK_calculated =
2061 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2062 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
2063 > v->MaxDispclkRoundedToDFSGranularity) {
2064 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2065 } else {
2066 v->DISPCLK_calculated =
2067 v->DISPCLKWithRampingRoundedToDFSGranularity;
2068 }
2069 v->DISPCLK = v->DISPCLK_calculated;
2070 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2071
2072 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2073 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
2074 / v->DPPPerPlane[k]
2075 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2076 v->GlobalDPPCLK = dml_max(
2077 v->GlobalDPPCLK,
2078 v->DPPCLK_calculated[k]);
2079 }
2080 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2081 v->GlobalDPPCLK,
2082 v->DISPCLKDPPCLKVCOSpeed);
2083 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2084 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2085 * dml_ceil(
2086 v->DPPCLK_calculated[k] * 255.0
2087 / v->GlobalDPPCLK,
2088 1);
2089 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2090 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2091 }
2092
2093 // Urgent and B P-State/DRAM Clock Change Watermark
2094 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2095 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2096
2097 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2098 CalculateBytePerPixelAnd256BBlockSizes(
2099 v->SourcePixelFormat[k],
2100 v->SurfaceTiling[k],
2101 &v->BytePerPixelY[k],
2102 &v->BytePerPixelC[k],
2103 &v->BytePerPixelDETY[k],
2104 &v->BytePerPixelDETC[k],
2105 &v->BlockHeight256BytesY[k],
2106 &v->BlockHeight256BytesC[k],
2107 &v->BlockWidth256BytesY[k],
2108 &v->BlockWidth256BytesC[k]);
2109 }
2110
2111 CalculateSwathWidth(
2112 false,
2113 v->NumberOfActivePlanes,
2114 v->SourcePixelFormat,
2115 v->SourceScan,
2116 v->ViewportWidth,
2117 v->ViewportHeight,
2118 v->SurfaceWidthY,
2119 v->SurfaceWidthC,
2120 v->SurfaceHeightY,
2121 v->SurfaceHeightC,
2122 v->ODMCombineEnabled,
2123 v->BytePerPixelY,
2124 v->BytePerPixelC,
2125 v->BlockHeight256BytesY,
2126 v->BlockHeight256BytesC,
2127 v->BlockWidth256BytesY,
2128 v->BlockWidth256BytesC,
2129 v->BlendingAndTiming,
2130 v->HActive,
2131 v->HRatio,
2132 v->DPPPerPlane,
2133 v->SwathWidthSingleDPPY,
2134 v->SwathWidthSingleDPPC,
2135 v->SwathWidthY,
2136 v->SwathWidthC,
2137 v->dummyinteger3,
2138 v->dummyinteger4,
2139 v->swath_width_luma_ub,
2140 v->swath_width_chroma_ub);
2141
2142
2143 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2144 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2145 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2146 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2147 }
2148
2149
2150 // DCFCLK Deep Sleep
2151 CalculateDCFCLKDeepSleep(
2152 mode_lib,
2153 v->NumberOfActivePlanes,
2154 v->BytePerPixelY,
2155 v->BytePerPixelC,
2156 v->VRatio,
2157 v->VRatioChroma,
2158 v->SwathWidthY,
2159 v->SwathWidthC,
2160 v->DPPPerPlane,
2161 v->HRatio,
2162 v->HRatioChroma,
2163 v->PixelClock,
2164 v->PSCL_THROUGHPUT_LUMA,
2165 v->PSCL_THROUGHPUT_CHROMA,
2166 v->DPPCLK,
2167 v->ReadBandwidthPlaneLuma,
2168 v->ReadBandwidthPlaneChroma,
2169 v->ReturnBusWidth,
2170 &v->DCFCLKDeepSleep);
2171
2172 // DSCCLK
2173 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2174 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2175 v->DSCCLK_calculated[k] = 0.0;
2176 } else {
2177 if (v->OutputFormat[k] == dm_420)
2178 v->DSCFormatFactor = 2;
2179 else if (v->OutputFormat[k] == dm_444)
2180 v->DSCFormatFactor = 1;
2181 else if (v->OutputFormat[k] == dm_n422)
2182 v->DSCFormatFactor = 2;
2183 else
2184 v->DSCFormatFactor = 1;
2185 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2186 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2187 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2188 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2189 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2190 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2191 else
2192 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2193 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2194 }
2195 }
2196
2197 // DSC Delay
2198 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2199 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2200
2201 if (v->DSCEnabled[k] && BPP != 0) {
2202 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2203 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2204 BPP,
2205 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2206 v->NumberOfDSCSlices[k],
2207 v->OutputFormat[k],
2208 v->Output[k])
2209 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2210 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2211 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2212 BPP,
2213 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2214 v->NumberOfDSCSlices[k] / 2.0,
2215 v->OutputFormat[k],
2216 v->Output[k])
2217 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2218 } else {
2219 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2220 BPP,
2221 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2222 v->NumberOfDSCSlices[k] / 4.0,
2223 v->OutputFormat[k],
2224 v->Output[k])
2225 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2226 }
2227 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2228 } else {
2229 v->DSCDelay[k] = 0;
2230 }
2231 }
2232
2233 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2234 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2235 if (j != k && v->BlendingAndTiming[k] == j
2236 && v->DSCEnabled[j])
2237 v->DSCDelay[k] = v->DSCDelay[j];
2238
2239 // Prefetch
2240 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2241 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2242 unsigned int PixelPTEBytesPerRowY = 0;
2243 unsigned int MetaRowByteY = 0;
2244 unsigned int MetaRowByteC = 0;
2245 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2246 unsigned int PixelPTEBytesPerRowC = 0;
2247 bool PTEBufferSizeNotExceededY = 0;
2248 bool PTEBufferSizeNotExceededC = 0;
2249
2250
2251 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2252 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2253 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2254 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2255 } else {
2256 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2257 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2258
2259 }
2260 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2261 mode_lib,
2262 v->DCCEnable[k],
2263 v->BlockHeight256BytesC[k],
2264 v->BlockWidth256BytesC[k],
2265 v->SourcePixelFormat[k],
2266 v->SurfaceTiling[k],
2267 v->BytePerPixelC[k],
2268 v->SourceScan[k],
2269 v->SwathWidthC[k],
2270 v->ViewportHeightChroma[k],
2271 v->GPUVMEnable,
2272 v->HostVMEnable,
2273 v->HostVMMaxNonCachedPageTableLevels,
2274 v->GPUVMMinPageSize,
2275 v->HostVMMinPageSize,
2276 v->PTEBufferSizeInRequestsForChroma,
2277 v->PitchC[k],
2278 v->DCCMetaPitchC[k],
2279 &v->MacroTileWidthC[k],
2280 &MetaRowByteC,
2281 &PixelPTEBytesPerRowC,
2282 &PTEBufferSizeNotExceededC,
2283 &v->dpte_row_width_chroma_ub[k],
2284 &v->dpte_row_height_chroma[k],
2285 &v->meta_req_width_chroma[k],
2286 &v->meta_req_height_chroma[k],
2287 &v->meta_row_width_chroma[k],
2288 &v->meta_row_height_chroma[k],
2289 &v->dummyinteger1,
2290 &v->dummyinteger2,
2291 &v->PixelPTEReqWidthC[k],
2292 &v->PixelPTEReqHeightC[k],
2293 &v->PTERequestSizeC[k],
2294 &v->dpde0_bytes_per_frame_ub_c[k],
2295 &v->meta_pte_bytes_per_frame_ub_c[k]);
2296
2297 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2298 mode_lib,
2299 v->VRatioChroma[k],
2300 v->VTAPsChroma[k],
2301 v->Interlace[k],
2302 v->ProgressiveToInterlaceUnitInOPP,
2303 v->SwathHeightC[k],
2304 v->ViewportYStartC[k],
2305 &v->VInitPreFillC[k],
2306 &v->MaxNumSwathC[k]);
2307 } else {
2308 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2309 v->PTEBufferSizeInRequestsForChroma = 0;
2310 PixelPTEBytesPerRowC = 0;
2311 PDEAndMetaPTEBytesFrameC = 0;
2312 MetaRowByteC = 0;
2313 v->MaxNumSwathC[k] = 0;
2314 v->PrefetchSourceLinesC[k] = 0;
2315 }
2316
2317 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2318 mode_lib,
2319 v->DCCEnable[k],
2320 v->BlockHeight256BytesY[k],
2321 v->BlockWidth256BytesY[k],
2322 v->SourcePixelFormat[k],
2323 v->SurfaceTiling[k],
2324 v->BytePerPixelY[k],
2325 v->SourceScan[k],
2326 v->SwathWidthY[k],
2327 v->ViewportHeight[k],
2328 v->GPUVMEnable,
2329 v->HostVMEnable,
2330 v->HostVMMaxNonCachedPageTableLevels,
2331 v->GPUVMMinPageSize,
2332 v->HostVMMinPageSize,
2333 v->PTEBufferSizeInRequestsForLuma,
2334 v->PitchY[k],
2335 v->DCCMetaPitchY[k],
2336 &v->MacroTileWidthY[k],
2337 &MetaRowByteY,
2338 &PixelPTEBytesPerRowY,
2339 &PTEBufferSizeNotExceededY,
2340 &v->dpte_row_width_luma_ub[k],
2341 &v->dpte_row_height[k],
2342 &v->meta_req_width[k],
2343 &v->meta_req_height[k],
2344 &v->meta_row_width[k],
2345 &v->meta_row_height[k],
2346 &v->vm_group_bytes[k],
2347 &v->dpte_group_bytes[k],
2348 &v->PixelPTEReqWidthY[k],
2349 &v->PixelPTEReqHeightY[k],
2350 &v->PTERequestSizeY[k],
2351 &v->dpde0_bytes_per_frame_ub_l[k],
2352 &v->meta_pte_bytes_per_frame_ub_l[k]);
2353
2354 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2355 mode_lib,
2356 v->VRatio[k],
2357 v->vtaps[k],
2358 v->Interlace[k],
2359 v->ProgressiveToInterlaceUnitInOPP,
2360 v->SwathHeightY[k],
2361 v->ViewportYStartY[k],
2362 &v->VInitPreFillY[k],
2363 &v->MaxNumSwathY[k]);
2364 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2365 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2366 + PDEAndMetaPTEBytesFrameC;
2367 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2368
2369 CalculateRowBandwidth(
2370 v->GPUVMEnable,
2371 v->SourcePixelFormat[k],
2372 v->VRatio[k],
2373 v->VRatioChroma[k],
2374 v->DCCEnable[k],
2375 v->HTotal[k] / v->PixelClock[k],
2376 MetaRowByteY,
2377 MetaRowByteC,
2378 v->meta_row_height[k],
2379 v->meta_row_height_chroma[k],
2380 PixelPTEBytesPerRowY,
2381 PixelPTEBytesPerRowC,
2382 v->dpte_row_height[k],
2383 v->dpte_row_height_chroma[k],
2384 &v->meta_row_bw[k],
2385 &v->dpte_row_bw[k]);
2386 }
2387
2388 v->TotalDCCActiveDPP = 0;
2389 v->TotalActiveDPP = 0;
2390 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2391 v->TotalActiveDPP = v->TotalActiveDPP
2392 + v->DPPPerPlane[k];
2393 if (v->DCCEnable[k])
2394 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2395 + v->DPPPerPlane[k];
2396 }
2397
2398
2399 ReorderBytes = v->NumberOfChannels * dml_max3(
2400 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2401 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2402 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2403
2404 v->UrgentExtraLatency = CalculateExtraLatency(
2405 v->RoundTripPingLatencyCycles,
2406 ReorderBytes,
2407 v->DCFCLK,
2408 v->TotalActiveDPP,
2409 v->PixelChunkSizeInKByte,
2410 v->TotalDCCActiveDPP,
2411 v->MetaChunkSize,
2412 v->ReturnBW,
2413 v->GPUVMEnable,
2414 v->HostVMEnable,
2415 v->NumberOfActivePlanes,
2416 v->DPPPerPlane,
2417 v->dpte_group_bytes,
2418 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2419 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2420 v->HostVMMinPageSize,
2421 v->HostVMMaxNonCachedPageTableLevels);
2422
2423 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2424
2425 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2426 if (v->BlendingAndTiming[k] == k) {
2427 if (v->WritebackEnable[k] == true) {
2428 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2429 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2430 v->WritebackHRatio[k],
2431 v->WritebackVRatio[k],
2432 v->WritebackVTaps[k],
2433 v->WritebackDestinationWidth[k],
2434 v->WritebackDestinationHeight[k],
2435 v->WritebackSourceHeight[k],
2436 v->HTotal[k]) / v->DISPCLK;
2437 } else
2438 v->WritebackDelay[v->VoltageLevel][k] = 0;
2439 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2440 if (v->BlendingAndTiming[j] == k
2441 && v->WritebackEnable[j] == true) {
2442 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2443 v->WritebackLatency + CalculateWriteBackDelay(
2444 v->WritebackPixelFormat[j],
2445 v->WritebackHRatio[j],
2446 v->WritebackVRatio[j],
2447 v->WritebackVTaps[j],
2448 v->WritebackDestinationWidth[j],
2449 v->WritebackDestinationHeight[j],
2450 v->WritebackSourceHeight[j],
2451 v->HTotal[k]) / v->DISPCLK);
2452 }
2453 }
2454 }
2455 }
2456
2457 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2458 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2459 if (v->BlendingAndTiming[k] == j)
2460 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2461
2462 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2463 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2464 }
2465
2466 v->MaximumMaxVStartupLines = 0;
2467 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2468 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2469
2470 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2471 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2472 } else {
2473 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2474 }
2475 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2476
2477
2478 v->FractionOfUrgentBandwidth = 0.0;
2479 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2480
2481 v->VStartupLines = 13;
2482
2483 do {
2484 MaxTotalRDBandwidth = 0;
2485 MaxTotalRDBandwidthNoUrgentBurst = 0;
2486 DestinationLineTimesForPrefetchLessThan2 = false;
2487 VRatioPrefetchMoreThan4 = false;
2488 TWait = CalculateTWait(
2489 PrefetchMode,
2490 v->FinalDRAMClockChangeLatency,
2491 v->UrgentLatency,
2492 v->SREnterPlusExitTime);
2493
2494 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2495 Pipe myPipe = { 0 };
2496
2497 myPipe.DPPCLK = v->DPPCLK[k];
2498 myPipe.DISPCLK = v->DISPCLK;
2499 myPipe.PixelClock = v->PixelClock[k];
2500 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2501 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2502 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2503 myPipe.SourceScan = v->SourceScan[k];
2504 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2505 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2506 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2507 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2508 myPipe.InterlaceEnable = v->Interlace[k];
2509 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2510 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2511 myPipe.HTotal = v->HTotal[k];
2512 myPipe.DCCEnable = v->DCCEnable[k];
2513 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2514
2515 v->ErrorResult[k] = CalculatePrefetchSchedule(
2516 mode_lib,
2517 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2518 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2519 &myPipe,
2520 v->DSCDelay[k],
2521 v->DPPCLKDelaySubtotal
2522 + v->DPPCLKDelayCNVCFormater,
2523 v->DPPCLKDelaySCL,
2524 v->DPPCLKDelaySCLLBOnly,
2525 v->DPPCLKDelayCNVCCursor,
2526 v->DISPCLKDelaySubtotal,
2527 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2528 v->OutputFormat[k],
2529 v->MaxInterDCNTileRepeaters,
2530 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2531 v->MaxVStartupLines[k],
2532 v->GPUVMMaxPageTableLevels,
2533 v->GPUVMEnable,
2534 v->HostVMEnable,
2535 v->HostVMMaxNonCachedPageTableLevels,
2536 v->HostVMMinPageSize,
2537 v->DynamicMetadataEnable[k],
2538 v->DynamicMetadataVMEnabled,
2539 v->DynamicMetadataLinesBeforeActiveRequired[k],
2540 v->DynamicMetadataTransmittedBytes[k],
2541 v->UrgentLatency,
2542 v->UrgentExtraLatency,
2543 v->TCalc,
2544 v->PDEAndMetaPTEBytesFrame[k],
2545 v->MetaRowByte[k],
2546 v->PixelPTEBytesPerRow[k],
2547 v->PrefetchSourceLinesY[k],
2548 v->SwathWidthY[k],
2549 v->BytePerPixelY[k],
2550 v->VInitPreFillY[k],
2551 v->MaxNumSwathY[k],
2552 v->PrefetchSourceLinesC[k],
2553 v->SwathWidthC[k],
2554 v->BytePerPixelC[k],
2555 v->VInitPreFillC[k],
2556 v->MaxNumSwathC[k],
2557 v->swath_width_luma_ub[k],
2558 v->swath_width_chroma_ub[k],
2559 v->SwathHeightY[k],
2560 v->SwathHeightC[k],
2561 TWait,
2562 v->ProgressiveToInterlaceUnitInOPP,
2563 &v->DSTXAfterScaler[k],
2564 &v->DSTYAfterScaler[k],
2565 &v->DestinationLinesForPrefetch[k],
2566 &v->PrefetchBandwidth[k],
2567 &v->DestinationLinesToRequestVMInVBlank[k],
2568 &v->DestinationLinesToRequestRowInVBlank[k],
2569 &v->VRatioPrefetchY[k],
2570 &v->VRatioPrefetchC[k],
2571 &v->RequiredPrefetchPixDataBWLuma[k],
2572 &v->RequiredPrefetchPixDataBWChroma[k],
2573 &v->NotEnoughTimeForDynamicMetadata[k],
2574 &v->Tno_bw[k],
2575 &v->prefetch_vmrow_bw[k],
2576 &v->Tdmdl_vm[k],
2577 &v->Tdmdl[k],
2578 &v->VUpdateOffsetPix[k],
2579 &v->VUpdateWidthPix[k],
2580 &v->VReadyOffsetPix[k]);
2581 if (v->BlendingAndTiming[k] == k) {
2582 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2583 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2584 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2585 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2586 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2587 } else {
2588 int x = v->BlendingAndTiming[k];
2589 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2590 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2591 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2592 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2593 if (!v->MaxVStartupLines[x])
2594 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2595 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2596 }
2597 }
2598
2599 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2600 v->NotEnoughUrgentLatencyHidingPre = false;
2601
2602 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2603 v->cursor_bw[k] = v->NumberOfCursors[k]
2604 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2605 / 8.0
2606 / (v->HTotal[k] / v->PixelClock[k])
2607 * v->VRatio[k];
2608 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2609 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2610 / 8.0
2611 / (v->HTotal[k] / v->PixelClock[k])
2612 * v->VRatioPrefetchY[k];
2613
2614 CalculateUrgentBurstFactor(
2615 v->swath_width_luma_ub[k],
2616 v->swath_width_chroma_ub[k],
2617 v->DETBufferSizeInKByte[0],
2618 v->SwathHeightY[k],
2619 v->SwathHeightC[k],
2620 v->HTotal[k] / v->PixelClock[k],
2621 v->UrgentLatency,
2622 v->CursorBufferSize,
2623 v->CursorWidth[k][0],
2624 v->CursorBPP[k][0],
2625 v->VRatio[k],
2626 v->VRatioChroma[k],
2627 v->BytePerPixelDETY[k],
2628 v->BytePerPixelDETC[k],
2629 v->DETBufferSizeY[k],
2630 v->DETBufferSizeC[k],
2631 &v->UrgentBurstFactorCursor[k],
2632 &v->UrgentBurstFactorLuma[k],
2633 &v->UrgentBurstFactorChroma[k],
2634 &v->NoUrgentLatencyHiding[k]);
2635
2636 CalculateUrgentBurstFactor(
2637 v->swath_width_luma_ub[k],
2638 v->swath_width_chroma_ub[k],
2639 v->DETBufferSizeInKByte[0],
2640 v->SwathHeightY[k],
2641 v->SwathHeightC[k],
2642 v->HTotal[k] / v->PixelClock[k],
2643 v->UrgentLatency,
2644 v->CursorBufferSize,
2645 v->CursorWidth[k][0],
2646 v->CursorBPP[k][0],
2647 v->VRatioPrefetchY[k],
2648 v->VRatioPrefetchC[k],
2649 v->BytePerPixelDETY[k],
2650 v->BytePerPixelDETC[k],
2651 v->DETBufferSizeY[k],
2652 v->DETBufferSizeC[k],
2653 &v->UrgentBurstFactorCursorPre[k],
2654 &v->UrgentBurstFactorLumaPre[k],
2655 &v->UrgentBurstFactorChromaPre[k],
2656 &v->NoUrgentLatencyHidingPre[k]);
2657
2658 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2659 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2660 v->ReadBandwidthPlaneLuma[k] *
2661 v->UrgentBurstFactorLuma[k] +
2662 v->ReadBandwidthPlaneChroma[k] *
2663 v->UrgentBurstFactorChroma[k] +
2664 v->cursor_bw[k] *
2665 v->UrgentBurstFactorCursor[k] +
2666 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2667 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2668 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2669 v->UrgentBurstFactorCursorPre[k]);
2670
2671 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2672 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2673 v->ReadBandwidthPlaneLuma[k] +
2674 v->ReadBandwidthPlaneChroma[k] +
2675 v->cursor_bw[k] +
2676 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2677 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2678
2679 if (v->DestinationLinesForPrefetch[k] < 2)
2680 DestinationLineTimesForPrefetchLessThan2 = true;
2681 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2682 VRatioPrefetchMoreThan4 = true;
2683 if (v->NoUrgentLatencyHiding[k] == true)
2684 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2685
2686 if (v->NoUrgentLatencyHidingPre[k] == true)
2687 v->NotEnoughUrgentLatencyHidingPre = true;
2688 }
2689 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2690
2691
2692 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2693 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2694 && !DestinationLineTimesForPrefetchLessThan2)
2695 v->PrefetchModeSupported = true;
2696 else {
2697 v->PrefetchModeSupported = false;
2698 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2699 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2700 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2701 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2702 }
2703
2704 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2705 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2706 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2707 v->BandwidthAvailableForImmediateFlip =
2708 v->BandwidthAvailableForImmediateFlip
2709 - dml_max(
2710 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2711 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2712 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2713 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2714 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2715 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2716 }
2717
2718 v->TotImmediateFlipBytes = 0;
2719 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2720 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2721 }
2722 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2723 CalculateFlipSchedule(
2724 mode_lib,
2725 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2726 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2727 v->UrgentExtraLatency,
2728 v->UrgentLatency,
2729 v->GPUVMMaxPageTableLevels,
2730 v->HostVMEnable,
2731 v->HostVMMaxNonCachedPageTableLevels,
2732 v->GPUVMEnable,
2733 v->HostVMMinPageSize,
2734 v->PDEAndMetaPTEBytesFrame[k],
2735 v->MetaRowByte[k],
2736 v->PixelPTEBytesPerRow[k],
2737 v->BandwidthAvailableForImmediateFlip,
2738 v->TotImmediateFlipBytes,
2739 v->SourcePixelFormat[k],
2740 v->HTotal[k] / v->PixelClock[k],
2741 v->VRatio[k],
2742 v->VRatioChroma[k],
2743 v->Tno_bw[k],
2744 v->DCCEnable[k],
2745 v->dpte_row_height[k],
2746 v->meta_row_height[k],
2747 v->dpte_row_height_chroma[k],
2748 v->meta_row_height_chroma[k],
2749 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2750 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2751 &v->final_flip_bw[k],
2752 &v->ImmediateFlipSupportedForPipe[k]);
2753 }
2754 v->total_dcn_read_bw_with_flip = 0.0;
2755 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2756 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2757 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2758 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2759 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2760 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2761 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2762 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2763 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2764 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2765 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2766 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2767 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2768 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2769 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2770 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2771 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2772
2773 }
2774 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2775
2776 v->ImmediateFlipSupported = true;
2777 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2778 v->ImmediateFlipSupported = false;
2779 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2780 }
2781 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2782 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2783 v->ImmediateFlipSupported = false;
2784 }
2785 }
2786 } else {
2787 v->ImmediateFlipSupported = false;
2788 }
2789
2790 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2791 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2792 v->PrefetchModeSupported = false;
2793 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2794 }
2795 }
2796
2797 v->VStartupLines = v->VStartupLines + 1;
2798 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2799 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2800 v->ImmediateFlipSupported)) ? true : false;
2801 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2802 ASSERT(v->PrefetchModeSupported);
2803
2804 //Watermarks and NB P-State/DRAM Clock Change Support
2805 {
2806 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2807 CalculateWatermarksAndDRAMSpeedChangeSupport(
2808 mode_lib,
2809 PrefetchMode,
2810 v->NumberOfActivePlanes,
2811 v->MaxLineBufferLines,
2812 v->LineBufferSize,
2813 v->DPPOutputBufferPixels,
2814 v->DETBufferSizeInKByte[0],
2815 v->WritebackInterfaceBufferSize,
2816 v->DCFCLK,
2817 v->ReturnBW,
2818 v->GPUVMEnable,
2819 v->dpte_group_bytes,
2820 v->MetaChunkSize,
2821 v->UrgentLatency,
2822 v->UrgentExtraLatency,
2823 v->WritebackLatency,
2824 v->WritebackChunkSize,
2825 v->SOCCLK,
2826 v->FinalDRAMClockChangeLatency,
2827 v->SRExitTime,
2828 v->SREnterPlusExitTime,
2829 v->DCFCLKDeepSleep,
2830 v->DPPPerPlane,
2831 v->DCCEnable,
2832 v->DPPCLK,
2833 v->DETBufferSizeY,
2834 v->DETBufferSizeC,
2835 v->SwathHeightY,
2836 v->SwathHeightC,
2837 v->LBBitPerPixel,
2838 v->SwathWidthY,
2839 v->SwathWidthC,
2840 v->HRatio,
2841 v->HRatioChroma,
2842 v->vtaps,
2843 v->VTAPsChroma,
2844 v->VRatio,
2845 v->VRatioChroma,
2846 v->HTotal,
2847 v->PixelClock,
2848 v->BlendingAndTiming,
2849 v->BytePerPixelDETY,
2850 v->BytePerPixelDETC,
2851 v->DSTXAfterScaler,
2852 v->DSTYAfterScaler,
2853 v->WritebackEnable,
2854 v->WritebackPixelFormat,
2855 v->WritebackDestinationWidth,
2856 v->WritebackDestinationHeight,
2857 v->WritebackSourceHeight,
2858 &DRAMClockChangeSupport,
2859 &v->UrgentWatermark,
2860 &v->WritebackUrgentWatermark,
2861 &v->DRAMClockChangeWatermark,
2862 &v->WritebackDRAMClockChangeWatermark,
2863 &v->StutterExitWatermark,
2864 &v->StutterEnterPlusExitWatermark,
2865 &v->MinActiveDRAMClockChangeLatencySupported);
2866
2867 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2868 if (v->WritebackEnable[k] == true) {
2869 if (v->BlendingAndTiming[k] == k) {
2870 v->ThisVStartup = v->VStartup[k];
2871 } else {
2872 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2873 if (v->BlendingAndTiming[k] == j) {
2874 v->ThisVStartup = v->VStartup[j];
2875 }
2876 }
2877 }
2878 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2879 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2880 } else {
2881 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2882 }
2883 }
2884
2885 }
2886
2887
2888 //Display Pipeline Delivery Time in Prefetch, Groups
2889 CalculatePixelDeliveryTimes(
2890 v->NumberOfActivePlanes,
2891 v->VRatio,
2892 v->VRatioChroma,
2893 v->VRatioPrefetchY,
2894 v->VRatioPrefetchC,
2895 v->swath_width_luma_ub,
2896 v->swath_width_chroma_ub,
2897 v->DPPPerPlane,
2898 v->HRatio,
2899 v->HRatioChroma,
2900 v->PixelClock,
2901 v->PSCL_THROUGHPUT_LUMA,
2902 v->PSCL_THROUGHPUT_CHROMA,
2903 v->DPPCLK,
2904 v->BytePerPixelC,
2905 v->SourceScan,
2906 v->NumberOfCursors,
2907 v->CursorWidth,
2908 v->CursorBPP,
2909 v->BlockWidth256BytesY,
2910 v->BlockHeight256BytesY,
2911 v->BlockWidth256BytesC,
2912 v->BlockHeight256BytesC,
2913 v->DisplayPipeLineDeliveryTimeLuma,
2914 v->DisplayPipeLineDeliveryTimeChroma,
2915 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2916 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2917 v->DisplayPipeRequestDeliveryTimeLuma,
2918 v->DisplayPipeRequestDeliveryTimeChroma,
2919 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2920 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2921 v->CursorRequestDeliveryTime,
2922 v->CursorRequestDeliveryTimePrefetch);
2923
2924 CalculateMetaAndPTETimes(
2925 v->NumberOfActivePlanes,
2926 v->GPUVMEnable,
2927 v->MetaChunkSize,
2928 v->MinMetaChunkSizeBytes,
2929 v->HTotal,
2930 v->VRatio,
2931 v->VRatioChroma,
2932 v->DestinationLinesToRequestRowInVBlank,
2933 v->DestinationLinesToRequestRowInImmediateFlip,
2934 v->DCCEnable,
2935 v->PixelClock,
2936 v->BytePerPixelY,
2937 v->BytePerPixelC,
2938 v->SourceScan,
2939 v->dpte_row_height,
2940 v->dpte_row_height_chroma,
2941 v->meta_row_width,
2942 v->meta_row_width_chroma,
2943 v->meta_row_height,
2944 v->meta_row_height_chroma,
2945 v->meta_req_width,
2946 v->meta_req_width_chroma,
2947 v->meta_req_height,
2948 v->meta_req_height_chroma,
2949 v->dpte_group_bytes,
2950 v->PTERequestSizeY,
2951 v->PTERequestSizeC,
2952 v->PixelPTEReqWidthY,
2953 v->PixelPTEReqHeightY,
2954 v->PixelPTEReqWidthC,
2955 v->PixelPTEReqHeightC,
2956 v->dpte_row_width_luma_ub,
2957 v->dpte_row_width_chroma_ub,
2958 v->DST_Y_PER_PTE_ROW_NOM_L,
2959 v->DST_Y_PER_PTE_ROW_NOM_C,
2960 v->DST_Y_PER_META_ROW_NOM_L,
2961 v->DST_Y_PER_META_ROW_NOM_C,
2962 v->TimePerMetaChunkNominal,
2963 v->TimePerChromaMetaChunkNominal,
2964 v->TimePerMetaChunkVBlank,
2965 v->TimePerChromaMetaChunkVBlank,
2966 v->TimePerMetaChunkFlip,
2967 v->TimePerChromaMetaChunkFlip,
2968 v->time_per_pte_group_nom_luma,
2969 v->time_per_pte_group_vblank_luma,
2970 v->time_per_pte_group_flip_luma,
2971 v->time_per_pte_group_nom_chroma,
2972 v->time_per_pte_group_vblank_chroma,
2973 v->time_per_pte_group_flip_chroma);
2974
2975 CalculateVMGroupAndRequestTimes(
2976 v->NumberOfActivePlanes,
2977 v->GPUVMEnable,
2978 v->GPUVMMaxPageTableLevels,
2979 v->HTotal,
2980 v->BytePerPixelC,
2981 v->DestinationLinesToRequestVMInVBlank,
2982 v->DestinationLinesToRequestVMInImmediateFlip,
2983 v->DCCEnable,
2984 v->PixelClock,
2985 v->dpte_row_width_luma_ub,
2986 v->dpte_row_width_chroma_ub,
2987 v->vm_group_bytes,
2988 v->dpde0_bytes_per_frame_ub_l,
2989 v->dpde0_bytes_per_frame_ub_c,
2990 v->meta_pte_bytes_per_frame_ub_l,
2991 v->meta_pte_bytes_per_frame_ub_c,
2992 v->TimePerVMGroupVBlank,
2993 v->TimePerVMGroupFlip,
2994 v->TimePerVMRequestVBlank,
2995 v->TimePerVMRequestFlip);
2996
2997
2998 // Min TTUVBlank
2999 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3000 if (PrefetchMode == 0) {
3001 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3002 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3003 v->MinTTUVBlank[k] = dml_max(
3004 v->DRAMClockChangeWatermark,
3005 dml_max(
3006 v->StutterEnterPlusExitWatermark,
3007 v->UrgentWatermark));
3008 } else if (PrefetchMode == 1) {
3009 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3010 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3011 v->MinTTUVBlank[k] = dml_max(
3012 v->StutterEnterPlusExitWatermark,
3013 v->UrgentWatermark);
3014 } else {
3015 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3016 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3017 v->MinTTUVBlank[k] = v->UrgentWatermark;
3018 }
3019 if (!v->DynamicMetadataEnable[k])
3020 v->MinTTUVBlank[k] = v->TCalc
3021 + v->MinTTUVBlank[k];
3022 }
3023
3024 // DCC Configuration
3025 v->ActiveDPPs = 0;
3026 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3027 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3028 v->SourcePixelFormat[k],
3029 v->SurfaceWidthY[k],
3030 v->SurfaceWidthC[k],
3031 v->SurfaceHeightY[k],
3032 v->SurfaceHeightC[k],
3033 v->DETBufferSizeInKByte[0] * 1024,
3034 v->BlockHeight256BytesY[k],
3035 v->BlockHeight256BytesC[k],
3036 v->SurfaceTiling[k],
3037 v->BytePerPixelY[k],
3038 v->BytePerPixelC[k],
3039 v->BytePerPixelDETY[k],
3040 v->BytePerPixelDETC[k],
3041 v->SourceScan[k],
3042 &v->DCCYMaxUncompressedBlock[k],
3043 &v->DCCCMaxUncompressedBlock[k],
3044 &v->DCCYMaxCompressedBlock[k],
3045 &v->DCCCMaxCompressedBlock[k],
3046 &v->DCCYIndependentBlock[k],
3047 &v->DCCCIndependentBlock[k]);
3048 }
3049
3050 {
3051 //Maximum Bandwidth Used
3052 double TotalWRBandwidth = 0;
3053 double MaxPerPlaneVActiveWRBandwidth = 0;
3054 double WRBandwidth = 0;
3055 double MaxUsedBW = 0;
3056 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3057 if (v->WritebackEnable[k] == true
3058 && v->WritebackPixelFormat[k] == dm_444_32) {
3059 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3060 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3061 } else if (v->WritebackEnable[k] == true) {
3062 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3063 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3064 }
3065 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3066 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3067 }
3068
3069 v->TotalDataReadBandwidth = 0;
3070 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3071 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
3072 + v->ReadBandwidthPlaneLuma[k]
3073 + v->ReadBandwidthPlaneChroma[k];
3074 }
3075
3076 {
3077 double MaxPerPlaneVActiveRDBandwidth = 0;
3078 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3079 MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth,
3080 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
3081
3082 }
3083 }
3084
3085 MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth;
3086 }
3087
3088 // VStartup Margin
3089 v->VStartupMargin = 0;
3090 v->FirstMainPlane = true;
3091 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3092 if (v->BlendingAndTiming[k] == k) {
3093 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
3094 / v->PixelClock[k];
3095 if (v->FirstMainPlane == true) {
3096 v->VStartupMargin = margin;
3097 v->FirstMainPlane = false;
3098 } else {
3099 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
3100 }
3101 }
3102 }
3103
3104 // Stutter Efficiency
3105 CalculateStutterEfficiency(
3106 v->NumberOfActivePlanes,
3107 v->ROBBufferSizeInKByte,
3108 v->TotalDataReadBandwidth,
3109 v->DCFCLK,
3110 v->ReturnBW,
3111 v->SRExitTime,
3112 v->SynchronizedVBlank,
3113 v->DPPPerPlane,
3114 v->DETBufferSizeY,
3115 v->BytePerPixelY,
3116 v->BytePerPixelDETY,
3117 v->SwathWidthY,
3118 v->SwathHeightY,
3119 v->SwathHeightC,
3120 v->DCCRateLuma,
3121 v->DCCRateChroma,
3122 v->HTotal,
3123 v->VTotal,
3124 v->PixelClock,
3125 v->VRatio,
3126 v->SourceScan,
3127 v->BlockHeight256BytesY,
3128 v->BlockWidth256BytesY,
3129 v->BlockHeight256BytesC,
3130 v->BlockWidth256BytesC,
3131 v->DCCYMaxUncompressedBlock,
3132 v->DCCCMaxUncompressedBlock,
3133 v->VActive,
3134 v->DCCEnable,
3135 v->WritebackEnable,
3136 v->ReadBandwidthPlaneLuma,
3137 v->ReadBandwidthPlaneChroma,
3138 v->meta_row_bw,
3139 v->dpte_row_bw,
3140 &v->StutterEfficiencyNotIncludingVBlank,
3141 &v->StutterEfficiency,
3142 &v->StutterPeriod);
3143 }
3144
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)3145 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3146 {
3147 // Display Pipe Configuration
3148 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3149 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3150 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3151 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3152 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3153 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3154 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3155 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3156 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3157 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3158 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3159 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3160 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3161 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3162 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3163 bool dummysinglestring = 0;
3164 unsigned int k;
3165
3166 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3167
3168 CalculateBytePerPixelAnd256BBlockSizes(
3169 mode_lib->vba.SourcePixelFormat[k],
3170 mode_lib->vba.SurfaceTiling[k],
3171 &BytePerPixY[k],
3172 &BytePerPixC[k],
3173 &BytePerPixDETY[k],
3174 &BytePerPixDETC[k],
3175 &Read256BytesBlockHeightY[k],
3176 &Read256BytesBlockHeightC[k],
3177 &Read256BytesBlockWidthY[k],
3178 &Read256BytesBlockWidthC[k]);
3179 }
3180 CalculateSwathAndDETConfiguration(
3181 false,
3182 mode_lib->vba.NumberOfActivePlanes,
3183 mode_lib->vba.DETBufferSizeInKByte[0],
3184 dummy1,
3185 dummy2,
3186 mode_lib->vba.SourceScan,
3187 mode_lib->vba.SourcePixelFormat,
3188 mode_lib->vba.SurfaceTiling,
3189 mode_lib->vba.ViewportWidth,
3190 mode_lib->vba.ViewportHeight,
3191 mode_lib->vba.SurfaceWidthY,
3192 mode_lib->vba.SurfaceWidthC,
3193 mode_lib->vba.SurfaceHeightY,
3194 mode_lib->vba.SurfaceHeightC,
3195 Read256BytesBlockHeightY,
3196 Read256BytesBlockHeightC,
3197 Read256BytesBlockWidthY,
3198 Read256BytesBlockWidthC,
3199 mode_lib->vba.ODMCombineEnabled,
3200 mode_lib->vba.BlendingAndTiming,
3201 BytePerPixY,
3202 BytePerPixC,
3203 BytePerPixDETY,
3204 BytePerPixDETC,
3205 mode_lib->vba.HActive,
3206 mode_lib->vba.HRatio,
3207 mode_lib->vba.HRatioChroma,
3208 mode_lib->vba.DPPPerPlane,
3209 dummy5,
3210 dummy6,
3211 dummy3,
3212 dummy4,
3213 mode_lib->vba.SwathHeightY,
3214 mode_lib->vba.SwathHeightC,
3215 mode_lib->vba.DETBufferSizeY,
3216 mode_lib->vba.DETBufferSizeC,
3217 dummy7,
3218 &dummysinglestring);
3219 }
3220
CalculateBytePerPixelAnd256BBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC)3221 static bool CalculateBytePerPixelAnd256BBlockSizes(
3222 enum source_format_class SourcePixelFormat,
3223 enum dm_swizzle_mode SurfaceTiling,
3224 unsigned int *BytePerPixelY,
3225 unsigned int *BytePerPixelC,
3226 double *BytePerPixelDETY,
3227 double *BytePerPixelDETC,
3228 unsigned int *BlockHeight256BytesY,
3229 unsigned int *BlockHeight256BytesC,
3230 unsigned int *BlockWidth256BytesY,
3231 unsigned int *BlockWidth256BytesC)
3232 {
3233 if (SourcePixelFormat == dm_444_64) {
3234 *BytePerPixelDETY = 8;
3235 *BytePerPixelDETC = 0;
3236 *BytePerPixelY = 8;
3237 *BytePerPixelC = 0;
3238 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3239 *BytePerPixelDETY = 4;
3240 *BytePerPixelDETC = 0;
3241 *BytePerPixelY = 4;
3242 *BytePerPixelC = 0;
3243 } else if (SourcePixelFormat == dm_444_16) {
3244 *BytePerPixelDETY = 2;
3245 *BytePerPixelDETC = 0;
3246 *BytePerPixelY = 2;
3247 *BytePerPixelC = 0;
3248 } else if (SourcePixelFormat == dm_444_8) {
3249 *BytePerPixelDETY = 1;
3250 *BytePerPixelDETC = 0;
3251 *BytePerPixelY = 1;
3252 *BytePerPixelC = 0;
3253 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3254 *BytePerPixelDETY = 4;
3255 *BytePerPixelDETC = 1;
3256 *BytePerPixelY = 4;
3257 *BytePerPixelC = 1;
3258 } else if (SourcePixelFormat == dm_420_8) {
3259 *BytePerPixelDETY = 1;
3260 *BytePerPixelDETC = 2;
3261 *BytePerPixelY = 1;
3262 *BytePerPixelC = 2;
3263 } else if (SourcePixelFormat == dm_420_12) {
3264 *BytePerPixelDETY = 2;
3265 *BytePerPixelDETC = 4;
3266 *BytePerPixelY = 2;
3267 *BytePerPixelC = 4;
3268 } else {
3269 *BytePerPixelDETY = 4.0 / 3;
3270 *BytePerPixelDETC = 8.0 / 3;
3271 *BytePerPixelY = 2;
3272 *BytePerPixelC = 4;
3273 }
3274
3275 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3276 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3277 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3278 || SourcePixelFormat == dm_rgbe)) {
3279 if (SurfaceTiling == dm_sw_linear) {
3280 *BlockHeight256BytesY = 1;
3281 } else if (SourcePixelFormat == dm_444_64) {
3282 *BlockHeight256BytesY = 4;
3283 } else if (SourcePixelFormat == dm_444_8) {
3284 *BlockHeight256BytesY = 16;
3285 } else {
3286 *BlockHeight256BytesY = 8;
3287 }
3288 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3289 *BlockHeight256BytesC = 0;
3290 *BlockWidth256BytesC = 0;
3291 } else {
3292 if (SurfaceTiling == dm_sw_linear) {
3293 *BlockHeight256BytesY = 1;
3294 *BlockHeight256BytesC = 1;
3295 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3296 *BlockHeight256BytesY = 8;
3297 *BlockHeight256BytesC = 16;
3298 } else if (SourcePixelFormat == dm_420_8) {
3299 *BlockHeight256BytesY = 16;
3300 *BlockHeight256BytesC = 8;
3301 } else {
3302 *BlockHeight256BytesY = 8;
3303 *BlockHeight256BytesC = 8;
3304 }
3305 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3306 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3307 }
3308 return true;
3309 }
3310
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3311 static double CalculateTWait(
3312 unsigned int PrefetchMode,
3313 double DRAMClockChangeLatency,
3314 double UrgentLatency,
3315 double SREnterPlusExitTime)
3316 {
3317 if (PrefetchMode == 0) {
3318 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3319 dml_max(SREnterPlusExitTime, UrgentLatency));
3320 } else if (PrefetchMode == 1) {
3321 return dml_max(SREnterPlusExitTime, UrgentLatency);
3322 } else {
3323 return UrgentLatency;
3324 }
3325 }
3326
dml30_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,long WritebackSourceWidth,long WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)3327 double dml30_CalculateWriteBackDISPCLK(
3328 enum source_format_class WritebackPixelFormat,
3329 double PixelClock,
3330 double WritebackHRatio,
3331 double WritebackVRatio,
3332 unsigned int WritebackHTaps,
3333 unsigned int WritebackVTaps,
3334 long WritebackSourceWidth,
3335 long WritebackDestinationWidth,
3336 unsigned int HTotal,
3337 unsigned int WritebackLineBufferSize)
3338 {
3339 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3340
3341 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3342 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3343 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3344 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3345 }
3346
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,long WritebackDestinationWidth,long WritebackDestinationHeight,long WritebackSourceHeight,unsigned int HTotal)3347 static double CalculateWriteBackDelay(
3348 enum source_format_class WritebackPixelFormat,
3349 double WritebackHRatio,
3350 double WritebackVRatio,
3351 unsigned int WritebackVTaps,
3352 long WritebackDestinationWidth,
3353 long WritebackDestinationHeight,
3354 long WritebackSourceHeight,
3355 unsigned int HTotal)
3356 {
3357 double CalculateWriteBackDelay = 0;
3358 double Line_length = 0;
3359 double Output_lines_last_notclamped = 0;
3360 double WritebackVInit = 0;
3361
3362 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3363 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3364 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3365 if (Output_lines_last_notclamped < 0) {
3366 CalculateWriteBackDelay = 0;
3367 } else {
3368 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3369 }
3370 return CalculateWriteBackDelay;
3371 }
3372
3373
CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters,double DPPCLK,double DISPCLK,double DCFClkDeepSleep,double PixelClock,long HTotal,long VBlank,long DynamicMetadataTransmittedBytes,long DynamicMetadataLinesBeforeActiveRequired,int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * Tsetup,double * Tdmbf,double * Tdmec,double * Tdmsks)3374 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3375 double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
3376 long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3377 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3378 {
3379 double TotalRepeaterDelayTime = 0;
3380 double VUpdateWidthPix = 0;
3381 double VReadyOffsetPix = 0;
3382 double VUpdateOffsetPix = 0;
3383 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3384 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3385 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3386 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3387 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3388 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3389 *Tdmec = HTotal / PixelClock;
3390 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3391 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3392 } else {
3393 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3394 }
3395 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3396 *Tdmsks = *Tdmsks / 2;
3397 }
3398 }
3399
CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3400 static void CalculateRowBandwidth(
3401 bool GPUVMEnable,
3402 enum source_format_class SourcePixelFormat,
3403 double VRatio,
3404 double VRatioChroma,
3405 bool DCCEnable,
3406 double LineTime,
3407 unsigned int MetaRowByteLuma,
3408 unsigned int MetaRowByteChroma,
3409 unsigned int meta_row_height_luma,
3410 unsigned int meta_row_height_chroma,
3411 unsigned int PixelPTEBytesPerRowLuma,
3412 unsigned int PixelPTEBytesPerRowChroma,
3413 unsigned int dpte_row_height_luma,
3414 unsigned int dpte_row_height_chroma,
3415 double *meta_row_bw,
3416 double *dpte_row_bw)
3417 {
3418 if (DCCEnable != true) {
3419 *meta_row_bw = 0;
3420 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3421 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3422 + VRatioChroma * MetaRowByteChroma
3423 / (meta_row_height_chroma * LineTime);
3424 } else {
3425 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3426 }
3427
3428 if (GPUVMEnable != true) {
3429 *dpte_row_bw = 0;
3430 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3431 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3432 + VRatioChroma * PixelPTEBytesPerRowChroma
3433 / (dpte_row_height_chroma * LineTime);
3434 } else {
3435 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3436 }
3437 }
3438
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3439 static void CalculateFlipSchedule(
3440 struct display_mode_lib *mode_lib,
3441 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3442 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3443 double UrgentExtraLatency,
3444 double UrgentLatency,
3445 unsigned int GPUVMMaxPageTableLevels,
3446 bool HostVMEnable,
3447 unsigned int HostVMMaxNonCachedPageTableLevels,
3448 bool GPUVMEnable,
3449 double HostVMMinPageSize,
3450 double PDEAndMetaPTEBytesPerFrame,
3451 double MetaRowBytes,
3452 double DPTEBytesPerRow,
3453 double BandwidthAvailableForImmediateFlip,
3454 unsigned int TotImmediateFlipBytes,
3455 enum source_format_class SourcePixelFormat,
3456 double LineTime,
3457 double VRatio,
3458 double VRatioChroma,
3459 double Tno_bw,
3460 bool DCCEnable,
3461 unsigned int dpte_row_height,
3462 unsigned int meta_row_height,
3463 unsigned int dpte_row_height_chroma,
3464 unsigned int meta_row_height_chroma,
3465 double *DestinationLinesToRequestVMInImmediateFlip,
3466 double *DestinationLinesToRequestRowInImmediateFlip,
3467 double *final_flip_bw,
3468 bool *ImmediateFlipSupportedForPipe)
3469 {
3470 double min_row_time = 0.0;
3471 unsigned int HostVMDynamicLevelsTrips = 0;
3472 double TimeForFetchingMetaPTEImmediateFlip = 0;
3473 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3474 double ImmediateFlipBW = 0;
3475 double HostVMInefficiencyFactor = 0;
3476
3477 if (GPUVMEnable == true && HostVMEnable == true) {
3478 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3479 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3480 } else {
3481 HostVMInefficiencyFactor = 1;
3482 HostVMDynamicLevelsTrips = 0;
3483 }
3484
3485 if (GPUVMEnable == true || DCCEnable == true) {
3486 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3487 }
3488
3489 if (GPUVMEnable == true) {
3490 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3491 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3492 } else {
3493 TimeForFetchingMetaPTEImmediateFlip = 0;
3494 }
3495
3496 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3497 if ((GPUVMEnable == true || DCCEnable == true)) {
3498 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3499 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3500 } else {
3501 TimeForFetchingRowInVBlankImmediateFlip = 0;
3502 }
3503
3504 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3505
3506 if (GPUVMEnable == true) {
3507 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3508 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3509 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3510 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3511 } else {
3512 *final_flip_bw = 0;
3513 }
3514
3515
3516 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3517 if (GPUVMEnable == true && DCCEnable != true) {
3518 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3519 } else if (GPUVMEnable != true && DCCEnable == true) {
3520 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3521 } else {
3522 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3523 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3524 }
3525 } else {
3526 if (GPUVMEnable == true && DCCEnable != true) {
3527 min_row_time = dpte_row_height * LineTime / VRatio;
3528 } else if (GPUVMEnable != true && DCCEnable == true) {
3529 min_row_time = meta_row_height * LineTime / VRatio;
3530 } else {
3531 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3532 }
3533 }
3534
3535 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3536 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3537 *ImmediateFlipSupportedForPipe = false;
3538 } else {
3539 *ImmediateFlipSupportedForPipe = true;
3540 }
3541 }
3542
TruncToValidBPP(double LinkBitRate,int Lanes,long HTotal,long HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,int DSCSlices,int AudioRate,int AudioLayout,enum odm_combine_mode ODMCombine)3543 static double TruncToValidBPP(
3544 double LinkBitRate,
3545 int Lanes,
3546 long HTotal,
3547 long HActive,
3548 double PixelClock,
3549 double DesiredBPP,
3550 bool DSCEnable,
3551 enum output_encoder_class Output,
3552 enum output_format_class Format,
3553 unsigned int DSCInputBitPerComponent,
3554 int DSCSlices,
3555 int AudioRate,
3556 int AudioLayout,
3557 enum odm_combine_mode ODMCombine)
3558 {
3559 double MaxLinkBPP = 0;
3560 int MinDSCBPP = 0;
3561 double MaxDSCBPP = 0;
3562 int NonDSCBPP0 = 0;
3563 int NonDSCBPP1 = 0;
3564 int NonDSCBPP2 = 0;
3565
3566 if (Format == dm_420) {
3567 NonDSCBPP0 = 12;
3568 NonDSCBPP1 = 15;
3569 NonDSCBPP2 = 18;
3570 MinDSCBPP = 6;
3571 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3572 } else if (Format == dm_444) {
3573 NonDSCBPP0 = 24;
3574 NonDSCBPP1 = 30;
3575 NonDSCBPP2 = 36;
3576 MinDSCBPP = 8;
3577 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3578 } else {
3579 NonDSCBPP0 = 16;
3580 NonDSCBPP1 = 20;
3581 NonDSCBPP2 = 24;
3582
3583 if (Format == dm_n422) {
3584 MinDSCBPP = 7;
3585 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3586 }
3587 else {
3588 MinDSCBPP = 8;
3589 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3590 }
3591 }
3592
3593 if (DSCEnable && Output == dm_dp) {
3594 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3595 } else {
3596 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3597 }
3598
3599 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3600 MaxLinkBPP = 16;
3601 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3602 MaxLinkBPP = 32;
3603 }
3604
3605
3606 if (DesiredBPP == 0) {
3607 if (DSCEnable) {
3608 if (MaxLinkBPP < MinDSCBPP) {
3609 return BPP_INVALID;
3610 } else if (MaxLinkBPP >= MaxDSCBPP) {
3611 return MaxDSCBPP;
3612 } else {
3613 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3614 }
3615 } else {
3616 if (MaxLinkBPP >= NonDSCBPP2) {
3617 return NonDSCBPP2;
3618 } else if (MaxLinkBPP >= NonDSCBPP1) {
3619 return NonDSCBPP1;
3620 } else if (MaxLinkBPP >= NonDSCBPP0) {
3621 return NonDSCBPP0;
3622 } else {
3623 return BPP_INVALID;
3624 }
3625 }
3626 } else {
3627 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3628 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3629 return BPP_INVALID;
3630 } else {
3631 return DesiredBPP;
3632 }
3633 }
3634 return BPP_INVALID;
3635 }
3636
dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3637 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3638 {
3639 struct vba_vars_st *v = &mode_lib->vba;
3640 int MinPrefetchMode, MaxPrefetchMode;
3641 int i;
3642 unsigned int j, k, m;
3643 bool EnoughWritebackUnits = true;
3644 bool WritebackModeSupport = true;
3645 bool ViewportExceedsSurface = false;
3646 double MaxTotalVActiveRDBandwidth = 0;
3647 long ReorderingBytes = 0;
3648 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3649
3650 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3651
3652 CalculateMinAndMaxPrefetchMode(
3653 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3654 &MinPrefetchMode, &MaxPrefetchMode);
3655
3656 /*Scale Ratio, taps Support Check*/
3657
3658 v->ScaleRatioAndTapsSupport = true;
3659 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3660 if (v->ScalerEnabled[k] == false
3661 && ((v->SourcePixelFormat[k] != dm_444_64
3662 && v->SourcePixelFormat[k] != dm_444_32
3663 && v->SourcePixelFormat[k] != dm_444_16
3664 && v->SourcePixelFormat[k] != dm_mono_16
3665 && v->SourcePixelFormat[k] != dm_mono_8
3666 && v->SourcePixelFormat[k] != dm_rgbe
3667 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3668 || v->HRatio[k] != 1.0
3669 || v->htaps[k] != 1.0
3670 || v->VRatio[k] != 1.0
3671 || v->vtaps[k] != 1.0)) {
3672 v->ScaleRatioAndTapsSupport = false;
3673 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3674 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3675 || (v->htaps[k] > 1.0
3676 && (v->htaps[k] % 2) == 1)
3677 || v->HRatio[k] > v->MaxHSCLRatio
3678 || v->VRatio[k] > v->MaxVSCLRatio
3679 || v->HRatio[k] > v->htaps[k]
3680 || v->VRatio[k] > v->vtaps[k]
3681 || (v->SourcePixelFormat[k] != dm_444_64
3682 && v->SourcePixelFormat[k] != dm_444_32
3683 && v->SourcePixelFormat[k] != dm_444_16
3684 && v->SourcePixelFormat[k] != dm_mono_16
3685 && v->SourcePixelFormat[k] != dm_mono_8
3686 && v->SourcePixelFormat[k] != dm_rgbe
3687 && (v->VTAPsChroma[k] < 1
3688 || v->VTAPsChroma[k] > 8
3689 || v->HTAPsChroma[k] < 1
3690 || v->HTAPsChroma[k] > 8
3691 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3692 || v->HRatioChroma[k] > v->MaxHSCLRatio
3693 || v->VRatioChroma[k] > v->MaxVSCLRatio
3694 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3695 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3696 v->ScaleRatioAndTapsSupport = false;
3697 }
3698 }
3699 /*Source Format, Pixel Format and Scan Support Check*/
3700
3701 v->SourceFormatPixelAndScanSupport = true;
3702 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3703 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3704 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3705 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3706 v->SourceFormatPixelAndScanSupport = false;
3707 }
3708 }
3709 /*Bandwidth Support Check*/
3710
3711 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3712 CalculateBytePerPixelAnd256BBlockSizes(
3713 v->SourcePixelFormat[k],
3714 v->SurfaceTiling[k],
3715 &v->BytePerPixelY[k],
3716 &v->BytePerPixelC[k],
3717 &v->BytePerPixelInDETY[k],
3718 &v->BytePerPixelInDETC[k],
3719 &v->Read256BlockHeightY[k],
3720 &v->Read256BlockHeightC[k],
3721 &v->Read256BlockWidthY[k],
3722 &v->Read256BlockWidthC[k]);
3723 }
3724 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3725 if (v->SourceScan[k] != dm_vert) {
3726 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3727 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3728 } else {
3729 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3730 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3731 }
3732 }
3733 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3734 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3735 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3736 }
3737 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3738 if (v->WritebackEnable[k] == true
3739 && v->WritebackPixelFormat[k] == dm_444_64) {
3740 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3741 * v->WritebackDestinationHeight[k]
3742 / (v->WritebackSourceHeight[k]
3743 * v->HTotal[k]
3744 / v->PixelClock[k]) * 8.0;
3745 } else if (v->WritebackEnable[k] == true) {
3746 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3747 * v->WritebackDestinationHeight[k]
3748 / (v->WritebackSourceHeight[k]
3749 * v->HTotal[k]
3750 / v->PixelClock[k]) * 4.0;
3751 } else {
3752 v->WriteBandwidth[k] = 0.0;
3753 }
3754 }
3755
3756 /*Writeback Latency support check*/
3757
3758 v->WritebackLatencySupport = true;
3759 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3760 if (v->WritebackEnable[k] == true) {
3761 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3762 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3763 if (v->WriteBandwidth[k]
3764 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3765 / v->WritebackLatency) {
3766 v->WritebackLatencySupport = false;
3767 }
3768 } else {
3769 if (v->WriteBandwidth[k]
3770 > v->WritebackInterfaceBufferSize * 1024
3771 / v->WritebackLatency) {
3772 v->WritebackLatencySupport = false;
3773 }
3774 }
3775 }
3776 }
3777
3778 /*Writeback Mode Support Check*/
3779
3780 v->TotalNumberOfActiveWriteback = 0;
3781 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3782 if (v->WritebackEnable[k] == true) {
3783 v->TotalNumberOfActiveWriteback =
3784 v->TotalNumberOfActiveWriteback + 1;
3785 }
3786 }
3787
3788 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3789 EnoughWritebackUnits = false;
3790 }
3791 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3792 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3793 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3794
3795 WritebackModeSupport = false;
3796 }
3797 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3798 WritebackModeSupport = false;
3799 }
3800
3801 /*Writeback Scale Ratio and Taps Support Check*/
3802
3803 v->WritebackScaleRatioAndTapsSupport = true;
3804 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3805 if (v->WritebackEnable[k] == true) {
3806 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3807 || v->WritebackVRatio[k]
3808 > v->WritebackMaxVSCLRatio
3809 || v->WritebackHRatio[k]
3810 < v->WritebackMinHSCLRatio
3811 || v->WritebackVRatio[k]
3812 < v->WritebackMinVSCLRatio
3813 || v->WritebackHTaps[k]
3814 > v->WritebackMaxHSCLTaps
3815 || v->WritebackVTaps[k]
3816 > v->WritebackMaxVSCLTaps
3817 || v->WritebackHRatio[k]
3818 > v->WritebackHTaps[k]
3819 || v->WritebackVRatio[k]
3820 > v->WritebackVTaps[k]
3821 || (v->WritebackHTaps[k] > 2.0
3822 && ((v->WritebackHTaps[k] % 2)
3823 == 1))) {
3824 v->WritebackScaleRatioAndTapsSupport = false;
3825 }
3826 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3827 v->WritebackScaleRatioAndTapsSupport = false;
3828 }
3829 }
3830 }
3831 /*Maximum DISPCLK/DPPCLK Support check*/
3832
3833 v->WritebackRequiredDISPCLK = 0.0;
3834 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3835 if (v->WritebackEnable[k] == true) {
3836 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3837 dml30_CalculateWriteBackDISPCLK(
3838 v->WritebackPixelFormat[k],
3839 v->PixelClock[k],
3840 v->WritebackHRatio[k],
3841 v->WritebackVRatio[k],
3842 v->WritebackHTaps[k],
3843 v->WritebackVTaps[k],
3844 v->WritebackSourceWidth[k],
3845 v->WritebackDestinationWidth[k],
3846 v->HTotal[k],
3847 v->WritebackLineBufferSize));
3848 }
3849 }
3850 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3851 if (v->HRatio[k] > 1.0) {
3852 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3853 } else {
3854 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3855 }
3856 if (v->BytePerPixelC[k] == 0.0) {
3857 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3858 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3859 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3860 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3861 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3862 }
3863 } else {
3864 if (v->HRatioChroma[k] > 1.0) {
3865 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3866 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3867 } else {
3868 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3869 }
3870 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3871 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3872 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3873 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3874 1.0);
3875 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3876 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3877 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3878 }
3879 }
3880 }
3881 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3882 int MaximumSwathWidthSupportLuma = 0;
3883 int MaximumSwathWidthSupportChroma = 0;
3884
3885 if (v->SurfaceTiling[k] == dm_sw_linear) {
3886 MaximumSwathWidthSupportLuma = 8192.0;
3887 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3888 MaximumSwathWidthSupportLuma = 2880.0;
3889 } else {
3890 MaximumSwathWidthSupportLuma = 5760.0;
3891 }
3892
3893 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3894 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3895 } else {
3896 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3897 }
3898 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3899 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3900 if (v->BytePerPixelC[k] == 0.0) {
3901 v->MaximumSwathWidthInLineBufferChroma = 0;
3902 } else {
3903 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3904 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3905 }
3906 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3907 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3908 }
3909
3910 CalculateSwathAndDETConfiguration(
3911 true,
3912 v->NumberOfActivePlanes,
3913 v->DETBufferSizeInKByte[0],
3914 v->MaximumSwathWidthLuma,
3915 v->MaximumSwathWidthChroma,
3916 v->SourceScan,
3917 v->SourcePixelFormat,
3918 v->SurfaceTiling,
3919 v->ViewportWidth,
3920 v->ViewportHeight,
3921 v->SurfaceWidthY,
3922 v->SurfaceWidthC,
3923 v->SurfaceHeightY,
3924 v->SurfaceHeightC,
3925 v->Read256BlockHeightY,
3926 v->Read256BlockHeightC,
3927 v->Read256BlockWidthY,
3928 v->Read256BlockWidthC,
3929 v->odm_combine_dummy,
3930 v->BlendingAndTiming,
3931 v->BytePerPixelY,
3932 v->BytePerPixelC,
3933 v->BytePerPixelInDETY,
3934 v->BytePerPixelInDETC,
3935 v->HActive,
3936 v->HRatio,
3937 v->HRatioChroma,
3938 v->DPPPerPlane,
3939 v->swath_width_luma_ub,
3940 v->swath_width_chroma_ub,
3941 v->SwathWidthY,
3942 v->SwathWidthC,
3943 v->SwathHeightY,
3944 v->SwathHeightC,
3945 v->DETBufferSizeY,
3946 v->DETBufferSizeC,
3947 v->SingleDPPViewportSizeSupportPerPlane,
3948 &v->ViewportSizeSupport[0][0]);
3949
3950 for (i = 0; i < v->soc.num_states; i++) {
3951 for (j = 0; j < 2; j++) {
3952 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3953 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3954 v->RequiredDISPCLK[i][j] = 0.0;
3955 v->DISPCLK_DPPCLK_Support[i][j] = true;
3956 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3957 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3958 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3959 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3960 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3961 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3962 }
3963 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3964 * (1 + v->DISPCLKRampingMargin / 100.0);
3965 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3966 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3967 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3968 }
3969 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3970 * (1 + v->DISPCLKRampingMargin / 100.0);
3971 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3972 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3973 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3974 }
3975
3976 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3977 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3978 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3979 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3980 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3981 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3982 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3983 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3984 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3985 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3986 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3987 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3988 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3989 } else {
3990 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3991 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3992 }
3993 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
3994 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3995 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
3996 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3997 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3998 } else {
3999 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4000 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4001 }
4002 }
4003 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
4004 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4005 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
4006 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4007 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4008 } else {
4009 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4010 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4011 }
4012 }
4013 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4014 v->MPCCombine[i][j][k] = false;
4015 v->NoOfDPP[i][j][k] = 4;
4016 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4017 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4018 v->MPCCombine[i][j][k] = false;
4019 v->NoOfDPP[i][j][k] = 2;
4020 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4021 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4022 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
4023 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4024 v->MPCCombine[i][j][k] = false;
4025 v->NoOfDPP[i][j][k] = 1;
4026 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4027 } else {
4028 v->MPCCombine[i][j][k] = true;
4029 v->NoOfDPP[i][j][k] = 2;
4030 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4031 }
4032 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4033 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4034 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4035 v->DISPCLK_DPPCLK_Support[i][j] = false;
4036 }
4037 }
4038 v->TotalNumberOfActiveDPP[i][j] = 0;
4039 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4040 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4041 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4042 if (v->NoOfDPP[i][j][k] == 1)
4043 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4044 }
4045 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
4046 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4047 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4048 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4049 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4050 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4051 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4052 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4053 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4054 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4055 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4056 }
4057 }
4058 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4059 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4060 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4061 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4062 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4063 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4064 }
4065 }
4066 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4067 v->RequiredDISPCLK[i][j] = 0.0;
4068 v->DISPCLK_DPPCLK_Support[i][j] = true;
4069 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4070 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4071 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4072 v->MPCCombine[i][j][k] = true;
4073 v->NoOfDPP[i][j][k] = 2;
4074 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4075 } else {
4076 v->MPCCombine[i][j][k] = false;
4077 v->NoOfDPP[i][j][k] = 1;
4078 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4079 }
4080 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4081 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4082 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4083 } else {
4084 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4085 }
4086 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4087 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4088 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4089 v->DISPCLK_DPPCLK_Support[i][j] = false;
4090 }
4091 }
4092 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4093 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4094 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4095 }
4096 }
4097 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4098 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4099 v->DISPCLK_DPPCLK_Support[i][j] = false;
4100 }
4101 }
4102 }
4103
4104 /*Total Available Pipes Support Check*/
4105
4106 for (i = 0; i < v->soc.num_states; i++) {
4107 for (j = 0; j < 2; j++) {
4108 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4109 v->TotalAvailablePipesSupport[i][j] = true;
4110 } else {
4111 v->TotalAvailablePipesSupport[i][j] = false;
4112 }
4113 }
4114 }
4115 /*Display IO and DSC Support Check*/
4116
4117 v->NonsupportedDSCInputBPC = false;
4118 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4119 if (!(v->DSCInputBitPerComponent[k] == 12.0
4120 || v->DSCInputBitPerComponent[k] == 10.0
4121 || v->DSCInputBitPerComponent[k] == 8.0)) {
4122 v->NonsupportedDSCInputBPC = true;
4123 }
4124 }
4125
4126 /*Number Of DSC Slices*/
4127 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4128 if (v->BlendingAndTiming[k] == k) {
4129 if (v->PixelClockBackEnd[k] > 3200) {
4130 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4131 } else if (v->PixelClockBackEnd[k] > 1360) {
4132 v->NumberOfDSCSlices[k] = 8;
4133 } else if (v->PixelClockBackEnd[k] > 680) {
4134 v->NumberOfDSCSlices[k] = 4;
4135 } else if (v->PixelClockBackEnd[k] > 340) {
4136 v->NumberOfDSCSlices[k] = 2;
4137 } else {
4138 v->NumberOfDSCSlices[k] = 1;
4139 }
4140 } else {
4141 v->NumberOfDSCSlices[k] = 0;
4142 }
4143 }
4144
4145 for (i = 0; i < v->soc.num_states; i++) {
4146 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4147 v->RequiresDSC[i][k] = false;
4148 v->RequiresFEC[i][k] = false;
4149 if (v->BlendingAndTiming[k] == k) {
4150 if (v->Output[k] == dm_hdmi) {
4151 v->RequiresDSC[i][k] = false;
4152 v->RequiresFEC[i][k] = false;
4153 v->OutputBppPerState[i][k] = TruncToValidBPP(
4154 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4155 3,
4156 v->HTotal[k],
4157 v->HActive[k],
4158 v->PixelClockBackEnd[k],
4159 v->ForcedOutputLinkBPP[k],
4160 false,
4161 v->Output[k],
4162 v->OutputFormat[k],
4163 v->DSCInputBitPerComponent[k],
4164 v->NumberOfDSCSlices[k],
4165 v->AudioSampleRate[k],
4166 v->AudioSampleLayout[k],
4167 v->ODMCombineEnablePerState[i][k]);
4168 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4169 if (v->DSCEnable[k] == true) {
4170 v->RequiresDSC[i][k] = true;
4171 v->LinkDSCEnable = true;
4172 if (v->Output[k] == dm_dp) {
4173 v->RequiresFEC[i][k] = true;
4174 } else {
4175 v->RequiresFEC[i][k] = false;
4176 }
4177 } else {
4178 v->RequiresDSC[i][k] = false;
4179 v->LinkDSCEnable = false;
4180 v->RequiresFEC[i][k] = false;
4181 }
4182
4183 v->Outbpp = BPP_INVALID;
4184 if (v->PHYCLKPerState[i] >= 270.0) {
4185 v->Outbpp = TruncToValidBPP(
4186 (1.0 - v->Downspreading / 100.0) * 2700,
4187 v->OutputLinkDPLanes[k],
4188 v->HTotal[k],
4189 v->HActive[k],
4190 v->PixelClockBackEnd[k],
4191 v->ForcedOutputLinkBPP[k],
4192 v->LinkDSCEnable,
4193 v->Output[k],
4194 v->OutputFormat[k],
4195 v->DSCInputBitPerComponent[k],
4196 v->NumberOfDSCSlices[k],
4197 v->AudioSampleRate[k],
4198 v->AudioSampleLayout[k],
4199 v->ODMCombineEnablePerState[i][k]);
4200 v->OutputBppPerState[i][k] = v->Outbpp;
4201 // TODO: Need some other way to handle this nonsense
4202 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4203 }
4204 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4205 v->Outbpp = TruncToValidBPP(
4206 (1.0 - v->Downspreading / 100.0) * 5400,
4207 v->OutputLinkDPLanes[k],
4208 v->HTotal[k],
4209 v->HActive[k],
4210 v->PixelClockBackEnd[k],
4211 v->ForcedOutputLinkBPP[k],
4212 v->LinkDSCEnable,
4213 v->Output[k],
4214 v->OutputFormat[k],
4215 v->DSCInputBitPerComponent[k],
4216 v->NumberOfDSCSlices[k],
4217 v->AudioSampleRate[k],
4218 v->AudioSampleLayout[k],
4219 v->ODMCombineEnablePerState[i][k]);
4220 v->OutputBppPerState[i][k] = v->Outbpp;
4221 // TODO: Need some other way to handle this nonsense
4222 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4223 }
4224 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4225 v->Outbpp = TruncToValidBPP(
4226 (1.0 - v->Downspreading / 100.0) * 8100,
4227 v->OutputLinkDPLanes[k],
4228 v->HTotal[k],
4229 v->HActive[k],
4230 v->PixelClockBackEnd[k],
4231 v->ForcedOutputLinkBPP[k],
4232 v->LinkDSCEnable,
4233 v->Output[k],
4234 v->OutputFormat[k],
4235 v->DSCInputBitPerComponent[k],
4236 v->NumberOfDSCSlices[k],
4237 v->AudioSampleRate[k],
4238 v->AudioSampleLayout[k],
4239 v->ODMCombineEnablePerState[i][k]);
4240 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4241 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4242 v->RequiresDSC[i][k] = true;
4243 v->LinkDSCEnable = true;
4244 if (v->Output[k] == dm_dp) {
4245 v->RequiresFEC[i][k] = true;
4246 }
4247 v->Outbpp = TruncToValidBPP(
4248 (1.0 - v->Downspreading / 100.0) * 8100,
4249 v->OutputLinkDPLanes[k],
4250 v->HTotal[k],
4251 v->HActive[k],
4252 v->PixelClockBackEnd[k],
4253 v->ForcedOutputLinkBPP[k],
4254 v->LinkDSCEnable,
4255 v->Output[k],
4256 v->OutputFormat[k],
4257 v->DSCInputBitPerComponent[k],
4258 v->NumberOfDSCSlices[k],
4259 v->AudioSampleRate[k],
4260 v->AudioSampleLayout[k],
4261 v->ODMCombineEnablePerState[i][k]);
4262 }
4263 v->OutputBppPerState[i][k] = v->Outbpp;
4264 // TODO: Need some other way to handle this nonsense
4265 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4266 }
4267 }
4268 } else {
4269 v->OutputBppPerState[i][k] = 0;
4270 }
4271 }
4272 }
4273 for (i = 0; i < v->soc.num_states; i++) {
4274 v->DIOSupport[i] = true;
4275 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4276 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4277 && (v->OutputBppPerState[i][k] == 0
4278 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4279 v->DIOSupport[i] = false;
4280 }
4281 }
4282 }
4283
4284 for (i = 0; i < v->soc.num_states; ++i) {
4285 v->ODMCombine4To1SupportCheckOK[i] = true;
4286 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4287 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4288 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4289 v->ODMCombine4To1SupportCheckOK[i] = false;
4290 }
4291 }
4292 }
4293
4294 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4295
4296 for (i = 0; i < v->soc.num_states; i++) {
4297 v->NotEnoughDSCUnits[i] = false;
4298 v->TotalDSCUnitsRequired = 0.0;
4299 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4300 if (v->RequiresDSC[i][k] == true) {
4301 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4302 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4303 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4304 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4305 } else {
4306 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4307 }
4308 }
4309 }
4310 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4311 v->NotEnoughDSCUnits[i] = true;
4312 }
4313 }
4314 /*DSC Delay per state*/
4315
4316 for (i = 0; i < v->soc.num_states; i++) {
4317 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4318 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4319 v->BPP = 0.0;
4320 } else {
4321 v->BPP = v->OutputBppPerState[i][k];
4322 }
4323 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4324 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4325 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4326 v->DSCInputBitPerComponent[k],
4327 v->BPP,
4328 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4329 v->NumberOfDSCSlices[k],
4330 v->OutputFormat[k],
4331 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4332 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4333 v->DSCDelayPerState[i][k] = 2.0
4334 * dscceComputeDelay(
4335 v->DSCInputBitPerComponent[k],
4336 v->BPP,
4337 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4338 v->NumberOfDSCSlices[k] / 2,
4339 v->OutputFormat[k],
4340 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4341 } else {
4342 v->DSCDelayPerState[i][k] = 4.0
4343 * (dscceComputeDelay(
4344 v->DSCInputBitPerComponent[k],
4345 v->BPP,
4346 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4347 v->NumberOfDSCSlices[k] / 4,
4348 v->OutputFormat[k],
4349 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4350 }
4351 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4352 } else {
4353 v->DSCDelayPerState[i][k] = 0.0;
4354 }
4355 }
4356 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4357 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4358 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4359 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4360 }
4361 }
4362 }
4363 }
4364
4365 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4366 //
4367 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4368 for (j = 0; j <= 1; ++j) {
4369 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4370 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4371 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4372 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4373 }
4374
4375 CalculateSwathAndDETConfiguration(
4376 false,
4377 v->NumberOfActivePlanes,
4378 v->DETBufferSizeInKByte[0],
4379 v->MaximumSwathWidthLuma,
4380 v->MaximumSwathWidthChroma,
4381 v->SourceScan,
4382 v->SourcePixelFormat,
4383 v->SurfaceTiling,
4384 v->ViewportWidth,
4385 v->ViewportHeight,
4386 v->SurfaceWidthY,
4387 v->SurfaceWidthC,
4388 v->SurfaceHeightY,
4389 v->SurfaceHeightC,
4390 v->Read256BlockHeightY,
4391 v->Read256BlockHeightC,
4392 v->Read256BlockWidthY,
4393 v->Read256BlockWidthC,
4394 v->ODMCombineEnableThisState,
4395 v->BlendingAndTiming,
4396 v->BytePerPixelY,
4397 v->BytePerPixelC,
4398 v->BytePerPixelInDETY,
4399 v->BytePerPixelInDETC,
4400 v->HActive,
4401 v->HRatio,
4402 v->HRatioChroma,
4403 v->NoOfDPPThisState,
4404 v->swath_width_luma_ub_this_state,
4405 v->swath_width_chroma_ub_this_state,
4406 v->SwathWidthYThisState,
4407 v->SwathWidthCThisState,
4408 v->SwathHeightYThisState,
4409 v->SwathHeightCThisState,
4410 v->DETBufferSizeYThisState,
4411 v->DETBufferSizeCThisState,
4412 v->dummystring,
4413 &v->ViewportSizeSupport[i][j]);
4414
4415 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4416 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4417 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4418 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4419 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4420 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4421 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4422 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4423 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4424 }
4425
4426 }
4427 }
4428 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4429 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4430 }
4431
4432 for (i = 0; i < v->soc.num_states; i++) {
4433 for (j = 0; j < 2; j++) {
4434 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4435 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4436 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4437 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4438 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4439 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4440 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4441 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4442 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4443 }
4444
4445 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4446 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4447 if (v->DCCEnable[k] == true) {
4448 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4449 }
4450 }
4451
4452 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4453 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4454 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4455
4456 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4457 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4458 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4459 } else {
4460 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4461 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4462 }
4463
4464 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4465 mode_lib,
4466 v->DCCEnable[k],
4467 v->Read256BlockHeightC[k],
4468 v->Read256BlockWidthY[k],
4469 v->SourcePixelFormat[k],
4470 v->SurfaceTiling[k],
4471 v->BytePerPixelC[k],
4472 v->SourceScan[k],
4473 v->SwathWidthCThisState[k],
4474 v->ViewportHeightChroma[k],
4475 v->GPUVMEnable,
4476 v->HostVMEnable,
4477 v->HostVMMaxNonCachedPageTableLevels,
4478 v->GPUVMMinPageSize,
4479 v->HostVMMinPageSize,
4480 v->PTEBufferSizeInRequestsForChroma,
4481 v->PitchC[k],
4482 0.0,
4483 &v->MacroTileWidthC[k],
4484 &v->MetaRowBytesC,
4485 &v->DPTEBytesPerRowC,
4486 &v->PTEBufferSizeNotExceededC[i][j][k],
4487 &v->dummyinteger7,
4488 &v->dpte_row_height_chroma[k],
4489 &v->dummyinteger28,
4490 &v->dummyinteger26,
4491 &v->dummyinteger23,
4492 &v->meta_row_height_chroma[k],
4493 &v->dummyinteger8,
4494 &v->dummyinteger9,
4495 &v->dummyinteger19,
4496 &v->dummyinteger20,
4497 &v->dummyinteger17,
4498 &v->dummyinteger10,
4499 &v->dummyinteger11);
4500
4501 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4502 mode_lib,
4503 v->VRatioChroma[k],
4504 v->VTAPsChroma[k],
4505 v->Interlace[k],
4506 v->ProgressiveToInterlaceUnitInOPP,
4507 v->SwathHeightCThisState[k],
4508 v->ViewportYStartC[k],
4509 &v->PrefillC[k],
4510 &v->MaxNumSwC[k]);
4511 } else {
4512 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4513 v->PTEBufferSizeInRequestsForChroma = 0;
4514 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4515 v->MetaRowBytesC = 0.0;
4516 v->DPTEBytesPerRowC = 0.0;
4517 v->PrefetchLinesC[i][j][k] = 0.0;
4518 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4519 }
4520 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4521 mode_lib,
4522 v->DCCEnable[k],
4523 v->Read256BlockHeightY[k],
4524 v->Read256BlockWidthY[k],
4525 v->SourcePixelFormat[k],
4526 v->SurfaceTiling[k],
4527 v->BytePerPixelY[k],
4528 v->SourceScan[k],
4529 v->SwathWidthYThisState[k],
4530 v->ViewportHeight[k],
4531 v->GPUVMEnable,
4532 v->HostVMEnable,
4533 v->HostVMMaxNonCachedPageTableLevels,
4534 v->GPUVMMinPageSize,
4535 v->HostVMMinPageSize,
4536 v->PTEBufferSizeInRequestsForLuma,
4537 v->PitchY[k],
4538 v->DCCMetaPitchY[k],
4539 &v->MacroTileWidthY[k],
4540 &v->MetaRowBytesY,
4541 &v->DPTEBytesPerRowY,
4542 &v->PTEBufferSizeNotExceededY[i][j][k],
4543 v->dummyinteger4,
4544 &v->dpte_row_height[k],
4545 &v->dummyinteger29,
4546 &v->dummyinteger27,
4547 &v->dummyinteger24,
4548 &v->meta_row_height[k],
4549 &v->dummyinteger25,
4550 &v->dpte_group_bytes[k],
4551 &v->dummyinteger21,
4552 &v->dummyinteger22,
4553 &v->dummyinteger18,
4554 &v->dummyinteger5,
4555 &v->dummyinteger6);
4556 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4557 mode_lib,
4558 v->VRatio[k],
4559 v->vtaps[k],
4560 v->Interlace[k],
4561 v->ProgressiveToInterlaceUnitInOPP,
4562 v->SwathHeightYThisState[k],
4563 v->ViewportYStartY[k],
4564 &v->PrefillY[k],
4565 &v->MaxNumSwY[k]);
4566 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4567 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4568 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4569
4570 CalculateRowBandwidth(
4571 v->GPUVMEnable,
4572 v->SourcePixelFormat[k],
4573 v->VRatio[k],
4574 v->VRatioChroma[k],
4575 v->DCCEnable[k],
4576 v->HTotal[k] / v->PixelClock[k],
4577 v->MetaRowBytesY,
4578 v->MetaRowBytesC,
4579 v->meta_row_height[k],
4580 v->meta_row_height_chroma[k],
4581 v->DPTEBytesPerRowY,
4582 v->DPTEBytesPerRowC,
4583 v->dpte_row_height[k],
4584 v->dpte_row_height_chroma[k],
4585 &v->meta_row_bandwidth[i][j][k],
4586 &v->dpte_row_bandwidth[i][j][k]);
4587 }
4588 v->UrgLatency[i] = CalculateUrgentLatency(
4589 v->UrgentLatencyPixelDataOnly,
4590 v->UrgentLatencyPixelMixedWithVMData,
4591 v->UrgentLatencyVMDataOnly,
4592 v->DoUrgentLatencyAdjustment,
4593 v->UrgentLatencyAdjustmentFabricClockComponent,
4594 v->UrgentLatencyAdjustmentFabricClockReference,
4595 v->FabricClockPerState[i]);
4596
4597 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4598 CalculateUrgentBurstFactor(
4599 v->swath_width_luma_ub_this_state[k],
4600 v->swath_width_chroma_ub_this_state[k],
4601 v->DETBufferSizeInKByte[0],
4602 v->SwathHeightYThisState[k],
4603 v->SwathHeightCThisState[k],
4604 v->HTotal[k] / v->PixelClock[k],
4605 v->UrgLatency[i],
4606 v->CursorBufferSize,
4607 v->CursorWidth[k][0],
4608 v->CursorBPP[k][0],
4609 v->VRatio[k],
4610 v->VRatioChroma[k],
4611 v->BytePerPixelInDETY[k],
4612 v->BytePerPixelInDETC[k],
4613 v->DETBufferSizeYThisState[k],
4614 v->DETBufferSizeCThisState[k],
4615 &v->UrgentBurstFactorCursor[k],
4616 &v->UrgentBurstFactorLuma[k],
4617 &v->UrgentBurstFactorChroma[k],
4618 &NotUrgentLatencyHiding[k]);
4619 }
4620
4621 v->NotUrgentLatencyHiding[i][j] = false;
4622 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4623 if (NotUrgentLatencyHiding[k]) {
4624 v->NotUrgentLatencyHiding[i][j] = true;
4625 }
4626 }
4627
4628 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4629 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4630 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4631 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4632 }
4633
4634 v->TotalVActivePixelBandwidth[i][j] = 0;
4635 v->TotalVActiveCursorBandwidth[i][j] = 0;
4636 v->TotalMetaRowBandwidth[i][j] = 0;
4637 v->TotalDPTERowBandwidth[i][j] = 0;
4638 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4639 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4640 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4641 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4642 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4643 }
4644
4645 CalculateDCFCLKDeepSleep(
4646 mode_lib,
4647 v->NumberOfActivePlanes,
4648 v->BytePerPixelY,
4649 v->BytePerPixelC,
4650 v->VRatio,
4651 v->VRatioChroma,
4652 v->SwathWidthYThisState,
4653 v->SwathWidthCThisState,
4654 v->NoOfDPPThisState,
4655 v->HRatio,
4656 v->HRatioChroma,
4657 v->PixelClock,
4658 v->PSCL_FACTOR,
4659 v->PSCL_FACTOR_CHROMA,
4660 v->RequiredDPPCLKThisState,
4661 v->ReadBandwidthLuma,
4662 v->ReadBandwidthChroma,
4663 v->ReturnBusWidth,
4664 &v->ProjectedDCFCLKDeepSleep[i][j]);
4665 }
4666 }
4667
4668 //Calculate Return BW
4669
4670 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4671 for (j = 0; j <= 1; ++j) {
4672 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4673 if (v->BlendingAndTiming[k] == k) {
4674 if (v->WritebackEnable[k] == true) {
4675 v->WritebackDelayTime[k] = v->WritebackLatency
4676 + CalculateWriteBackDelay(
4677 v->WritebackPixelFormat[k],
4678 v->WritebackHRatio[k],
4679 v->WritebackVRatio[k],
4680 v->WritebackVTaps[k],
4681 v->WritebackDestinationWidth[k],
4682 v->WritebackDestinationHeight[k],
4683 v->WritebackSourceHeight[k],
4684 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4685 } else {
4686 v->WritebackDelayTime[k] = 0.0;
4687 }
4688 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4689 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4690 v->WritebackDelayTime[k] = dml_max(
4691 v->WritebackDelayTime[k],
4692 v->WritebackLatency
4693 + CalculateWriteBackDelay(
4694 v->WritebackPixelFormat[m],
4695 v->WritebackHRatio[m],
4696 v->WritebackVRatio[m],
4697 v->WritebackVTaps[m],
4698 v->WritebackDestinationWidth[m],
4699 v->WritebackDestinationHeight[m],
4700 v->WritebackSourceHeight[m],
4701 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4702 }
4703 }
4704 }
4705 }
4706 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4707 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4708 if (v->BlendingAndTiming[k] == m) {
4709 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4710 }
4711 }
4712 }
4713 v->MaxMaxVStartup[i][j] = 0;
4714 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4715 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4716 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4717 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4718 }
4719 }
4720 }
4721
4722 ReorderingBytes = v->NumberOfChannels
4723 * dml_max3(
4724 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4725 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4726 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4727 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4728
4729 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4730 for (j = 0; j <= 1; ++j) {
4731 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4732 }
4733 }
4734
4735 if (v->UseMinimumRequiredDCFCLK == true) {
4736 UseMinimumDCFCLK(
4737 mode_lib,
4738 v->MaxInterDCNTileRepeaters,
4739 MaxPrefetchMode,
4740 v->FinalDRAMClockChangeLatency,
4741 v->SREnterPlusExitTime,
4742 v->ReturnBusWidth,
4743 v->RoundTripPingLatencyCycles,
4744 ReorderingBytes,
4745 v->PixelChunkSizeInKByte,
4746 v->MetaChunkSize,
4747 v->GPUVMEnable,
4748 v->GPUVMMaxPageTableLevels,
4749 v->HostVMEnable,
4750 v->NumberOfActivePlanes,
4751 v->HostVMMinPageSize,
4752 v->HostVMMaxNonCachedPageTableLevels,
4753 v->DynamicMetadataVMEnabled,
4754 v->ImmediateFlipRequirement[0],
4755 v->ProgressiveToInterlaceUnitInOPP,
4756 v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
4757 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4758 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4759 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
4760 v->VTotal,
4761 v->VActive,
4762 v->DynamicMetadataTransmittedBytes,
4763 v->DynamicMetadataLinesBeforeActiveRequired,
4764 v->Interlace,
4765 v->RequiredDPPCLK,
4766 v->RequiredDISPCLK,
4767 v->UrgLatency,
4768 v->NoOfDPP,
4769 v->ProjectedDCFCLKDeepSleep,
4770 v->MaximumVStartup,
4771 v->TotalVActivePixelBandwidth,
4772 v->TotalVActiveCursorBandwidth,
4773 v->TotalMetaRowBandwidth,
4774 v->TotalDPTERowBandwidth,
4775 v->TotalNumberOfActiveDPP,
4776 v->TotalNumberOfDCCActiveDPP,
4777 v->dpte_group_bytes,
4778 v->PrefetchLinesY,
4779 v->PrefetchLinesC,
4780 v->swath_width_luma_ub_all_states,
4781 v->swath_width_chroma_ub_all_states,
4782 v->BytePerPixelY,
4783 v->BytePerPixelC,
4784 v->HTotal,
4785 v->PixelClock,
4786 v->PDEAndMetaPTEBytesPerFrame,
4787 v->DPTEBytesPerRow,
4788 v->MetaRowBytes,
4789 v->DynamicMetadataEnable,
4790 v->VActivePixelBandwidth,
4791 v->VActiveCursorBandwidth,
4792 v->ReadBandwidthLuma,
4793 v->ReadBandwidthChroma,
4794 v->DCFCLKPerState,
4795 v->DCFCLKState);
4796
4797 if (v->ClampMinDCFCLK) {
4798 /* Clamp calculated values to actual minimum */
4799 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4800 for (j = 0; j <= 1; ++j) {
4801 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4802 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4803 }
4804 }
4805 }
4806 }
4807 }
4808
4809 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4810 for (j = 0; j <= 1; ++j) {
4811 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4812 v->ReturnBusWidth * v->DCFCLKState[i][j],
4813 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4814 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4815 if (v->HostVMEnable != true) {
4816 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4817 / 100;
4818 } else {
4819 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4820 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4821 }
4822 }
4823 }
4824
4825 //Re-ordering Buffer Support Check
4826
4827 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4828 for (j = 0; j <= 1; ++j) {
4829 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4830 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4831 v->ROBSupport[i][j] = true;
4832 } else {
4833 v->ROBSupport[i][j] = false;
4834 }
4835 }
4836 }
4837
4838 //Vertical Active BW support check
4839
4840 MaxTotalVActiveRDBandwidth = 0;
4841 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4842 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4843 }
4844
4845 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4846 for (j = 0; j <= 1; ++j) {
4847 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4848 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4849 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4850 / 100);
4851 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4852 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4853 } else {
4854 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4855 }
4856 }
4857 }
4858
4859 //Prefetch Check
4860
4861 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4862 for (j = 0; j <= 1; ++j) {
4863 int NextPrefetchModeState = MinPrefetchMode;
4864
4865 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4866
4867 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4868 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4869 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4870 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4871 }
4872
4873 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4874 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4875 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4876 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4877 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4878 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4879 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4880 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4881 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4882 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4883 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4884 }
4885
4886 v->ExtraLatency = CalculateExtraLatency(
4887 v->RoundTripPingLatencyCycles,
4888 ReorderingBytes,
4889 v->DCFCLKState[i][j],
4890 v->TotalNumberOfActiveDPP[i][j],
4891 v->PixelChunkSizeInKByte,
4892 v->TotalNumberOfDCCActiveDPP[i][j],
4893 v->MetaChunkSize,
4894 v->ReturnBWPerState[i][j],
4895 v->GPUVMEnable,
4896 v->HostVMEnable,
4897 v->NumberOfActivePlanes,
4898 v->NoOfDPPThisState,
4899 v->dpte_group_bytes,
4900 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4901 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4902 v->HostVMMinPageSize,
4903 v->HostVMMaxNonCachedPageTableLevels);
4904
4905 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4906 do {
4907 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4908 v->MaxVStartup = v->NextMaxVStartup;
4909
4910 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4911
4912 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4913 Pipe myPipe = { 0 };
4914
4915 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4916 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4917 myPipe.PixelClock = v->PixelClock[k];
4918 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4919 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4920 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4921 myPipe.SourceScan = v->SourceScan[k];
4922 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4923 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4924 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4925 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4926 myPipe.InterlaceEnable = v->Interlace[k];
4927 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4928 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4929 myPipe.HTotal = v->HTotal[k];
4930 myPipe.DCCEnable = v->DCCEnable[k];
4931 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4932
4933 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4934 mode_lib,
4935 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4936 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4937 &myPipe,
4938 v->DSCDelayPerState[i][k],
4939 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4940 v->DPPCLKDelaySCL,
4941 v->DPPCLKDelaySCLLBOnly,
4942 v->DPPCLKDelayCNVCCursor,
4943 v->DISPCLKDelaySubtotal,
4944 v->SwathWidthYThisState[k] / v->HRatio[k],
4945 v->OutputFormat[k],
4946 v->MaxInterDCNTileRepeaters,
4947 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4948 v->MaximumVStartup[i][j][k],
4949 v->GPUVMMaxPageTableLevels,
4950 v->GPUVMEnable,
4951 v->HostVMEnable,
4952 v->HostVMMaxNonCachedPageTableLevels,
4953 v->HostVMMinPageSize,
4954 v->DynamicMetadataEnable[k],
4955 v->DynamicMetadataVMEnabled,
4956 v->DynamicMetadataLinesBeforeActiveRequired[k],
4957 v->DynamicMetadataTransmittedBytes[k],
4958 v->UrgLatency[i],
4959 v->ExtraLatency,
4960 v->TimeCalc,
4961 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4962 v->MetaRowBytes[i][j][k],
4963 v->DPTEBytesPerRow[i][j][k],
4964 v->PrefetchLinesY[i][j][k],
4965 v->SwathWidthYThisState[k],
4966 v->BytePerPixelY[k],
4967 v->PrefillY[k],
4968 v->MaxNumSwY[k],
4969 v->PrefetchLinesC[i][j][k],
4970 v->SwathWidthCThisState[k],
4971 v->BytePerPixelC[k],
4972 v->PrefillC[k],
4973 v->MaxNumSwC[k],
4974 v->swath_width_luma_ub_this_state[k],
4975 v->swath_width_chroma_ub_this_state[k],
4976 v->SwathHeightYThisState[k],
4977 v->SwathHeightCThisState[k],
4978 v->TWait,
4979 v->ProgressiveToInterlaceUnitInOPP,
4980 &v->DSTXAfterScaler[k],
4981 &v->DSTYAfterScaler[k],
4982 &v->LineTimesForPrefetch[k],
4983 &v->PrefetchBW[k],
4984 &v->LinesForMetaPTE[k],
4985 &v->LinesForMetaAndDPTERow[k],
4986 &v->VRatioPreY[i][j][k],
4987 &v->VRatioPreC[i][j][k],
4988 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4989 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
4990 &v->NoTimeForDynamicMetadata[i][j][k],
4991 &v->Tno_bw[k],
4992 &v->prefetch_vmrow_bw[k],
4993 &v->Tdmdl_vm[k],
4994 &v->Tdmdl[k],
4995 &v->VUpdateOffsetPix[k],
4996 &v->VUpdateWidthPix[k],
4997 &v->VReadyOffsetPix[k]);
4998 }
4999
5000 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5001 CalculateUrgentBurstFactor(
5002 v->swath_width_luma_ub_this_state[k],
5003 v->swath_width_chroma_ub_this_state[k],
5004 v->DETBufferSizeInKByte[0],
5005 v->SwathHeightYThisState[k],
5006 v->SwathHeightCThisState[k],
5007 v->HTotal[k] / v->PixelClock[k],
5008 v->UrgentLatency,
5009 v->CursorBufferSize,
5010 v->CursorWidth[k][0],
5011 v->CursorBPP[k][0],
5012 v->VRatioPreY[i][j][k],
5013 v->VRatioPreC[i][j][k],
5014 v->BytePerPixelInDETY[k],
5015 v->BytePerPixelInDETC[k],
5016 v->DETBufferSizeYThisState[k],
5017 v->DETBufferSizeCThisState[k],
5018 &v->UrgentBurstFactorCursorPre[k],
5019 &v->UrgentBurstFactorLumaPre[k],
5020 &v->UrgentBurstFactorChroma[k],
5021 &v->NoUrgentLatencyHidingPre[k]);
5022 }
5023
5024 v->MaximumReadBandwidthWithPrefetch = 0.0;
5025 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5026 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
5027 * v->VRatioPreY[i][j][k];
5028
5029 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
5030 + dml_max4(
5031 v->VActivePixelBandwidth[i][j][k],
5032 v->VActiveCursorBandwidth[i][j][k]
5033 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
5034 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5035 v->NoOfDPP[i][j][k]
5036 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5037 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5038 * v->UrgentBurstFactorChromaPre[k])
5039 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5040 }
5041
5042 v->NotEnoughUrgentLatencyHidingPre = false;
5043 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5044 if (v->NoUrgentLatencyHidingPre[k] == true) {
5045 v->NotEnoughUrgentLatencyHidingPre = true;
5046 }
5047 }
5048
5049 v->PrefetchSupported[i][j] = true;
5050 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5051 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5052 v->PrefetchSupported[i][j] = false;
5053 }
5054 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5055 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5056 || v->NoTimeForPrefetch[i][j][k] == true) {
5057 v->PrefetchSupported[i][j] = false;
5058 }
5059 }
5060
5061 v->DynamicMetadataSupported[i][j] = true;
5062 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5063 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5064 v->DynamicMetadataSupported[i][j] = false;
5065 }
5066 }
5067
5068 v->VRatioInPrefetchSupported[i][j] = true;
5069 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5070 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5071 v->VRatioInPrefetchSupported[i][j] = false;
5072 }
5073 }
5074 v->AnyLinesForVMOrRowTooLarge = false;
5075 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5076 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5077 v->AnyLinesForVMOrRowTooLarge = true;
5078 }
5079 }
5080
5081 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5082 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5083 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5084 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5085 - dml_max(
5086 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5087 v->NoOfDPP[i][j][k]
5088 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
5089 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5090 * v->UrgentBurstFactorChromaPre[k])
5091 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5092 }
5093 v->TotImmediateFlipBytes = 0.0;
5094 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5095 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k]
5096 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k];
5097 }
5098
5099 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5100 CalculateFlipSchedule(
5101 mode_lib,
5102 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
5103 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
5104 v->ExtraLatency,
5105 v->UrgLatency[i],
5106 v->GPUVMMaxPageTableLevels,
5107 v->HostVMEnable,
5108 v->HostVMMaxNonCachedPageTableLevels,
5109 v->GPUVMEnable,
5110 v->HostVMMinPageSize,
5111 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5112 v->MetaRowBytes[i][j][k],
5113 v->DPTEBytesPerRow[i][j][k],
5114 v->BandwidthAvailableForImmediateFlip,
5115 v->TotImmediateFlipBytes,
5116 v->SourcePixelFormat[k],
5117 v->HTotal[k] / v->PixelClock[k],
5118 v->VRatio[k],
5119 v->VRatioChroma[k],
5120 v->Tno_bw[k],
5121 v->DCCEnable[k],
5122 v->dpte_row_height[k],
5123 v->meta_row_height[k],
5124 v->dpte_row_height_chroma[k],
5125 v->meta_row_height_chroma[k],
5126 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5127 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5128 &v->final_flip_bw[k],
5129 &v->ImmediateFlipSupportedForPipe[k]);
5130 }
5131 v->total_dcn_read_bw_with_flip = 0.0;
5132 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5133 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5134 + dml_max3(
5135 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5136 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5137 + v->VActiveCursorBandwidth[i][j][k],
5138 v->NoOfDPP[i][j][k]
5139 * (v->final_flip_bw[k]
5140 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5141 * v->UrgentBurstFactorLumaPre[k]
5142 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5143 * v->UrgentBurstFactorChromaPre[k])
5144 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5145 }
5146 v->ImmediateFlipSupportedForState[i][j] = true;
5147 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5148 v->ImmediateFlipSupportedForState[i][j] = false;
5149 }
5150 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5151 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5152 v->ImmediateFlipSupportedForState[i][j] = false;
5153 }
5154 }
5155 } else {
5156 v->ImmediateFlipSupportedForState[i][j] = false;
5157 }
5158 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
5159 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5160 NextPrefetchModeState = NextPrefetchModeState + 1;
5161 } else {
5162 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5163 }
5164 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5165 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5166 || v->ImmediateFlipSupportedForState[i][j] == true))
5167 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5168
5169 CalculateWatermarksAndDRAMSpeedChangeSupport(
5170 mode_lib,
5171 v->PrefetchModePerState[i][j],
5172 v->NumberOfActivePlanes,
5173 v->MaxLineBufferLines,
5174 v->LineBufferSize,
5175 v->DPPOutputBufferPixels,
5176 v->DETBufferSizeInKByte[0],
5177 v->WritebackInterfaceBufferSize,
5178 v->DCFCLKState[i][j],
5179 v->ReturnBWPerState[i][j],
5180 v->GPUVMEnable,
5181 v->dpte_group_bytes,
5182 v->MetaChunkSize,
5183 v->UrgLatency[i],
5184 v->ExtraLatency,
5185 v->WritebackLatency,
5186 v->WritebackChunkSize,
5187 v->SOCCLKPerState[i],
5188 v->FinalDRAMClockChangeLatency,
5189 v->SRExitTime,
5190 v->SREnterPlusExitTime,
5191 v->ProjectedDCFCLKDeepSleep[i][j],
5192 v->NoOfDPPThisState,
5193 v->DCCEnable,
5194 v->RequiredDPPCLKThisState,
5195 v->DETBufferSizeYThisState,
5196 v->DETBufferSizeCThisState,
5197 v->SwathHeightYThisState,
5198 v->SwathHeightCThisState,
5199 v->LBBitPerPixel,
5200 v->SwathWidthYThisState,
5201 v->SwathWidthCThisState,
5202 v->HRatio,
5203 v->HRatioChroma,
5204 v->vtaps,
5205 v->VTAPsChroma,
5206 v->VRatio,
5207 v->VRatioChroma,
5208 v->HTotal,
5209 v->PixelClock,
5210 v->BlendingAndTiming,
5211 v->BytePerPixelInDETY,
5212 v->BytePerPixelInDETC,
5213 v->DSTXAfterScaler,
5214 v->DSTYAfterScaler,
5215 v->WritebackEnable,
5216 v->WritebackPixelFormat,
5217 v->WritebackDestinationWidth,
5218 v->WritebackDestinationHeight,
5219 v->WritebackSourceHeight,
5220 &v->DRAMClockChangeSupport[i][j],
5221 &v->UrgentWatermark,
5222 &v->WritebackUrgentWatermark,
5223 &v->DRAMClockChangeWatermark,
5224 &v->WritebackDRAMClockChangeWatermark,
5225 &v->StutterExitWatermark,
5226 &v->StutterEnterPlusExitWatermark,
5227 &v->MinActiveDRAMClockChangeLatencySupported);
5228 }
5229 }
5230
5231 /*PTE Buffer Size Check*/
5232
5233 for (i = 0; i < v->soc.num_states; i++) {
5234 for (j = 0; j < 2; j++) {
5235 v->PTEBufferSizeNotExceeded[i][j] = true;
5236 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5237 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5238 v->PTEBufferSizeNotExceeded[i][j] = false;
5239 }
5240 }
5241 }
5242 }
5243 /*Cursor Support Check*/
5244
5245 v->CursorSupport = true;
5246 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5247 if (v->CursorWidth[k][0] > 0.0) {
5248 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5249 v->CursorSupport = false;
5250 }
5251 }
5252 }
5253 /*Valid Pitch Check*/
5254
5255 v->PitchSupport = true;
5256 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5257 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5258 if (v->DCCEnable[k] == true) {
5259 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5260 } else {
5261 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5262 }
5263 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5264 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5265 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5266 if (v->DCCEnable[k] == true) {
5267 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5268 } else {
5269 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5270 }
5271 } else {
5272 v->AlignedCPitch[k] = v->PitchC[k];
5273 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5274 }
5275 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5276 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5277 v->PitchSupport = false;
5278 }
5279 }
5280
5281 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5282 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5283 ViewportExceedsSurface = true;
5284
5285 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5286 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5287 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5288 ViewportExceedsSurface = true;
5289 }
5290 }
5291 }
5292 /*Mode Support, Voltage State and SOC Configuration*/
5293
5294 for (i = v->soc.num_states - 1; i >= 0; i--) {
5295 for (j = 0; j < 2; j++) {
5296 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5297 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5298 && v->NotEnoughDSCUnits[i] == 0
5299 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5300 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5301 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5302 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5303 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5304 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5305 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5306 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5307 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5308 v->ModeSupport[i][j] = true;
5309 } else {
5310 v->ModeSupport[i][j] = false;
5311 }
5312 }
5313 }
5314 {
5315 unsigned int MaximumMPCCombine = 0;
5316 for (i = v->soc.num_states; i >= 0; i--) {
5317 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5318 v->VoltageLevel = i;
5319 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5320 if (v->ModeSupport[i][1] == true) {
5321 MaximumMPCCombine = 1;
5322 } else {
5323 MaximumMPCCombine = 0;
5324 }
5325 }
5326 }
5327 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5328 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5329 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5330 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5331 }
5332 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5333 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5334 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5335 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5336 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5337 v->maxMpcComb = MaximumMPCCombine;
5338 }
5339 }
5340
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,unsigned int DETBufferSizeInKByte,unsigned int WritebackInterfaceBufferSize,double DCFCLK,double ReturnBW,bool GPUVMEnable,unsigned int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,unsigned int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int LBBitPerPixel[],double SwathWidthY[],double SwathWidthC[],double HRatio[],double HRatioChroma[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],double VRatioChroma[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5341 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5342 struct display_mode_lib *mode_lib,
5343 unsigned int PrefetchMode,
5344 unsigned int NumberOfActivePlanes,
5345 unsigned int MaxLineBufferLines,
5346 unsigned int LineBufferSize,
5347 unsigned int DPPOutputBufferPixels,
5348 unsigned int DETBufferSizeInKByte,
5349 unsigned int WritebackInterfaceBufferSize,
5350 double DCFCLK,
5351 double ReturnBW,
5352 bool GPUVMEnable,
5353 unsigned int dpte_group_bytes[],
5354 unsigned int MetaChunkSize,
5355 double UrgentLatency,
5356 double ExtraLatency,
5357 double WritebackLatency,
5358 double WritebackChunkSize,
5359 double SOCCLK,
5360 double DRAMClockChangeLatency,
5361 double SRExitTime,
5362 double SREnterPlusExitTime,
5363 double DCFCLKDeepSleep,
5364 unsigned int DPPPerPlane[],
5365 bool DCCEnable[],
5366 double DPPCLK[],
5367 unsigned int DETBufferSizeY[],
5368 unsigned int DETBufferSizeC[],
5369 unsigned int SwathHeightY[],
5370 unsigned int SwathHeightC[],
5371 unsigned int LBBitPerPixel[],
5372 double SwathWidthY[],
5373 double SwathWidthC[],
5374 double HRatio[],
5375 double HRatioChroma[],
5376 unsigned int vtaps[],
5377 unsigned int VTAPsChroma[],
5378 double VRatio[],
5379 double VRatioChroma[],
5380 unsigned int HTotal[],
5381 double PixelClock[],
5382 unsigned int BlendingAndTiming[],
5383 double BytePerPixelDETY[],
5384 double BytePerPixelDETC[],
5385 double DSTXAfterScaler[],
5386 double DSTYAfterScaler[],
5387 bool WritebackEnable[],
5388 enum source_format_class WritebackPixelFormat[],
5389 double WritebackDestinationWidth[],
5390 double WritebackDestinationHeight[],
5391 double WritebackSourceHeight[],
5392 enum clock_change_support *DRAMClockChangeSupport,
5393 double *UrgentWatermark,
5394 double *WritebackUrgentWatermark,
5395 double *DRAMClockChangeWatermark,
5396 double *WritebackDRAMClockChangeWatermark,
5397 double *StutterExitWatermark,
5398 double *StutterEnterPlusExitWatermark,
5399 double *MinActiveDRAMClockChangeLatencySupported)
5400 {
5401 double EffectiveLBLatencyHidingY = 0;
5402 double EffectiveLBLatencyHidingC = 0;
5403 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5404 double LinesInDETC = 0;
5405 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5406 unsigned int LinesInDETCRoundedDownToSwath = 0;
5407 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5408 double FullDETBufferingTimeC = 0;
5409 double ActiveDRAMClockChangeLatencyMarginY = 0;
5410 double ActiveDRAMClockChangeLatencyMarginC = 0;
5411 double WritebackDRAMClockChangeLatencyMargin = 0;
5412 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5413 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5414 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5415 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5416 double WritebackDRAMClockChangeLatencyHiding = 0;
5417 unsigned int k, j;
5418
5419 mode_lib->vba.TotalActiveDPP = 0;
5420 mode_lib->vba.TotalDCCActiveDPP = 0;
5421 for (k = 0; k < NumberOfActivePlanes; ++k) {
5422 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5423 if (DCCEnable[k] == true) {
5424 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5425 }
5426 }
5427
5428 *UrgentWatermark = UrgentLatency + ExtraLatency;
5429
5430 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5431
5432 mode_lib->vba.TotalActiveWriteback = 0;
5433 for (k = 0; k < NumberOfActivePlanes; ++k) {
5434 if (WritebackEnable[k] == true) {
5435 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5436 }
5437 }
5438
5439 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5440 *WritebackUrgentWatermark = WritebackLatency;
5441 } else {
5442 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5443 }
5444
5445 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5446 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5447 } else {
5448 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5449 }
5450
5451 for (k = 0; k < NumberOfActivePlanes; ++k) {
5452
5453 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5454
5455 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5456
5457 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5458
5459 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5460
5461 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5462 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5463 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5464 if (BytePerPixelDETC[k] > 0) {
5465 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5466 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5467 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5468 } else {
5469 LinesInDETC = 0;
5470 FullDETBufferingTimeC = 999999;
5471 }
5472
5473 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5474
5475 if (NumberOfActivePlanes > 1) {
5476 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5477 }
5478
5479 if (BytePerPixelDETC[k] > 0) {
5480 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5481
5482 if (NumberOfActivePlanes > 1) {
5483 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5484 }
5485 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5486 } else {
5487 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5488 }
5489
5490 if (WritebackEnable[k] == true) {
5491
5492 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5493 if (WritebackPixelFormat[k] == dm_444_64) {
5494 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5495 }
5496 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5497 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5498 }
5499 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5500 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5501 }
5502 }
5503
5504 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5505 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5506 for (k = 0; k < NumberOfActivePlanes; ++k) {
5507 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5508 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5509 if (BlendingAndTiming[k] == k) {
5510 PlaneWithMinActiveDRAMClockChangeMargin = k;
5511 } else {
5512 for (j = 0; j < NumberOfActivePlanes; ++j) {
5513 if (BlendingAndTiming[k] == j) {
5514 PlaneWithMinActiveDRAMClockChangeMargin = j;
5515 }
5516 }
5517 }
5518 }
5519 }
5520
5521 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5522
5523 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5524 for (k = 0; k < NumberOfActivePlanes; ++k) {
5525 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5526 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5527 }
5528 }
5529
5530 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5531 for (k = 0; k < NumberOfActivePlanes; ++k) {
5532 if (BlendingAndTiming[k] == k) {
5533 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5534 }
5535 }
5536
5537 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5538 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5539 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5540 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5541 } else {
5542 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5543 }
5544
5545 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5546 for (k = 0; k < NumberOfActivePlanes; ++k) {
5547 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5548 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5549 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5550 }
5551 }
5552
5553 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5554 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5555
5556 }
5557
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,int BytePerPixelY[],int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],int ReturnBusWidth,double * DCFCLKDeepSleep)5558 static void CalculateDCFCLKDeepSleep(
5559 struct display_mode_lib *mode_lib,
5560 unsigned int NumberOfActivePlanes,
5561 int BytePerPixelY[],
5562 int BytePerPixelC[],
5563 double VRatio[],
5564 double VRatioChroma[],
5565 double SwathWidthY[],
5566 double SwathWidthC[],
5567 unsigned int DPPPerPlane[],
5568 double HRatio[],
5569 double HRatioChroma[],
5570 double PixelClock[],
5571 double PSCL_THROUGHPUT[],
5572 double PSCL_THROUGHPUT_CHROMA[],
5573 double DPPCLK[],
5574 double ReadBandwidthLuma[],
5575 double ReadBandwidthChroma[],
5576 int ReturnBusWidth,
5577 double *DCFCLKDeepSleep)
5578 {
5579 double DisplayPipeLineDeliveryTimeLuma = 0;
5580 double DisplayPipeLineDeliveryTimeChroma = 0;
5581 unsigned int k;
5582 double ReadBandwidth = 0.0;
5583
5584 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5585 for (k = 0; k < NumberOfActivePlanes; ++k) {
5586
5587 if (VRatio[k] <= 1) {
5588 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5589 } else {
5590 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5591 }
5592 if (BytePerPixelC[k] == 0) {
5593 DisplayPipeLineDeliveryTimeChroma = 0;
5594 } else {
5595 if (VRatioChroma[k] <= 1) {
5596 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5597 } else {
5598 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5599 }
5600 }
5601
5602 if (BytePerPixelC[k] > 0) {
5603 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5604 } else {
5605 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5606 }
5607 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5608
5609 }
5610
5611 for (k = 0; k < NumberOfActivePlanes; ++k) {
5612 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5613 }
5614
5615 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5616
5617 for (k = 0; k < NumberOfActivePlanes; ++k) {
5618 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5619 }
5620 }
5621
CalculateUrgentBurstFactor(long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,double DETBufferSizeY,double DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)5622 static void CalculateUrgentBurstFactor(
5623 long swath_width_luma_ub,
5624 long swath_width_chroma_ub,
5625 unsigned int DETBufferSizeInKByte,
5626 unsigned int SwathHeightY,
5627 unsigned int SwathHeightC,
5628 double LineTime,
5629 double UrgentLatency,
5630 double CursorBufferSize,
5631 unsigned int CursorWidth,
5632 unsigned int CursorBPP,
5633 double VRatio,
5634 double VRatioC,
5635 double BytePerPixelInDETY,
5636 double BytePerPixelInDETC,
5637 double DETBufferSizeY,
5638 double DETBufferSizeC,
5639 double *UrgentBurstFactorCursor,
5640 double *UrgentBurstFactorLuma,
5641 double *UrgentBurstFactorChroma,
5642 bool *NotEnoughUrgentLatencyHiding)
5643 {
5644 double LinesInDETLuma = 0;
5645 double LinesInDETChroma = 0;
5646 unsigned int LinesInCursorBuffer = 0;
5647 double CursorBufferSizeInTime = 0;
5648 double DETBufferSizeInTimeLuma = 0;
5649 double DETBufferSizeInTimeChroma = 0;
5650
5651 *NotEnoughUrgentLatencyHiding = 0;
5652
5653 if (CursorWidth > 0) {
5654 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5655 if (VRatio > 0) {
5656 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5657 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5658 *NotEnoughUrgentLatencyHiding = 1;
5659 *UrgentBurstFactorCursor = 0;
5660 } else {
5661 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5662 }
5663 } else {
5664 *UrgentBurstFactorCursor = 1;
5665 }
5666 }
5667
5668 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5669 if (VRatio > 0) {
5670 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5671 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5672 *NotEnoughUrgentLatencyHiding = 1;
5673 *UrgentBurstFactorLuma = 0;
5674 } else {
5675 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5676 }
5677 } else {
5678 *UrgentBurstFactorLuma = 1;
5679 }
5680
5681 if (BytePerPixelInDETC > 0) {
5682 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5683 if (VRatio > 0) {
5684 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5685 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5686 *NotEnoughUrgentLatencyHiding = 1;
5687 *UrgentBurstFactorChroma = 0;
5688 } else {
5689 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5690 }
5691 } else {
5692 *UrgentBurstFactorChroma = 1;
5693 }
5694 }
5695 }
5696
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],int BytePerPixelC[],enum scan_direction_class SourceScan[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][2],unsigned int CursorBPP[][2],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])5697 static void CalculatePixelDeliveryTimes(
5698 unsigned int NumberOfActivePlanes,
5699 double VRatio[],
5700 double VRatioChroma[],
5701 double VRatioPrefetchY[],
5702 double VRatioPrefetchC[],
5703 unsigned int swath_width_luma_ub[],
5704 unsigned int swath_width_chroma_ub[],
5705 unsigned int DPPPerPlane[],
5706 double HRatio[],
5707 double HRatioChroma[],
5708 double PixelClock[],
5709 double PSCL_THROUGHPUT[],
5710 double PSCL_THROUGHPUT_CHROMA[],
5711 double DPPCLK[],
5712 int BytePerPixelC[],
5713 enum scan_direction_class SourceScan[],
5714 unsigned int NumberOfCursors[],
5715 unsigned int CursorWidth[][2],
5716 unsigned int CursorBPP[][2],
5717 unsigned int BlockWidth256BytesY[],
5718 unsigned int BlockHeight256BytesY[],
5719 unsigned int BlockWidth256BytesC[],
5720 unsigned int BlockHeight256BytesC[],
5721 double DisplayPipeLineDeliveryTimeLuma[],
5722 double DisplayPipeLineDeliveryTimeChroma[],
5723 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5724 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5725 double DisplayPipeRequestDeliveryTimeLuma[],
5726 double DisplayPipeRequestDeliveryTimeChroma[],
5727 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5728 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5729 double CursorRequestDeliveryTime[],
5730 double CursorRequestDeliveryTimePrefetch[])
5731 {
5732 double req_per_swath_ub = 0;
5733 unsigned int k;
5734
5735 for (k = 0; k < NumberOfActivePlanes; ++k) {
5736 if (VRatio[k] <= 1) {
5737 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5738 } else {
5739 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5740 }
5741
5742 if (BytePerPixelC[k] == 0) {
5743 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5744 } else {
5745 if (VRatioChroma[k] <= 1) {
5746 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5747 } else {
5748 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5749 }
5750 }
5751
5752 if (VRatioPrefetchY[k] <= 1) {
5753 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5754 } else {
5755 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5756 }
5757
5758 if (BytePerPixelC[k] == 0) {
5759 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5760 } else {
5761 if (VRatioPrefetchC[k] <= 1) {
5762 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5763 } else {
5764 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5765 }
5766 }
5767 }
5768
5769 for (k = 0; k < NumberOfActivePlanes; ++k) {
5770 if (SourceScan[k] != dm_vert) {
5771 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5772 } else {
5773 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5774 }
5775 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5776 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5777 if (BytePerPixelC[k] == 0) {
5778 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5779 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5780 } else {
5781 if (SourceScan[k] != dm_vert) {
5782 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5783 } else {
5784 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5785 }
5786 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5787 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5788 }
5789 }
5790
5791 for (k = 0; k < NumberOfActivePlanes; ++k) {
5792 int cursor_req_per_width = 0;
5793 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5794 if (NumberOfCursors[k] > 0) {
5795 if (VRatio[k] <= 1) {
5796 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5797 } else {
5798 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5799 }
5800 if (VRatioPrefetchY[k] <= 1) {
5801 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5802 } else {
5803 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5804 }
5805 } else {
5806 CursorRequestDeliveryTime[k] = 0;
5807 CursorRequestDeliveryTimePrefetch[k] = 0;
5808 }
5809 }
5810 }
5811
CalculateMetaAndPTETimes(int NumberOfActivePlanes,bool GPUVMEnable,int MetaChunkSize,int MinMetaChunkSizeBytes,int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],int BytePerPixelY[],int BytePerPixelC[],enum scan_direction_class SourceScan[],int dpte_row_height[],int dpte_row_height_chroma[],int meta_row_width[],int meta_row_width_chroma[],int meta_row_height[],int meta_row_height_chroma[],int meta_req_width[],int meta_req_width_chroma[],int meta_req_height[],int meta_req_height_chroma[],int dpte_group_bytes[],int PTERequestSizeY[],int PTERequestSizeC[],int PixelPTEReqWidthY[],int PixelPTEReqHeightY[],int PixelPTEReqWidthC[],int PixelPTEReqHeightC[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])5812 static void CalculateMetaAndPTETimes(
5813 int NumberOfActivePlanes,
5814 bool GPUVMEnable,
5815 int MetaChunkSize,
5816 int MinMetaChunkSizeBytes,
5817 int HTotal[],
5818 double VRatio[],
5819 double VRatioChroma[],
5820 double DestinationLinesToRequestRowInVBlank[],
5821 double DestinationLinesToRequestRowInImmediateFlip[],
5822 bool DCCEnable[],
5823 double PixelClock[],
5824 int BytePerPixelY[],
5825 int BytePerPixelC[],
5826 enum scan_direction_class SourceScan[],
5827 int dpte_row_height[],
5828 int dpte_row_height_chroma[],
5829 int meta_row_width[],
5830 int meta_row_width_chroma[],
5831 int meta_row_height[],
5832 int meta_row_height_chroma[],
5833 int meta_req_width[],
5834 int meta_req_width_chroma[],
5835 int meta_req_height[],
5836 int meta_req_height_chroma[],
5837 int dpte_group_bytes[],
5838 int PTERequestSizeY[],
5839 int PTERequestSizeC[],
5840 int PixelPTEReqWidthY[],
5841 int PixelPTEReqHeightY[],
5842 int PixelPTEReqWidthC[],
5843 int PixelPTEReqHeightC[],
5844 int dpte_row_width_luma_ub[],
5845 int dpte_row_width_chroma_ub[],
5846 double DST_Y_PER_PTE_ROW_NOM_L[],
5847 double DST_Y_PER_PTE_ROW_NOM_C[],
5848 double DST_Y_PER_META_ROW_NOM_L[],
5849 double DST_Y_PER_META_ROW_NOM_C[],
5850 double TimePerMetaChunkNominal[],
5851 double TimePerChromaMetaChunkNominal[],
5852 double TimePerMetaChunkVBlank[],
5853 double TimePerChromaMetaChunkVBlank[],
5854 double TimePerMetaChunkFlip[],
5855 double TimePerChromaMetaChunkFlip[],
5856 double time_per_pte_group_nom_luma[],
5857 double time_per_pte_group_vblank_luma[],
5858 double time_per_pte_group_flip_luma[],
5859 double time_per_pte_group_nom_chroma[],
5860 double time_per_pte_group_vblank_chroma[],
5861 double time_per_pte_group_flip_chroma[])
5862 {
5863 unsigned int meta_chunk_width = 0;
5864 unsigned int min_meta_chunk_width = 0;
5865 unsigned int meta_chunk_per_row_int = 0;
5866 unsigned int meta_row_remainder = 0;
5867 unsigned int meta_chunk_threshold = 0;
5868 unsigned int meta_chunks_per_row_ub = 0;
5869 unsigned int meta_chunk_width_chroma = 0;
5870 unsigned int min_meta_chunk_width_chroma = 0;
5871 unsigned int meta_chunk_per_row_int_chroma = 0;
5872 unsigned int meta_row_remainder_chroma = 0;
5873 unsigned int meta_chunk_threshold_chroma = 0;
5874 unsigned int meta_chunks_per_row_ub_chroma = 0;
5875 unsigned int dpte_group_width_luma = 0;
5876 unsigned int dpte_groups_per_row_luma_ub = 0;
5877 unsigned int dpte_group_width_chroma = 0;
5878 unsigned int dpte_groups_per_row_chroma_ub = 0;
5879 unsigned int k;
5880
5881 for (k = 0; k < NumberOfActivePlanes; ++k) {
5882 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5883 if (BytePerPixelC[k] == 0) {
5884 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5885 } else {
5886 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5887 }
5888 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5889 if (BytePerPixelC[k] == 0) {
5890 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5891 } else {
5892 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5893 }
5894 }
5895
5896 for (k = 0; k < NumberOfActivePlanes; ++k) {
5897 if (DCCEnable[k] == true) {
5898 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5899 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5900 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5901 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5902 if (SourceScan[k] != dm_vert) {
5903 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5904 } else {
5905 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5906 }
5907 if (meta_row_remainder <= meta_chunk_threshold) {
5908 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5909 } else {
5910 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5911 }
5912 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5913 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5914 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5915 if (BytePerPixelC[k] == 0) {
5916 TimePerChromaMetaChunkNominal[k] = 0;
5917 TimePerChromaMetaChunkVBlank[k] = 0;
5918 TimePerChromaMetaChunkFlip[k] = 0;
5919 } else {
5920 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5921 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5922 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5923 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5924 if (SourceScan[k] != dm_vert) {
5925 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5926 } else {
5927 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5928 }
5929 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5930 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5931 } else {
5932 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5933 }
5934 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5935 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5936 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5937 }
5938 } else {
5939 TimePerMetaChunkNominal[k] = 0;
5940 TimePerMetaChunkVBlank[k] = 0;
5941 TimePerMetaChunkFlip[k] = 0;
5942 TimePerChromaMetaChunkNominal[k] = 0;
5943 TimePerChromaMetaChunkVBlank[k] = 0;
5944 TimePerChromaMetaChunkFlip[k] = 0;
5945 }
5946 }
5947
5948 for (k = 0; k < NumberOfActivePlanes; ++k) {
5949 if (GPUVMEnable == true) {
5950 if (SourceScan[k] != dm_vert) {
5951 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5952 } else {
5953 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5954 }
5955 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5956 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5957 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5958 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5959 if (BytePerPixelC[k] == 0) {
5960 time_per_pte_group_nom_chroma[k] = 0;
5961 time_per_pte_group_vblank_chroma[k] = 0;
5962 time_per_pte_group_flip_chroma[k] = 0;
5963 } else {
5964 if (SourceScan[k] != dm_vert) {
5965 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5966 } else {
5967 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5968 }
5969 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5970 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5971 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5972 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5973 }
5974 } else {
5975 time_per_pte_group_nom_luma[k] = 0;
5976 time_per_pte_group_vblank_luma[k] = 0;
5977 time_per_pte_group_flip_luma[k] = 0;
5978 time_per_pte_group_nom_chroma[k] = 0;
5979 time_per_pte_group_vblank_chroma[k] = 0;
5980 time_per_pte_group_flip_chroma[k] = 0;
5981 }
5982 }
5983 }
5984
CalculateVMGroupAndRequestTimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],int meta_pte_bytes_per_frame_ub_l[],int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5985 static void CalculateVMGroupAndRequestTimes(
5986 unsigned int NumberOfActivePlanes,
5987 bool GPUVMEnable,
5988 unsigned int GPUVMMaxPageTableLevels,
5989 unsigned int HTotal[],
5990 int BytePerPixelC[],
5991 double DestinationLinesToRequestVMInVBlank[],
5992 double DestinationLinesToRequestVMInImmediateFlip[],
5993 bool DCCEnable[],
5994 double PixelClock[],
5995 int dpte_row_width_luma_ub[],
5996 int dpte_row_width_chroma_ub[],
5997 int vm_group_bytes[],
5998 unsigned int dpde0_bytes_per_frame_ub_l[],
5999 unsigned int dpde0_bytes_per_frame_ub_c[],
6000 int meta_pte_bytes_per_frame_ub_l[],
6001 int meta_pte_bytes_per_frame_ub_c[],
6002 double TimePerVMGroupVBlank[],
6003 double TimePerVMGroupFlip[],
6004 double TimePerVMRequestVBlank[],
6005 double TimePerVMRequestFlip[])
6006 {
6007 int num_group_per_lower_vm_stage = 0;
6008 int num_req_per_lower_vm_stage = 0;
6009 unsigned int k;
6010
6011 for (k = 0; k < NumberOfActivePlanes; ++k) {
6012 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6013 if (DCCEnable[k] == false) {
6014 if (BytePerPixelC[k] > 0) {
6015 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6016 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
6017 / (double) (vm_group_bytes[k]), 1);
6018 } else {
6019 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
6020 / (double) (vm_group_bytes[k]), 1);
6021 }
6022 } else {
6023 if (GPUVMMaxPageTableLevels == 1) {
6024 if (BytePerPixelC[k] > 0) {
6025 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6026 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
6027 / (double) (vm_group_bytes[k]), 1);
6028 } else {
6029 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
6030 / (double) (vm_group_bytes[k]), 1);
6031 }
6032 } else {
6033 if (BytePerPixelC[k] > 0) {
6034 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6035 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6036 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6037 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6038 } else {
6039 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6040 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6041 }
6042 }
6043 }
6044
6045 if (DCCEnable[k] == false) {
6046 if (BytePerPixelC[k] > 0) {
6047 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6048 } else {
6049 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6050 }
6051 } else {
6052 if (GPUVMMaxPageTableLevels == 1) {
6053 if (BytePerPixelC[k] > 0) {
6054 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
6055 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6056 } else {
6057 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6058 }
6059 } else {
6060 if (BytePerPixelC[k] > 0) {
6061 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6062 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
6063 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6064 } else {
6065 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
6066 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6067 }
6068 }
6069 }
6070
6071 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6072 / num_group_per_lower_vm_stage;
6073 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6074 / num_group_per_lower_vm_stage;
6075 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
6076 / num_req_per_lower_vm_stage;
6077 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
6078 / num_req_per_lower_vm_stage;
6079
6080 if (GPUVMMaxPageTableLevels > 2) {
6081 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6082 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6083 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6084 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6085 }
6086
6087 } else {
6088 TimePerVMGroupVBlank[k] = 0;
6089 TimePerVMGroupFlip[k] = 0;
6090 TimePerVMRequestVBlank[k] = 0;
6091 TimePerVMRequestFlip[k] = 0;
6092 }
6093 }
6094 }
6095
CalculateStutterEfficiency(int NumberOfActivePlanes,long ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,double SRExitTime,bool SynchronizedVBlank,int DPPPerPlane[],unsigned int DETBufferSizeY[],int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],int SwathHeightY[],int SwathHeightC[],double DCCRateLuma[],double DCCRateChroma[],int HTotal[],int VTotal[],double PixelClock[],double VRatio[],enum scan_direction_class SourceScan[],int BlockHeight256BytesY[],int BlockWidth256BytesY[],int BlockHeight256BytesC[],int BlockWidth256BytesC[],int DCCYMaxUncompressedBlock[],int DCCCMaxUncompressedBlock[],int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthPlaneLuma[],double ReadBandwidthPlaneChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,double * StutterPeriodOut)6096 static void CalculateStutterEfficiency(
6097 int NumberOfActivePlanes,
6098 long ROBBufferSizeInKByte,
6099 double TotalDataReadBandwidth,
6100 double DCFCLK,
6101 double ReturnBW,
6102 double SRExitTime,
6103 bool SynchronizedVBlank,
6104 int DPPPerPlane[],
6105 unsigned int DETBufferSizeY[],
6106 int BytePerPixelY[],
6107 double BytePerPixelDETY[],
6108 double SwathWidthY[],
6109 int SwathHeightY[],
6110 int SwathHeightC[],
6111 double DCCRateLuma[],
6112 double DCCRateChroma[],
6113 int HTotal[],
6114 int VTotal[],
6115 double PixelClock[],
6116 double VRatio[],
6117 enum scan_direction_class SourceScan[],
6118 int BlockHeight256BytesY[],
6119 int BlockWidth256BytesY[],
6120 int BlockHeight256BytesC[],
6121 int BlockWidth256BytesC[],
6122 int DCCYMaxUncompressedBlock[],
6123 int DCCCMaxUncompressedBlock[],
6124 int VActive[],
6125 bool DCCEnable[],
6126 bool WritebackEnable[],
6127 double ReadBandwidthPlaneLuma[],
6128 double ReadBandwidthPlaneChroma[],
6129 double meta_row_bw[],
6130 double dpte_row_bw[],
6131 double *StutterEfficiencyNotIncludingVBlank,
6132 double *StutterEfficiency,
6133 double *StutterPeriodOut)
6134 {
6135 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
6136 double FrameTimeForMinFullDETBufferingTime = 0;
6137 double StutterPeriod = 0;
6138 double AverageReadBandwidth = 0;
6139 double TotalRowReadBandwidth = 0;
6140 double AverageDCCCompressionRate = 0;
6141 double PartOfBurstThatFitsInROB = 0;
6142 double StutterBurstTime = 0;
6143 int TotalActiveWriteback = 0;
6144 double VBlankTime = 0;
6145 double SmallestVBlank = 0;
6146 int BytePerPixelYCriticalPlane = 0;
6147 double SwathWidthYCriticalPlane = 0;
6148 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
6149 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
6150 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6151 double MaximumEffectiveCompressionLuma = 0;
6152 double MaximumEffectiveCompressionChroma = 0;
6153 unsigned int k;
6154
6155 for (k = 0; k < NumberOfActivePlanes; ++k) {
6156 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
6157 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
6158 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
6159 }
6160
6161 StutterPeriod = FullDETBufferingTimeY[0];
6162 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6163 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6164 SwathWidthYCriticalPlane = SwathWidthY[0];
6165 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6166 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6167
6168 for (k = 0; k < NumberOfActivePlanes; ++k) {
6169 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6170 StutterPeriod = FullDETBufferingTimeY[k];
6171 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6172 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6173 SwathWidthYCriticalPlane = SwathWidthY[k];
6174 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6175 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6176 }
6177 }
6178
6179 AverageReadBandwidth = 0;
6180 TotalRowReadBandwidth = 0;
6181 for (k = 0; k < NumberOfActivePlanes; ++k) {
6182 if (DCCEnable[k] == true) {
6183 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6184 || (SourceScan[k] != dm_vert
6185 && BlockHeight256BytesY[k] > SwathHeightY[k])
6186 || DCCYMaxUncompressedBlock[k] < 256) {
6187 MaximumEffectiveCompressionLuma = 2;
6188 } else {
6189 MaximumEffectiveCompressionLuma = 4;
6190 }
6191 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6192
6193 if (ReadBandwidthPlaneChroma[k] > 0) {
6194 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6195 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6196 || DCCCMaxUncompressedBlock[k] < 256) {
6197 MaximumEffectiveCompressionChroma = 2;
6198 } else {
6199 MaximumEffectiveCompressionChroma = 4;
6200 }
6201 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6202 }
6203 } else {
6204 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6205 }
6206 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6207 }
6208
6209 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6210 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6211 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6212 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6213 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6214
6215 TotalActiveWriteback = 0;
6216 for (k = 0; k < NumberOfActivePlanes; ++k) {
6217 if (WritebackEnable[k] == true) {
6218 TotalActiveWriteback = TotalActiveWriteback + 1;
6219 }
6220 }
6221
6222 if (TotalActiveWriteback == 0) {
6223 *StutterEfficiencyNotIncludingVBlank = (1
6224 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6225 } else {
6226 *StutterEfficiencyNotIncludingVBlank = 0;
6227 }
6228
6229 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6230 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6231 } else {
6232 SmallestVBlank = 0;
6233 }
6234 for (k = 0; k < NumberOfActivePlanes; ++k) {
6235 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6236 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6237 } else {
6238 VBlankTime = 0;
6239 }
6240 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6241 }
6242
6243 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6244
6245 if (StutterPeriodOut)
6246 *StutterPeriodOut = StutterPeriod;
6247 }
6248
CalculateSwathAndDETConfiguration(bool ForceSingleDPP,int NumberOfActivePlanes,unsigned int DETBufferSizeInKByte,double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum scan_direction_class SourceScan[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],int ViewportWidth[],int ViewportHeight[],int SurfaceWidthY[],int SurfaceWidthC[],int SurfaceHeightY[],int SurfaceHeightC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],enum odm_combine_mode ODMCombineEnabled[],int BlendingAndTiming[],int BytePerPixY[],int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],int HActive[],double HRatio[],double HRatioChroma[],int DPPPerPlane[],int swath_width_luma_ub[],int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],int SwathHeightY[],int SwathHeightC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool ViewportSizeSupportPerPlane[],bool * ViewportSizeSupport)6249 static void CalculateSwathAndDETConfiguration(
6250 bool ForceSingleDPP,
6251 int NumberOfActivePlanes,
6252 unsigned int DETBufferSizeInKByte,
6253 double MaximumSwathWidthLuma[],
6254 double MaximumSwathWidthChroma[],
6255 enum scan_direction_class SourceScan[],
6256 enum source_format_class SourcePixelFormat[],
6257 enum dm_swizzle_mode SurfaceTiling[],
6258 int ViewportWidth[],
6259 int ViewportHeight[],
6260 int SurfaceWidthY[],
6261 int SurfaceWidthC[],
6262 int SurfaceHeightY[],
6263 int SurfaceHeightC[],
6264 int Read256BytesBlockHeightY[],
6265 int Read256BytesBlockHeightC[],
6266 int Read256BytesBlockWidthY[],
6267 int Read256BytesBlockWidthC[],
6268 enum odm_combine_mode ODMCombineEnabled[],
6269 int BlendingAndTiming[],
6270 int BytePerPixY[],
6271 int BytePerPixC[],
6272 double BytePerPixDETY[],
6273 double BytePerPixDETC[],
6274 int HActive[],
6275 double HRatio[],
6276 double HRatioChroma[],
6277 int DPPPerPlane[],
6278 int swath_width_luma_ub[],
6279 int swath_width_chroma_ub[],
6280 double SwathWidth[],
6281 double SwathWidthChroma[],
6282 int SwathHeightY[],
6283 int SwathHeightC[],
6284 unsigned int DETBufferSizeY[],
6285 unsigned int DETBufferSizeC[],
6286 bool ViewportSizeSupportPerPlane[],
6287 bool *ViewportSizeSupport)
6288 {
6289 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6290 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6291 int MinimumSwathHeightY = 0;
6292 int MinimumSwathHeightC = 0;
6293 long RoundedUpMaxSwathSizeBytesY = 0;
6294 long RoundedUpMaxSwathSizeBytesC = 0;
6295 long RoundedUpMinSwathSizeBytesY = 0;
6296 long RoundedUpMinSwathSizeBytesC = 0;
6297 long RoundedUpSwathSizeBytesY = 0;
6298 long RoundedUpSwathSizeBytesC = 0;
6299 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6300 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6301 int k;
6302
6303 CalculateSwathWidth(
6304 ForceSingleDPP,
6305 NumberOfActivePlanes,
6306 SourcePixelFormat,
6307 SourceScan,
6308 ViewportWidth,
6309 ViewportHeight,
6310 SurfaceWidthY,
6311 SurfaceWidthC,
6312 SurfaceHeightY,
6313 SurfaceHeightC,
6314 ODMCombineEnabled,
6315 BytePerPixY,
6316 BytePerPixC,
6317 Read256BytesBlockHeightY,
6318 Read256BytesBlockHeightC,
6319 Read256BytesBlockWidthY,
6320 Read256BytesBlockWidthC,
6321 BlendingAndTiming,
6322 HActive,
6323 HRatio,
6324 DPPPerPlane,
6325 SwathWidthSingleDPP,
6326 SwathWidthSingleDPPChroma,
6327 SwathWidth,
6328 SwathWidthChroma,
6329 MaximumSwathHeightY,
6330 MaximumSwathHeightC,
6331 swath_width_luma_ub,
6332 swath_width_chroma_ub);
6333
6334 *ViewportSizeSupport = true;
6335 for (k = 0; k < NumberOfActivePlanes; ++k) {
6336 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6337 || SourcePixelFormat[k] == dm_444_16
6338 || SourcePixelFormat[k] == dm_mono_16
6339 || SourcePixelFormat[k] == dm_mono_8
6340 || SourcePixelFormat[k] == dm_rgbe)) {
6341 if (SurfaceTiling[k] == dm_sw_linear
6342 || (SourcePixelFormat[k] == dm_444_64
6343 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6344 && SourceScan[k] != dm_vert)) {
6345 MinimumSwathHeightY = MaximumSwathHeightY[k];
6346 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6347 MinimumSwathHeightY = MaximumSwathHeightY[k];
6348 } else {
6349 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6350 }
6351 MinimumSwathHeightC = MaximumSwathHeightC[k];
6352 } else {
6353 if (SurfaceTiling[k] == dm_sw_linear) {
6354 MinimumSwathHeightY = MaximumSwathHeightY[k];
6355 MinimumSwathHeightC = MaximumSwathHeightC[k];
6356 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6357 && SourceScan[k] == dm_vert) {
6358 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6359 MinimumSwathHeightC = MaximumSwathHeightC[k];
6360 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6361 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6362 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6363 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6364 MinimumSwathHeightY = MaximumSwathHeightY[k];
6365 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6366 } else {
6367 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6368 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6369 }
6370 }
6371
6372 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6373 * MaximumSwathHeightY[k];
6374 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6375 * MinimumSwathHeightY;
6376 if (SourcePixelFormat[k] == dm_420_10) {
6377 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6378 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6379 }
6380 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6381 * MaximumSwathHeightC[k];
6382 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6383 * MinimumSwathHeightC;
6384 if (SourcePixelFormat[k] == dm_420_10) {
6385 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6386 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6387 }
6388
6389 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6390 <= DETBufferSizeInKByte * 1024 / 2) {
6391 SwathHeightY[k] = MaximumSwathHeightY[k];
6392 SwathHeightC[k] = MaximumSwathHeightC[k];
6393 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6394 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6395 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6396 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6397 <= DETBufferSizeInKByte * 1024 / 2) {
6398 SwathHeightY[k] = MinimumSwathHeightY;
6399 SwathHeightC[k] = MaximumSwathHeightC[k];
6400 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6401 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6402 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6403 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6404 <= DETBufferSizeInKByte * 1024 / 2) {
6405 SwathHeightY[k] = MaximumSwathHeightY[k];
6406 SwathHeightC[k] = MinimumSwathHeightC;
6407 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6408 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6409 } else {
6410 SwathHeightY[k] = MinimumSwathHeightY;
6411 SwathHeightC[k] = MinimumSwathHeightC;
6412 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6413 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6414 }
6415
6416 if (SwathHeightC[k] == 0) {
6417 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6418 DETBufferSizeC[k] = 0;
6419 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6420 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6421 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6422 } else {
6423 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6424 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6425 }
6426
6427 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6428 > DETBufferSizeInKByte * 1024 / 2
6429 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6430 || (SwathHeightC[k] > 0
6431 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6432 *ViewportSizeSupport = false;
6433 ViewportSizeSupportPerPlane[k] = false;
6434 } else {
6435 ViewportSizeSupportPerPlane[k] = true;
6436 }
6437 }
6438 }
6439
CalculateSwathWidth(bool ForceSingleDPP,int NumberOfActivePlanes,enum source_format_class SourcePixelFormat[],enum scan_direction_class SourceScan[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMCombineEnabled[],int BytePerPixY[],int BytePerPixC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],int BlendingAndTiming[],unsigned int HActive[],double HRatio[],int DPPPerPlane[],double SwathWidthSingleDPPY[],double SwathWidthSingleDPPC[],double SwathWidthY[],double SwathWidthC[],int MaximumSwathHeightY[],int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])6440 static void CalculateSwathWidth(
6441 bool ForceSingleDPP,
6442 int NumberOfActivePlanes,
6443 enum source_format_class SourcePixelFormat[],
6444 enum scan_direction_class SourceScan[],
6445 unsigned int ViewportWidth[],
6446 unsigned int ViewportHeight[],
6447 unsigned int SurfaceWidthY[],
6448 unsigned int SurfaceWidthC[],
6449 unsigned int SurfaceHeightY[],
6450 unsigned int SurfaceHeightC[],
6451 enum odm_combine_mode ODMCombineEnabled[],
6452 int BytePerPixY[],
6453 int BytePerPixC[],
6454 int Read256BytesBlockHeightY[],
6455 int Read256BytesBlockHeightC[],
6456 int Read256BytesBlockWidthY[],
6457 int Read256BytesBlockWidthC[],
6458 int BlendingAndTiming[],
6459 unsigned int HActive[],
6460 double HRatio[],
6461 int DPPPerPlane[],
6462 double SwathWidthSingleDPPY[],
6463 double SwathWidthSingleDPPC[],
6464 double SwathWidthY[],
6465 double SwathWidthC[],
6466 int MaximumSwathHeightY[],
6467 int MaximumSwathHeightC[],
6468 unsigned int swath_width_luma_ub[],
6469 unsigned int swath_width_chroma_ub[])
6470 {
6471 unsigned int k, j;
6472 long surface_width_ub_l;
6473 long surface_height_ub_l;
6474 long surface_width_ub_c;
6475 long surface_height_ub_c;
6476
6477 for (k = 0; k < NumberOfActivePlanes; ++k) {
6478 enum odm_combine_mode MainPlaneODMCombine = 0;
6479 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6480 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6481 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6482 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6483
6484 if (SourceScan[k] != dm_vert) {
6485 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6486 } else {
6487 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6488 }
6489
6490 MainPlaneODMCombine = ODMCombineEnabled[k];
6491 for (j = 0; j < NumberOfActivePlanes; ++j) {
6492 if (BlendingAndTiming[k] == j) {
6493 MainPlaneODMCombine = ODMCombineEnabled[j];
6494 }
6495 }
6496
6497 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6498 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6499 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6500 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6501 } else if (DPPPerPlane[k] == 2) {
6502 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6503 } else {
6504 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6505 }
6506
6507 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6508 SwathWidthC[k] = SwathWidthY[k] / 2;
6509 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6510 } else {
6511 SwathWidthC[k] = SwathWidthY[k];
6512 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6513 }
6514
6515 if (ForceSingleDPP == true) {
6516 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6517 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6518 }
6519
6520 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6521 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6522 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6523 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6524
6525 if (SourceScan[k] != dm_vert) {
6526 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6527 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6528 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6529 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6530 if (BytePerPixC[k] > 0) {
6531 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6532 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6533 } else {
6534 swath_width_chroma_ub[k] = 0;
6535 }
6536 } else {
6537 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6538 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6539 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6540 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6541 if (BytePerPixC[k] > 0) {
6542 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6543 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6544 } else {
6545 swath_width_chroma_ub[k] = 0;
6546 }
6547 }
6548 }
6549 }
6550
CalculateExtraLatency(long RoundTripPingLatencyCycles,long ReorderingBytes,double DCFCLK,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6551 static double CalculateExtraLatency(
6552 long RoundTripPingLatencyCycles,
6553 long ReorderingBytes,
6554 double DCFCLK,
6555 int TotalNumberOfActiveDPP,
6556 int PixelChunkSizeInKByte,
6557 int TotalNumberOfDCCActiveDPP,
6558 int MetaChunkSize,
6559 double ReturnBW,
6560 bool GPUVMEnable,
6561 bool HostVMEnable,
6562 int NumberOfActivePlanes,
6563 int NumberOfDPP[],
6564 int dpte_group_bytes[],
6565 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6566 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6567 double HostVMMinPageSize,
6568 int HostVMMaxNonCachedPageTableLevels)
6569 {
6570 double ExtraLatencyBytes = 0;
6571 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6572 ReorderingBytes,
6573 TotalNumberOfActiveDPP,
6574 PixelChunkSizeInKByte,
6575 TotalNumberOfDCCActiveDPP,
6576 MetaChunkSize,
6577 GPUVMEnable,
6578 HostVMEnable,
6579 NumberOfActivePlanes,
6580 NumberOfDPP,
6581 dpte_group_bytes,
6582 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6583 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6584 HostVMMinPageSize,
6585 HostVMMaxNonCachedPageTableLevels);
6586
6587 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6588 }
6589
CalculateExtraLatencyBytes(long ReorderingBytes,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6590 static double CalculateExtraLatencyBytes(
6591 long ReorderingBytes,
6592 int TotalNumberOfActiveDPP,
6593 int PixelChunkSizeInKByte,
6594 int TotalNumberOfDCCActiveDPP,
6595 int MetaChunkSize,
6596 bool GPUVMEnable,
6597 bool HostVMEnable,
6598 int NumberOfActivePlanes,
6599 int NumberOfDPP[],
6600 int dpte_group_bytes[],
6601 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6602 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6603 double HostVMMinPageSize,
6604 int HostVMMaxNonCachedPageTableLevels)
6605 {
6606 double ret = 0;
6607 double HostVMInefficiencyFactor = 0;
6608 int HostVMDynamicLevels = 0;
6609 unsigned int k;
6610
6611 if (GPUVMEnable == true && HostVMEnable == true) {
6612 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6613 if (HostVMMinPageSize < 2048) {
6614 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6615 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6616 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6617 } else {
6618 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6619 }
6620 } else {
6621 HostVMInefficiencyFactor = 1;
6622 HostVMDynamicLevels = 0;
6623 }
6624
6625 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6626
6627 if (GPUVMEnable == true) {
6628 for (k = 0; k < NumberOfActivePlanes; ++k) {
6629 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6630 }
6631 }
6632 return ret;
6633 }
6634
6635
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)6636 static double CalculateUrgentLatency(
6637 double UrgentLatencyPixelDataOnly,
6638 double UrgentLatencyPixelMixedWithVMData,
6639 double UrgentLatencyVMDataOnly,
6640 bool DoUrgentLatencyAdjustment,
6641 double UrgentLatencyAdjustmentFabricClockComponent,
6642 double UrgentLatencyAdjustmentFabricClockReference,
6643 double FabricClock)
6644 {
6645 double ret;
6646
6647 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6648 if (DoUrgentLatencyAdjustment == true) {
6649 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6650 }
6651 return ret;
6652 }
6653
6654
UseMinimumDCFCLK(struct display_mode_lib * mode_lib,int MaxInterDCNTileRepeaters,int MaxPrefetchMode,double FinalDRAMClockChangeLatency,double SREnterPlusExitTime,int ReturnBusWidth,int RoundTripPingLatencyCycles,int ReorderingBytes,int PixelChunkSizeInKByte,int MetaChunkSize,bool GPUVMEnable,int GPUVMMaxPageTableLevels,bool HostVMEnable,int NumberOfActivePlanes,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,enum immediate_flip_requirement ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,int VTotal[],int VActive[],int DynamicMetadataTransmittedBytes[],int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFCLKDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],double TotalVActivePixelBandwidth[][2],double TotalVActiveCursorBandwidth[][2],double TotalMetaRowBandwidth[][2],double TotalDPTERowBandwidth[][2],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],int BytePerPixelY[],int BytePerPixelC[],int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])6655 static void UseMinimumDCFCLK(
6656 struct display_mode_lib *mode_lib,
6657 int MaxInterDCNTileRepeaters,
6658 int MaxPrefetchMode,
6659 double FinalDRAMClockChangeLatency,
6660 double SREnterPlusExitTime,
6661 int ReturnBusWidth,
6662 int RoundTripPingLatencyCycles,
6663 int ReorderingBytes,
6664 int PixelChunkSizeInKByte,
6665 int MetaChunkSize,
6666 bool GPUVMEnable,
6667 int GPUVMMaxPageTableLevels,
6668 bool HostVMEnable,
6669 int NumberOfActivePlanes,
6670 double HostVMMinPageSize,
6671 int HostVMMaxNonCachedPageTableLevels,
6672 bool DynamicMetadataVMEnabled,
6673 enum immediate_flip_requirement ImmediateFlipRequirement,
6674 bool ProgressiveToInterlaceUnitInOPP,
6675 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
6676 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6677 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6678 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly,
6679 int VTotal[],
6680 int VActive[],
6681 int DynamicMetadataTransmittedBytes[],
6682 int DynamicMetadataLinesBeforeActiveRequired[],
6683 bool Interlace[],
6684 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
6685 double RequiredDISPCLK[][2],
6686 double UrgLatency[],
6687 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
6688 double ProjectedDCFCLKDeepSleep[][2],
6689 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
6690 double TotalVActivePixelBandwidth[][2],
6691 double TotalVActiveCursorBandwidth[][2],
6692 double TotalMetaRowBandwidth[][2],
6693 double TotalDPTERowBandwidth[][2],
6694 unsigned int TotalNumberOfActiveDPP[][2],
6695 unsigned int TotalNumberOfDCCActiveDPP[][2],
6696 int dpte_group_bytes[],
6697 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
6698 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
6699 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
6700 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
6701 int BytePerPixelY[],
6702 int BytePerPixelC[],
6703 int HTotal[],
6704 double PixelClock[],
6705 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
6706 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
6707 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
6708 bool DynamicMetadataEnable[],
6709 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
6710 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
6711 double ReadBandwidthLuma[],
6712 double ReadBandwidthChroma[],
6713 double DCFCLKPerState[],
6714 double DCFCLKState[][2])
6715 {
6716 double NormalEfficiency = 0;
6717 double PTEEfficiency = 0;
6718 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6719 unsigned int i, j, k;
6720
6721 NormalEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6722 : PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6723 PTEEfficiency = (HostVMEnable == true ? PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6724 / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6725 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6726 for (j = 0; j <= 1; ++j) {
6727 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6728 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6729 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6730 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6731 double MinimumTWait = 0;
6732 double NonDPTEBandwidth = 0;
6733 double DPTEBandwidth = 0;
6734 double DCFCLKRequiredForAverageBandwidth = 0;
6735 double ExtraLatencyBytes = 0;
6736 double ExtraLatencyCycles = 0;
6737 double DCFCLKRequiredForPeakBandwidth = 0;
6738 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6739 double MinimumTvmPlus2Tr0 = 0;
6740
6741 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6742 for (k = 0; k < NumberOfActivePlanes; ++k) {
6743 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6744 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
6745 }
6746
6747 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6748 NoOfDPPState[k] = NoOfDPP[i][j][k];
6749 }
6750
6751 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
6752 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
6753 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
6754 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
6755 DCFCLKRequiredForAverageBandwidth = dml_max3(ProjectedDCFCLKDeepSleep[i][j],
6756 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth / (MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6757 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / ReturnBusWidth);
6758
6759 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP[i][j],
6760 MetaChunkSize, GPUVMEnable, HostVMEnable, NumberOfActivePlanes, NoOfDPPState, dpte_group_bytes,
6761 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6762 HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels);
6763 ExtraLatencyCycles = RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
6764 for (k = 0; k < NumberOfActivePlanes; ++k) {
6765 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6766 double ExpectedPrefetchBWAcceleration = { 0 };
6767 double PrefetchTime = { 0 };
6768
6769 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
6770 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
6771 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6772 / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * DPTEBytesPerRow[i][j][k] / PTEEfficiency
6773 / NormalEfficiency / ReturnBusWidth + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6774 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
6775 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
6776 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
6777 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6778 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait - UrgLatency[i] * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels
6779 : GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6780
6781 if (PrefetchTime > 0) {
6782 double ExpectedVRatioPrefetch = { 0 };
6783 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6784 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6785 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6786 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
6787 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6788 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / ReturnBusWidth;
6789 }
6790 } else {
6791 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6792 }
6793 if (DynamicMetadataEnable[k] == true) {
6794 double TsetupPipe = { 0 };
6795 double TdmbfPipe = { 0 };
6796 double TdmsksPipe = { 0 };
6797 double TdmecPipe = { 0 };
6798 double AllowedTimeForUrgentExtraLatency = { 0 };
6799
6800 CalculateDynamicMetadataParameters(
6801 MaxInterDCNTileRepeaters,
6802 RequiredDPPCLK[i][j][k],
6803 RequiredDISPCLK[i][j],
6804 ProjectedDCFCLKDeepSleep[i][j],
6805 PixelClock[k],
6806 HTotal[k],
6807 VTotal[k] - VActive[k],
6808 DynamicMetadataTransmittedBytes[k],
6809 DynamicMetadataLinesBeforeActiveRequired[k],
6810 Interlace[k],
6811 ProgressiveToInterlaceUnitInOPP,
6812 &TsetupPipe,
6813 &TdmbfPipe,
6814 &TdmecPipe,
6815 &TdmsksPipe);
6816 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TsetupPipe
6817 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6818 if (AllowedTimeForUrgentExtraLatency > 0) {
6819 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6820 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6821 } else {
6822 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
6823 }
6824 }
6825 }
6826 DCFCLKRequiredForPeakBandwidth = 0;
6827 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
6828 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6829 }
6830 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? (HostVMEnable == true ?
6831 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
6832 for (k = 0; k < NumberOfActivePlanes; ++k) {
6833 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6834 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6835 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6836 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
6837 } else {
6838 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6839 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6840 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6841 }
6842 }
6843 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6844 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6845 }
6846 }
6847 }
6848
6849 #endif /* CONFIG_DRM_AMD_DC_DCN */
6850