1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "../display_mode_lib.h"
28 #include "display_mode_vba_30.h"
29 #include "../dml_inline_defs.h"
30
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41
42 typedef struct {
43 double DPPCLK;
44 double DISPCLK;
45 double PixelClock;
46 double DCFCLKDeepSleep;
47 unsigned int DPPPerPlane;
48 bool ScalerEnabled;
49 enum scan_direction_class SourceScan;
50 unsigned int BlockWidth256BytesY;
51 unsigned int BlockHeight256BytesY;
52 unsigned int BlockWidth256BytesC;
53 unsigned int BlockHeight256BytesC;
54 unsigned int InterlaceEnable;
55 unsigned int NumberOfCursors;
56 unsigned int VBlank;
57 unsigned int HTotal;
58 unsigned int DCCEnable;
59 bool ODMCombineEnabled;
60 } Pipe;
61
62 #define BPP_INVALID 0
63 #define BPP_BLENDED_PIPE 0xffffffff
64 #define DCN30_MAX_DSC_IMAGE_WIDTH 5184
65 #define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
66
67 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
68 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
69 struct display_mode_lib *mode_lib);
70 static unsigned int dscceComputeDelay(
71 unsigned int bpc,
72 double BPP,
73 unsigned int sliceWidth,
74 unsigned int numSlices,
75 enum output_format_class pixelFormat,
76 enum output_encoder_class Output);
77 static unsigned int dscComputeDelay(
78 enum output_format_class pixelFormat,
79 enum output_encoder_class Output);
80 // Super monster function with some 45 argument
81 static bool CalculatePrefetchSchedule(
82 struct display_mode_lib *mode_lib,
83 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
84 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
85 Pipe *myPipe,
86 unsigned int DSCDelay,
87 double DPPCLKDelaySubtotalPlusCNVCFormater,
88 double DPPCLKDelaySCL,
89 double DPPCLKDelaySCLLBOnly,
90 double DPPCLKDelayCNVCCursor,
91 double DISPCLKDelaySubtotal,
92 unsigned int DPP_RECOUT_WIDTH,
93 enum output_format_class OutputFormat,
94 unsigned int MaxInterDCNTileRepeaters,
95 unsigned int VStartup,
96 unsigned int MaxVStartup,
97 unsigned int GPUVMPageTableLevels,
98 bool GPUVMEnable,
99 bool HostVMEnable,
100 unsigned int HostVMMaxNonCachedPageTableLevels,
101 double HostVMMinPageSize,
102 bool DynamicMetadataEnable,
103 bool DynamicMetadataVMEnabled,
104 int DynamicMetadataLinesBeforeActiveRequired,
105 unsigned int DynamicMetadataTransmittedBytes,
106 double UrgentLatency,
107 double UrgentExtraLatency,
108 double TCalc,
109 unsigned int PDEAndMetaPTEBytesFrame,
110 unsigned int MetaRowByte,
111 unsigned int PixelPTEBytesPerRow,
112 double PrefetchSourceLinesY,
113 unsigned int SwathWidthY,
114 int BytePerPixelY,
115 double VInitPreFillY,
116 unsigned int MaxNumSwathY,
117 double PrefetchSourceLinesC,
118 unsigned int SwathWidthC,
119 int BytePerPixelC,
120 double VInitPreFillC,
121 unsigned int MaxNumSwathC,
122 long swath_width_luma_ub,
123 long swath_width_chroma_ub,
124 unsigned int SwathHeightY,
125 unsigned int SwathHeightC,
126 double TWait,
127 bool ProgressiveToInterlaceUnitInOPP,
128 double *DSTXAfterScaler,
129 double *DSTYAfterScaler,
130 double *DestinationLinesForPrefetch,
131 double *PrefetchBandwidth,
132 double *DestinationLinesToRequestVMInVBlank,
133 double *DestinationLinesToRequestRowInVBlank,
134 double *VRatioPrefetchY,
135 double *VRatioPrefetchC,
136 double *RequiredPrefetchPixDataBWLuma,
137 double *RequiredPrefetchPixDataBWChroma,
138 bool *NotEnoughTimeForDynamicMetadata,
139 double *Tno_bw,
140 double *prefetch_vmrow_bw,
141 double *Tdmdl_vm,
142 double *Tdmdl,
143 unsigned int *VUpdateOffsetPix,
144 double *VUpdateWidthPix,
145 double *VReadyOffsetPix);
146 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
147 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
148 static void CalculateDCCConfiguration(
149 bool DCCEnabled,
150 bool DCCProgrammingAssumesScanDirectionUnknown,
151 enum source_format_class SourcePixelFormat,
152 unsigned int ViewportWidthLuma,
153 unsigned int ViewportWidthChroma,
154 unsigned int ViewportHeightLuma,
155 unsigned int ViewportHeightChroma,
156 double DETBufferSize,
157 unsigned int RequestHeight256ByteLuma,
158 unsigned int RequestHeight256ByteChroma,
159 enum dm_swizzle_mode TilingFormat,
160 unsigned int BytePerPixelY,
161 unsigned int BytePerPixelC,
162 double BytePerPixelDETY,
163 double BytePerPixelDETC,
164 enum scan_direction_class ScanOrientation,
165 unsigned int *MaxUncompressedBlockLuma,
166 unsigned int *MaxUncompressedBlockChroma,
167 unsigned int *MaxCompressedBlockLuma,
168 unsigned int *MaxCompressedBlockChroma,
169 unsigned int *IndependentBlockLuma,
170 unsigned int *IndependentBlockChroma);
171 static double CalculatePrefetchSourceLines(
172 struct display_mode_lib *mode_lib,
173 double VRatio,
174 double vtaps,
175 bool Interlace,
176 bool ProgressiveToInterlaceUnitInOPP,
177 unsigned int SwathHeight,
178 unsigned int ViewportYStart,
179 double *VInitPreFill,
180 unsigned int *MaxNumSwath);
181 static unsigned int CalculateVMAndRowBytes(
182 struct display_mode_lib *mode_lib,
183 bool DCCEnable,
184 unsigned int BlockHeight256Bytes,
185 unsigned int BlockWidth256Bytes,
186 enum source_format_class SourcePixelFormat,
187 unsigned int SurfaceTiling,
188 unsigned int BytePerPixel,
189 enum scan_direction_class ScanDirection,
190 unsigned int SwathWidth,
191 unsigned int ViewportHeight,
192 bool GPUVMEnable,
193 bool HostVMEnable,
194 unsigned int HostVMMaxNonCachedPageTableLevels,
195 unsigned int GPUVMMinPageSize,
196 unsigned int HostVMMinPageSize,
197 unsigned int PTEBufferSizeInRequests,
198 unsigned int Pitch,
199 unsigned int DCCMetaPitch,
200 unsigned int *MacroTileWidth,
201 unsigned int *MetaRowByte,
202 unsigned int *PixelPTEBytesPerRow,
203 bool *PTEBufferSizeNotExceeded,
204 unsigned int *dpte_row_width_ub,
205 unsigned int *dpte_row_height,
206 unsigned int *MetaRequestWidth,
207 unsigned int *MetaRequestHeight,
208 unsigned int *meta_row_width,
209 unsigned int *meta_row_height,
210 unsigned int *vm_group_bytes,
211 unsigned int *dpte_group_bytes,
212 unsigned int *PixelPTEReqWidth,
213 unsigned int *PixelPTEReqHeight,
214 unsigned int *PTERequestSize,
215 unsigned int *DPDE0BytesFrame,
216 unsigned int *MetaPTEBytesFrame);
217 static double CalculateTWait(
218 unsigned int PrefetchMode,
219 double DRAMClockChangeLatency,
220 double UrgentLatency,
221 double SREnterPlusExitTime);
222 static void CalculateRowBandwidth(
223 bool GPUVMEnable,
224 enum source_format_class SourcePixelFormat,
225 double VRatio,
226 double VRatioChroma,
227 bool DCCEnable,
228 double LineTime,
229 unsigned int MetaRowByteLuma,
230 unsigned int MetaRowByteChroma,
231 unsigned int meta_row_height_luma,
232 unsigned int meta_row_height_chroma,
233 unsigned int PixelPTEBytesPerRowLuma,
234 unsigned int PixelPTEBytesPerRowChroma,
235 unsigned int dpte_row_height_luma,
236 unsigned int dpte_row_height_chroma,
237 double *meta_row_bw,
238 double *dpte_row_bw);
239 static void CalculateFlipSchedule(
240 struct display_mode_lib *mode_lib,
241 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
242 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
243 double UrgentExtraLatency,
244 double UrgentLatency,
245 unsigned int GPUVMMaxPageTableLevels,
246 bool HostVMEnable,
247 unsigned int HostVMMaxNonCachedPageTableLevels,
248 bool GPUVMEnable,
249 double HostVMMinPageSize,
250 double PDEAndMetaPTEBytesPerFrame,
251 double MetaRowBytes,
252 double DPTEBytesPerRow,
253 double BandwidthAvailableForImmediateFlip,
254 unsigned int TotImmediateFlipBytes,
255 enum source_format_class SourcePixelFormat,
256 double LineTime,
257 double VRatio,
258 double VRatioChroma,
259 double Tno_bw,
260 bool DCCEnable,
261 unsigned int dpte_row_height,
262 unsigned int meta_row_height,
263 unsigned int dpte_row_height_chroma,
264 unsigned int meta_row_height_chroma,
265 double *DestinationLinesToRequestVMInImmediateFlip,
266 double *DestinationLinesToRequestRowInImmediateFlip,
267 double *final_flip_bw,
268 bool *ImmediateFlipSupportedForPipe);
269 static double CalculateWriteBackDelay(
270 enum source_format_class WritebackPixelFormat,
271 double WritebackHRatio,
272 double WritebackVRatio,
273 unsigned int WritebackVTaps,
274 long WritebackDestinationWidth,
275 long WritebackDestinationHeight,
276 long WritebackSourceHeight,
277 unsigned int HTotal);
278 static void CalculateDynamicMetadataParameters(
279 int MaxInterDCNTileRepeaters,
280 double DPPCLK,
281 double DISPCLK,
282 double DCFClkDeepSleep,
283 double PixelClock,
284 long HTotal,
285 long VBlank,
286 long DynamicMetadataTransmittedBytes,
287 long DynamicMetadataLinesBeforeActiveRequired,
288 int InterlaceEnable,
289 bool ProgressiveToInterlaceUnitInOPP,
290 double *Tsetup,
291 double *Tdmbf,
292 double *Tdmec,
293 double *Tdmsks);
294 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
295 struct display_mode_lib *mode_lib,
296 unsigned int PrefetchMode,
297 unsigned int NumberOfActivePlanes,
298 unsigned int MaxLineBufferLines,
299 unsigned int LineBufferSize,
300 unsigned int DPPOutputBufferPixels,
301 unsigned int DETBufferSizeInKByte,
302 unsigned int WritebackInterfaceBufferSize,
303 double DCFCLK,
304 double ReturnBW,
305 bool GPUVMEnable,
306 unsigned int dpte_group_bytes[],
307 unsigned int MetaChunkSize,
308 double UrgentLatency,
309 double ExtraLatency,
310 double WritebackLatency,
311 double WritebackChunkSize,
312 double SOCCLK,
313 double DRAMClockChangeLatency,
314 double SRExitTime,
315 double SREnterPlusExitTime,
316 double DCFCLKDeepSleep,
317 unsigned int DPPPerPlane[],
318 bool DCCEnable[],
319 double DPPCLK[],
320 unsigned int DETBufferSizeY[],
321 unsigned int DETBufferSizeC[],
322 unsigned int SwathHeightY[],
323 unsigned int SwathHeightC[],
324 unsigned int LBBitPerPixel[],
325 double SwathWidthY[],
326 double SwathWidthC[],
327 double HRatio[],
328 double HRatioChroma[],
329 unsigned int vtaps[],
330 unsigned int VTAPsChroma[],
331 double VRatio[],
332 double VRatioChroma[],
333 unsigned int HTotal[],
334 double PixelClock[],
335 unsigned int BlendingAndTiming[],
336 double BytePerPixelDETY[],
337 double BytePerPixelDETC[],
338 double DSTXAfterScaler[],
339 double DSTYAfterScaler[],
340 bool WritebackEnable[],
341 enum source_format_class WritebackPixelFormat[],
342 double WritebackDestinationWidth[],
343 double WritebackDestinationHeight[],
344 double WritebackSourceHeight[],
345 enum clock_change_support *DRAMClockChangeSupport,
346 double *UrgentWatermark,
347 double *WritebackUrgentWatermark,
348 double *DRAMClockChangeWatermark,
349 double *WritebackDRAMClockChangeWatermark,
350 double *StutterExitWatermark,
351 double *StutterEnterPlusExitWatermark,
352 double *MinActiveDRAMClockChangeLatencySupported);
353 static void CalculateDCFCLKDeepSleep(
354 struct display_mode_lib *mode_lib,
355 unsigned int NumberOfActivePlanes,
356 int BytePerPixelY[],
357 int BytePerPixelC[],
358 double VRatio[],
359 double VRatioChroma[],
360 double SwathWidthY[],
361 double SwathWidthC[],
362 unsigned int DPPPerPlane[],
363 double HRatio[],
364 double HRatioChroma[],
365 double PixelClock[],
366 double PSCL_THROUGHPUT[],
367 double PSCL_THROUGHPUT_CHROMA[],
368 double DPPCLK[],
369 double ReadBandwidthLuma[],
370 double ReadBandwidthChroma[],
371 int ReturnBusWidth,
372 double *DCFCLKDeepSleep);
373 static void CalculateUrgentBurstFactor(
374 long swath_width_luma_ub,
375 long swath_width_chroma_ub,
376 unsigned int DETBufferSizeInKByte,
377 unsigned int SwathHeightY,
378 unsigned int SwathHeightC,
379 double LineTime,
380 double UrgentLatency,
381 double CursorBufferSize,
382 unsigned int CursorWidth,
383 unsigned int CursorBPP,
384 double VRatio,
385 double VRatioC,
386 double BytePerPixelInDETY,
387 double BytePerPixelInDETC,
388 double DETBufferSizeY,
389 double DETBufferSizeC,
390 double *UrgentBurstFactorCursor,
391 double *UrgentBurstFactorLuma,
392 double *UrgentBurstFactorChroma,
393 bool *NotEnoughUrgentLatencyHiding);
394
395 static void UseMinimumDCFCLK(
396 struct display_mode_lib *mode_lib,
397 struct vba_vars_st *v,
398 int MaxPrefetchMode,
399 int ReorderingBytes);
400
401 static void CalculatePixelDeliveryTimes(
402 unsigned int NumberOfActivePlanes,
403 double VRatio[],
404 double VRatioChroma[],
405 double VRatioPrefetchY[],
406 double VRatioPrefetchC[],
407 unsigned int swath_width_luma_ub[],
408 unsigned int swath_width_chroma_ub[],
409 unsigned int DPPPerPlane[],
410 double HRatio[],
411 double HRatioChroma[],
412 double PixelClock[],
413 double PSCL_THROUGHPUT[],
414 double PSCL_THROUGHPUT_CHROMA[],
415 double DPPCLK[],
416 int BytePerPixelC[],
417 enum scan_direction_class SourceScan[],
418 unsigned int NumberOfCursors[],
419 unsigned int CursorWidth[][2],
420 unsigned int CursorBPP[][2],
421 unsigned int BlockWidth256BytesY[],
422 unsigned int BlockHeight256BytesY[],
423 unsigned int BlockWidth256BytesC[],
424 unsigned int BlockHeight256BytesC[],
425 double DisplayPipeLineDeliveryTimeLuma[],
426 double DisplayPipeLineDeliveryTimeChroma[],
427 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
428 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
429 double DisplayPipeRequestDeliveryTimeLuma[],
430 double DisplayPipeRequestDeliveryTimeChroma[],
431 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
432 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
433 double CursorRequestDeliveryTime[],
434 double CursorRequestDeliveryTimePrefetch[]);
435
436 static void CalculateMetaAndPTETimes(
437 int NumberOfActivePlanes,
438 bool GPUVMEnable,
439 int MetaChunkSize,
440 int MinMetaChunkSizeBytes,
441 int HTotal[],
442 double VRatio[],
443 double VRatioChroma[],
444 double DestinationLinesToRequestRowInVBlank[],
445 double DestinationLinesToRequestRowInImmediateFlip[],
446 bool DCCEnable[],
447 double PixelClock[],
448 int BytePerPixelY[],
449 int BytePerPixelC[],
450 enum scan_direction_class SourceScan[],
451 int dpte_row_height[],
452 int dpte_row_height_chroma[],
453 int meta_row_width[],
454 int meta_row_width_chroma[],
455 int meta_row_height[],
456 int meta_row_height_chroma[],
457 int meta_req_width[],
458 int meta_req_width_chroma[],
459 int meta_req_height[],
460 int meta_req_height_chroma[],
461 int dpte_group_bytes[],
462 int PTERequestSizeY[],
463 int PTERequestSizeC[],
464 int PixelPTEReqWidthY[],
465 int PixelPTEReqHeightY[],
466 int PixelPTEReqWidthC[],
467 int PixelPTEReqHeightC[],
468 int dpte_row_width_luma_ub[],
469 int dpte_row_width_chroma_ub[],
470 double DST_Y_PER_PTE_ROW_NOM_L[],
471 double DST_Y_PER_PTE_ROW_NOM_C[],
472 double DST_Y_PER_META_ROW_NOM_L[],
473 double DST_Y_PER_META_ROW_NOM_C[],
474 double TimePerMetaChunkNominal[],
475 double TimePerChromaMetaChunkNominal[],
476 double TimePerMetaChunkVBlank[],
477 double TimePerChromaMetaChunkVBlank[],
478 double TimePerMetaChunkFlip[],
479 double TimePerChromaMetaChunkFlip[],
480 double time_per_pte_group_nom_luma[],
481 double time_per_pte_group_vblank_luma[],
482 double time_per_pte_group_flip_luma[],
483 double time_per_pte_group_nom_chroma[],
484 double time_per_pte_group_vblank_chroma[],
485 double time_per_pte_group_flip_chroma[]);
486
487 static void CalculateVMGroupAndRequestTimes(
488 unsigned int NumberOfActivePlanes,
489 bool GPUVMEnable,
490 unsigned int GPUVMMaxPageTableLevels,
491 unsigned int HTotal[],
492 int BytePerPixelC[],
493 double DestinationLinesToRequestVMInVBlank[],
494 double DestinationLinesToRequestVMInImmediateFlip[],
495 bool DCCEnable[],
496 double PixelClock[],
497 int dpte_row_width_luma_ub[],
498 int dpte_row_width_chroma_ub[],
499 int vm_group_bytes[],
500 unsigned int dpde0_bytes_per_frame_ub_l[],
501 unsigned int dpde0_bytes_per_frame_ub_c[],
502 int meta_pte_bytes_per_frame_ub_l[],
503 int meta_pte_bytes_per_frame_ub_c[],
504 double TimePerVMGroupVBlank[],
505 double TimePerVMGroupFlip[],
506 double TimePerVMRequestVBlank[],
507 double TimePerVMRequestFlip[]);
508
509 static void CalculateStutterEfficiency(
510 int NumberOfActivePlanes,
511 long ROBBufferSizeInKByte,
512 double TotalDataReadBandwidth,
513 double DCFCLK,
514 double ReturnBW,
515 double SRExitTime,
516 bool SynchronizedVBlank,
517 int DPPPerPlane[],
518 unsigned int DETBufferSizeY[],
519 int BytePerPixelY[],
520 double BytePerPixelDETY[],
521 double SwathWidthY[],
522 int SwathHeightY[],
523 int SwathHeightC[],
524 double DCCRateLuma[],
525 double DCCRateChroma[],
526 int HTotal[],
527 int VTotal[],
528 double PixelClock[],
529 double VRatio[],
530 enum scan_direction_class SourceScan[],
531 int BlockHeight256BytesY[],
532 int BlockWidth256BytesY[],
533 int BlockHeight256BytesC[],
534 int BlockWidth256BytesC[],
535 int DCCYMaxUncompressedBlock[],
536 int DCCCMaxUncompressedBlock[],
537 int VActive[],
538 bool DCCEnable[],
539 bool WritebackEnable[],
540 double ReadBandwidthPlaneLuma[],
541 double ReadBandwidthPlaneChroma[],
542 double meta_row_bw[],
543 double dpte_row_bw[],
544 double *StutterEfficiencyNotIncludingVBlank,
545 double *StutterEfficiency,
546 double *StutterPeriodOut);
547
548 static void CalculateSwathAndDETConfiguration(
549 bool ForceSingleDPP,
550 int NumberOfActivePlanes,
551 unsigned int DETBufferSizeInKByte,
552 double MaximumSwathWidthLuma[],
553 double MaximumSwathWidthChroma[],
554 enum scan_direction_class SourceScan[],
555 enum source_format_class SourcePixelFormat[],
556 enum dm_swizzle_mode SurfaceTiling[],
557 int ViewportWidth[],
558 int ViewportHeight[],
559 int SurfaceWidthY[],
560 int SurfaceWidthC[],
561 int SurfaceHeightY[],
562 int SurfaceHeightC[],
563 int Read256BytesBlockHeightY[],
564 int Read256BytesBlockHeightC[],
565 int Read256BytesBlockWidthY[],
566 int Read256BytesBlockWidthC[],
567 enum odm_combine_mode ODMCombineEnabled[],
568 int BlendingAndTiming[],
569 int BytePerPixY[],
570 int BytePerPixC[],
571 double BytePerPixDETY[],
572 double BytePerPixDETC[],
573 int HActive[],
574 double HRatio[],
575 double HRatioChroma[],
576 int DPPPerPlane[],
577 int swath_width_luma_ub[],
578 int swath_width_chroma_ub[],
579 double SwathWidth[],
580 double SwathWidthChroma[],
581 int SwathHeightY[],
582 int SwathHeightC[],
583 unsigned int DETBufferSizeY[],
584 unsigned int DETBufferSizeC[],
585 bool ViewportSizeSupportPerPlane[],
586 bool *ViewportSizeSupport);
587 static void CalculateSwathWidth(
588 bool ForceSingleDPP,
589 int NumberOfActivePlanes,
590 enum source_format_class SourcePixelFormat[],
591 enum scan_direction_class SourceScan[],
592 unsigned int ViewportWidth[],
593 unsigned int ViewportHeight[],
594 unsigned int SurfaceWidthY[],
595 unsigned int SurfaceWidthC[],
596 unsigned int SurfaceHeightY[],
597 unsigned int SurfaceHeightC[],
598 enum odm_combine_mode ODMCombineEnabled[],
599 int BytePerPixY[],
600 int BytePerPixC[],
601 int Read256BytesBlockHeightY[],
602 int Read256BytesBlockHeightC[],
603 int Read256BytesBlockWidthY[],
604 int Read256BytesBlockWidthC[],
605 int BlendingAndTiming[],
606 unsigned int HActive[],
607 double HRatio[],
608 int DPPPerPlane[],
609 double SwathWidthSingleDPPY[],
610 double SwathWidthSingleDPPC[],
611 double SwathWidthY[],
612 double SwathWidthC[],
613 int MaximumSwathHeightY[],
614 int MaximumSwathHeightC[],
615 unsigned int swath_width_luma_ub[],
616 unsigned int swath_width_chroma_ub[]);
617 static double CalculateExtraLatency(
618 long RoundTripPingLatencyCycles,
619 long ReorderingBytes,
620 double DCFCLK,
621 int TotalNumberOfActiveDPP,
622 int PixelChunkSizeInKByte,
623 int TotalNumberOfDCCActiveDPP,
624 int MetaChunkSize,
625 double ReturnBW,
626 bool GPUVMEnable,
627 bool HostVMEnable,
628 int NumberOfActivePlanes,
629 int NumberOfDPP[],
630 int dpte_group_bytes[],
631 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
632 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
633 double HostVMMinPageSize,
634 int HostVMMaxNonCachedPageTableLevels);
635 static double CalculateExtraLatencyBytes(
636 long ReorderingBytes,
637 int TotalNumberOfActiveDPP,
638 int PixelChunkSizeInKByte,
639 int TotalNumberOfDCCActiveDPP,
640 int MetaChunkSize,
641 bool GPUVMEnable,
642 bool HostVMEnable,
643 int NumberOfActivePlanes,
644 int NumberOfDPP[],
645 int dpte_group_bytes[],
646 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
647 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
648 double HostVMMinPageSize,
649 int HostVMMaxNonCachedPageTableLevels);
650 static double CalculateUrgentLatency(
651 double UrgentLatencyPixelDataOnly,
652 double UrgentLatencyPixelMixedWithVMData,
653 double UrgentLatencyVMDataOnly,
654 bool DoUrgentLatencyAdjustment,
655 double UrgentLatencyAdjustmentFabricClockComponent,
656 double UrgentLatencyAdjustmentFabricClockReference,
657 double FabricClockSingle);
658
dml30_recalculate(struct display_mode_lib * mode_lib)659 void dml30_recalculate(struct display_mode_lib *mode_lib)
660 {
661 ModeSupportAndSystemConfiguration(mode_lib);
662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
663 DisplayPipeConfiguration(mode_lib);
664 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
665 }
666
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)667 static unsigned int dscceComputeDelay(
668 unsigned int bpc,
669 double BPP,
670 unsigned int sliceWidth,
671 unsigned int numSlices,
672 enum output_format_class pixelFormat,
673 enum output_encoder_class Output)
674 {
675 // valid bpc = source bits per component in the set of {8, 10, 12}
676 // valid bpp = increments of 1/16 of a bit
677 // min = 6/7/8 in N420/N422/444, respectively
678 // max = such that compression is 1:1
679 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
680 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
681 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
682
683 // fixed value
684 unsigned int rcModelSize = 8192;
685
686 // N422/N420 operate at 2 pixels per clock
687 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L,
688 Delay, pixels;
689
690 if (pixelFormat == dm_420)
691 pixelsPerClock = 2;
692 // #all other modes operate at 1 pixel per clock
693 else if (pixelFormat == dm_444)
694 pixelsPerClock = 1;
695 else if (pixelFormat == dm_n422)
696 pixelsPerClock = 2;
697 else
698 pixelsPerClock = 1;
699
700 //initial transmit delay as per PPS
701 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
702
703 //compute ssm delay
704 if (bpc == 8)
705 D = 81;
706 else if (bpc == 10)
707 D = 89;
708 else
709 D = 113;
710
711 //divide by pixel per cycle to compute slice width as seen by DSC
712 w = sliceWidth / pixelsPerClock;
713
714 //422 mode has an additional cycle of delay
715 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
716 s = 0;
717 else
718 s = 1;
719
720 //main calculation for the dscce
721 ix = initalXmitDelay + 45;
722 wx = (w + 2) / 3;
723 P = 3 * wx - w;
724 l0 = ix / w;
725 a = ix + P * l0;
726 ax = (a + 2) / 3 + D + 6 + 1;
727 L = (ax + wx - 1) / wx;
728 if ((ix % w) == 0 && P != 0)
729 lstall = 1;
730 else
731 lstall = 0;
732 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
733
734 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
735 pixels = Delay * 3 * pixelsPerClock;
736 return pixels;
737 }
738
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)739 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
740 {
741 unsigned int Delay = 0;
742
743 if (pixelFormat == dm_420) {
744 // sfr
745 Delay = Delay + 2;
746 // dsccif
747 Delay = Delay + 0;
748 // dscc - input deserializer
749 Delay = Delay + 3;
750 // dscc gets pixels every other cycle
751 Delay = Delay + 2;
752 // dscc - input cdc fifo
753 Delay = Delay + 12;
754 // dscc gets pixels every other cycle
755 Delay = Delay + 13;
756 // dscc - cdc uncertainty
757 Delay = Delay + 2;
758 // dscc - output cdc fifo
759 Delay = Delay + 7;
760 // dscc gets pixels every other cycle
761 Delay = Delay + 3;
762 // dscc - cdc uncertainty
763 Delay = Delay + 2;
764 // dscc - output serializer
765 Delay = Delay + 1;
766 // sft
767 Delay = Delay + 1;
768 } else if (pixelFormat == dm_n422) {
769 // sfr
770 Delay = Delay + 2;
771 // dsccif
772 Delay = Delay + 1;
773 // dscc - input deserializer
774 Delay = Delay + 5;
775 // dscc - input cdc fifo
776 Delay = Delay + 25;
777 // dscc - cdc uncertainty
778 Delay = Delay + 2;
779 // dscc - output cdc fifo
780 Delay = Delay + 10;
781 // dscc - cdc uncertainty
782 Delay = Delay + 2;
783 // dscc - output serializer
784 Delay = Delay + 1;
785 // sft
786 Delay = Delay + 1;
787 } else {
788 // sfr
789 Delay = Delay + 2;
790 // dsccif
791 Delay = Delay + 0;
792 // dscc - input deserializer
793 Delay = Delay + 3;
794 // dscc - input cdc fifo
795 Delay = Delay + 12;
796 // dscc - cdc uncertainty
797 Delay = Delay + 2;
798 // dscc - output cdc fifo
799 Delay = Delay + 7;
800 // dscc - output serializer
801 Delay = Delay + 1;
802 // dscc - cdc uncertainty
803 Delay = Delay + 2;
804 // sft
805 Delay = Delay + 1;
806 }
807
808 return Delay;
809 }
810
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,int BytePerPixelY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,int BytePerPixelC,double VInitPreFillC,unsigned int MaxNumSwathC,long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,bool ProgressiveToInterlaceUnitInOPP,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)811 static bool CalculatePrefetchSchedule(
812 struct display_mode_lib *mode_lib,
813 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
814 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
815 Pipe *myPipe,
816 unsigned int DSCDelay,
817 double DPPCLKDelaySubtotalPlusCNVCFormater,
818 double DPPCLKDelaySCL,
819 double DPPCLKDelaySCLLBOnly,
820 double DPPCLKDelayCNVCCursor,
821 double DISPCLKDelaySubtotal,
822 unsigned int DPP_RECOUT_WIDTH,
823 enum output_format_class OutputFormat,
824 unsigned int MaxInterDCNTileRepeaters,
825 unsigned int VStartup,
826 unsigned int MaxVStartup,
827 unsigned int GPUVMPageTableLevels,
828 bool GPUVMEnable,
829 bool HostVMEnable,
830 unsigned int HostVMMaxNonCachedPageTableLevels,
831 double HostVMMinPageSize,
832 bool DynamicMetadataEnable,
833 bool DynamicMetadataVMEnabled,
834 int DynamicMetadataLinesBeforeActiveRequired,
835 unsigned int DynamicMetadataTransmittedBytes,
836 double UrgentLatency,
837 double UrgentExtraLatency,
838 double TCalc,
839 unsigned int PDEAndMetaPTEBytesFrame,
840 unsigned int MetaRowByte,
841 unsigned int PixelPTEBytesPerRow,
842 double PrefetchSourceLinesY,
843 unsigned int SwathWidthY,
844 int BytePerPixelY,
845 double VInitPreFillY,
846 unsigned int MaxNumSwathY,
847 double PrefetchSourceLinesC,
848 unsigned int SwathWidthC,
849 int BytePerPixelC,
850 double VInitPreFillC,
851 unsigned int MaxNumSwathC,
852 long swath_width_luma_ub,
853 long swath_width_chroma_ub,
854 unsigned int SwathHeightY,
855 unsigned int SwathHeightC,
856 double TWait,
857 bool ProgressiveToInterlaceUnitInOPP,
858 double *DSTXAfterScaler,
859 double *DSTYAfterScaler,
860 double *DestinationLinesForPrefetch,
861 double *PrefetchBandwidth,
862 double *DestinationLinesToRequestVMInVBlank,
863 double *DestinationLinesToRequestRowInVBlank,
864 double *VRatioPrefetchY,
865 double *VRatioPrefetchC,
866 double *RequiredPrefetchPixDataBWLuma,
867 double *RequiredPrefetchPixDataBWChroma,
868 bool *NotEnoughTimeForDynamicMetadata,
869 double *Tno_bw,
870 double *prefetch_vmrow_bw,
871 double *Tdmdl_vm,
872 double *Tdmdl,
873 unsigned int *VUpdateOffsetPix,
874 double *VUpdateWidthPix,
875 double *VReadyOffsetPix)
876 {
877 bool MyError = false;
878 unsigned int DPPCycles = 0, DISPCLKCycles = 0;
879 double DSTTotalPixelsAfterScaler = 0;
880 double LineTime = 0, Tsetup = 0;
881 double dst_y_prefetch_equ = 0;
882 double Tsw_oto = 0;
883 double prefetch_bw_oto = 0;
884 double Tvm_oto = 0;
885 double Tr0_oto = 0;
886 double Tvm_oto_lines = 0;
887 double Tr0_oto_lines = 0;
888 double dst_y_prefetch_oto = 0;
889 double TimeForFetchingMetaPTE = 0;
890 double TimeForFetchingRowInVBlank = 0;
891 double LinesToRequestPrefetchPixelData = 0;
892 double HostVMInefficiencyFactor = 0;
893 unsigned int HostVMDynamicLevelsTrips = 0;
894 double trip_to_mem = 0;
895 double Tvm_trips = 0;
896 double Tr0_trips = 0;
897 double Tvm_trips_rounded = 0;
898 double Tr0_trips_rounded = 0;
899 double Lsw_oto = 0;
900 double Tpre_rounded = 0;
901 double prefetch_bw_equ = 0;
902 double Tvm_equ = 0;
903 double Tr0_equ = 0;
904 double Tdmbf = 0;
905 double Tdmec = 0;
906 double Tdmsks = 0;
907
908 if (GPUVMEnable == true && HostVMEnable == true) {
909 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
910 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
911 } else {
912 HostVMInefficiencyFactor = 1;
913 HostVMDynamicLevelsTrips = 0;
914 }
915
916 CalculateDynamicMetadataParameters(
917 MaxInterDCNTileRepeaters,
918 myPipe->DPPCLK,
919 myPipe->DISPCLK,
920 myPipe->DCFCLKDeepSleep,
921 myPipe->PixelClock,
922 myPipe->HTotal,
923 myPipe->VBlank,
924 DynamicMetadataTransmittedBytes,
925 DynamicMetadataLinesBeforeActiveRequired,
926 myPipe->InterlaceEnable,
927 ProgressiveToInterlaceUnitInOPP,
928 &Tsetup,
929 &Tdmbf,
930 &Tdmec,
931 &Tdmsks);
932
933 LineTime = myPipe->HTotal / myPipe->PixelClock;
934 trip_to_mem = UrgentLatency;
935 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
936
937 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
938 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
939 } else {
940 *Tdmdl = TWait + UrgentExtraLatency;
941 }
942
943 if (DynamicMetadataEnable == true) {
944 if (VStartup * LineTime < Tsetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
945 *NotEnoughTimeForDynamicMetadata = true;
946 } else {
947 *NotEnoughTimeForDynamicMetadata = false;
948 dml_print("DML: Not Enough Time for Dynamic Meta!\n");
949 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
950 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
951 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
952 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
953 }
954 } else {
955 *NotEnoughTimeForDynamicMetadata = false;
956 }
957
958 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
959
960 if (myPipe->ScalerEnabled)
961 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
962 else
963 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
964
965 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
966
967 DISPCLKCycles = DISPCLKDelaySubtotal;
968
969 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
970 return true;
971
972 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK
973 + DSCDelay;
974
975 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineEnabled)?18:0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
976
977 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP))
978 *DSTYAfterScaler = 1;
979 else
980 *DSTYAfterScaler = 0;
981
982 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
983 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
984 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
985
986 MyError = false;
987
988
989 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
990 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
991 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
992
993 if (GPUVMEnable) {
994 if (GPUVMPageTableLevels >= 3) {
995 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
996 } else
997 *Tno_bw = 0;
998 } else if (!myPipe->DCCEnable)
999 *Tno_bw = LineTime;
1000 else
1001 *Tno_bw = LineTime / 4;
1002
1003 dst_y_prefetch_equ = VStartup - (Tsetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime
1004 - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1005
1006 Lsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC);
1007 Tsw_oto = Lsw_oto * LineTime;
1008
1009 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC) / Tsw_oto;
1010
1011 if (GPUVMEnable == true) {
1012 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
1013 Tvm_trips,
1014 LineTime / 4.0);
1015 } else
1016 Tvm_oto = LineTime / 4.0;
1017
1018 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1019 Tr0_oto = dml_max3(
1020 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
1021 LineTime - Tvm_oto, LineTime / 4);
1022 } else
1023 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1024
1025 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1026 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1027 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1028
1029 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1030 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1031
1032 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1033 dml_print("DML: dst_y_prefetch_equ: %f\n", dst_y_prefetch_equ);
1034
1035 dml_print("DML: LineTime: %f\n", LineTime);
1036 dml_print("DML: VStartup: %d\n", VStartup);
1037 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1038 dml_print("DML: Tsetup: %fus - time from vstartup to vready\n", Tsetup);
1039 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1040 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1041 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1042 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1043 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1044 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1045 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1046 dml_print("DML: dst_x_after_scl: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1047 dml_print("DML: dst_y_after_scl: %d lines - number of lines of pipeline and buffer delay after scaler \n", (int)*DSTYAfterScaler);
1048
1049 *PrefetchBandwidth = 0;
1050 *DestinationLinesToRequestVMInVBlank = 0;
1051 *DestinationLinesToRequestRowInVBlank = 0;
1052 *VRatioPrefetchY = 0;
1053 *VRatioPrefetchC = 0;
1054 *RequiredPrefetchPixDataBWLuma = 0;
1055 if (dst_y_prefetch_equ > 1) {
1056 double PrefetchBandwidth1 = 0;
1057 double PrefetchBandwidth2 = 0;
1058 double PrefetchBandwidth3 = 0;
1059 double PrefetchBandwidth4 = 0;
1060
1061 if (Tpre_rounded - *Tno_bw > 0)
1062 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
1063 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1064 + PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY
1065 + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1066 / (Tpre_rounded - *Tno_bw);
1067 else
1068 PrefetchBandwidth1 = 0;
1069
1070 if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw) > 0) {
1071 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - *Tno_bw);
1072 }
1073
1074 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1075 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame *
1076 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1077 swath_width_luma_ub * BytePerPixelY +
1078 PrefetchSourceLinesC * swath_width_chroma_ub *
1079 BytePerPixelC) /
1080 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1081 else
1082 PrefetchBandwidth2 = 0;
1083
1084 if (Tpre_rounded - Tvm_trips_rounded > 0)
1085 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow *
1086 HostVMInefficiencyFactor + PrefetchSourceLinesY *
1087 swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC *
1088 swath_width_chroma_ub * BytePerPixelC) / (Tpre_rounded -
1089 Tvm_trips_rounded);
1090 else
1091 PrefetchBandwidth3 = 0;
1092
1093 if (VStartup == MaxVStartup && (PrefetchBandwidth3 > 4 * prefetch_bw_oto) && Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1094 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - Tsw_oto / 4 - 0.75 * LineTime - Tvm_trips_rounded);
1095 }
1096
1097 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1098 PrefetchBandwidth4 = (PrefetchSourceLinesY * swath_width_luma_ub * BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * BytePerPixelC)
1099 / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1100 else
1101 PrefetchBandwidth4 = 0;
1102
1103 {
1104 bool Case1OK;
1105 bool Case2OK;
1106 bool Case3OK;
1107
1108 if (PrefetchBandwidth1 > 0) {
1109 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
1110 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1111 Case1OK = true;
1112 } else {
1113 Case1OK = false;
1114 }
1115 } else {
1116 Case1OK = false;
1117 }
1118
1119 if (PrefetchBandwidth2 > 0) {
1120 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
1121 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1122 Case2OK = true;
1123 } else {
1124 Case2OK = false;
1125 }
1126 } else {
1127 Case2OK = false;
1128 }
1129
1130 if (PrefetchBandwidth3 > 0) {
1131 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3
1132 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1133 Case3OK = true;
1134 } else {
1135 Case3OK = false;
1136 }
1137 } else {
1138 Case3OK = false;
1139 }
1140
1141 if (Case1OK) {
1142 prefetch_bw_equ = PrefetchBandwidth1;
1143 } else if (Case2OK) {
1144 prefetch_bw_equ = PrefetchBandwidth2;
1145 } else if (Case3OK) {
1146 prefetch_bw_equ = PrefetchBandwidth3;
1147 } else {
1148 prefetch_bw_equ = PrefetchBandwidth4;
1149 }
1150
1151 dml_print("DML: prefetch_bw_equ: %f\n", prefetch_bw_equ);
1152
1153 if (prefetch_bw_equ > 0) {
1154 if (GPUVMEnable) {
1155 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1156 } else {
1157 Tvm_equ = LineTime / 4;
1158 }
1159
1160 if ((GPUVMEnable || myPipe->DCCEnable)) {
1161 Tr0_equ = dml_max4(
1162 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1163 Tr0_trips,
1164 (LineTime - Tvm_equ) / 2,
1165 LineTime / 4);
1166 } else {
1167 Tr0_equ = (LineTime - Tvm_equ) / 2;
1168 }
1169 } else {
1170 Tvm_equ = 0;
1171 Tr0_equ = 0;
1172 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1173 }
1174 }
1175
1176 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1177 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1178 TimeForFetchingMetaPTE = Tvm_oto;
1179 TimeForFetchingRowInVBlank = Tr0_oto;
1180 *PrefetchBandwidth = prefetch_bw_oto;
1181 } else {
1182 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1183 TimeForFetchingMetaPTE = Tvm_equ;
1184 TimeForFetchingRowInVBlank = Tr0_equ;
1185 *PrefetchBandwidth = prefetch_bw_equ;
1186 }
1187
1188 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1189
1190 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1191
1192
1193 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank
1194 - 2 * *DestinationLinesToRequestRowInVBlank;
1195
1196 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1197
1198 *VRatioPrefetchY = (double) PrefetchSourceLinesY
1199 / LinesToRequestPrefetchPixelData;
1200 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1201 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1202 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1203 *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1204 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1205 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1206 } else {
1207 MyError = true;
1208 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1209 *VRatioPrefetchY = 0;
1210 }
1211 }
1212
1213 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1214 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1215
1216 if ((SwathHeightC > 4)) {
1217 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1218 *VRatioPrefetchC = dml_max(*VRatioPrefetchC,
1219 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1220 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1221 } else {
1222 MyError = true;
1223 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1224 *VRatioPrefetchC = 0;
1225 }
1226 }
1227
1228 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * BytePerPixelY * swath_width_luma_ub / LineTime;
1229 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * BytePerPixelC * swath_width_chroma_ub / LineTime;
1230 } else {
1231 MyError = true;
1232 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1233 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1234 *VRatioPrefetchY = 0;
1235 *VRatioPrefetchC = 0;
1236 *RequiredPrefetchPixDataBWLuma = 0;
1237 *RequiredPrefetchPixDataBWChroma = 0;
1238 }
1239
1240 dml_print("DML: Tpre: %fus - sum of tim to request meta pte, 2 x data pte + meta data, swaths\n", (double)LinesToRequestPrefetchPixelData * LineTime + 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1241 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1242 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1243 dml_print("DML: Tr1: %fus - time to fetch second row of data pagetables and second row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1244 dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)LinesToRequestPrefetchPixelData * LineTime);
1245 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1246 dml_print("DML: Tvstartup - Tsetup - Tcalc - Twait - Tpre - To > 0\n");
1247 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - Tsetup);
1248 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1249
1250 } else {
1251 MyError = true;
1252 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1253 }
1254
1255 {
1256 double prefetch_vm_bw = 0;
1257 double prefetch_row_bw = 0;
1258
1259 if (PDEAndMetaPTEBytesFrame == 0) {
1260 prefetch_vm_bw = 0;
1261 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1262 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1263 } else {
1264 prefetch_vm_bw = 0;
1265 MyError = true;
1266 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1267 }
1268 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1269 prefetch_row_bw = 0;
1270 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1271 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1272 } else {
1273 prefetch_row_bw = 0;
1274 MyError = true;
1275 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1276 }
1277
1278 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1279 }
1280
1281 if (MyError) {
1282 *PrefetchBandwidth = 0;
1283 TimeForFetchingMetaPTE = 0;
1284 TimeForFetchingRowInVBlank = 0;
1285 *DestinationLinesToRequestVMInVBlank = 0;
1286 *DestinationLinesToRequestRowInVBlank = 0;
1287 *DestinationLinesForPrefetch = 0;
1288 LinesToRequestPrefetchPixelData = 0;
1289 *VRatioPrefetchY = 0;
1290 *VRatioPrefetchC = 0;
1291 *RequiredPrefetchPixDataBWLuma = 0;
1292 *RequiredPrefetchPixDataBWChroma = 0;
1293 }
1294
1295 return MyError;
1296 }
1297
RoundToDFSGranularityUp(double Clock,double VCOSpeed)1298 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1299 {
1300 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1301 }
1302
RoundToDFSGranularityDown(double Clock,double VCOSpeed)1303 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1304 {
1305 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1306 }
1307
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,double DETBufferSize,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum scan_direction_class ScanOrientation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)1308 static void CalculateDCCConfiguration(
1309 bool DCCEnabled,
1310 bool DCCProgrammingAssumesScanDirectionUnknown,
1311 enum source_format_class SourcePixelFormat,
1312 unsigned int SurfaceWidthLuma,
1313 unsigned int SurfaceWidthChroma,
1314 unsigned int SurfaceHeightLuma,
1315 unsigned int SurfaceHeightChroma,
1316 double DETBufferSize,
1317 unsigned int RequestHeight256ByteLuma,
1318 unsigned int RequestHeight256ByteChroma,
1319 enum dm_swizzle_mode TilingFormat,
1320 unsigned int BytePerPixelY,
1321 unsigned int BytePerPixelC,
1322 double BytePerPixelDETY,
1323 double BytePerPixelDETC,
1324 enum scan_direction_class ScanOrientation,
1325 unsigned int *MaxUncompressedBlockLuma,
1326 unsigned int *MaxUncompressedBlockChroma,
1327 unsigned int *MaxCompressedBlockLuma,
1328 unsigned int *MaxCompressedBlockChroma,
1329 unsigned int *IndependentBlockLuma,
1330 unsigned int *IndependentBlockChroma)
1331 {
1332 int yuv420 = 0;
1333 int horz_div_l = 0;
1334 int horz_div_c = 0;
1335 int vert_div_l = 0;
1336 int vert_div_c = 0;
1337
1338 int req128_horz_wc_l = 0;
1339 int req128_horz_wc_c = 0;
1340 int req128_vert_wc_l = 0;
1341 int req128_vert_wc_c = 0;
1342 int segment_order_horz_contiguous_luma = 0;
1343 int segment_order_horz_contiguous_chroma = 0;
1344 int segment_order_vert_contiguous_luma = 0;
1345 int segment_order_vert_contiguous_chroma = 0;
1346
1347 long full_swath_bytes_horz_wc_l = 0;
1348 long full_swath_bytes_horz_wc_c = 0;
1349 long full_swath_bytes_vert_wc_l = 0;
1350 long full_swath_bytes_vert_wc_c = 0;
1351
1352 long swath_buf_size = 0;
1353 double detile_buf_vp_horz_limit = 0;
1354 double detile_buf_vp_vert_limit = 0;
1355
1356 long MAS_vp_horz_limit = 0;
1357 long MAS_vp_vert_limit = 0;
1358 long max_vp_horz_width = 0;
1359 long max_vp_vert_height = 0;
1360 long eff_surf_width_l = 0;
1361 long eff_surf_width_c = 0;
1362 long eff_surf_height_l = 0;
1363 long eff_surf_height_c = 0;
1364
1365 typedef enum {
1366 REQ_256Bytes,
1367 REQ_128BytesNonContiguous,
1368 REQ_128BytesContiguous,
1369 REQ_NA
1370 } RequestType;
1371
1372 RequestType RequestLuma;
1373 RequestType RequestChroma;
1374
1375 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1376 horz_div_l = 1;
1377 horz_div_c = 1;
1378 vert_div_l = 1;
1379 vert_div_c = 1;
1380
1381 if (BytePerPixelY == 1)
1382 vert_div_l = 0;
1383 if (BytePerPixelC == 1)
1384 vert_div_c = 0;
1385 if (BytePerPixelY == 8
1386 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1387 || TilingFormat == dm_sw_64kb_s_x))
1388 horz_div_l = 0;
1389 if (BytePerPixelC == 8
1390 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t
1391 || TilingFormat == dm_sw_64kb_s_x))
1392 horz_div_c = 0;
1393
1394 if (BytePerPixelC == 0) {
1395 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1396 detile_buf_vp_horz_limit = (double) swath_buf_size
1397 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1398 / (1 + horz_div_l));
1399 detile_buf_vp_vert_limit = (double) swath_buf_size
1400 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1401 } else {
1402 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1403 detile_buf_vp_horz_limit = (double) swath_buf_size
1404 / ((double) RequestHeight256ByteLuma * BytePerPixelY
1405 / (1 + horz_div_l)
1406 + (double) RequestHeight256ByteChroma
1407 * BytePerPixelC / (1 + horz_div_c)
1408 / (1 + yuv420));
1409 detile_buf_vp_vert_limit = (double) swath_buf_size
1410 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)
1411 + 256.0 / RequestHeight256ByteChroma
1412 / (1 + vert_div_c) / (1 + yuv420));
1413 }
1414
1415 if (SourcePixelFormat == dm_420_10) {
1416 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1417 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1418 }
1419
1420 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1421 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1422
1423 MAS_vp_horz_limit = 5760;
1424 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1425 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1426 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1427 eff_surf_width_l =
1428 (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1429 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1430 eff_surf_height_l = (
1431 SurfaceHeightLuma > max_vp_vert_height ?
1432 max_vp_vert_height : SurfaceHeightLuma);
1433 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1434
1435 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1436 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1437 if (BytePerPixelC > 0) {
1438 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma
1439 * BytePerPixelC;
1440 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1441 } else {
1442 full_swath_bytes_horz_wc_c = 0;
1443 full_swath_bytes_vert_wc_c = 0;
1444 }
1445
1446 if (SourcePixelFormat == dm_420_10) {
1447 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1448 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1449 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1450 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1451 }
1452
1453 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1454 req128_horz_wc_l = 0;
1455 req128_horz_wc_c = 0;
1456 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c
1457 && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c
1458 <= DETBufferSize) {
1459 req128_horz_wc_l = 0;
1460 req128_horz_wc_c = 1;
1461 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c
1462 && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c
1463 <= DETBufferSize) {
1464 req128_horz_wc_l = 1;
1465 req128_horz_wc_c = 0;
1466 } else {
1467 req128_horz_wc_l = 1;
1468 req128_horz_wc_c = 1;
1469 }
1470
1471 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1472 req128_vert_wc_l = 0;
1473 req128_vert_wc_c = 0;
1474 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c
1475 && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c
1476 <= DETBufferSize) {
1477 req128_vert_wc_l = 0;
1478 req128_vert_wc_c = 1;
1479 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c
1480 && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c
1481 <= DETBufferSize) {
1482 req128_vert_wc_l = 1;
1483 req128_vert_wc_c = 0;
1484 } else {
1485 req128_vert_wc_l = 1;
1486 req128_vert_wc_c = 1;
1487 }
1488
1489 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1490 segment_order_horz_contiguous_luma = 0;
1491 } else {
1492 segment_order_horz_contiguous_luma = 1;
1493 }
1494 if ((BytePerPixelY == 8
1495 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1496 || TilingFormat == dm_sw_64kb_d_t
1497 || TilingFormat == dm_sw_64kb_r_x))
1498 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1499 segment_order_vert_contiguous_luma = 0;
1500 } else {
1501 segment_order_vert_contiguous_luma = 1;
1502 }
1503 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1504 segment_order_horz_contiguous_chroma = 0;
1505 } else {
1506 segment_order_horz_contiguous_chroma = 1;
1507 }
1508 if ((BytePerPixelC == 8
1509 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x
1510 || TilingFormat == dm_sw_64kb_d_t
1511 || TilingFormat == dm_sw_64kb_r_x))
1512 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1513 segment_order_vert_contiguous_chroma = 0;
1514 } else {
1515 segment_order_vert_contiguous_chroma = 1;
1516 }
1517
1518 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1519 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1520 RequestLuma = REQ_256Bytes;
1521 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0)
1522 || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1523 RequestLuma = REQ_128BytesNonContiguous;
1524 } else {
1525 RequestLuma = REQ_128BytesContiguous;
1526 }
1527 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1528 RequestChroma = REQ_256Bytes;
1529 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0)
1530 || (req128_vert_wc_c == 1
1531 && segment_order_vert_contiguous_chroma == 0)) {
1532 RequestChroma = REQ_128BytesNonContiguous;
1533 } else {
1534 RequestChroma = REQ_128BytesContiguous;
1535 }
1536 } else if (ScanOrientation != dm_vert) {
1537 if (req128_horz_wc_l == 0) {
1538 RequestLuma = REQ_256Bytes;
1539 } else if (segment_order_horz_contiguous_luma == 0) {
1540 RequestLuma = REQ_128BytesNonContiguous;
1541 } else {
1542 RequestLuma = REQ_128BytesContiguous;
1543 }
1544 if (req128_horz_wc_c == 0) {
1545 RequestChroma = REQ_256Bytes;
1546 } else if (segment_order_horz_contiguous_chroma == 0) {
1547 RequestChroma = REQ_128BytesNonContiguous;
1548 } else {
1549 RequestChroma = REQ_128BytesContiguous;
1550 }
1551 } else {
1552 if (req128_vert_wc_l == 0) {
1553 RequestLuma = REQ_256Bytes;
1554 } else if (segment_order_vert_contiguous_luma == 0) {
1555 RequestLuma = REQ_128BytesNonContiguous;
1556 } else {
1557 RequestLuma = REQ_128BytesContiguous;
1558 }
1559 if (req128_vert_wc_c == 0) {
1560 RequestChroma = REQ_256Bytes;
1561 } else if (segment_order_vert_contiguous_chroma == 0) {
1562 RequestChroma = REQ_128BytesNonContiguous;
1563 } else {
1564 RequestChroma = REQ_128BytesContiguous;
1565 }
1566 }
1567
1568 if (RequestLuma == REQ_256Bytes) {
1569 *MaxUncompressedBlockLuma = 256;
1570 *MaxCompressedBlockLuma = 256;
1571 *IndependentBlockLuma = 0;
1572 } else if (RequestLuma == REQ_128BytesContiguous) {
1573 *MaxUncompressedBlockLuma = 256;
1574 *MaxCompressedBlockLuma = 128;
1575 *IndependentBlockLuma = 128;
1576 } else {
1577 *MaxUncompressedBlockLuma = 256;
1578 *MaxCompressedBlockLuma = 64;
1579 *IndependentBlockLuma = 64;
1580 }
1581
1582 if (RequestChroma == REQ_256Bytes) {
1583 *MaxUncompressedBlockChroma = 256;
1584 *MaxCompressedBlockChroma = 256;
1585 *IndependentBlockChroma = 0;
1586 } else if (RequestChroma == REQ_128BytesContiguous) {
1587 *MaxUncompressedBlockChroma = 256;
1588 *MaxCompressedBlockChroma = 128;
1589 *IndependentBlockChroma = 128;
1590 } else {
1591 *MaxUncompressedBlockChroma = 256;
1592 *MaxCompressedBlockChroma = 64;
1593 *IndependentBlockChroma = 64;
1594 }
1595
1596 if (DCCEnabled != true || BytePerPixelC == 0) {
1597 *MaxUncompressedBlockChroma = 0;
1598 *MaxCompressedBlockChroma = 0;
1599 *IndependentBlockChroma = 0;
1600 }
1601
1602 if (DCCEnabled != true) {
1603 *MaxUncompressedBlockLuma = 0;
1604 *MaxCompressedBlockLuma = 0;
1605 *IndependentBlockLuma = 0;
1606 }
1607 }
1608
1609
CalculatePrefetchSourceLines(struct display_mode_lib * mode_lib,double VRatio,double vtaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)1610 static double CalculatePrefetchSourceLines(
1611 struct display_mode_lib *mode_lib,
1612 double VRatio,
1613 double vtaps,
1614 bool Interlace,
1615 bool ProgressiveToInterlaceUnitInOPP,
1616 unsigned int SwathHeight,
1617 unsigned int ViewportYStart,
1618 double *VInitPreFill,
1619 unsigned int *MaxNumSwath)
1620 {
1621 unsigned int MaxPartialSwath = 0;
1622
1623 if (ProgressiveToInterlaceUnitInOPP)
1624 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1625 else
1626 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1627
1628 if (!mode_lib->vba.IgnoreViewportPositioning) {
1629
1630 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1631
1632 if (*VInitPreFill > 1.0)
1633 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1634 else
1635 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2)
1636 % SwathHeight;
1637 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1638
1639 } else {
1640
1641 if (ViewportYStart != 0)
1642 dml_print(
1643 "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1644
1645 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1646
1647 if (*VInitPreFill > 1.0)
1648 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1649 else
1650 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1)
1651 % SwathHeight;
1652 }
1653
1654 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1655 }
1656
CalculateVMAndRowBytes(struct display_mode_lib * mode_lib,bool DCCEnable,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum scan_direction_class ScanDirection,unsigned int SwathWidth,unsigned int ViewportHeight,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMinPageSize,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int * MacroTileWidth,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,bool * PTEBufferSizeNotExceeded,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * vm_group_bytes,unsigned int * dpte_group_bytes,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)1657 static unsigned int CalculateVMAndRowBytes(
1658 struct display_mode_lib *mode_lib,
1659 bool DCCEnable,
1660 unsigned int BlockHeight256Bytes,
1661 unsigned int BlockWidth256Bytes,
1662 enum source_format_class SourcePixelFormat,
1663 unsigned int SurfaceTiling,
1664 unsigned int BytePerPixel,
1665 enum scan_direction_class ScanDirection,
1666 unsigned int SwathWidth,
1667 unsigned int ViewportHeight,
1668 bool GPUVMEnable,
1669 bool HostVMEnable,
1670 unsigned int HostVMMaxNonCachedPageTableLevels,
1671 unsigned int GPUVMMinPageSize,
1672 unsigned int HostVMMinPageSize,
1673 unsigned int PTEBufferSizeInRequests,
1674 unsigned int Pitch,
1675 unsigned int DCCMetaPitch,
1676 unsigned int *MacroTileWidth,
1677 unsigned int *MetaRowByte,
1678 unsigned int *PixelPTEBytesPerRow,
1679 bool *PTEBufferSizeNotExceeded,
1680 unsigned int *dpte_row_width_ub,
1681 unsigned int *dpte_row_height,
1682 unsigned int *MetaRequestWidth,
1683 unsigned int *MetaRequestHeight,
1684 unsigned int *meta_row_width,
1685 unsigned int *meta_row_height,
1686 unsigned int *vm_group_bytes,
1687 unsigned int *dpte_group_bytes,
1688 unsigned int *PixelPTEReqWidth,
1689 unsigned int *PixelPTEReqHeight,
1690 unsigned int *PTERequestSize,
1691 unsigned int *DPDE0BytesFrame,
1692 unsigned int *MetaPTEBytesFrame)
1693 {
1694 unsigned int MPDEBytesFrame = 0;
1695 unsigned int DCCMetaSurfaceBytes = 0;
1696 unsigned int MacroTileSizeBytes = 0;
1697 unsigned int MacroTileHeight = 0;
1698 unsigned int ExtraDPDEBytesFrame = 0;
1699 unsigned int PDEAndMetaPTEBytesFrame = 0;
1700 unsigned int PixelPTEReqHeightPTEs = 0;
1701 unsigned int HostVMDynamicLevels = 0;
1702
1703 double FractionOfPTEReturnDrop;
1704
1705 if (GPUVMEnable == true && HostVMEnable == true) {
1706 if (HostVMMinPageSize < 2048) {
1707 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1708 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1709 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1710 } else {
1711 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1712 }
1713 }
1714
1715 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1716 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1717 if (ScanDirection != dm_vert) {
1718 *meta_row_height = *MetaRequestHeight;
1719 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth)
1720 + *MetaRequestWidth;
1721 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1722 } else {
1723 *meta_row_height = *MetaRequestWidth;
1724 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight)
1725 + *MetaRequestHeight;
1726 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1727 }
1728 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes)
1729 + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1730 if (GPUVMEnable == true) {
1731 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1732 MPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 1);
1733 } else {
1734 *MetaPTEBytesFrame = 0;
1735 MPDEBytesFrame = 0;
1736 }
1737
1738 if (DCCEnable != true) {
1739 *MetaPTEBytesFrame = 0;
1740 MPDEBytesFrame = 0;
1741 *MetaRowByte = 0;
1742 }
1743
1744 if (SurfaceTiling == dm_sw_linear) {
1745 MacroTileSizeBytes = 256;
1746 MacroTileHeight = BlockHeight256Bytes;
1747 } else {
1748 MacroTileSizeBytes = 65536;
1749 MacroTileHeight = 16 * BlockHeight256Bytes;
1750 }
1751 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1752
1753 if (GPUVMEnable == true && mode_lib->vba.GPUVMMaxPageTableLevels > 1) {
1754 if (ScanDirection != dm_vert) {
1755 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1756 } else {
1757 *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1);
1758 }
1759 ExtraDPDEBytesFrame = 128 * (mode_lib->vba.GPUVMMaxPageTableLevels - 2);
1760 } else {
1761 *DPDE0BytesFrame = 0;
1762 ExtraDPDEBytesFrame = 0;
1763 }
1764
1765 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame
1766 + ExtraDPDEBytesFrame;
1767
1768 if (HostVMEnable == true) {
1769 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1770 }
1771
1772 if (SurfaceTiling == dm_sw_linear) {
1773 PixelPTEReqHeightPTEs = 1;
1774 *PixelPTEReqHeight = 1;
1775 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1776 *PTERequestSize = 64;
1777 FractionOfPTEReturnDrop = 0;
1778 } else if (MacroTileSizeBytes == 4096) {
1779 PixelPTEReqHeightPTEs = 1;
1780 *PixelPTEReqHeight = MacroTileHeight;
1781 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1782 *PTERequestSize = 64;
1783 if (ScanDirection != dm_vert)
1784 FractionOfPTEReturnDrop = 0;
1785 else
1786 FractionOfPTEReturnDrop = 7 / 8;
1787 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1788 PixelPTEReqHeightPTEs = 16;
1789 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1790 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1791 *PTERequestSize = 128;
1792 FractionOfPTEReturnDrop = 0;
1793 } else {
1794 PixelPTEReqHeightPTEs = 1;
1795 *PixelPTEReqHeight = MacroTileHeight;
1796 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1797 *PTERequestSize = 64;
1798 FractionOfPTEReturnDrop = 0;
1799 }
1800
1801 if (SurfaceTiling == dm_sw_linear) {
1802 if (PTEBufferSizeInRequests == 0)
1803 *dpte_row_height = 1;
1804 else
1805 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1806 *dpte_row_width_ub = (dml_ceil(((double) SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1807 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1808 } else if (ScanDirection != dm_vert) {
1809 *dpte_row_height = *PixelPTEReqHeight;
1810 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1811 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1812 } else {
1813 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1814 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1815 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1816 }
1817 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop)
1818 <= 64 * PTEBufferSizeInRequests) {
1819 *PTEBufferSizeNotExceeded = true;
1820 } else {
1821 *PTEBufferSizeNotExceeded = false;
1822 }
1823
1824 if (GPUVMEnable != true) {
1825 *PixelPTEBytesPerRow = 0;
1826 *PTEBufferSizeNotExceeded = true;
1827 }
1828 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1829
1830 if (HostVMEnable == true) {
1831 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1832 }
1833
1834 if (HostVMEnable == true) {
1835 *vm_group_bytes = 512;
1836 *dpte_group_bytes = 512;
1837 } else if (GPUVMEnable == true) {
1838 *vm_group_bytes = 2048;
1839 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1840 *dpte_group_bytes = 512;
1841 } else {
1842 *dpte_group_bytes = 2048;
1843 }
1844 } else {
1845 *vm_group_bytes = 0;
1846 *dpte_group_bytes = 0;
1847 }
1848
1849 return PDEAndMetaPTEBytesFrame;
1850 }
1851
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib * mode_lib)1852 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
1853 struct display_mode_lib *mode_lib)
1854 {
1855 struct vba_vars_st *v = &mode_lib->vba;
1856 unsigned int j, k;
1857 long ReorderBytes = 0;
1858 unsigned int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1859 double MaxTotalRDBandwidth = 0;
1860 double MaxTotalRDBandwidthNoUrgentBurst = 0;
1861 bool DestinationLineTimesForPrefetchLessThan2 = false;
1862 bool VRatioPrefetchMoreThan4 = false;
1863 double TWait;
1864
1865 v->WritebackDISPCLK = 0.0;
1866 v->DISPCLKWithRamping = 0;
1867 v->DISPCLKWithoutRamping = 0;
1868 v->GlobalDPPCLK = 0.0;
1869 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
1870 v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] = dml_min3(
1871 v->ReturnBusWidth * v->DCFCLK,
1872 v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth,
1873 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
1874 if (v->HostVMEnable != true) {
1875 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly / 100;
1876 } else {
1877 v->ReturnBW = v->IdealSDPPortBandwidthPerState[v->VoltageLevel][v->maxMpcComb] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
1878 }
1879 /* End DAL custom code */
1880
1881 // DISPCLK and DPPCLK Calculation
1882 //
1883 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1884 if (v->WritebackEnable[k]) {
1885 v->WritebackDISPCLK = dml_max(v->WritebackDISPCLK,
1886 dml30_CalculateWriteBackDISPCLK(
1887 v->WritebackPixelFormat[k],
1888 v->PixelClock[k],
1889 v->WritebackHRatio[k],
1890 v->WritebackVRatio[k],
1891 v->WritebackHTaps[k],
1892 v->WritebackVTaps[k],
1893 v->WritebackSourceWidth[k],
1894 v->WritebackDestinationWidth[k],
1895 v->HTotal[k],
1896 v->WritebackLineBufferSize));
1897 }
1898 }
1899
1900 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1901 if (v->HRatio[k] > 1) {
1902 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1903 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
1904 } else {
1905 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
1906 v->MaxDCHUBToPSCLThroughput,
1907 v->MaxPSCLToLBThroughput);
1908 }
1909
1910 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
1911 * dml_max(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
1912 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
1913
1914 if ((v->htaps[k] > 6 || v->vtaps[k] > 6)
1915 && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
1916 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
1917 }
1918
1919 if ((v->SourcePixelFormat[k] != dm_420_8
1920 && v->SourcePixelFormat[k] != dm_420_10
1921 && v->SourcePixelFormat[k] != dm_420_12
1922 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
1923 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
1924 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
1925 } else {
1926 if (v->HRatioChroma[k] > 1) {
1927 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
1928 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
1929 } else {
1930 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
1931 v->MaxDCHUBToPSCLThroughput,
1932 v->MaxPSCLToLBThroughput);
1933 }
1934 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
1935 * dml_max3(v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
1936 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0);
1937
1938 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6)
1939 && v->DPPCLKUsingSingleDPPChroma
1940 < 2 * v->PixelClock[k]) {
1941 v->DPPCLKUsingSingleDPPChroma = 2
1942 * v->PixelClock[k];
1943 }
1944
1945 v->DPPCLKUsingSingleDPP[k] = dml_max(
1946 v->DPPCLKUsingSingleDPPLuma,
1947 v->DPPCLKUsingSingleDPPChroma);
1948 }
1949 }
1950
1951 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
1952 if (v->BlendingAndTiming[k] != k)
1953 continue;
1954 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
1955 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1956 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1957 * (1 + v->DISPCLKRampingMargin / 100));
1958 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1959 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1960 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
1961 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1962 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1963 * (1 + v->DISPCLKRampingMargin / 100));
1964 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1965 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1966 } else {
1967 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping,
1968 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1969 * (1 + v->DISPCLKRampingMargin / 100));
1970 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping,
1971 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
1972 }
1973 }
1974
1975 v->DISPCLKWithRamping = dml_max(
1976 v->DISPCLKWithRamping,
1977 v->WritebackDISPCLK);
1978 v->DISPCLKWithoutRamping = dml_max(
1979 v->DISPCLKWithoutRamping,
1980 v->WritebackDISPCLK);
1981
1982 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
1983 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1984 v->DISPCLKWithRamping,
1985 v->DISPCLKDPPCLKVCOSpeed);
1986 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(
1987 v->DISPCLKWithoutRamping,
1988 v->DISPCLKDPPCLKVCOSpeed);
1989 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
1990 v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz,
1991 v->DISPCLKDPPCLKVCOSpeed);
1992 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity
1993 > v->MaxDispclkRoundedToDFSGranularity) {
1994 v->DISPCLK_calculated =
1995 v->DISPCLKWithoutRampingRoundedToDFSGranularity;
1996 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity
1997 > v->MaxDispclkRoundedToDFSGranularity) {
1998 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
1999 } else {
2000 v->DISPCLK_calculated =
2001 v->DISPCLKWithRampingRoundedToDFSGranularity;
2002 }
2003 v->DISPCLK = v->DISPCLK_calculated;
2004 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2005
2006 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2007 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k]
2008 / v->DPPPerPlane[k]
2009 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2010 v->GlobalDPPCLK = dml_max(
2011 v->GlobalDPPCLK,
2012 v->DPPCLK_calculated[k]);
2013 }
2014 v->GlobalDPPCLK = RoundToDFSGranularityUp(
2015 v->GlobalDPPCLK,
2016 v->DISPCLKDPPCLKVCOSpeed);
2017 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2018 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255
2019 * dml_ceil(
2020 v->DPPCLK_calculated[k] * 255.0
2021 / v->GlobalDPPCLK,
2022 1);
2023 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2024 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2025 }
2026
2027 // Urgent and B P-State/DRAM Clock Change Watermark
2028 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2029 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2030
2031 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2032 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2033 v->SourcePixelFormat[k],
2034 v->SurfaceTiling[k],
2035 &v->BytePerPixelY[k],
2036 &v->BytePerPixelC[k],
2037 &v->BytePerPixelDETY[k],
2038 &v->BytePerPixelDETC[k],
2039 &v->BlockHeight256BytesY[k],
2040 &v->BlockHeight256BytesC[k],
2041 &v->BlockWidth256BytesY[k],
2042 &v->BlockWidth256BytesC[k]);
2043 }
2044
2045 CalculateSwathWidth(
2046 false,
2047 v->NumberOfActivePlanes,
2048 v->SourcePixelFormat,
2049 v->SourceScan,
2050 v->ViewportWidth,
2051 v->ViewportHeight,
2052 v->SurfaceWidthY,
2053 v->SurfaceWidthC,
2054 v->SurfaceHeightY,
2055 v->SurfaceHeightC,
2056 v->ODMCombineEnabled,
2057 v->BytePerPixelY,
2058 v->BytePerPixelC,
2059 v->BlockHeight256BytesY,
2060 v->BlockHeight256BytesC,
2061 v->BlockWidth256BytesY,
2062 v->BlockWidth256BytesC,
2063 v->BlendingAndTiming,
2064 v->HActive,
2065 v->HRatio,
2066 v->DPPPerPlane,
2067 v->SwathWidthSingleDPPY,
2068 v->SwathWidthSingleDPPC,
2069 v->SwathWidthY,
2070 v->SwathWidthC,
2071 v->dummyinteger3,
2072 v->dummyinteger4,
2073 v->swath_width_luma_ub,
2074 v->swath_width_chroma_ub);
2075
2076
2077 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2078 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2079 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k];
2080 DTRACE("read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2081 }
2082
2083
2084 // DCFCLK Deep Sleep
2085 CalculateDCFCLKDeepSleep(
2086 mode_lib,
2087 v->NumberOfActivePlanes,
2088 v->BytePerPixelY,
2089 v->BytePerPixelC,
2090 v->VRatio,
2091 v->VRatioChroma,
2092 v->SwathWidthY,
2093 v->SwathWidthC,
2094 v->DPPPerPlane,
2095 v->HRatio,
2096 v->HRatioChroma,
2097 v->PixelClock,
2098 v->PSCL_THROUGHPUT_LUMA,
2099 v->PSCL_THROUGHPUT_CHROMA,
2100 v->DPPCLK,
2101 v->ReadBandwidthPlaneLuma,
2102 v->ReadBandwidthPlaneChroma,
2103 v->ReturnBusWidth,
2104 &v->DCFCLKDeepSleep);
2105
2106 // DSCCLK
2107 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2108 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2109 v->DSCCLK_calculated[k] = 0.0;
2110 } else {
2111 if (v->OutputFormat[k] == dm_420)
2112 v->DSCFormatFactor = 2;
2113 else if (v->OutputFormat[k] == dm_444)
2114 v->DSCFormatFactor = 1;
2115 else if (v->OutputFormat[k] == dm_n422)
2116 v->DSCFormatFactor = 2;
2117 else
2118 v->DSCFormatFactor = 1;
2119 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2120 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12
2121 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2122 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2123 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6
2124 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2125 else
2126 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3
2127 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2128 }
2129 }
2130
2131 // DSC Delay
2132 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2133 double BPP = v->OutputBppPerState[k][v->VoltageLevel];
2134
2135 if (v->DSCEnabled[k] && BPP != 0) {
2136 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2137 v->DSCDelay[k] = dscceComputeDelay(v->DSCInputBitPerComponent[k],
2138 BPP,
2139 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2140 v->NumberOfDSCSlices[k],
2141 v->OutputFormat[k],
2142 v->Output[k])
2143 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2144 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2145 v->DSCDelay[k] = 2 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2146 BPP,
2147 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2148 v->NumberOfDSCSlices[k] / 2.0,
2149 v->OutputFormat[k],
2150 v->Output[k])
2151 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2152 } else {
2153 v->DSCDelay[k] = 4 * dscceComputeDelay(v->DSCInputBitPerComponent[k],
2154 BPP,
2155 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2156 v->NumberOfDSCSlices[k] / 4.0,
2157 v->OutputFormat[k],
2158 v->Output[k])
2159 + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2160 }
2161 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2162 } else {
2163 v->DSCDelay[k] = 0;
2164 }
2165 }
2166
2167 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2168 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2169 if (j != k && v->BlendingAndTiming[k] == j
2170 && v->DSCEnabled[j])
2171 v->DSCDelay[k] = v->DSCDelay[j];
2172
2173 // Prefetch
2174 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2175 unsigned int PDEAndMetaPTEBytesFrameY = 0;
2176 unsigned int PixelPTEBytesPerRowY = 0;
2177 unsigned int MetaRowByteY = 0;
2178 unsigned int MetaRowByteC = 0;
2179 unsigned int PDEAndMetaPTEBytesFrameC = 0;
2180 unsigned int PixelPTEBytesPerRowC = 0;
2181 bool PTEBufferSizeNotExceededY = 0;
2182 bool PTEBufferSizeNotExceededC = 0;
2183
2184
2185 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2186 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2187 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2188 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2189 } else {
2190 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2191 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2192
2193 }
2194 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2195 mode_lib,
2196 v->DCCEnable[k],
2197 v->BlockHeight256BytesC[k],
2198 v->BlockWidth256BytesC[k],
2199 v->SourcePixelFormat[k],
2200 v->SurfaceTiling[k],
2201 v->BytePerPixelC[k],
2202 v->SourceScan[k],
2203 v->SwathWidthC[k],
2204 v->ViewportHeightChroma[k],
2205 v->GPUVMEnable,
2206 v->HostVMEnable,
2207 v->HostVMMaxNonCachedPageTableLevels,
2208 v->GPUVMMinPageSize,
2209 v->HostVMMinPageSize,
2210 v->PTEBufferSizeInRequestsForChroma,
2211 v->PitchC[k],
2212 v->DCCMetaPitchC[k],
2213 &v->MacroTileWidthC[k],
2214 &MetaRowByteC,
2215 &PixelPTEBytesPerRowC,
2216 &PTEBufferSizeNotExceededC,
2217 &v->dpte_row_width_chroma_ub[k],
2218 &v->dpte_row_height_chroma[k],
2219 &v->meta_req_width_chroma[k],
2220 &v->meta_req_height_chroma[k],
2221 &v->meta_row_width_chroma[k],
2222 &v->meta_row_height_chroma[k],
2223 &v->dummyinteger1,
2224 &v->dummyinteger2,
2225 &v->PixelPTEReqWidthC[k],
2226 &v->PixelPTEReqHeightC[k],
2227 &v->PTERequestSizeC[k],
2228 &v->dpde0_bytes_per_frame_ub_c[k],
2229 &v->meta_pte_bytes_per_frame_ub_c[k]);
2230
2231 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2232 mode_lib,
2233 v->VRatioChroma[k],
2234 v->VTAPsChroma[k],
2235 v->Interlace[k],
2236 v->ProgressiveToInterlaceUnitInOPP,
2237 v->SwathHeightC[k],
2238 v->ViewportYStartC[k],
2239 &v->VInitPreFillC[k],
2240 &v->MaxNumSwathC[k]);
2241 } else {
2242 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2243 v->PTEBufferSizeInRequestsForChroma = 0;
2244 PixelPTEBytesPerRowC = 0;
2245 PDEAndMetaPTEBytesFrameC = 0;
2246 MetaRowByteC = 0;
2247 v->MaxNumSwathC[k] = 0;
2248 v->PrefetchSourceLinesC[k] = 0;
2249 }
2250
2251 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2252 mode_lib,
2253 v->DCCEnable[k],
2254 v->BlockHeight256BytesY[k],
2255 v->BlockWidth256BytesY[k],
2256 v->SourcePixelFormat[k],
2257 v->SurfaceTiling[k],
2258 v->BytePerPixelY[k],
2259 v->SourceScan[k],
2260 v->SwathWidthY[k],
2261 v->ViewportHeight[k],
2262 v->GPUVMEnable,
2263 v->HostVMEnable,
2264 v->HostVMMaxNonCachedPageTableLevels,
2265 v->GPUVMMinPageSize,
2266 v->HostVMMinPageSize,
2267 v->PTEBufferSizeInRequestsForLuma,
2268 v->PitchY[k],
2269 v->DCCMetaPitchY[k],
2270 &v->MacroTileWidthY[k],
2271 &MetaRowByteY,
2272 &PixelPTEBytesPerRowY,
2273 &PTEBufferSizeNotExceededY,
2274 &v->dpte_row_width_luma_ub[k],
2275 &v->dpte_row_height[k],
2276 &v->meta_req_width[k],
2277 &v->meta_req_height[k],
2278 &v->meta_row_width[k],
2279 &v->meta_row_height[k],
2280 &v->vm_group_bytes[k],
2281 &v->dpte_group_bytes[k],
2282 &v->PixelPTEReqWidthY[k],
2283 &v->PixelPTEReqHeightY[k],
2284 &v->PTERequestSizeY[k],
2285 &v->dpde0_bytes_per_frame_ub_l[k],
2286 &v->meta_pte_bytes_per_frame_ub_l[k]);
2287
2288 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2289 mode_lib,
2290 v->VRatio[k],
2291 v->vtaps[k],
2292 v->Interlace[k],
2293 v->ProgressiveToInterlaceUnitInOPP,
2294 v->SwathHeightY[k],
2295 v->ViewportYStartY[k],
2296 &v->VInitPreFillY[k],
2297 &v->MaxNumSwathY[k]);
2298 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2299 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY
2300 + PDEAndMetaPTEBytesFrameC;
2301 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2302
2303 CalculateRowBandwidth(
2304 v->GPUVMEnable,
2305 v->SourcePixelFormat[k],
2306 v->VRatio[k],
2307 v->VRatioChroma[k],
2308 v->DCCEnable[k],
2309 v->HTotal[k] / v->PixelClock[k],
2310 MetaRowByteY,
2311 MetaRowByteC,
2312 v->meta_row_height[k],
2313 v->meta_row_height_chroma[k],
2314 PixelPTEBytesPerRowY,
2315 PixelPTEBytesPerRowC,
2316 v->dpte_row_height[k],
2317 v->dpte_row_height_chroma[k],
2318 &v->meta_row_bw[k],
2319 &v->dpte_row_bw[k]);
2320 }
2321
2322 v->TotalDCCActiveDPP = 0;
2323 v->TotalActiveDPP = 0;
2324 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2325 v->TotalActiveDPP = v->TotalActiveDPP
2326 + v->DPPPerPlane[k];
2327 if (v->DCCEnable[k])
2328 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP
2329 + v->DPPPerPlane[k];
2330 }
2331
2332
2333 ReorderBytes = v->NumberOfChannels * dml_max3(
2334 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2335 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2336 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2337
2338 v->UrgentExtraLatency = CalculateExtraLatency(
2339 v->RoundTripPingLatencyCycles,
2340 ReorderBytes,
2341 v->DCFCLK,
2342 v->TotalActiveDPP,
2343 v->PixelChunkSizeInKByte,
2344 v->TotalDCCActiveDPP,
2345 v->MetaChunkSize,
2346 v->ReturnBW,
2347 v->GPUVMEnable,
2348 v->HostVMEnable,
2349 v->NumberOfActivePlanes,
2350 v->DPPPerPlane,
2351 v->dpte_group_bytes,
2352 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2353 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2354 v->HostVMMinPageSize,
2355 v->HostVMMaxNonCachedPageTableLevels);
2356
2357 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2358
2359 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2360 if (v->BlendingAndTiming[k] == k) {
2361 if (v->WritebackEnable[k] == true) {
2362 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency +
2363 CalculateWriteBackDelay(v->WritebackPixelFormat[k],
2364 v->WritebackHRatio[k],
2365 v->WritebackVRatio[k],
2366 v->WritebackVTaps[k],
2367 v->WritebackDestinationWidth[k],
2368 v->WritebackDestinationHeight[k],
2369 v->WritebackSourceHeight[k],
2370 v->HTotal[k]) / v->DISPCLK;
2371 } else
2372 v->WritebackDelay[v->VoltageLevel][k] = 0;
2373 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2374 if (v->BlendingAndTiming[j] == k
2375 && v->WritebackEnable[j] == true) {
2376 v->WritebackDelay[v->VoltageLevel][k] = dml_max(v->WritebackDelay[v->VoltageLevel][k],
2377 v->WritebackLatency + CalculateWriteBackDelay(
2378 v->WritebackPixelFormat[j],
2379 v->WritebackHRatio[j],
2380 v->WritebackVRatio[j],
2381 v->WritebackVTaps[j],
2382 v->WritebackDestinationWidth[j],
2383 v->WritebackDestinationHeight[j],
2384 v->WritebackSourceHeight[j],
2385 v->HTotal[k]) / v->DISPCLK);
2386 }
2387 }
2388 }
2389 }
2390
2391 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2392 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2393 if (v->BlendingAndTiming[k] == j)
2394 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2395
2396 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2397 v->MaxVStartupLines[k] = v->VTotal[k] - v->VActive[k] - dml_max(1.0, dml_ceil((double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1));
2398 }
2399
2400 v->MaximumMaxVStartupLines = 0;
2401 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2402 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2403
2404 if (v->DRAMClockChangeLatencyOverride > 0.0) {
2405 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatencyOverride;
2406 } else {
2407 v->FinalDRAMClockChangeLatency = v->DRAMClockChangeLatency;
2408 }
2409 v->UrgentLatency = CalculateUrgentLatency(v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock);
2410
2411
2412 v->FractionOfUrgentBandwidth = 0.0;
2413 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2414
2415 v->VStartupLines = 13;
2416
2417 do {
2418 MaxTotalRDBandwidth = 0;
2419 MaxTotalRDBandwidthNoUrgentBurst = 0;
2420 DestinationLineTimesForPrefetchLessThan2 = false;
2421 VRatioPrefetchMoreThan4 = false;
2422 TWait = CalculateTWait(
2423 PrefetchMode,
2424 v->FinalDRAMClockChangeLatency,
2425 v->UrgentLatency,
2426 v->SREnterPlusExitTime);
2427
2428 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2429 Pipe myPipe = { 0 };
2430
2431 myPipe.DPPCLK = v->DPPCLK[k];
2432 myPipe.DISPCLK = v->DISPCLK;
2433 myPipe.PixelClock = v->PixelClock[k];
2434 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2435 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2436 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2437 myPipe.SourceScan = v->SourceScan[k];
2438 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2439 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2440 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2441 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2442 myPipe.InterlaceEnable = v->Interlace[k];
2443 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2444 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2445 myPipe.HTotal = v->HTotal[k];
2446 myPipe.DCCEnable = v->DCCEnable[k];
2447 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
2448
2449 v->ErrorResult[k] = CalculatePrefetchSchedule(
2450 mode_lib,
2451 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2452 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2453 &myPipe,
2454 v->DSCDelay[k],
2455 v->DPPCLKDelaySubtotal
2456 + v->DPPCLKDelayCNVCFormater,
2457 v->DPPCLKDelaySCL,
2458 v->DPPCLKDelaySCLLBOnly,
2459 v->DPPCLKDelayCNVCCursor,
2460 v->DISPCLKDelaySubtotal,
2461 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2462 v->OutputFormat[k],
2463 v->MaxInterDCNTileRepeaters,
2464 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2465 v->MaxVStartupLines[k],
2466 v->GPUVMMaxPageTableLevels,
2467 v->GPUVMEnable,
2468 v->HostVMEnable,
2469 v->HostVMMaxNonCachedPageTableLevels,
2470 v->HostVMMinPageSize,
2471 v->DynamicMetadataEnable[k],
2472 v->DynamicMetadataVMEnabled,
2473 v->DynamicMetadataLinesBeforeActiveRequired[k],
2474 v->DynamicMetadataTransmittedBytes[k],
2475 v->UrgentLatency,
2476 v->UrgentExtraLatency,
2477 v->TCalc,
2478 v->PDEAndMetaPTEBytesFrame[k],
2479 v->MetaRowByte[k],
2480 v->PixelPTEBytesPerRow[k],
2481 v->PrefetchSourceLinesY[k],
2482 v->SwathWidthY[k],
2483 v->BytePerPixelY[k],
2484 v->VInitPreFillY[k],
2485 v->MaxNumSwathY[k],
2486 v->PrefetchSourceLinesC[k],
2487 v->SwathWidthC[k],
2488 v->BytePerPixelC[k],
2489 v->VInitPreFillC[k],
2490 v->MaxNumSwathC[k],
2491 v->swath_width_luma_ub[k],
2492 v->swath_width_chroma_ub[k],
2493 v->SwathHeightY[k],
2494 v->SwathHeightC[k],
2495 TWait,
2496 v->ProgressiveToInterlaceUnitInOPP,
2497 &v->DSTXAfterScaler[k],
2498 &v->DSTYAfterScaler[k],
2499 &v->DestinationLinesForPrefetch[k],
2500 &v->PrefetchBandwidth[k],
2501 &v->DestinationLinesToRequestVMInVBlank[k],
2502 &v->DestinationLinesToRequestRowInVBlank[k],
2503 &v->VRatioPrefetchY[k],
2504 &v->VRatioPrefetchC[k],
2505 &v->RequiredPrefetchPixDataBWLuma[k],
2506 &v->RequiredPrefetchPixDataBWChroma[k],
2507 &v->NotEnoughTimeForDynamicMetadata[k],
2508 &v->Tno_bw[k],
2509 &v->prefetch_vmrow_bw[k],
2510 &v->Tdmdl_vm[k],
2511 &v->Tdmdl[k],
2512 &v->VUpdateOffsetPix[k],
2513 &v->VUpdateWidthPix[k],
2514 &v->VReadyOffsetPix[k]);
2515 if (v->BlendingAndTiming[k] == k) {
2516 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2517 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[k];
2518 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[k];
2519 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[k] / 4.0, 1);
2520 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2521 } else {
2522 int x = v->BlendingAndTiming[k];
2523 double TotalRepeaterDelayTime = v->MaxInterDCNTileRepeaters * (2 / v->DPPCLK[k] + 3 / v->DISPCLK);
2524 v->VUpdateWidthPix[k] = (14 / v->DCFCLKDeepSleep + 12 / v->DPPCLK[k] + TotalRepeaterDelayTime) * v->PixelClock[x];
2525 v->VReadyOffsetPix[k] = dml_max(150.0 / v->DPPCLK[k], TotalRepeaterDelayTime + 20 / v->DCFCLKDeepSleep + 10 / v->DPPCLK[k]) * v->PixelClock[x];
2526 v->VUpdateOffsetPix[k] = dml_ceil(v->HTotal[x] / 4.0, 1);
2527 if (!v->MaxVStartupLines[x])
2528 v->MaxVStartupLines[x] = v->MaxVStartupLines[k];
2529 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[x]);
2530 }
2531 }
2532
2533 v->NotEnoughUrgentLatencyHiding[0][0] = false;
2534 v->NotEnoughUrgentLatencyHidingPre = false;
2535
2536 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2537 v->cursor_bw[k] = v->NumberOfCursors[k]
2538 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2539 / 8.0
2540 / (v->HTotal[k] / v->PixelClock[k])
2541 * v->VRatio[k];
2542 v->cursor_bw_pre[k] = v->NumberOfCursors[k]
2543 * v->CursorWidth[k][0] * v->CursorBPP[k][0]
2544 / 8.0
2545 / (v->HTotal[k] / v->PixelClock[k])
2546 * v->VRatioPrefetchY[k];
2547
2548 CalculateUrgentBurstFactor(
2549 v->swath_width_luma_ub[k],
2550 v->swath_width_chroma_ub[k],
2551 v->DETBufferSizeInKByte[0],
2552 v->SwathHeightY[k],
2553 v->SwathHeightC[k],
2554 v->HTotal[k] / v->PixelClock[k],
2555 v->UrgentLatency,
2556 v->CursorBufferSize,
2557 v->CursorWidth[k][0],
2558 v->CursorBPP[k][0],
2559 v->VRatio[k],
2560 v->VRatioChroma[k],
2561 v->BytePerPixelDETY[k],
2562 v->BytePerPixelDETC[k],
2563 v->DETBufferSizeY[k],
2564 v->DETBufferSizeC[k],
2565 &v->UrgentBurstFactorCursor[k],
2566 &v->UrgentBurstFactorLuma[k],
2567 &v->UrgentBurstFactorChroma[k],
2568 &v->NoUrgentLatencyHiding[k]);
2569
2570 CalculateUrgentBurstFactor(
2571 v->swath_width_luma_ub[k],
2572 v->swath_width_chroma_ub[k],
2573 v->DETBufferSizeInKByte[0],
2574 v->SwathHeightY[k],
2575 v->SwathHeightC[k],
2576 v->HTotal[k] / v->PixelClock[k],
2577 v->UrgentLatency,
2578 v->CursorBufferSize,
2579 v->CursorWidth[k][0],
2580 v->CursorBPP[k][0],
2581 v->VRatioPrefetchY[k],
2582 v->VRatioPrefetchC[k],
2583 v->BytePerPixelDETY[k],
2584 v->BytePerPixelDETC[k],
2585 v->DETBufferSizeY[k],
2586 v->DETBufferSizeC[k],
2587 &v->UrgentBurstFactorCursorPre[k],
2588 &v->UrgentBurstFactorLumaPre[k],
2589 &v->UrgentBurstFactorChromaPre[k],
2590 &v->NoUrgentLatencyHidingPre[k]);
2591
2592 MaxTotalRDBandwidth = MaxTotalRDBandwidth +
2593 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2594 v->ReadBandwidthPlaneLuma[k] *
2595 v->UrgentBurstFactorLuma[k] +
2596 v->ReadBandwidthPlaneChroma[k] *
2597 v->UrgentBurstFactorChroma[k] +
2598 v->cursor_bw[k] *
2599 v->UrgentBurstFactorCursor[k] +
2600 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2601 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2602 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] *
2603 v->UrgentBurstFactorCursorPre[k]);
2604
2605 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst +
2606 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2607 v->ReadBandwidthPlaneLuma[k] +
2608 v->ReadBandwidthPlaneChroma[k] +
2609 v->cursor_bw[k] +
2610 v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2611 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2612
2613 if (v->DestinationLinesForPrefetch[k] < 2)
2614 DestinationLineTimesForPrefetchLessThan2 = true;
2615 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2616 VRatioPrefetchMoreThan4 = true;
2617 if (v->NoUrgentLatencyHiding[k] == true)
2618 v->NotEnoughUrgentLatencyHiding[0][0] = true;
2619
2620 if (v->NoUrgentLatencyHidingPre[k] == true)
2621 v->NotEnoughUrgentLatencyHidingPre = true;
2622 }
2623 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2624
2625
2626 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NotEnoughUrgentLatencyHiding[0][0] == 0
2627 && v->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4
2628 && !DestinationLineTimesForPrefetchLessThan2)
2629 v->PrefetchModeSupported = true;
2630 else {
2631 v->PrefetchModeSupported = false;
2632 dml_print("DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n");
2633 dml_print("DML: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", MaxTotalRDBandwidth, v->ReturnBW);
2634 dml_print("DML: VRatioPrefetch %s more than 4\n", (VRatioPrefetchMoreThan4) ? "is" : "is not");
2635 dml_print("DML: DestinationLines for Prefetch %s less than 2\n", (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2636 }
2637
2638 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2639 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2640 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2641 v->BandwidthAvailableForImmediateFlip =
2642 v->BandwidthAvailableForImmediateFlip
2643 - dml_max(
2644 v->ReadBandwidthPlaneLuma[k] * v->UrgentBurstFactorLuma[k]
2645 + v->ReadBandwidthPlaneChroma[k] * v->UrgentBurstFactorChroma[k]
2646 + v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2647 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2648 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2649 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2650 }
2651
2652 v->TotImmediateFlipBytes = 0;
2653 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2654 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2655 }
2656 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2657 CalculateFlipSchedule(
2658 mode_lib,
2659 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
2660 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
2661 v->UrgentExtraLatency,
2662 v->UrgentLatency,
2663 v->GPUVMMaxPageTableLevels,
2664 v->HostVMEnable,
2665 v->HostVMMaxNonCachedPageTableLevels,
2666 v->GPUVMEnable,
2667 v->HostVMMinPageSize,
2668 v->PDEAndMetaPTEBytesFrame[k],
2669 v->MetaRowByte[k],
2670 v->PixelPTEBytesPerRow[k],
2671 v->BandwidthAvailableForImmediateFlip,
2672 v->TotImmediateFlipBytes,
2673 v->SourcePixelFormat[k],
2674 v->HTotal[k] / v->PixelClock[k],
2675 v->VRatio[k],
2676 v->VRatioChroma[k],
2677 v->Tno_bw[k],
2678 v->DCCEnable[k],
2679 v->dpte_row_height[k],
2680 v->meta_row_height[k],
2681 v->dpte_row_height_chroma[k],
2682 v->meta_row_height_chroma[k],
2683 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2684 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2685 &v->final_flip_bw[k],
2686 &v->ImmediateFlipSupportedForPipe[k]);
2687 }
2688 v->total_dcn_read_bw_with_flip = 0.0;
2689 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2690 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2691 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3(
2692 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2693 v->DPPPerPlane[k] * v->final_flip_bw[k] +
2694 v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] +
2695 v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k] +
2696 v->cursor_bw[k] * v->UrgentBurstFactorCursor[k],
2697 v->DPPPerPlane[k] * (v->final_flip_bw[k] +
2698 v->RequiredPrefetchPixDataBWLuma[k] * v->UrgentBurstFactorLumaPre[k] +
2699 v->RequiredPrefetchPixDataBWChroma[k] * v->UrgentBurstFactorChromaPre[k]) +
2700 v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
2701 v->total_dcn_read_bw_with_flip_no_urgent_burst =
2702 v->total_dcn_read_bw_with_flip_no_urgent_burst +
2703 dml_max3(v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2704 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2705 v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2706
2707 }
2708 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2709
2710 v->ImmediateFlipSupported = true;
2711 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2712 v->ImmediateFlipSupported = false;
2713 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2714 }
2715 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2716 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2717 v->ImmediateFlipSupported = false;
2718 }
2719 }
2720 } else {
2721 v->ImmediateFlipSupported = false;
2722 }
2723
2724 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2725 if (v->ErrorResult[k] || v->NotEnoughTimeForDynamicMetadata[k]) {
2726 v->PrefetchModeSupported = false;
2727 dml_print("DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n");
2728 }
2729 }
2730
2731 v->VStartupLines = v->VStartupLines + 1;
2732 v->PrefetchModeSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport &&
2733 !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2734 v->ImmediateFlipSupported)) ? true : false;
2735 } while (!v->PrefetchModeSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2736 ASSERT(v->PrefetchModeSupported);
2737
2738 //Watermarks and NB P-State/DRAM Clock Change Support
2739 {
2740 enum clock_change_support DRAMClockChangeSupport = 0; // dummy
2741 CalculateWatermarksAndDRAMSpeedChangeSupport(
2742 mode_lib,
2743 PrefetchMode,
2744 v->NumberOfActivePlanes,
2745 v->MaxLineBufferLines,
2746 v->LineBufferSize,
2747 v->DPPOutputBufferPixels,
2748 v->DETBufferSizeInKByte[0],
2749 v->WritebackInterfaceBufferSize,
2750 v->DCFCLK,
2751 v->ReturnBW,
2752 v->GPUVMEnable,
2753 v->dpte_group_bytes,
2754 v->MetaChunkSize,
2755 v->UrgentLatency,
2756 v->UrgentExtraLatency,
2757 v->WritebackLatency,
2758 v->WritebackChunkSize,
2759 v->SOCCLK,
2760 v->FinalDRAMClockChangeLatency,
2761 v->SRExitTime,
2762 v->SREnterPlusExitTime,
2763 v->DCFCLKDeepSleep,
2764 v->DPPPerPlane,
2765 v->DCCEnable,
2766 v->DPPCLK,
2767 v->DETBufferSizeY,
2768 v->DETBufferSizeC,
2769 v->SwathHeightY,
2770 v->SwathHeightC,
2771 v->LBBitPerPixel,
2772 v->SwathWidthY,
2773 v->SwathWidthC,
2774 v->HRatio,
2775 v->HRatioChroma,
2776 v->vtaps,
2777 v->VTAPsChroma,
2778 v->VRatio,
2779 v->VRatioChroma,
2780 v->HTotal,
2781 v->PixelClock,
2782 v->BlendingAndTiming,
2783 v->BytePerPixelDETY,
2784 v->BytePerPixelDETC,
2785 v->DSTXAfterScaler,
2786 v->DSTYAfterScaler,
2787 v->WritebackEnable,
2788 v->WritebackPixelFormat,
2789 v->WritebackDestinationWidth,
2790 v->WritebackDestinationHeight,
2791 v->WritebackSourceHeight,
2792 &DRAMClockChangeSupport,
2793 &v->UrgentWatermark,
2794 &v->WritebackUrgentWatermark,
2795 &v->DRAMClockChangeWatermark,
2796 &v->WritebackDRAMClockChangeWatermark,
2797 &v->StutterExitWatermark,
2798 &v->StutterEnterPlusExitWatermark,
2799 &v->MinActiveDRAMClockChangeLatencySupported);
2800
2801 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2802 if (v->WritebackEnable[k] == true) {
2803 if (v->BlendingAndTiming[k] == k) {
2804 v->ThisVStartup = v->VStartup[k];
2805 } else {
2806 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2807 if (v->BlendingAndTiming[k] == j) {
2808 v->ThisVStartup = v->VStartup[j];
2809 }
2810 }
2811 }
2812 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(0,
2813 v->ThisVStartup * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2814 } else {
2815 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2816 }
2817 }
2818
2819 }
2820
2821
2822 //Display Pipeline Delivery Time in Prefetch, Groups
2823 CalculatePixelDeliveryTimes(
2824 v->NumberOfActivePlanes,
2825 v->VRatio,
2826 v->VRatioChroma,
2827 v->VRatioPrefetchY,
2828 v->VRatioPrefetchC,
2829 v->swath_width_luma_ub,
2830 v->swath_width_chroma_ub,
2831 v->DPPPerPlane,
2832 v->HRatio,
2833 v->HRatioChroma,
2834 v->PixelClock,
2835 v->PSCL_THROUGHPUT_LUMA,
2836 v->PSCL_THROUGHPUT_CHROMA,
2837 v->DPPCLK,
2838 v->BytePerPixelC,
2839 v->SourceScan,
2840 v->NumberOfCursors,
2841 v->CursorWidth,
2842 v->CursorBPP,
2843 v->BlockWidth256BytesY,
2844 v->BlockHeight256BytesY,
2845 v->BlockWidth256BytesC,
2846 v->BlockHeight256BytesC,
2847 v->DisplayPipeLineDeliveryTimeLuma,
2848 v->DisplayPipeLineDeliveryTimeChroma,
2849 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
2850 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
2851 v->DisplayPipeRequestDeliveryTimeLuma,
2852 v->DisplayPipeRequestDeliveryTimeChroma,
2853 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
2854 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
2855 v->CursorRequestDeliveryTime,
2856 v->CursorRequestDeliveryTimePrefetch);
2857
2858 CalculateMetaAndPTETimes(
2859 v->NumberOfActivePlanes,
2860 v->GPUVMEnable,
2861 v->MetaChunkSize,
2862 v->MinMetaChunkSizeBytes,
2863 v->HTotal,
2864 v->VRatio,
2865 v->VRatioChroma,
2866 v->DestinationLinesToRequestRowInVBlank,
2867 v->DestinationLinesToRequestRowInImmediateFlip,
2868 v->DCCEnable,
2869 v->PixelClock,
2870 v->BytePerPixelY,
2871 v->BytePerPixelC,
2872 v->SourceScan,
2873 v->dpte_row_height,
2874 v->dpte_row_height_chroma,
2875 v->meta_row_width,
2876 v->meta_row_width_chroma,
2877 v->meta_row_height,
2878 v->meta_row_height_chroma,
2879 v->meta_req_width,
2880 v->meta_req_width_chroma,
2881 v->meta_req_height,
2882 v->meta_req_height_chroma,
2883 v->dpte_group_bytes,
2884 v->PTERequestSizeY,
2885 v->PTERequestSizeC,
2886 v->PixelPTEReqWidthY,
2887 v->PixelPTEReqHeightY,
2888 v->PixelPTEReqWidthC,
2889 v->PixelPTEReqHeightC,
2890 v->dpte_row_width_luma_ub,
2891 v->dpte_row_width_chroma_ub,
2892 v->DST_Y_PER_PTE_ROW_NOM_L,
2893 v->DST_Y_PER_PTE_ROW_NOM_C,
2894 v->DST_Y_PER_META_ROW_NOM_L,
2895 v->DST_Y_PER_META_ROW_NOM_C,
2896 v->TimePerMetaChunkNominal,
2897 v->TimePerChromaMetaChunkNominal,
2898 v->TimePerMetaChunkVBlank,
2899 v->TimePerChromaMetaChunkVBlank,
2900 v->TimePerMetaChunkFlip,
2901 v->TimePerChromaMetaChunkFlip,
2902 v->time_per_pte_group_nom_luma,
2903 v->time_per_pte_group_vblank_luma,
2904 v->time_per_pte_group_flip_luma,
2905 v->time_per_pte_group_nom_chroma,
2906 v->time_per_pte_group_vblank_chroma,
2907 v->time_per_pte_group_flip_chroma);
2908
2909 CalculateVMGroupAndRequestTimes(
2910 v->NumberOfActivePlanes,
2911 v->GPUVMEnable,
2912 v->GPUVMMaxPageTableLevels,
2913 v->HTotal,
2914 v->BytePerPixelC,
2915 v->DestinationLinesToRequestVMInVBlank,
2916 v->DestinationLinesToRequestVMInImmediateFlip,
2917 v->DCCEnable,
2918 v->PixelClock,
2919 v->dpte_row_width_luma_ub,
2920 v->dpte_row_width_chroma_ub,
2921 v->vm_group_bytes,
2922 v->dpde0_bytes_per_frame_ub_l,
2923 v->dpde0_bytes_per_frame_ub_c,
2924 v->meta_pte_bytes_per_frame_ub_l,
2925 v->meta_pte_bytes_per_frame_ub_c,
2926 v->TimePerVMGroupVBlank,
2927 v->TimePerVMGroupFlip,
2928 v->TimePerVMRequestVBlank,
2929 v->TimePerVMRequestFlip);
2930
2931
2932 // Min TTUVBlank
2933 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2934 if (PrefetchMode == 0) {
2935 v->AllowDRAMClockChangeDuringVBlank[k] = true;
2936 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2937 v->MinTTUVBlank[k] = dml_max(
2938 v->DRAMClockChangeWatermark,
2939 dml_max(
2940 v->StutterEnterPlusExitWatermark,
2941 v->UrgentWatermark));
2942 } else if (PrefetchMode == 1) {
2943 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2944 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
2945 v->MinTTUVBlank[k] = dml_max(
2946 v->StutterEnterPlusExitWatermark,
2947 v->UrgentWatermark);
2948 } else {
2949 v->AllowDRAMClockChangeDuringVBlank[k] = false;
2950 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
2951 v->MinTTUVBlank[k] = v->UrgentWatermark;
2952 }
2953 if (!v->DynamicMetadataEnable[k])
2954 v->MinTTUVBlank[k] = v->TCalc
2955 + v->MinTTUVBlank[k];
2956 }
2957
2958 // DCC Configuration
2959 v->ActiveDPPs = 0;
2960 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2961 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
2962 v->SourcePixelFormat[k],
2963 v->SurfaceWidthY[k],
2964 v->SurfaceWidthC[k],
2965 v->SurfaceHeightY[k],
2966 v->SurfaceHeightC[k],
2967 v->DETBufferSizeInKByte[0] * 1024,
2968 v->BlockHeight256BytesY[k],
2969 v->BlockHeight256BytesC[k],
2970 v->SurfaceTiling[k],
2971 v->BytePerPixelY[k],
2972 v->BytePerPixelC[k],
2973 v->BytePerPixelDETY[k],
2974 v->BytePerPixelDETC[k],
2975 v->SourceScan[k],
2976 &v->DCCYMaxUncompressedBlock[k],
2977 &v->DCCCMaxUncompressedBlock[k],
2978 &v->DCCYMaxCompressedBlock[k],
2979 &v->DCCCMaxCompressedBlock[k],
2980 &v->DCCYIndependentBlock[k],
2981 &v->DCCCIndependentBlock[k]);
2982 }
2983
2984 {
2985 //Maximum Bandwidth Used
2986 v->TotalDataReadBandwidth = 0;
2987 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2988 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth
2989 + v->ReadBandwidthPlaneLuma[k]
2990 + v->ReadBandwidthPlaneChroma[k];
2991 }
2992 }
2993
2994 // VStartup Margin
2995 v->VStartupMargin = 0;
2996 v->FirstMainPlane = true;
2997 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2998 if (v->BlendingAndTiming[k] == k) {
2999 double margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k]
3000 / v->PixelClock[k];
3001 if (v->FirstMainPlane == true) {
3002 v->VStartupMargin = margin;
3003 v->FirstMainPlane = false;
3004 } else {
3005 v->VStartupMargin = dml_min(v->VStartupMargin, margin);
3006 }
3007 }
3008 }
3009
3010 // Stutter Efficiency
3011 CalculateStutterEfficiency(
3012 v->NumberOfActivePlanes,
3013 v->ROBBufferSizeInKByte,
3014 v->TotalDataReadBandwidth,
3015 v->DCFCLK,
3016 v->ReturnBW,
3017 v->SRExitTime,
3018 v->SynchronizedVBlank,
3019 v->DPPPerPlane,
3020 v->DETBufferSizeY,
3021 v->BytePerPixelY,
3022 v->BytePerPixelDETY,
3023 v->SwathWidthY,
3024 v->SwathHeightY,
3025 v->SwathHeightC,
3026 v->DCCRateLuma,
3027 v->DCCRateChroma,
3028 v->HTotal,
3029 v->VTotal,
3030 v->PixelClock,
3031 v->VRatio,
3032 v->SourceScan,
3033 v->BlockHeight256BytesY,
3034 v->BlockWidth256BytesY,
3035 v->BlockHeight256BytesC,
3036 v->BlockWidth256BytesC,
3037 v->DCCYMaxUncompressedBlock,
3038 v->DCCCMaxUncompressedBlock,
3039 v->VActive,
3040 v->DCCEnable,
3041 v->WritebackEnable,
3042 v->ReadBandwidthPlaneLuma,
3043 v->ReadBandwidthPlaneChroma,
3044 v->meta_row_bw,
3045 v->dpte_row_bw,
3046 &v->StutterEfficiencyNotIncludingVBlank,
3047 &v->StutterEfficiency,
3048 &v->StutterPeriod);
3049 }
3050
DisplayPipeConfiguration(struct display_mode_lib * mode_lib)3051 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3052 {
3053 // Display Pipe Configuration
3054 double BytePerPixDETY[DC__NUM_DPP__MAX] = { 0 };
3055 double BytePerPixDETC[DC__NUM_DPP__MAX] = { 0 };
3056 int BytePerPixY[DC__NUM_DPP__MAX] = { 0 };
3057 int BytePerPixC[DC__NUM_DPP__MAX] = { 0 };
3058 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX] = { 0 };
3059 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX] = { 0 };
3060 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX] = { 0 };
3061 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX] = { 0 };
3062 double dummy1[DC__NUM_DPP__MAX] = { 0 };
3063 double dummy2[DC__NUM_DPP__MAX] = { 0 };
3064 double dummy3[DC__NUM_DPP__MAX] = { 0 };
3065 double dummy4[DC__NUM_DPP__MAX] = { 0 };
3066 int dummy5[DC__NUM_DPP__MAX] = { 0 };
3067 int dummy6[DC__NUM_DPP__MAX] = { 0 };
3068 bool dummy7[DC__NUM_DPP__MAX] = { 0 };
3069 bool dummysinglestring = 0;
3070 unsigned int k;
3071
3072 for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
3073
3074 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3075 mode_lib->vba.SourcePixelFormat[k],
3076 mode_lib->vba.SurfaceTiling[k],
3077 &BytePerPixY[k],
3078 &BytePerPixC[k],
3079 &BytePerPixDETY[k],
3080 &BytePerPixDETC[k],
3081 &Read256BytesBlockHeightY[k],
3082 &Read256BytesBlockHeightC[k],
3083 &Read256BytesBlockWidthY[k],
3084 &Read256BytesBlockWidthC[k]);
3085 }
3086 CalculateSwathAndDETConfiguration(
3087 false,
3088 mode_lib->vba.NumberOfActivePlanes,
3089 mode_lib->vba.DETBufferSizeInKByte[0],
3090 dummy1,
3091 dummy2,
3092 mode_lib->vba.SourceScan,
3093 mode_lib->vba.SourcePixelFormat,
3094 mode_lib->vba.SurfaceTiling,
3095 mode_lib->vba.ViewportWidth,
3096 mode_lib->vba.ViewportHeight,
3097 mode_lib->vba.SurfaceWidthY,
3098 mode_lib->vba.SurfaceWidthC,
3099 mode_lib->vba.SurfaceHeightY,
3100 mode_lib->vba.SurfaceHeightC,
3101 Read256BytesBlockHeightY,
3102 Read256BytesBlockHeightC,
3103 Read256BytesBlockWidthY,
3104 Read256BytesBlockWidthC,
3105 mode_lib->vba.ODMCombineEnabled,
3106 mode_lib->vba.BlendingAndTiming,
3107 BytePerPixY,
3108 BytePerPixC,
3109 BytePerPixDETY,
3110 BytePerPixDETC,
3111 mode_lib->vba.HActive,
3112 mode_lib->vba.HRatio,
3113 mode_lib->vba.HRatioChroma,
3114 mode_lib->vba.DPPPerPlane,
3115 dummy5,
3116 dummy6,
3117 dummy3,
3118 dummy4,
3119 mode_lib->vba.SwathHeightY,
3120 mode_lib->vba.SwathHeightC,
3121 mode_lib->vba.DETBufferSizeY,
3122 mode_lib->vba.DETBufferSizeC,
3123 dummy7,
3124 &dummysinglestring);
3125 }
3126
dml30_CalculateBytePerPixelAnd256BBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC)3127 void dml30_CalculateBytePerPixelAnd256BBlockSizes(
3128 enum source_format_class SourcePixelFormat,
3129 enum dm_swizzle_mode SurfaceTiling,
3130 unsigned int *BytePerPixelY,
3131 unsigned int *BytePerPixelC,
3132 double *BytePerPixelDETY,
3133 double *BytePerPixelDETC,
3134 unsigned int *BlockHeight256BytesY,
3135 unsigned int *BlockHeight256BytesC,
3136 unsigned int *BlockWidth256BytesY,
3137 unsigned int *BlockWidth256BytesC)
3138 {
3139 if (SourcePixelFormat == dm_444_64) {
3140 *BytePerPixelDETY = 8;
3141 *BytePerPixelDETC = 0;
3142 *BytePerPixelY = 8;
3143 *BytePerPixelC = 0;
3144 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3145 *BytePerPixelDETY = 4;
3146 *BytePerPixelDETC = 0;
3147 *BytePerPixelY = 4;
3148 *BytePerPixelC = 0;
3149 } else if (SourcePixelFormat == dm_444_16) {
3150 *BytePerPixelDETY = 2;
3151 *BytePerPixelDETC = 0;
3152 *BytePerPixelY = 2;
3153 *BytePerPixelC = 0;
3154 } else if (SourcePixelFormat == dm_444_8) {
3155 *BytePerPixelDETY = 1;
3156 *BytePerPixelDETC = 0;
3157 *BytePerPixelY = 1;
3158 *BytePerPixelC = 0;
3159 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3160 *BytePerPixelDETY = 4;
3161 *BytePerPixelDETC = 1;
3162 *BytePerPixelY = 4;
3163 *BytePerPixelC = 1;
3164 } else if (SourcePixelFormat == dm_420_8) {
3165 *BytePerPixelDETY = 1;
3166 *BytePerPixelDETC = 2;
3167 *BytePerPixelY = 1;
3168 *BytePerPixelC = 2;
3169 } else if (SourcePixelFormat == dm_420_12) {
3170 *BytePerPixelDETY = 2;
3171 *BytePerPixelDETC = 4;
3172 *BytePerPixelY = 2;
3173 *BytePerPixelC = 4;
3174 } else {
3175 *BytePerPixelDETY = 4.0 / 3;
3176 *BytePerPixelDETC = 8.0 / 3;
3177 *BytePerPixelY = 2;
3178 *BytePerPixelC = 4;
3179 }
3180
3181 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
3182 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8
3183 || SourcePixelFormat == dm_mono_16 || SourcePixelFormat == dm_mono_8
3184 || SourcePixelFormat == dm_rgbe)) {
3185 if (SurfaceTiling == dm_sw_linear) {
3186 *BlockHeight256BytesY = 1;
3187 } else if (SourcePixelFormat == dm_444_64) {
3188 *BlockHeight256BytesY = 4;
3189 } else if (SourcePixelFormat == dm_444_8) {
3190 *BlockHeight256BytesY = 16;
3191 } else {
3192 *BlockHeight256BytesY = 8;
3193 }
3194 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3195 *BlockHeight256BytesC = 0;
3196 *BlockWidth256BytesC = 0;
3197 } else {
3198 if (SurfaceTiling == dm_sw_linear) {
3199 *BlockHeight256BytesY = 1;
3200 *BlockHeight256BytesC = 1;
3201 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3202 *BlockHeight256BytesY = 8;
3203 *BlockHeight256BytesC = 16;
3204 } else if (SourcePixelFormat == dm_420_8) {
3205 *BlockHeight256BytesY = 16;
3206 *BlockHeight256BytesC = 8;
3207 } else {
3208 *BlockHeight256BytesY = 8;
3209 *BlockHeight256BytesC = 8;
3210 }
3211 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3212 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3213 }
3214 }
3215
CalculateTWait(unsigned int PrefetchMode,double DRAMClockChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3216 static double CalculateTWait(
3217 unsigned int PrefetchMode,
3218 double DRAMClockChangeLatency,
3219 double UrgentLatency,
3220 double SREnterPlusExitTime)
3221 {
3222 if (PrefetchMode == 0) {
3223 return dml_max(DRAMClockChangeLatency + UrgentLatency,
3224 dml_max(SREnterPlusExitTime, UrgentLatency));
3225 } else if (PrefetchMode == 1) {
3226 return dml_max(SREnterPlusExitTime, UrgentLatency);
3227 } else {
3228 return UrgentLatency;
3229 }
3230 }
3231
dml30_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,long WritebackSourceWidth,long WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)3232 double dml30_CalculateWriteBackDISPCLK(
3233 enum source_format_class WritebackPixelFormat,
3234 double PixelClock,
3235 double WritebackHRatio,
3236 double WritebackVRatio,
3237 unsigned int WritebackHTaps,
3238 unsigned int WritebackVTaps,
3239 long WritebackSourceWidth,
3240 long WritebackDestinationWidth,
3241 unsigned int HTotal,
3242 unsigned int WritebackLineBufferSize)
3243 {
3244 double DISPCLK_H = 0, DISPCLK_V = 0, DISPCLK_HB = 0;
3245
3246 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3247 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3248 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3249 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3250 }
3251
CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,long WritebackDestinationWidth,long WritebackDestinationHeight,long WritebackSourceHeight,unsigned int HTotal)3252 static double CalculateWriteBackDelay(
3253 enum source_format_class WritebackPixelFormat,
3254 double WritebackHRatio,
3255 double WritebackVRatio,
3256 unsigned int WritebackVTaps,
3257 long WritebackDestinationWidth,
3258 long WritebackDestinationHeight,
3259 long WritebackSourceHeight,
3260 unsigned int HTotal)
3261 {
3262 double CalculateWriteBackDelay = 0;
3263 double Line_length = 0;
3264 double Output_lines_last_notclamped = 0;
3265 double WritebackVInit = 0;
3266
3267 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3268 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3269 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3270 if (Output_lines_last_notclamped < 0) {
3271 CalculateWriteBackDelay = 0;
3272 } else {
3273 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3274 }
3275 return CalculateWriteBackDelay;
3276 }
3277
3278
CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters,double DPPCLK,double DISPCLK,double DCFClkDeepSleep,double PixelClock,long HTotal,long VBlank,long DynamicMetadataTransmittedBytes,long DynamicMetadataLinesBeforeActiveRequired,int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * Tsetup,double * Tdmbf,double * Tdmec,double * Tdmsks)3279 static void CalculateDynamicMetadataParameters(int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK,
3280 double DCFClkDeepSleep, double PixelClock, long HTotal, long VBlank, long DynamicMetadataTransmittedBytes,
3281 long DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP,
3282 double *Tsetup, double *Tdmbf, double *Tdmec, double *Tdmsks)
3283 {
3284 double TotalRepeaterDelayTime = 0;
3285 double VUpdateWidthPix = 0;
3286 double VReadyOffsetPix = 0;
3287 double VUpdateOffsetPix = 0;
3288 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3289 VUpdateWidthPix = (14 / DCFClkDeepSleep + 12 / DPPCLK + TotalRepeaterDelayTime) * PixelClock;
3290 VReadyOffsetPix = dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20 / DCFClkDeepSleep + 10 / DPPCLK) * PixelClock;
3291 VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3292 *Tsetup = (VUpdateOffsetPix + VUpdateWidthPix + VReadyOffsetPix) / PixelClock;
3293 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3294 *Tdmec = HTotal / PixelClock;
3295 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3296 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3297 } else {
3298 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3299 }
3300 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3301 *Tdmsks = *Tdmsks / 2;
3302 }
3303 }
3304
CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)3305 static void CalculateRowBandwidth(
3306 bool GPUVMEnable,
3307 enum source_format_class SourcePixelFormat,
3308 double VRatio,
3309 double VRatioChroma,
3310 bool DCCEnable,
3311 double LineTime,
3312 unsigned int MetaRowByteLuma,
3313 unsigned int MetaRowByteChroma,
3314 unsigned int meta_row_height_luma,
3315 unsigned int meta_row_height_chroma,
3316 unsigned int PixelPTEBytesPerRowLuma,
3317 unsigned int PixelPTEBytesPerRowChroma,
3318 unsigned int dpte_row_height_luma,
3319 unsigned int dpte_row_height_chroma,
3320 double *meta_row_bw,
3321 double *dpte_row_bw)
3322 {
3323 if (DCCEnable != true) {
3324 *meta_row_bw = 0;
3325 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3326 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime)
3327 + VRatioChroma * MetaRowByteChroma
3328 / (meta_row_height_chroma * LineTime);
3329 } else {
3330 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3331 }
3332
3333 if (GPUVMEnable != true) {
3334 *dpte_row_bw = 0;
3335 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3336 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3337 + VRatioChroma * PixelPTEBytesPerRowChroma
3338 / (dpte_row_height_chroma * LineTime);
3339 } else {
3340 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3341 }
3342 }
3343
CalculateFlipSchedule(struct display_mode_lib * mode_lib,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)3344 static void CalculateFlipSchedule(
3345 struct display_mode_lib *mode_lib,
3346 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
3347 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
3348 double UrgentExtraLatency,
3349 double UrgentLatency,
3350 unsigned int GPUVMMaxPageTableLevels,
3351 bool HostVMEnable,
3352 unsigned int HostVMMaxNonCachedPageTableLevels,
3353 bool GPUVMEnable,
3354 double HostVMMinPageSize,
3355 double PDEAndMetaPTEBytesPerFrame,
3356 double MetaRowBytes,
3357 double DPTEBytesPerRow,
3358 double BandwidthAvailableForImmediateFlip,
3359 unsigned int TotImmediateFlipBytes,
3360 enum source_format_class SourcePixelFormat,
3361 double LineTime,
3362 double VRatio,
3363 double VRatioChroma,
3364 double Tno_bw,
3365 bool DCCEnable,
3366 unsigned int dpte_row_height,
3367 unsigned int meta_row_height,
3368 unsigned int dpte_row_height_chroma,
3369 unsigned int meta_row_height_chroma,
3370 double *DestinationLinesToRequestVMInImmediateFlip,
3371 double *DestinationLinesToRequestRowInImmediateFlip,
3372 double *final_flip_bw,
3373 bool *ImmediateFlipSupportedForPipe)
3374 {
3375 double min_row_time = 0.0;
3376 unsigned int HostVMDynamicLevelsTrips = 0;
3377 double TimeForFetchingMetaPTEImmediateFlip = 0;
3378 double TimeForFetchingRowInVBlankImmediateFlip = 0;
3379 double ImmediateFlipBW = 0;
3380 double HostVMInefficiencyFactor = 0;
3381
3382 if (GPUVMEnable == true && HostVMEnable == true) {
3383 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
3384 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3385 } else {
3386 HostVMInefficiencyFactor = 1;
3387 HostVMDynamicLevelsTrips = 0;
3388 }
3389
3390 if (GPUVMEnable == true || DCCEnable == true) {
3391 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3392 }
3393
3394 if (GPUVMEnable == true) {
3395 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3396 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0);
3397 } else {
3398 TimeForFetchingMetaPTEImmediateFlip = 0;
3399 }
3400
3401 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3402 if ((GPUVMEnable == true || DCCEnable == true)) {
3403 TimeForFetchingRowInVBlankImmediateFlip = dml_max3((MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3404 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4);
3405 } else {
3406 TimeForFetchingRowInVBlankImmediateFlip = 0;
3407 }
3408
3409 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3410
3411 if (GPUVMEnable == true) {
3412 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3413 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3414 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3415 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3416 } else {
3417 *final_flip_bw = 0;
3418 }
3419
3420
3421 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3422 if (GPUVMEnable == true && DCCEnable != true) {
3423 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3424 } else if (GPUVMEnable != true && DCCEnable == true) {
3425 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3426 } else {
3427 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio,
3428 dpte_row_height_chroma * LineTime / VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
3429 }
3430 } else {
3431 if (GPUVMEnable == true && DCCEnable != true) {
3432 min_row_time = dpte_row_height * LineTime / VRatio;
3433 } else if (GPUVMEnable != true && DCCEnable == true) {
3434 min_row_time = meta_row_height * LineTime / VRatio;
3435 } else {
3436 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3437 }
3438 }
3439
3440 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3441 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3442 *ImmediateFlipSupportedForPipe = false;
3443 } else {
3444 *ImmediateFlipSupportedForPipe = true;
3445 }
3446 }
3447
TruncToValidBPP(double LinkBitRate,int Lanes,long HTotal,long HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,int DSCSlices,int AudioRate,int AudioLayout,enum odm_combine_mode ODMCombine)3448 static double TruncToValidBPP(
3449 double LinkBitRate,
3450 int Lanes,
3451 long HTotal,
3452 long HActive,
3453 double PixelClock,
3454 double DesiredBPP,
3455 bool DSCEnable,
3456 enum output_encoder_class Output,
3457 enum output_format_class Format,
3458 unsigned int DSCInputBitPerComponent,
3459 int DSCSlices,
3460 int AudioRate,
3461 int AudioLayout,
3462 enum odm_combine_mode ODMCombine)
3463 {
3464 double MaxLinkBPP = 0;
3465 int MinDSCBPP = 0;
3466 double MaxDSCBPP = 0;
3467 int NonDSCBPP0 = 0;
3468 int NonDSCBPP1 = 0;
3469 int NonDSCBPP2 = 0;
3470
3471 if (Format == dm_420) {
3472 NonDSCBPP0 = 12;
3473 NonDSCBPP1 = 15;
3474 NonDSCBPP2 = 18;
3475 MinDSCBPP = 6;
3476 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3477 } else if (Format == dm_444) {
3478 NonDSCBPP0 = 24;
3479 NonDSCBPP1 = 30;
3480 NonDSCBPP2 = 36;
3481 MinDSCBPP = 8;
3482 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3483 } else {
3484 NonDSCBPP0 = 16;
3485 NonDSCBPP1 = 20;
3486 NonDSCBPP2 = 24;
3487
3488 if (Format == dm_n422) {
3489 MinDSCBPP = 7;
3490 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3491 } else {
3492 MinDSCBPP = 8;
3493 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3494 }
3495 }
3496
3497 if (DSCEnable && Output == dm_dp) {
3498 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3499 } else {
3500 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3501 }
3502
3503 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3504 MaxLinkBPP = 16;
3505 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3506 MaxLinkBPP = 32;
3507 }
3508
3509
3510 if (DesiredBPP == 0) {
3511 if (DSCEnable) {
3512 if (MaxLinkBPP < MinDSCBPP) {
3513 return BPP_INVALID;
3514 } else if (MaxLinkBPP >= MaxDSCBPP) {
3515 return MaxDSCBPP;
3516 } else {
3517 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3518 }
3519 } else {
3520 if (MaxLinkBPP >= NonDSCBPP2) {
3521 return NonDSCBPP2;
3522 } else if (MaxLinkBPP >= NonDSCBPP1) {
3523 return NonDSCBPP1;
3524 } else if (MaxLinkBPP >= NonDSCBPP0) {
3525 return NonDSCBPP0;
3526 } else {
3527 return BPP_INVALID;
3528 }
3529 }
3530 } else {
3531 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0 || DesiredBPP == 18)) ||
3532 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3533 return BPP_INVALID;
3534 } else {
3535 return DesiredBPP;
3536 }
3537 }
3538 return BPP_INVALID;
3539 }
3540
dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib * mode_lib)3541 void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3542 {
3543 struct vba_vars_st *v = &mode_lib->vba;
3544 int MinPrefetchMode, MaxPrefetchMode;
3545 int i;
3546 unsigned int j, k, m;
3547 bool EnoughWritebackUnits = true;
3548 bool WritebackModeSupport = true;
3549 bool ViewportExceedsSurface = false;
3550 double MaxTotalVActiveRDBandwidth = 0;
3551 long ReorderingBytes = 0;
3552 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX] = { 0 };
3553
3554 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3555
3556 CalculateMinAndMaxPrefetchMode(
3557 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3558 &MinPrefetchMode, &MaxPrefetchMode);
3559
3560 /*Scale Ratio, taps Support Check*/
3561
3562 v->ScaleRatioAndTapsSupport = true;
3563 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3564 if (v->ScalerEnabled[k] == false
3565 && ((v->SourcePixelFormat[k] != dm_444_64
3566 && v->SourcePixelFormat[k] != dm_444_32
3567 && v->SourcePixelFormat[k] != dm_444_16
3568 && v->SourcePixelFormat[k] != dm_mono_16
3569 && v->SourcePixelFormat[k] != dm_mono_8
3570 && v->SourcePixelFormat[k] != dm_rgbe
3571 && v->SourcePixelFormat[k] != dm_rgbe_alpha)
3572 || v->HRatio[k] != 1.0
3573 || v->htaps[k] != 1.0
3574 || v->VRatio[k] != 1.0
3575 || v->vtaps[k] != 1.0)) {
3576 v->ScaleRatioAndTapsSupport = false;
3577 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0
3578 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3579 || (v->htaps[k] > 1.0
3580 && (v->htaps[k] % 2) == 1)
3581 || v->HRatio[k] > v->MaxHSCLRatio
3582 || v->VRatio[k] > v->MaxVSCLRatio
3583 || v->HRatio[k] > v->htaps[k]
3584 || v->VRatio[k] > v->vtaps[k]
3585 || (v->SourcePixelFormat[k] != dm_444_64
3586 && v->SourcePixelFormat[k] != dm_444_32
3587 && v->SourcePixelFormat[k] != dm_444_16
3588 && v->SourcePixelFormat[k] != dm_mono_16
3589 && v->SourcePixelFormat[k] != dm_mono_8
3590 && v->SourcePixelFormat[k] != dm_rgbe
3591 && (v->VTAPsChroma[k] < 1
3592 || v->VTAPsChroma[k] > 8
3593 || v->HTAPsChroma[k] < 1
3594 || v->HTAPsChroma[k] > 8
3595 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3596 || v->HRatioChroma[k] > v->MaxHSCLRatio
3597 || v->VRatioChroma[k] > v->MaxVSCLRatio
3598 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3599 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3600 v->ScaleRatioAndTapsSupport = false;
3601 }
3602 }
3603 /*Source Format, Pixel Format and Scan Support Check*/
3604
3605 v->SourceFormatPixelAndScanSupport = true;
3606 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3607 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3608 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x)
3609 && !(v->SourcePixelFormat[k] == dm_444_64))) {
3610 v->SourceFormatPixelAndScanSupport = false;
3611 }
3612 }
3613 /*Bandwidth Support Check*/
3614
3615 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3616 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3617 v->SourcePixelFormat[k],
3618 v->SurfaceTiling[k],
3619 &v->BytePerPixelY[k],
3620 &v->BytePerPixelC[k],
3621 &v->BytePerPixelInDETY[k],
3622 &v->BytePerPixelInDETC[k],
3623 &v->Read256BlockHeightY[k],
3624 &v->Read256BlockHeightC[k],
3625 &v->Read256BlockWidthY[k],
3626 &v->Read256BlockWidthC[k]);
3627 }
3628 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3629 if (v->SourceScan[k] != dm_vert) {
3630 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3631 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3632 } else {
3633 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3634 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3635 }
3636 }
3637 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3638 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3639 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3640 }
3641 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3642 if (v->WritebackEnable[k] == true
3643 && v->WritebackPixelFormat[k] == dm_444_64) {
3644 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3645 * v->WritebackDestinationHeight[k]
3646 / (v->WritebackSourceHeight[k]
3647 * v->HTotal[k]
3648 / v->PixelClock[k]) * 8.0;
3649 } else if (v->WritebackEnable[k] == true) {
3650 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k]
3651 * v->WritebackDestinationHeight[k]
3652 / (v->WritebackSourceHeight[k]
3653 * v->HTotal[k]
3654 / v->PixelClock[k]) * 4.0;
3655 } else {
3656 v->WriteBandwidth[k] = 0.0;
3657 }
3658 }
3659
3660 /*Writeback Latency support check*/
3661
3662 v->WritebackLatencySupport = true;
3663 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3664 if (v->WritebackEnable[k] == true) {
3665 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave ||
3666 v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
3667 if (v->WriteBandwidth[k]
3668 > 2.0 * v->WritebackInterfaceBufferSize * 1024
3669 / v->WritebackLatency) {
3670 v->WritebackLatencySupport = false;
3671 }
3672 } else {
3673 if (v->WriteBandwidth[k]
3674 > v->WritebackInterfaceBufferSize * 1024
3675 / v->WritebackLatency) {
3676 v->WritebackLatencySupport = false;
3677 }
3678 }
3679 }
3680 }
3681
3682 /*Writeback Mode Support Check*/
3683
3684 v->TotalNumberOfActiveWriteback = 0;
3685 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3686 if (v->WritebackEnable[k] == true) {
3687 v->TotalNumberOfActiveWriteback =
3688 v->TotalNumberOfActiveWriteback + 1;
3689 }
3690 }
3691
3692 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3693 EnoughWritebackUnits = false;
3694 }
3695 if (!v->WritebackSupportInterleaveAndUsingWholeBufferForASingleStream
3696 && (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave
3697 || v->WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave)) {
3698
3699 WritebackModeSupport = false;
3700 }
3701 if (v->WritebackConfiguration == dm_whole_buffer_for_single_stream_no_interleave && v->TotalNumberOfActiveWriteback > 1) {
3702 WritebackModeSupport = false;
3703 }
3704
3705 /*Writeback Scale Ratio and Taps Support Check*/
3706
3707 v->WritebackScaleRatioAndTapsSupport = true;
3708 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3709 if (v->WritebackEnable[k] == true) {
3710 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio
3711 || v->WritebackVRatio[k]
3712 > v->WritebackMaxVSCLRatio
3713 || v->WritebackHRatio[k]
3714 < v->WritebackMinHSCLRatio
3715 || v->WritebackVRatio[k]
3716 < v->WritebackMinVSCLRatio
3717 || v->WritebackHTaps[k]
3718 > v->WritebackMaxHSCLTaps
3719 || v->WritebackVTaps[k]
3720 > v->WritebackMaxVSCLTaps
3721 || v->WritebackHRatio[k]
3722 > v->WritebackHTaps[k]
3723 || v->WritebackVRatio[k]
3724 > v->WritebackVTaps[k]
3725 || (v->WritebackHTaps[k] > 2.0
3726 && ((v->WritebackHTaps[k] % 2)
3727 == 1))) {
3728 v->WritebackScaleRatioAndTapsSupport = false;
3729 }
3730 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3731 v->WritebackScaleRatioAndTapsSupport = false;
3732 }
3733 }
3734 }
3735 /*Maximum DISPCLK/DPPCLK Support check*/
3736
3737 v->WritebackRequiredDISPCLK = 0.0;
3738 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3739 if (v->WritebackEnable[k] == true) {
3740 v->WritebackRequiredDISPCLK = dml_max(v->WritebackRequiredDISPCLK,
3741 dml30_CalculateWriteBackDISPCLK(
3742 v->WritebackPixelFormat[k],
3743 v->PixelClock[k],
3744 v->WritebackHRatio[k],
3745 v->WritebackVRatio[k],
3746 v->WritebackHTaps[k],
3747 v->WritebackVTaps[k],
3748 v->WritebackSourceWidth[k],
3749 v->WritebackDestinationWidth[k],
3750 v->HTotal[k],
3751 v->WritebackLineBufferSize));
3752 }
3753 }
3754 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3755 if (v->HRatio[k] > 1.0) {
3756 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3757 } else {
3758 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3759 }
3760 if (v->BytePerPixelC[k] == 0.0) {
3761 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3762 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3763 * dml_max3(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0);
3764 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3765 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3766 }
3767 } else {
3768 if (v->HRatioChroma[k] > 1.0) {
3769 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput,
3770 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3771 } else {
3772 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3773 }
3774 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5(v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3775 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3776 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3777 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3778 1.0);
3779 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3780 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3781 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3782 }
3783 }
3784 }
3785 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3786 int MaximumSwathWidthSupportLuma = 0;
3787 int MaximumSwathWidthSupportChroma = 0;
3788
3789 if (v->SurfaceTiling[k] == dm_sw_linear) {
3790 MaximumSwathWidthSupportLuma = 8192.0;
3791 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3792 MaximumSwathWidthSupportLuma = 2880.0;
3793 } else {
3794 MaximumSwathWidthSupportLuma = 5760.0;
3795 }
3796
3797 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
3798 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
3799 } else {
3800 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
3801 }
3802 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
3803 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
3804 if (v->BytePerPixelC[k] == 0.0) {
3805 v->MaximumSwathWidthInLineBufferChroma = 0;
3806 } else {
3807 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
3808 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
3809 }
3810 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
3811 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
3812 }
3813
3814 CalculateSwathAndDETConfiguration(
3815 true,
3816 v->NumberOfActivePlanes,
3817 v->DETBufferSizeInKByte[0],
3818 v->MaximumSwathWidthLuma,
3819 v->MaximumSwathWidthChroma,
3820 v->SourceScan,
3821 v->SourcePixelFormat,
3822 v->SurfaceTiling,
3823 v->ViewportWidth,
3824 v->ViewportHeight,
3825 v->SurfaceWidthY,
3826 v->SurfaceWidthC,
3827 v->SurfaceHeightY,
3828 v->SurfaceHeightC,
3829 v->Read256BlockHeightY,
3830 v->Read256BlockHeightC,
3831 v->Read256BlockWidthY,
3832 v->Read256BlockWidthC,
3833 v->odm_combine_dummy,
3834 v->BlendingAndTiming,
3835 v->BytePerPixelY,
3836 v->BytePerPixelC,
3837 v->BytePerPixelInDETY,
3838 v->BytePerPixelInDETC,
3839 v->HActive,
3840 v->HRatio,
3841 v->HRatioChroma,
3842 v->DPPPerPlane,
3843 v->swath_width_luma_ub,
3844 v->swath_width_chroma_ub,
3845 v->SwathWidthY,
3846 v->SwathWidthC,
3847 v->SwathHeightY,
3848 v->SwathHeightC,
3849 v->DETBufferSizeY,
3850 v->DETBufferSizeC,
3851 v->SingleDPPViewportSizeSupportPerPlane,
3852 &v->ViewportSizeSupport[0][0]);
3853
3854 for (i = 0; i < v->soc.num_states; i++) {
3855 for (j = 0; j < 2; j++) {
3856 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
3857 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
3858 v->RequiredDISPCLK[i][j] = 0.0;
3859 v->DISPCLK_DPPCLK_Support[i][j] = true;
3860 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3861 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3862 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3863 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3864 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3865 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3866 }
3867 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3868 * (1 + v->DISPCLKRampingMargin / 100.0);
3869 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3870 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3871 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3872 }
3873 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3874 * (1 + v->DISPCLKRampingMargin / 100.0);
3875 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1]
3876 && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) {
3877 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3878 }
3879
3880 if (v->ODMCombinePolicy == dm_odm_combine_policy_none) {
3881 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3882 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3883 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
3884 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3885 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3886 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
3887 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
3888 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3889 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3890 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
3891 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3892 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3893 } else {
3894 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3895 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
3896 }
3897 if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH
3898 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3899 if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) {
3900 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3901 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3902 } else {
3903 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3904 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3905 }
3906 }
3907 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH
3908 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
3909 if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) {
3910 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
3911 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
3912 } else {
3913 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
3914 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
3915 }
3916 }
3917 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
3918 v->MPCCombine[i][j][k] = false;
3919 v->NoOfDPP[i][j][k] = 4;
3920 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
3921 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
3922 v->MPCCombine[i][j][k] = false;
3923 v->NoOfDPP[i][j][k] = 2;
3924 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
3925 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
3926 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity
3927 && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
3928 v->MPCCombine[i][j][k] = false;
3929 v->NoOfDPP[i][j][k] = 1;
3930 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3931 } else {
3932 v->MPCCombine[i][j][k] = true;
3933 v->NoOfDPP[i][j][k] = 2;
3934 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3935 }
3936 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3937 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3938 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3939 v->DISPCLK_DPPCLK_Support[i][j] = false;
3940 }
3941 }
3942 v->TotalNumberOfActiveDPP[i][j] = 0;
3943 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
3944 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3945 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
3946 if (v->NoOfDPP[i][j][k] == 1)
3947 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
3948 }
3949 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never) {
3950 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
3951 double BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3952 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3953 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
3954 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
3955 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3956 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
3957 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
3958 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
3959 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
3960 }
3961 }
3962 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
3963 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
3964 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
3965 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
3966 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
3967 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
3968 }
3969 }
3970 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
3971 v->RequiredDISPCLK[i][j] = 0.0;
3972 v->DISPCLK_DPPCLK_Support[i][j] = true;
3973 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3974 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
3975 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
3976 v->MPCCombine[i][j][k] = true;
3977 v->NoOfDPP[i][j][k] = 2;
3978 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
3979 } else {
3980 v->MPCCombine[i][j][k] = false;
3981 v->NoOfDPP[i][j][k] = 1;
3982 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3983 }
3984 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
3985 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3986 * (1.0 + v->DISPCLKRampingMargin / 100.0);
3987 } else {
3988 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
3989 }
3990 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
3991 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
3992 > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
3993 v->DISPCLK_DPPCLK_Support[i][j] = false;
3994 }
3995 }
3996 v->TotalNumberOfActiveDPP[i][j] = 0.0;
3997 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
3998 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
3999 }
4000 }
4001 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4002 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4003 v->DISPCLK_DPPCLK_Support[i][j] = false;
4004 }
4005 }
4006 }
4007
4008 /*Total Available Pipes Support Check*/
4009
4010 for (i = 0; i < v->soc.num_states; i++) {
4011 for (j = 0; j < 2; j++) {
4012 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4013 v->TotalAvailablePipesSupport[i][j] = true;
4014 } else {
4015 v->TotalAvailablePipesSupport[i][j] = false;
4016 }
4017 }
4018 }
4019 /*Display IO and DSC Support Check*/
4020
4021 v->NonsupportedDSCInputBPC = false;
4022 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4023 if (!(v->DSCInputBitPerComponent[k] == 12.0
4024 || v->DSCInputBitPerComponent[k] == 10.0
4025 || v->DSCInputBitPerComponent[k] == 8.0)) {
4026 v->NonsupportedDSCInputBPC = true;
4027 }
4028 }
4029
4030 /*Number Of DSC Slices*/
4031 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4032 if (v->BlendingAndTiming[k] == k) {
4033 if (v->PixelClockBackEnd[k] > 3200) {
4034 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4035 } else if (v->PixelClockBackEnd[k] > 1360) {
4036 v->NumberOfDSCSlices[k] = 8;
4037 } else if (v->PixelClockBackEnd[k] > 680) {
4038 v->NumberOfDSCSlices[k] = 4;
4039 } else if (v->PixelClockBackEnd[k] > 340) {
4040 v->NumberOfDSCSlices[k] = 2;
4041 } else {
4042 v->NumberOfDSCSlices[k] = 1;
4043 }
4044 } else {
4045 v->NumberOfDSCSlices[k] = 0;
4046 }
4047 }
4048
4049 for (i = 0; i < v->soc.num_states; i++) {
4050 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4051 v->RequiresDSC[i][k] = false;
4052 v->RequiresFEC[i][k] = false;
4053 if (v->BlendingAndTiming[k] == k) {
4054 if (v->Output[k] == dm_hdmi) {
4055 v->RequiresDSC[i][k] = false;
4056 v->RequiresFEC[i][k] = false;
4057 v->OutputBppPerState[i][k] = TruncToValidBPP(
4058 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4059 3,
4060 v->HTotal[k],
4061 v->HActive[k],
4062 v->PixelClockBackEnd[k],
4063 v->ForcedOutputLinkBPP[k],
4064 false,
4065 v->Output[k],
4066 v->OutputFormat[k],
4067 v->DSCInputBitPerComponent[k],
4068 v->NumberOfDSCSlices[k],
4069 v->AudioSampleRate[k],
4070 v->AudioSampleLayout[k],
4071 v->ODMCombineEnablePerState[i][k]);
4072 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4073 if (v->DSCEnable[k] == true) {
4074 v->RequiresDSC[i][k] = true;
4075 v->LinkDSCEnable = true;
4076 if (v->Output[k] == dm_dp) {
4077 v->RequiresFEC[i][k] = true;
4078 } else {
4079 v->RequiresFEC[i][k] = false;
4080 }
4081 } else {
4082 v->RequiresDSC[i][k] = false;
4083 v->LinkDSCEnable = false;
4084 v->RequiresFEC[i][k] = false;
4085 }
4086
4087 v->Outbpp = BPP_INVALID;
4088 if (v->PHYCLKPerState[i] >= 270.0) {
4089 v->Outbpp = TruncToValidBPP(
4090 (1.0 - v->Downspreading / 100.0) * 2700,
4091 v->OutputLinkDPLanes[k],
4092 v->HTotal[k],
4093 v->HActive[k],
4094 v->PixelClockBackEnd[k],
4095 v->ForcedOutputLinkBPP[k],
4096 v->LinkDSCEnable,
4097 v->Output[k],
4098 v->OutputFormat[k],
4099 v->DSCInputBitPerComponent[k],
4100 v->NumberOfDSCSlices[k],
4101 v->AudioSampleRate[k],
4102 v->AudioSampleLayout[k],
4103 v->ODMCombineEnablePerState[i][k]);
4104 v->OutputBppPerState[i][k] = v->Outbpp;
4105 // TODO: Need some other way to handle this nonsense
4106 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4107 }
4108 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4109 v->Outbpp = TruncToValidBPP(
4110 (1.0 - v->Downspreading / 100.0) * 5400,
4111 v->OutputLinkDPLanes[k],
4112 v->HTotal[k],
4113 v->HActive[k],
4114 v->PixelClockBackEnd[k],
4115 v->ForcedOutputLinkBPP[k],
4116 v->LinkDSCEnable,
4117 v->Output[k],
4118 v->OutputFormat[k],
4119 v->DSCInputBitPerComponent[k],
4120 v->NumberOfDSCSlices[k],
4121 v->AudioSampleRate[k],
4122 v->AudioSampleLayout[k],
4123 v->ODMCombineEnablePerState[i][k]);
4124 v->OutputBppPerState[i][k] = v->Outbpp;
4125 // TODO: Need some other way to handle this nonsense
4126 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4127 }
4128 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4129 v->Outbpp = TruncToValidBPP(
4130 (1.0 - v->Downspreading / 100.0) * 8100,
4131 v->OutputLinkDPLanes[k],
4132 v->HTotal[k],
4133 v->HActive[k],
4134 v->PixelClockBackEnd[k],
4135 v->ForcedOutputLinkBPP[k],
4136 v->LinkDSCEnable,
4137 v->Output[k],
4138 v->OutputFormat[k],
4139 v->DSCInputBitPerComponent[k],
4140 v->NumberOfDSCSlices[k],
4141 v->AudioSampleRate[k],
4142 v->AudioSampleLayout[k],
4143 v->ODMCombineEnablePerState[i][k]);
4144 if (v->Outbpp == BPP_INVALID && v->ForcedOutputLinkBPP[k] == 0) {
4145 //if (v->Outbpp == BPP_INVALID && v->DSCEnabled[k] == dm_dsc_enable_only_if_necessary && v->ForcedOutputLinkBPP[k] == 0) {
4146 v->RequiresDSC[i][k] = true;
4147 v->LinkDSCEnable = true;
4148 if (v->Output[k] == dm_dp) {
4149 v->RequiresFEC[i][k] = true;
4150 }
4151 v->Outbpp = TruncToValidBPP(
4152 (1.0 - v->Downspreading / 100.0) * 8100,
4153 v->OutputLinkDPLanes[k],
4154 v->HTotal[k],
4155 v->HActive[k],
4156 v->PixelClockBackEnd[k],
4157 v->ForcedOutputLinkBPP[k],
4158 v->LinkDSCEnable,
4159 v->Output[k],
4160 v->OutputFormat[k],
4161 v->DSCInputBitPerComponent[k],
4162 v->NumberOfDSCSlices[k],
4163 v->AudioSampleRate[k],
4164 v->AudioSampleLayout[k],
4165 v->ODMCombineEnablePerState[i][k]);
4166 }
4167 v->OutputBppPerState[i][k] = v->Outbpp;
4168 // TODO: Need some other way to handle this nonsense
4169 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4170 }
4171 }
4172 } else {
4173 v->OutputBppPerState[i][k] = 0;
4174 }
4175 }
4176 }
4177 for (i = 0; i < v->soc.num_states; i++) {
4178 v->DIOSupport[i] = true;
4179 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4180 if (!v->skip_dio_check[k] && v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)
4181 && (v->OutputBppPerState[i][k] == 0
4182 || (v->OutputFormat[k] == dm_420 && v->Interlace[k] == true && v->ProgressiveToInterlaceUnitInOPP == true))) {
4183 v->DIOSupport[i] = false;
4184 }
4185 }
4186 }
4187
4188 for (i = 0; i < v->soc.num_states; ++i) {
4189 v->ODMCombine4To1SupportCheckOK[i] = true;
4190 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4191 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4192 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) {
4193 v->ODMCombine4To1SupportCheckOK[i] = false;
4194 }
4195 }
4196 }
4197
4198 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4199
4200 for (i = 0; i < v->soc.num_states; i++) {
4201 v->NotEnoughDSCUnits[i] = false;
4202 v->TotalDSCUnitsRequired = 0.0;
4203 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4204 if (v->RequiresDSC[i][k] == true) {
4205 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4206 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4207 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4208 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4209 } else {
4210 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4211 }
4212 }
4213 }
4214 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4215 v->NotEnoughDSCUnits[i] = true;
4216 }
4217 }
4218 /*DSC Delay per state*/
4219
4220 for (i = 0; i < v->soc.num_states; i++) {
4221 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4222 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4223 v->BPP = 0.0;
4224 } else {
4225 v->BPP = v->OutputBppPerState[i][k];
4226 }
4227 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4228 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4229 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4230 v->DSCInputBitPerComponent[k],
4231 v->BPP,
4232 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4233 v->NumberOfDSCSlices[k],
4234 v->OutputFormat[k],
4235 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4236 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4237 v->DSCDelayPerState[i][k] = 2.0
4238 * dscceComputeDelay(
4239 v->DSCInputBitPerComponent[k],
4240 v->BPP,
4241 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4242 v->NumberOfDSCSlices[k] / 2,
4243 v->OutputFormat[k],
4244 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4245 } else {
4246 v->DSCDelayPerState[i][k] = 4.0
4247 * (dscceComputeDelay(
4248 v->DSCInputBitPerComponent[k],
4249 v->BPP,
4250 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4251 v->NumberOfDSCSlices[k] / 4,
4252 v->OutputFormat[k],
4253 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4254 }
4255 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4256 } else {
4257 v->DSCDelayPerState[i][k] = 0.0;
4258 }
4259 }
4260 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4261 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4262 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4263 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4264 }
4265 }
4266 }
4267 }
4268
4269 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4270 //
4271 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4272 for (j = 0; j <= 1; ++j) {
4273 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4274 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4275 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4276 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4277 }
4278
4279 CalculateSwathAndDETConfiguration(
4280 false,
4281 v->NumberOfActivePlanes,
4282 v->DETBufferSizeInKByte[0],
4283 v->MaximumSwathWidthLuma,
4284 v->MaximumSwathWidthChroma,
4285 v->SourceScan,
4286 v->SourcePixelFormat,
4287 v->SurfaceTiling,
4288 v->ViewportWidth,
4289 v->ViewportHeight,
4290 v->SurfaceWidthY,
4291 v->SurfaceWidthC,
4292 v->SurfaceHeightY,
4293 v->SurfaceHeightC,
4294 v->Read256BlockHeightY,
4295 v->Read256BlockHeightC,
4296 v->Read256BlockWidthY,
4297 v->Read256BlockWidthC,
4298 v->ODMCombineEnableThisState,
4299 v->BlendingAndTiming,
4300 v->BytePerPixelY,
4301 v->BytePerPixelC,
4302 v->BytePerPixelInDETY,
4303 v->BytePerPixelInDETC,
4304 v->HActive,
4305 v->HRatio,
4306 v->HRatioChroma,
4307 v->NoOfDPPThisState,
4308 v->swath_width_luma_ub_this_state,
4309 v->swath_width_chroma_ub_this_state,
4310 v->SwathWidthYThisState,
4311 v->SwathWidthCThisState,
4312 v->SwathHeightYThisState,
4313 v->SwathHeightCThisState,
4314 v->DETBufferSizeYThisState,
4315 v->DETBufferSizeCThisState,
4316 v->dummystring,
4317 &v->ViewportSizeSupport[i][j]);
4318
4319 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4320 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4321 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4322 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4323 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4324 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4325 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4326 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4327 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4328 }
4329
4330 }
4331 }
4332 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4333 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4334 }
4335
4336 for (i = 0; i < v->soc.num_states; i++) {
4337 for (j = 0; j < 2; j++) {
4338 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4339 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4340 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4341 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4342 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4343 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4344 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4345 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4346 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4347 }
4348
4349 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4350 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4351 if (v->DCCEnable[k] == true) {
4352 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4353 }
4354 }
4355
4356 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4357 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
4358 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4359
4360 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
4361 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
4362 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4363 } else {
4364 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4365 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4366 }
4367
4368 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4369 mode_lib,
4370 v->DCCEnable[k],
4371 v->Read256BlockHeightC[k],
4372 v->Read256BlockWidthY[k],
4373 v->SourcePixelFormat[k],
4374 v->SurfaceTiling[k],
4375 v->BytePerPixelC[k],
4376 v->SourceScan[k],
4377 v->SwathWidthCThisState[k],
4378 v->ViewportHeightChroma[k],
4379 v->GPUVMEnable,
4380 v->HostVMEnable,
4381 v->HostVMMaxNonCachedPageTableLevels,
4382 v->GPUVMMinPageSize,
4383 v->HostVMMinPageSize,
4384 v->PTEBufferSizeInRequestsForChroma,
4385 v->PitchC[k],
4386 0.0,
4387 &v->MacroTileWidthC[k],
4388 &v->MetaRowBytesC,
4389 &v->DPTEBytesPerRowC,
4390 &v->PTEBufferSizeNotExceededC[i][j][k],
4391 &v->dummyinteger7,
4392 &v->dpte_row_height_chroma[k],
4393 &v->dummyinteger28,
4394 &v->dummyinteger26,
4395 &v->dummyinteger23,
4396 &v->meta_row_height_chroma[k],
4397 &v->dummyinteger8,
4398 &v->dummyinteger9,
4399 &v->dummyinteger19,
4400 &v->dummyinteger20,
4401 &v->dummyinteger17,
4402 &v->dummyinteger10,
4403 &v->dummyinteger11);
4404
4405 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4406 mode_lib,
4407 v->VRatioChroma[k],
4408 v->VTAPsChroma[k],
4409 v->Interlace[k],
4410 v->ProgressiveToInterlaceUnitInOPP,
4411 v->SwathHeightCThisState[k],
4412 v->ViewportYStartC[k],
4413 &v->PrefillC[k],
4414 &v->MaxNumSwC[k]);
4415 } else {
4416 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4417 v->PTEBufferSizeInRequestsForChroma = 0;
4418 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4419 v->MetaRowBytesC = 0.0;
4420 v->DPTEBytesPerRowC = 0.0;
4421 v->PrefetchLinesC[i][j][k] = 0.0;
4422 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4423 }
4424 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4425 mode_lib,
4426 v->DCCEnable[k],
4427 v->Read256BlockHeightY[k],
4428 v->Read256BlockWidthY[k],
4429 v->SourcePixelFormat[k],
4430 v->SurfaceTiling[k],
4431 v->BytePerPixelY[k],
4432 v->SourceScan[k],
4433 v->SwathWidthYThisState[k],
4434 v->ViewportHeight[k],
4435 v->GPUVMEnable,
4436 v->HostVMEnable,
4437 v->HostVMMaxNonCachedPageTableLevels,
4438 v->GPUVMMinPageSize,
4439 v->HostVMMinPageSize,
4440 v->PTEBufferSizeInRequestsForLuma,
4441 v->PitchY[k],
4442 v->DCCMetaPitchY[k],
4443 &v->MacroTileWidthY[k],
4444 &v->MetaRowBytesY,
4445 &v->DPTEBytesPerRowY,
4446 &v->PTEBufferSizeNotExceededY[i][j][k],
4447 v->dummyinteger4,
4448 &v->dpte_row_height[k],
4449 &v->dummyinteger29,
4450 &v->dummyinteger27,
4451 &v->dummyinteger24,
4452 &v->meta_row_height[k],
4453 &v->dummyinteger25,
4454 &v->dpte_group_bytes[k],
4455 &v->dummyinteger21,
4456 &v->dummyinteger22,
4457 &v->dummyinteger18,
4458 &v->dummyinteger5,
4459 &v->dummyinteger6);
4460 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4461 mode_lib,
4462 v->VRatio[k],
4463 v->vtaps[k],
4464 v->Interlace[k],
4465 v->ProgressiveToInterlaceUnitInOPP,
4466 v->SwathHeightYThisState[k],
4467 v->ViewportYStartY[k],
4468 &v->PrefillY[k],
4469 &v->MaxNumSwY[k]);
4470 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4471 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4472 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4473
4474 CalculateRowBandwidth(
4475 v->GPUVMEnable,
4476 v->SourcePixelFormat[k],
4477 v->VRatio[k],
4478 v->VRatioChroma[k],
4479 v->DCCEnable[k],
4480 v->HTotal[k] / v->PixelClock[k],
4481 v->MetaRowBytesY,
4482 v->MetaRowBytesC,
4483 v->meta_row_height[k],
4484 v->meta_row_height_chroma[k],
4485 v->DPTEBytesPerRowY,
4486 v->DPTEBytesPerRowC,
4487 v->dpte_row_height[k],
4488 v->dpte_row_height_chroma[k],
4489 &v->meta_row_bandwidth[i][j][k],
4490 &v->dpte_row_bandwidth[i][j][k]);
4491 }
4492 v->UrgLatency[i] = CalculateUrgentLatency(
4493 v->UrgentLatencyPixelDataOnly,
4494 v->UrgentLatencyPixelMixedWithVMData,
4495 v->UrgentLatencyVMDataOnly,
4496 v->DoUrgentLatencyAdjustment,
4497 v->UrgentLatencyAdjustmentFabricClockComponent,
4498 v->UrgentLatencyAdjustmentFabricClockReference,
4499 v->FabricClockPerState[i]);
4500
4501 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4502 CalculateUrgentBurstFactor(
4503 v->swath_width_luma_ub_this_state[k],
4504 v->swath_width_chroma_ub_this_state[k],
4505 v->DETBufferSizeInKByte[0],
4506 v->SwathHeightYThisState[k],
4507 v->SwathHeightCThisState[k],
4508 v->HTotal[k] / v->PixelClock[k],
4509 v->UrgLatency[i],
4510 v->CursorBufferSize,
4511 v->CursorWidth[k][0],
4512 v->CursorBPP[k][0],
4513 v->VRatio[k],
4514 v->VRatioChroma[k],
4515 v->BytePerPixelInDETY[k],
4516 v->BytePerPixelInDETC[k],
4517 v->DETBufferSizeYThisState[k],
4518 v->DETBufferSizeCThisState[k],
4519 &v->UrgentBurstFactorCursor[k],
4520 &v->UrgentBurstFactorLuma[k],
4521 &v->UrgentBurstFactorChroma[k],
4522 &NotUrgentLatencyHiding[k]);
4523 }
4524
4525 v->NotUrgentLatencyHiding[i][j] = false;
4526 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4527 if (NotUrgentLatencyHiding[k]) {
4528 v->NotUrgentLatencyHiding[i][j] = true;
4529 }
4530 }
4531
4532 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4533 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4534 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4535 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4536 }
4537
4538 v->TotalVActivePixelBandwidth[i][j] = 0;
4539 v->TotalVActiveCursorBandwidth[i][j] = 0;
4540 v->TotalMetaRowBandwidth[i][j] = 0;
4541 v->TotalDPTERowBandwidth[i][j] = 0;
4542 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4543 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4544 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4545 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4546 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4547 }
4548
4549 CalculateDCFCLKDeepSleep(
4550 mode_lib,
4551 v->NumberOfActivePlanes,
4552 v->BytePerPixelY,
4553 v->BytePerPixelC,
4554 v->VRatio,
4555 v->VRatioChroma,
4556 v->SwathWidthYThisState,
4557 v->SwathWidthCThisState,
4558 v->NoOfDPPThisState,
4559 v->HRatio,
4560 v->HRatioChroma,
4561 v->PixelClock,
4562 v->PSCL_FACTOR,
4563 v->PSCL_FACTOR_CHROMA,
4564 v->RequiredDPPCLKThisState,
4565 v->ReadBandwidthLuma,
4566 v->ReadBandwidthChroma,
4567 v->ReturnBusWidth,
4568 &v->ProjectedDCFCLKDeepSleep[i][j]);
4569 }
4570 }
4571
4572 //Calculate Return BW
4573
4574 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4575 for (j = 0; j <= 1; ++j) {
4576 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4577 if (v->BlendingAndTiming[k] == k) {
4578 if (v->WritebackEnable[k] == true) {
4579 v->WritebackDelayTime[k] = v->WritebackLatency
4580 + CalculateWriteBackDelay(
4581 v->WritebackPixelFormat[k],
4582 v->WritebackHRatio[k],
4583 v->WritebackVRatio[k],
4584 v->WritebackVTaps[k],
4585 v->WritebackDestinationWidth[k],
4586 v->WritebackDestinationHeight[k],
4587 v->WritebackSourceHeight[k],
4588 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4589 } else {
4590 v->WritebackDelayTime[k] = 0.0;
4591 }
4592 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4593 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4594 v->WritebackDelayTime[k] = dml_max(
4595 v->WritebackDelayTime[k],
4596 v->WritebackLatency
4597 + CalculateWriteBackDelay(
4598 v->WritebackPixelFormat[m],
4599 v->WritebackHRatio[m],
4600 v->WritebackVRatio[m],
4601 v->WritebackVTaps[m],
4602 v->WritebackDestinationWidth[m],
4603 v->WritebackDestinationHeight[m],
4604 v->WritebackSourceHeight[m],
4605 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4606 }
4607 }
4608 }
4609 }
4610 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4611 for (m = 0; m <= v->NumberOfActivePlanes - 1; m++) {
4612 if (v->BlendingAndTiming[k] == m) {
4613 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4614 }
4615 }
4616 }
4617 v->MaxMaxVStartup[i][j] = 0;
4618 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4619 v->MaximumVStartup[i][j][k] = v->VTotal[k] - v->VActive[k]
4620 - dml_max(1.0, dml_ceil(1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0));
4621 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4622 }
4623 }
4624 }
4625
4626 ReorderingBytes = v->NumberOfChannels
4627 * dml_max3(
4628 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4629 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4630 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4631 v->FinalDRAMClockChangeLatency = (v->DRAMClockChangeLatencyOverride > 0 ? v->DRAMClockChangeLatencyOverride : v->DRAMClockChangeLatency);
4632
4633 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4634 for (j = 0; j <= 1; ++j) {
4635 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4636 }
4637 }
4638
4639 if (v->UseMinimumRequiredDCFCLK == true) {
4640 UseMinimumDCFCLK(mode_lib, v, MaxPrefetchMode, ReorderingBytes);
4641
4642 if (v->ClampMinDCFCLK) {
4643 /* Clamp calculated values to actual minimum */
4644 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4645 for (j = 0; j <= 1; ++j) {
4646 if (v->DCFCLKState[i][j] < mode_lib->soc.min_dcfclk) {
4647 v->DCFCLKState[i][j] = mode_lib->soc.min_dcfclk;
4648 }
4649 }
4650 }
4651 }
4652 }
4653
4654 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4655 for (j = 0; j <= 1; ++j) {
4656 v->IdealSDPPortBandwidthPerState[i][j] = dml_min3(
4657 v->ReturnBusWidth * v->DCFCLKState[i][j],
4658 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth,
4659 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4660 if (v->HostVMEnable != true) {
4661 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j] * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
4662 / 100;
4663 } else {
4664 v->ReturnBWPerState[i][j] = v->IdealSDPPortBandwidthPerState[i][j]
4665 * v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100;
4666 }
4667 }
4668 }
4669
4670 //Re-ordering Buffer Support Check
4671
4672 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4673 for (j = 0; j <= 1; ++j) {
4674 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4675 > (v->RoundTripPingLatencyCycles + 32) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4676 v->ROBSupport[i][j] = true;
4677 } else {
4678 v->ROBSupport[i][j] = false;
4679 }
4680 }
4681 }
4682
4683 //Vertical Active BW support check
4684
4685 MaxTotalVActiveRDBandwidth = 0;
4686 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4687 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4688 }
4689
4690 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4691 for (j = 0; j <= 1; ++j) {
4692 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4693 v->IdealSDPPortBandwidthPerState[i][j] * v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4694 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation
4695 / 100);
4696 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4697 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4698 } else {
4699 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4700 }
4701 }
4702 }
4703
4704 //Prefetch Check
4705
4706 for (i = 0; i < mode_lib->soc.num_states; ++i) {
4707 for (j = 0; j <= 1; ++j) {
4708 int NextPrefetchModeState = MinPrefetchMode;
4709
4710 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
4711
4712 v->BandwidthWithoutPrefetchSupported[i][j] = true;
4713 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j]
4714 > v->ReturnBWPerState[i][j] || v->NotUrgentLatencyHiding[i][j]) {
4715 v->BandwidthWithoutPrefetchSupported[i][j] = false;
4716 }
4717
4718 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4719 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4720 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4721 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4722 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4723 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4724 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4725 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4726 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4727 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4728 v->ODMCombineEnabled[k] = v->ODMCombineEnablePerState[i][k];
4729 }
4730
4731 v->ExtraLatency = CalculateExtraLatency(
4732 v->RoundTripPingLatencyCycles,
4733 ReorderingBytes,
4734 v->DCFCLKState[i][j],
4735 v->TotalNumberOfActiveDPP[i][j],
4736 v->PixelChunkSizeInKByte,
4737 v->TotalNumberOfDCCActiveDPP[i][j],
4738 v->MetaChunkSize,
4739 v->ReturnBWPerState[i][j],
4740 v->GPUVMEnable,
4741 v->HostVMEnable,
4742 v->NumberOfActivePlanes,
4743 v->NoOfDPPThisState,
4744 v->dpte_group_bytes,
4745 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4746 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4747 v->HostVMMinPageSize,
4748 v->HostVMMaxNonCachedPageTableLevels);
4749
4750 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
4751 do {
4752 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
4753 v->MaxVStartup = v->NextMaxVStartup;
4754
4755 v->TWait = CalculateTWait(v->PrefetchModePerState[i][j], v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
4756
4757 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4758 Pipe myPipe = { 0 };
4759
4760 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
4761 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
4762 myPipe.PixelClock = v->PixelClock[k];
4763 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
4764 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
4765 myPipe.ScalerEnabled = v->ScalerEnabled[k];
4766 myPipe.SourceScan = v->SourceScan[k];
4767 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
4768 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
4769 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
4770 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
4771 myPipe.InterlaceEnable = v->Interlace[k];
4772 myPipe.NumberOfCursors = v->NumberOfCursors[k];
4773 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
4774 myPipe.HTotal = v->HTotal[k];
4775 myPipe.DCCEnable = v->DCCEnable[k];
4776 myPipe.ODMCombineEnabled = !!v->ODMCombineEnabled[k];
4777
4778 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
4779 mode_lib,
4780 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4781 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4782 &myPipe,
4783 v->DSCDelayPerState[i][k],
4784 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
4785 v->DPPCLKDelaySCL,
4786 v->DPPCLKDelaySCLLBOnly,
4787 v->DPPCLKDelayCNVCCursor,
4788 v->DISPCLKDelaySubtotal,
4789 v->SwathWidthYThisState[k] / v->HRatio[k],
4790 v->OutputFormat[k],
4791 v->MaxInterDCNTileRepeaters,
4792 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
4793 v->MaximumVStartup[i][j][k],
4794 v->GPUVMMaxPageTableLevels,
4795 v->GPUVMEnable,
4796 v->HostVMEnable,
4797 v->HostVMMaxNonCachedPageTableLevels,
4798 v->HostVMMinPageSize,
4799 v->DynamicMetadataEnable[k],
4800 v->DynamicMetadataVMEnabled,
4801 v->DynamicMetadataLinesBeforeActiveRequired[k],
4802 v->DynamicMetadataTransmittedBytes[k],
4803 v->UrgLatency[i],
4804 v->ExtraLatency,
4805 v->TimeCalc,
4806 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4807 v->MetaRowBytes[i][j][k],
4808 v->DPTEBytesPerRow[i][j][k],
4809 v->PrefetchLinesY[i][j][k],
4810 v->SwathWidthYThisState[k],
4811 v->BytePerPixelY[k],
4812 v->PrefillY[k],
4813 v->MaxNumSwY[k],
4814 v->PrefetchLinesC[i][j][k],
4815 v->SwathWidthCThisState[k],
4816 v->BytePerPixelC[k],
4817 v->PrefillC[k],
4818 v->MaxNumSwC[k],
4819 v->swath_width_luma_ub_this_state[k],
4820 v->swath_width_chroma_ub_this_state[k],
4821 v->SwathHeightYThisState[k],
4822 v->SwathHeightCThisState[k],
4823 v->TWait,
4824 v->ProgressiveToInterlaceUnitInOPP,
4825 &v->DSTXAfterScaler[k],
4826 &v->DSTYAfterScaler[k],
4827 &v->LineTimesForPrefetch[k],
4828 &v->PrefetchBW[k],
4829 &v->LinesForMetaPTE[k],
4830 &v->LinesForMetaAndDPTERow[k],
4831 &v->VRatioPreY[i][j][k],
4832 &v->VRatioPreC[i][j][k],
4833 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
4834 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
4835 &v->NoTimeForDynamicMetadata[i][j][k],
4836 &v->Tno_bw[k],
4837 &v->prefetch_vmrow_bw[k],
4838 &v->Tdmdl_vm[k],
4839 &v->Tdmdl[k],
4840 &v->VUpdateOffsetPix[k],
4841 &v->VUpdateWidthPix[k],
4842 &v->VReadyOffsetPix[k]);
4843 }
4844
4845 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4846 CalculateUrgentBurstFactor(
4847 v->swath_width_luma_ub_this_state[k],
4848 v->swath_width_chroma_ub_this_state[k],
4849 v->DETBufferSizeInKByte[0],
4850 v->SwathHeightYThisState[k],
4851 v->SwathHeightCThisState[k],
4852 v->HTotal[k] / v->PixelClock[k],
4853 v->UrgLatency[i],
4854 v->CursorBufferSize,
4855 v->CursorWidth[k][0],
4856 v->CursorBPP[k][0],
4857 v->VRatioPreY[i][j][k],
4858 v->VRatioPreC[i][j][k],
4859 v->BytePerPixelInDETY[k],
4860 v->BytePerPixelInDETC[k],
4861 v->DETBufferSizeYThisState[k],
4862 v->DETBufferSizeCThisState[k],
4863 &v->UrgentBurstFactorCursorPre[k],
4864 &v->UrgentBurstFactorLumaPre[k],
4865 &v->UrgentBurstFactorChromaPre[k],
4866 &v->NoUrgentLatencyHidingPre[k]);
4867 }
4868
4869 v->MaximumReadBandwidthWithPrefetch = 0.0;
4870 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4871 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k])
4872 * v->VRatioPreY[i][j][k];
4873
4874 v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch
4875 + dml_max4(
4876 v->VActivePixelBandwidth[i][j][k],
4877 v->VActiveCursorBandwidth[i][j][k]
4878 + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]),
4879 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4880 v->NoOfDPP[i][j][k]
4881 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4882 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4883 * v->UrgentBurstFactorChromaPre[k])
4884 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4885 }
4886
4887 v->NotEnoughUrgentLatencyHidingPre = false;
4888 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4889 if (v->NoUrgentLatencyHidingPre[k] == true) {
4890 v->NotEnoughUrgentLatencyHidingPre = true;
4891 }
4892 }
4893
4894 v->PrefetchSupported[i][j] = true;
4895 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
4896 || v->NotEnoughUrgentLatencyHidingPre == 1) {
4897 v->PrefetchSupported[i][j] = false;
4898 }
4899 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4900 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
4901 || v->NoTimeForPrefetch[i][j][k] == true) {
4902 v->PrefetchSupported[i][j] = false;
4903 }
4904 }
4905
4906 v->DynamicMetadataSupported[i][j] = true;
4907 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4908 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
4909 v->DynamicMetadataSupported[i][j] = false;
4910 }
4911 }
4912
4913 v->VRatioInPrefetchSupported[i][j] = true;
4914 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4915 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
4916 v->VRatioInPrefetchSupported[i][j] = false;
4917 }
4918 }
4919 v->AnyLinesForVMOrRowTooLarge = false;
4920 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4921 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
4922 v->AnyLinesForVMOrRowTooLarge = true;
4923 }
4924 }
4925
4926 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
4927 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
4928 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4929 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
4930 - dml_max(
4931 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
4932 v->NoOfDPP[i][j][k]
4933 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k]
4934 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4935 * v->UrgentBurstFactorChromaPre[k])
4936 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4937 }
4938 v->TotImmediateFlipBytes = 0.0;
4939 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4940 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k]
4941 + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k]);
4942 }
4943
4944 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4945 CalculateFlipSchedule(
4946 mode_lib,
4947 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
4948 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
4949 v->ExtraLatency,
4950 v->UrgLatency[i],
4951 v->GPUVMMaxPageTableLevels,
4952 v->HostVMEnable,
4953 v->HostVMMaxNonCachedPageTableLevels,
4954 v->GPUVMEnable,
4955 v->HostVMMinPageSize,
4956 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
4957 v->MetaRowBytes[i][j][k],
4958 v->DPTEBytesPerRow[i][j][k],
4959 v->BandwidthAvailableForImmediateFlip,
4960 v->TotImmediateFlipBytes,
4961 v->SourcePixelFormat[k],
4962 v->HTotal[k] / v->PixelClock[k],
4963 v->VRatio[k],
4964 v->VRatioChroma[k],
4965 v->Tno_bw[k],
4966 v->DCCEnable[k],
4967 v->dpte_row_height[k],
4968 v->meta_row_height[k],
4969 v->dpte_row_height_chroma[k],
4970 v->meta_row_height_chroma[k],
4971 &v->DestinationLinesToRequestVMInImmediateFlip[k],
4972 &v->DestinationLinesToRequestRowInImmediateFlip[k],
4973 &v->final_flip_bw[k],
4974 &v->ImmediateFlipSupportedForPipe[k]);
4975 }
4976 v->total_dcn_read_bw_with_flip = 0.0;
4977 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4978 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
4979 + dml_max3(
4980 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
4981 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
4982 + v->VActiveCursorBandwidth[i][j][k],
4983 v->NoOfDPP[i][j][k]
4984 * (v->final_flip_bw[k]
4985 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
4986 * v->UrgentBurstFactorLumaPre[k]
4987 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
4988 * v->UrgentBurstFactorChromaPre[k])
4989 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
4990 }
4991 v->ImmediateFlipSupportedForState[i][j] = true;
4992 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
4993 v->ImmediateFlipSupportedForState[i][j] = false;
4994 }
4995 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
4996 if (v->ImmediateFlipSupportedForPipe[k] == false) {
4997 v->ImmediateFlipSupportedForState[i][j] = false;
4998 }
4999 }
5000 } else {
5001 v->ImmediateFlipSupportedForState[i][j] = false;
5002 }
5003 if (v->MaxVStartup <= 13 || v->AnyLinesForVMOrRowTooLarge == false) {
5004 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5005 NextPrefetchModeState = NextPrefetchModeState + 1;
5006 } else {
5007 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5008 }
5009 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5010 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5011 || v->ImmediateFlipSupportedForState[i][j] == true))
5012 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5013
5014 CalculateWatermarksAndDRAMSpeedChangeSupport(
5015 mode_lib,
5016 v->PrefetchModePerState[i][j],
5017 v->NumberOfActivePlanes,
5018 v->MaxLineBufferLines,
5019 v->LineBufferSize,
5020 v->DPPOutputBufferPixels,
5021 v->DETBufferSizeInKByte[0],
5022 v->WritebackInterfaceBufferSize,
5023 v->DCFCLKState[i][j],
5024 v->ReturnBWPerState[i][j],
5025 v->GPUVMEnable,
5026 v->dpte_group_bytes,
5027 v->MetaChunkSize,
5028 v->UrgLatency[i],
5029 v->ExtraLatency,
5030 v->WritebackLatency,
5031 v->WritebackChunkSize,
5032 v->SOCCLKPerState[i],
5033 v->FinalDRAMClockChangeLatency,
5034 v->SRExitTime,
5035 v->SREnterPlusExitTime,
5036 v->ProjectedDCFCLKDeepSleep[i][j],
5037 v->NoOfDPPThisState,
5038 v->DCCEnable,
5039 v->RequiredDPPCLKThisState,
5040 v->DETBufferSizeYThisState,
5041 v->DETBufferSizeCThisState,
5042 v->SwathHeightYThisState,
5043 v->SwathHeightCThisState,
5044 v->LBBitPerPixel,
5045 v->SwathWidthYThisState,
5046 v->SwathWidthCThisState,
5047 v->HRatio,
5048 v->HRatioChroma,
5049 v->vtaps,
5050 v->VTAPsChroma,
5051 v->VRatio,
5052 v->VRatioChroma,
5053 v->HTotal,
5054 v->PixelClock,
5055 v->BlendingAndTiming,
5056 v->BytePerPixelInDETY,
5057 v->BytePerPixelInDETC,
5058 v->DSTXAfterScaler,
5059 v->DSTYAfterScaler,
5060 v->WritebackEnable,
5061 v->WritebackPixelFormat,
5062 v->WritebackDestinationWidth,
5063 v->WritebackDestinationHeight,
5064 v->WritebackSourceHeight,
5065 &v->DRAMClockChangeSupport[i][j],
5066 &v->UrgentWatermark,
5067 &v->WritebackUrgentWatermark,
5068 &v->DRAMClockChangeWatermark,
5069 &v->WritebackDRAMClockChangeWatermark,
5070 &v->StutterExitWatermark,
5071 &v->StutterEnterPlusExitWatermark,
5072 &v->MinActiveDRAMClockChangeLatencySupported);
5073 }
5074 }
5075
5076 /*PTE Buffer Size Check*/
5077
5078 for (i = 0; i < v->soc.num_states; i++) {
5079 for (j = 0; j < 2; j++) {
5080 v->PTEBufferSizeNotExceeded[i][j] = true;
5081 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5082 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5083 v->PTEBufferSizeNotExceeded[i][j] = false;
5084 }
5085 }
5086 }
5087 }
5088 /*Cursor Support Check*/
5089
5090 v->CursorSupport = true;
5091 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5092 if (v->CursorWidth[k][0] > 0.0) {
5093 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5094 v->CursorSupport = false;
5095 }
5096 }
5097 }
5098 /*Valid Pitch Check*/
5099
5100 v->PitchSupport = true;
5101 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5102 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5103 if (v->DCCEnable[k] == true) {
5104 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5105 } else {
5106 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5107 }
5108 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
5109 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) {
5110 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5111 if (v->DCCEnable[k] == true) {
5112 v->AlignedDCCMetaPitchC[k] = dml_ceil(dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]);
5113 } else {
5114 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5115 }
5116 } else {
5117 v->AlignedCPitch[k] = v->PitchC[k];
5118 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5119 }
5120 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k]
5121 || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5122 v->PitchSupport = false;
5123 }
5124 }
5125
5126 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5127 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k])
5128 ViewportExceedsSurface = true;
5129
5130 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5131 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) {
5132 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5133 ViewportExceedsSurface = true;
5134 }
5135 }
5136 }
5137 /*Mode Support, Voltage State and SOC Configuration*/
5138
5139 for (i = v->soc.num_states - 1; i >= 0; i--) {
5140 for (j = 0; j < 2; j++) {
5141 if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1
5142 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1
5143 && v->NotEnoughDSCUnits[i] == 0
5144 && v->DTBCLKRequiredMoreThanSupported[i] == 0
5145 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1
5146 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
5147 && v->WritebackLatencySupport == 1 && v->WritebackScaleRatioAndTapsSupport == 1 && v->CursorSupport == 1 && v->PitchSupport == 1
5148 && ViewportExceedsSurface == 0 && v->PrefetchSupported[i][j] == 1 && v->DynamicMetadataSupported[i][j] == 1
5149 && v->TotalVerticalActiveBandwidthSupport[i][j] == 1 && v->VRatioInPrefetchSupported[i][j] == 1
5150 && v->PTEBufferSizeNotExceeded[i][j] == 1 && v->NonsupportedDSCInputBPC == 0
5151 && ((v->HostVMEnable == 0 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5152 || v->ImmediateFlipSupportedForState[i][j] == true)) {
5153 v->ModeSupport[i][j] = true;
5154 } else {
5155 v->ModeSupport[i][j] = false;
5156 }
5157 }
5158 }
5159 {
5160 unsigned int MaximumMPCCombine = 0;
5161 for (i = v->soc.num_states; i >= 0; i--) {
5162 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5163 v->VoltageLevel = i;
5164 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5165 if (v->ModeSupport[i][1] == true) {
5166 MaximumMPCCombine = 1;
5167 } else {
5168 MaximumMPCCombine = 0;
5169 }
5170 }
5171 }
5172 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5173 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5174 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5175 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5176 }
5177 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5178 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5179 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5180 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5181 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5182 v->maxMpcComb = MaximumMPCCombine;
5183 }
5184 }
5185
CalculateWatermarksAndDRAMSpeedChangeSupport(struct display_mode_lib * mode_lib,unsigned int PrefetchMode,unsigned int NumberOfActivePlanes,unsigned int MaxLineBufferLines,unsigned int LineBufferSize,unsigned int DPPOutputBufferPixels,unsigned int DETBufferSizeInKByte,unsigned int WritebackInterfaceBufferSize,double DCFCLK,double ReturnBW,bool GPUVMEnable,unsigned int dpte_group_bytes[],unsigned int MetaChunkSize,double UrgentLatency,double ExtraLatency,double WritebackLatency,double WritebackChunkSize,double SOCCLK,double DRAMClockChangeLatency,double SRExitTime,double SREnterPlusExitTime,double DCFCLKDeepSleep,unsigned int DPPPerPlane[],bool DCCEnable[],double DPPCLK[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int LBBitPerPixel[],double SwathWidthY[],double SwathWidthC[],double HRatio[],double HRatioChroma[],unsigned int vtaps[],unsigned int VTAPsChroma[],double VRatio[],double VRatioChroma[],unsigned int HTotal[],double PixelClock[],unsigned int BlendingAndTiming[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool WritebackEnable[],enum source_format_class WritebackPixelFormat[],double WritebackDestinationWidth[],double WritebackDestinationHeight[],double WritebackSourceHeight[],enum clock_change_support * DRAMClockChangeSupport,double * UrgentWatermark,double * WritebackUrgentWatermark,double * DRAMClockChangeWatermark,double * WritebackDRAMClockChangeWatermark,double * StutterExitWatermark,double * StutterEnterPlusExitWatermark,double * MinActiveDRAMClockChangeLatencySupported)5186 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5187 struct display_mode_lib *mode_lib,
5188 unsigned int PrefetchMode,
5189 unsigned int NumberOfActivePlanes,
5190 unsigned int MaxLineBufferLines,
5191 unsigned int LineBufferSize,
5192 unsigned int DPPOutputBufferPixels,
5193 unsigned int DETBufferSizeInKByte,
5194 unsigned int WritebackInterfaceBufferSize,
5195 double DCFCLK,
5196 double ReturnBW,
5197 bool GPUVMEnable,
5198 unsigned int dpte_group_bytes[],
5199 unsigned int MetaChunkSize,
5200 double UrgentLatency,
5201 double ExtraLatency,
5202 double WritebackLatency,
5203 double WritebackChunkSize,
5204 double SOCCLK,
5205 double DRAMClockChangeLatency,
5206 double SRExitTime,
5207 double SREnterPlusExitTime,
5208 double DCFCLKDeepSleep,
5209 unsigned int DPPPerPlane[],
5210 bool DCCEnable[],
5211 double DPPCLK[],
5212 unsigned int DETBufferSizeY[],
5213 unsigned int DETBufferSizeC[],
5214 unsigned int SwathHeightY[],
5215 unsigned int SwathHeightC[],
5216 unsigned int LBBitPerPixel[],
5217 double SwathWidthY[],
5218 double SwathWidthC[],
5219 double HRatio[],
5220 double HRatioChroma[],
5221 unsigned int vtaps[],
5222 unsigned int VTAPsChroma[],
5223 double VRatio[],
5224 double VRatioChroma[],
5225 unsigned int HTotal[],
5226 double PixelClock[],
5227 unsigned int BlendingAndTiming[],
5228 double BytePerPixelDETY[],
5229 double BytePerPixelDETC[],
5230 double DSTXAfterScaler[],
5231 double DSTYAfterScaler[],
5232 bool WritebackEnable[],
5233 enum source_format_class WritebackPixelFormat[],
5234 double WritebackDestinationWidth[],
5235 double WritebackDestinationHeight[],
5236 double WritebackSourceHeight[],
5237 enum clock_change_support *DRAMClockChangeSupport,
5238 double *UrgentWatermark,
5239 double *WritebackUrgentWatermark,
5240 double *DRAMClockChangeWatermark,
5241 double *WritebackDRAMClockChangeWatermark,
5242 double *StutterExitWatermark,
5243 double *StutterEnterPlusExitWatermark,
5244 double *MinActiveDRAMClockChangeLatencySupported)
5245 {
5246 double EffectiveLBLatencyHidingY = 0;
5247 double EffectiveLBLatencyHidingC = 0;
5248 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5249 double LinesInDETC = 0;
5250 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5251 unsigned int LinesInDETCRoundedDownToSwath = 0;
5252 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5253 double FullDETBufferingTimeC = 0;
5254 double ActiveDRAMClockChangeLatencyMarginY = 0;
5255 double ActiveDRAMClockChangeLatencyMarginC = 0;
5256 double WritebackDRAMClockChangeLatencyMargin = 0;
5257 double PlaneWithMinActiveDRAMClockChangeMargin = 0;
5258 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 0;
5259 double FullDETBufferingTimeYStutterCriticalPlane = 0;
5260 double TimeToFinishSwathTransferStutterCriticalPlane = 0;
5261 double WritebackDRAMClockChangeLatencyHiding = 0;
5262 unsigned int k, j;
5263
5264 mode_lib->vba.TotalActiveDPP = 0;
5265 mode_lib->vba.TotalDCCActiveDPP = 0;
5266 for (k = 0; k < NumberOfActivePlanes; ++k) {
5267 mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP + DPPPerPlane[k];
5268 if (DCCEnable[k] == true) {
5269 mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP + DPPPerPlane[k];
5270 }
5271 }
5272
5273 *UrgentWatermark = UrgentLatency + ExtraLatency;
5274
5275 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5276
5277 mode_lib->vba.TotalActiveWriteback = 0;
5278 for (k = 0; k < NumberOfActivePlanes; ++k) {
5279 if (WritebackEnable[k] == true) {
5280 mode_lib->vba.TotalActiveWriteback = mode_lib->vba.TotalActiveWriteback + 1;
5281 }
5282 }
5283
5284 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5285 *WritebackUrgentWatermark = WritebackLatency;
5286 } else {
5287 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5288 }
5289
5290 if (mode_lib->vba.TotalActiveWriteback <= 1) {
5291 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5292 } else {
5293 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5294 }
5295
5296 for (k = 0; k < NumberOfActivePlanes; ++k) {
5297
5298 mode_lib->vba.LBLatencyHidingSourceLinesY = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5299
5300 mode_lib->vba.LBLatencyHidingSourceLinesC = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5301
5302 EffectiveLBLatencyHidingY = mode_lib->vba.LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5303
5304 EffectiveLBLatencyHidingC = mode_lib->vba.LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5305
5306 LinesInDETY[k] = (double) DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
5307 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5308 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5309 if (BytePerPixelDETC[k] > 0) {
5310 LinesInDETC = mode_lib->vba.DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5311 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5312 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5313 } else {
5314 LinesInDETC = 0;
5315 FullDETBufferingTimeC = 999999;
5316 }
5317
5318 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY[k] - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5319
5320 if (NumberOfActivePlanes > 1) {
5321 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5322 }
5323
5324 if (BytePerPixelDETC[k] > 0) {
5325 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - *UrgentWatermark - (HTotal[k] / PixelClock[k]) * (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) - *DRAMClockChangeWatermark;
5326
5327 if (NumberOfActivePlanes > 1) {
5328 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5329 }
5330 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5331 } else {
5332 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5333 }
5334
5335 if (WritebackEnable[k] == true) {
5336
5337 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5338 if (WritebackPixelFormat[k] == dm_444_64) {
5339 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5340 }
5341 if (mode_lib->vba.WritebackConfiguration == dm_whole_buffer_for_single_stream_interleave) {
5342 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding * 2;
5343 }
5344 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - mode_lib->vba.WritebackDRAMClockChangeWatermark;
5345 mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] = dml_min(mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5346 }
5347 }
5348
5349 mode_lib->vba.MinActiveDRAMClockChangeMargin = 999999;
5350 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5351 for (k = 0; k < NumberOfActivePlanes; ++k) {
5352 if (mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < mode_lib->vba.MinActiveDRAMClockChangeMargin) {
5353 mode_lib->vba.MinActiveDRAMClockChangeMargin = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5354 if (BlendingAndTiming[k] == k) {
5355 PlaneWithMinActiveDRAMClockChangeMargin = k;
5356 } else {
5357 for (j = 0; j < NumberOfActivePlanes; ++j) {
5358 if (BlendingAndTiming[k] == j) {
5359 PlaneWithMinActiveDRAMClockChangeMargin = j;
5360 }
5361 }
5362 }
5363 }
5364 }
5365
5366 *MinActiveDRAMClockChangeLatencySupported = mode_lib->vba.MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5367
5368 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5369 for (k = 0; k < NumberOfActivePlanes; ++k) {
5370 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5371 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = mode_lib->vba.ActiveDRAMClockChangeLatencyMargin[k];
5372 }
5373 }
5374
5375 mode_lib->vba.TotalNumberOfActiveOTG = 0;
5376 for (k = 0; k < NumberOfActivePlanes; ++k) {
5377 if (BlendingAndTiming[k] == k) {
5378 mode_lib->vba.TotalNumberOfActiveOTG = mode_lib->vba.TotalNumberOfActiveOTG + 1;
5379 }
5380 }
5381
5382 if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
5383 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5384 } else if (((mode_lib->vba.SynchronizedVBlank == true || mode_lib->vba.TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0)) {
5385 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5386 } else {
5387 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5388 }
5389
5390 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[0];
5391 for (k = 0; k < NumberOfActivePlanes; ++k) {
5392 if (FullDETBufferingTimeY[k] <= FullDETBufferingTimeYStutterCriticalPlane) {
5393 FullDETBufferingTimeYStutterCriticalPlane = FullDETBufferingTimeY[k];
5394 TimeToFinishSwathTransferStutterCriticalPlane = (SwathHeightY[k] - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k])) * (HTotal[k] / PixelClock[k]) / VRatio[k];
5395 }
5396 }
5397
5398 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5399 *StutterEnterPlusExitWatermark = dml_max(SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep, TimeToFinishSwathTransferStutterCriticalPlane);
5400
5401 }
5402
CalculateDCFCLKDeepSleep(struct display_mode_lib * mode_lib,unsigned int NumberOfActivePlanes,int BytePerPixelY[],int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],int ReturnBusWidth,double * DCFCLKDeepSleep)5403 static void CalculateDCFCLKDeepSleep(
5404 struct display_mode_lib *mode_lib,
5405 unsigned int NumberOfActivePlanes,
5406 int BytePerPixelY[],
5407 int BytePerPixelC[],
5408 double VRatio[],
5409 double VRatioChroma[],
5410 double SwathWidthY[],
5411 double SwathWidthC[],
5412 unsigned int DPPPerPlane[],
5413 double HRatio[],
5414 double HRatioChroma[],
5415 double PixelClock[],
5416 double PSCL_THROUGHPUT[],
5417 double PSCL_THROUGHPUT_CHROMA[],
5418 double DPPCLK[],
5419 double ReadBandwidthLuma[],
5420 double ReadBandwidthChroma[],
5421 int ReturnBusWidth,
5422 double *DCFCLKDeepSleep)
5423 {
5424 double DisplayPipeLineDeliveryTimeLuma = 0;
5425 double DisplayPipeLineDeliveryTimeChroma = 0;
5426 unsigned int k;
5427 double ReadBandwidth = 0.0;
5428
5429 //double DCFCLKDeepSleepPerPlane[DC__NUM_DPP__MAX];
5430 for (k = 0; k < NumberOfActivePlanes; ++k) {
5431
5432 if (VRatio[k] <= 1) {
5433 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5434 } else {
5435 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5436 }
5437 if (BytePerPixelC[k] == 0) {
5438 DisplayPipeLineDeliveryTimeChroma = 0;
5439 } else {
5440 if (VRatioChroma[k] <= 1) {
5441 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5442 } else {
5443 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5444 }
5445 }
5446
5447 if (BytePerPixelC[k] > 0) {
5448 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(1.1 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 1.1 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5449 } else {
5450 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = 1.1 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5451 }
5452 mode_lib->vba.DCFCLKDeepSleepPerPlane[k] = dml_max(mode_lib->vba.DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5453
5454 }
5455
5456 for (k = 0; k < NumberOfActivePlanes; ++k) {
5457 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5458 }
5459
5460 *DCFCLKDeepSleep = dml_max(8.0, ReadBandwidth / ReturnBusWidth);
5461
5462 for (k = 0; k < NumberOfActivePlanes; ++k) {
5463 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, mode_lib->vba.DCFCLKDeepSleepPerPlane[k]);
5464 }
5465 }
5466
CalculateUrgentBurstFactor(long swath_width_luma_ub,long swath_width_chroma_ub,unsigned int DETBufferSizeInKByte,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,double DETBufferSizeY,double DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)5467 static void CalculateUrgentBurstFactor(
5468 long swath_width_luma_ub,
5469 long swath_width_chroma_ub,
5470 unsigned int DETBufferSizeInKByte,
5471 unsigned int SwathHeightY,
5472 unsigned int SwathHeightC,
5473 double LineTime,
5474 double UrgentLatency,
5475 double CursorBufferSize,
5476 unsigned int CursorWidth,
5477 unsigned int CursorBPP,
5478 double VRatio,
5479 double VRatioC,
5480 double BytePerPixelInDETY,
5481 double BytePerPixelInDETC,
5482 double DETBufferSizeY,
5483 double DETBufferSizeC,
5484 double *UrgentBurstFactorCursor,
5485 double *UrgentBurstFactorLuma,
5486 double *UrgentBurstFactorChroma,
5487 bool *NotEnoughUrgentLatencyHiding)
5488 {
5489 double LinesInDETLuma = 0;
5490 double LinesInDETChroma = 0;
5491 unsigned int LinesInCursorBuffer = 0;
5492 double CursorBufferSizeInTime = 0;
5493 double DETBufferSizeInTimeLuma = 0;
5494 double DETBufferSizeInTimeChroma = 0;
5495
5496 *NotEnoughUrgentLatencyHiding = 0;
5497
5498 if (CursorWidth > 0) {
5499 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5500 if (VRatio > 0) {
5501 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5502 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5503 *NotEnoughUrgentLatencyHiding = 1;
5504 *UrgentBurstFactorCursor = 0;
5505 } else {
5506 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5507 }
5508 } else {
5509 *UrgentBurstFactorCursor = 1;
5510 }
5511 }
5512
5513 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5514 if (VRatio > 0) {
5515 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5516 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5517 *NotEnoughUrgentLatencyHiding = 1;
5518 *UrgentBurstFactorLuma = 0;
5519 } else {
5520 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5521 }
5522 } else {
5523 *UrgentBurstFactorLuma = 1;
5524 }
5525
5526 if (BytePerPixelInDETC > 0) {
5527 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5528 if (VRatio > 0) {
5529 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5530 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5531 *NotEnoughUrgentLatencyHiding = 1;
5532 *UrgentBurstFactorChroma = 0;
5533 } else {
5534 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5535 }
5536 } else {
5537 *UrgentBurstFactorChroma = 1;
5538 }
5539 }
5540 }
5541
CalculatePixelDeliveryTimes(unsigned int NumberOfActivePlanes,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerPlane[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double DPPCLK[],int BytePerPixelC[],enum scan_direction_class SourceScan[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][2],unsigned int CursorBPP[][2],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])5542 static void CalculatePixelDeliveryTimes(
5543 unsigned int NumberOfActivePlanes,
5544 double VRatio[],
5545 double VRatioChroma[],
5546 double VRatioPrefetchY[],
5547 double VRatioPrefetchC[],
5548 unsigned int swath_width_luma_ub[],
5549 unsigned int swath_width_chroma_ub[],
5550 unsigned int DPPPerPlane[],
5551 double HRatio[],
5552 double HRatioChroma[],
5553 double PixelClock[],
5554 double PSCL_THROUGHPUT[],
5555 double PSCL_THROUGHPUT_CHROMA[],
5556 double DPPCLK[],
5557 int BytePerPixelC[],
5558 enum scan_direction_class SourceScan[],
5559 unsigned int NumberOfCursors[],
5560 unsigned int CursorWidth[][2],
5561 unsigned int CursorBPP[][2],
5562 unsigned int BlockWidth256BytesY[],
5563 unsigned int BlockHeight256BytesY[],
5564 unsigned int BlockWidth256BytesC[],
5565 unsigned int BlockHeight256BytesC[],
5566 double DisplayPipeLineDeliveryTimeLuma[],
5567 double DisplayPipeLineDeliveryTimeChroma[],
5568 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5569 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5570 double DisplayPipeRequestDeliveryTimeLuma[],
5571 double DisplayPipeRequestDeliveryTimeChroma[],
5572 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5573 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5574 double CursorRequestDeliveryTime[],
5575 double CursorRequestDeliveryTimePrefetch[])
5576 {
5577 double req_per_swath_ub = 0;
5578 unsigned int k;
5579
5580 for (k = 0; k < NumberOfActivePlanes; ++k) {
5581 if (VRatio[k] <= 1) {
5582 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5583 } else {
5584 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5585 }
5586
5587 if (BytePerPixelC[k] == 0) {
5588 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5589 } else {
5590 if (VRatioChroma[k] <= 1) {
5591 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5592 } else {
5593 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5594 }
5595 }
5596
5597 if (VRatioPrefetchY[k] <= 1) {
5598 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5599 } else {
5600 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5601 }
5602
5603 if (BytePerPixelC[k] == 0) {
5604 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5605 } else {
5606 if (VRatioPrefetchC[k] <= 1) {
5607 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5608 } else {
5609 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5610 }
5611 }
5612 }
5613
5614 for (k = 0; k < NumberOfActivePlanes; ++k) {
5615 if (SourceScan[k] != dm_vert) {
5616 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5617 } else {
5618 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5619 }
5620 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5621 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5622 if (BytePerPixelC[k] == 0) {
5623 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5624 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5625 } else {
5626 if (SourceScan[k] != dm_vert) {
5627 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5628 } else {
5629 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5630 }
5631 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5632 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5633 }
5634 }
5635
5636 for (k = 0; k < NumberOfActivePlanes; ++k) {
5637 int cursor_req_per_width = 0;
5638 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5639 if (NumberOfCursors[k] > 0) {
5640 if (VRatio[k] <= 1) {
5641 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5642 } else {
5643 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5644 }
5645 if (VRatioPrefetchY[k] <= 1) {
5646 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5647 } else {
5648 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5649 }
5650 } else {
5651 CursorRequestDeliveryTime[k] = 0;
5652 CursorRequestDeliveryTimePrefetch[k] = 0;
5653 }
5654 }
5655 }
5656
CalculateMetaAndPTETimes(int NumberOfActivePlanes,bool GPUVMEnable,int MetaChunkSize,int MinMetaChunkSizeBytes,int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],int BytePerPixelY[],int BytePerPixelC[],enum scan_direction_class SourceScan[],int dpte_row_height[],int dpte_row_height_chroma[],int meta_row_width[],int meta_row_width_chroma[],int meta_row_height[],int meta_row_height_chroma[],int meta_req_width[],int meta_req_width_chroma[],int meta_req_height[],int meta_req_height_chroma[],int dpte_group_bytes[],int PTERequestSizeY[],int PTERequestSizeC[],int PixelPTEReqWidthY[],int PixelPTEReqHeightY[],int PixelPTEReqWidthC[],int PixelPTEReqHeightC[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])5657 static void CalculateMetaAndPTETimes(
5658 int NumberOfActivePlanes,
5659 bool GPUVMEnable,
5660 int MetaChunkSize,
5661 int MinMetaChunkSizeBytes,
5662 int HTotal[],
5663 double VRatio[],
5664 double VRatioChroma[],
5665 double DestinationLinesToRequestRowInVBlank[],
5666 double DestinationLinesToRequestRowInImmediateFlip[],
5667 bool DCCEnable[],
5668 double PixelClock[],
5669 int BytePerPixelY[],
5670 int BytePerPixelC[],
5671 enum scan_direction_class SourceScan[],
5672 int dpte_row_height[],
5673 int dpte_row_height_chroma[],
5674 int meta_row_width[],
5675 int meta_row_width_chroma[],
5676 int meta_row_height[],
5677 int meta_row_height_chroma[],
5678 int meta_req_width[],
5679 int meta_req_width_chroma[],
5680 int meta_req_height[],
5681 int meta_req_height_chroma[],
5682 int dpte_group_bytes[],
5683 int PTERequestSizeY[],
5684 int PTERequestSizeC[],
5685 int PixelPTEReqWidthY[],
5686 int PixelPTEReqHeightY[],
5687 int PixelPTEReqWidthC[],
5688 int PixelPTEReqHeightC[],
5689 int dpte_row_width_luma_ub[],
5690 int dpte_row_width_chroma_ub[],
5691 double DST_Y_PER_PTE_ROW_NOM_L[],
5692 double DST_Y_PER_PTE_ROW_NOM_C[],
5693 double DST_Y_PER_META_ROW_NOM_L[],
5694 double DST_Y_PER_META_ROW_NOM_C[],
5695 double TimePerMetaChunkNominal[],
5696 double TimePerChromaMetaChunkNominal[],
5697 double TimePerMetaChunkVBlank[],
5698 double TimePerChromaMetaChunkVBlank[],
5699 double TimePerMetaChunkFlip[],
5700 double TimePerChromaMetaChunkFlip[],
5701 double time_per_pte_group_nom_luma[],
5702 double time_per_pte_group_vblank_luma[],
5703 double time_per_pte_group_flip_luma[],
5704 double time_per_pte_group_nom_chroma[],
5705 double time_per_pte_group_vblank_chroma[],
5706 double time_per_pte_group_flip_chroma[])
5707 {
5708 unsigned int meta_chunk_width = 0;
5709 unsigned int min_meta_chunk_width = 0;
5710 unsigned int meta_chunk_per_row_int = 0;
5711 unsigned int meta_row_remainder = 0;
5712 unsigned int meta_chunk_threshold = 0;
5713 unsigned int meta_chunks_per_row_ub = 0;
5714 unsigned int meta_chunk_width_chroma = 0;
5715 unsigned int min_meta_chunk_width_chroma = 0;
5716 unsigned int meta_chunk_per_row_int_chroma = 0;
5717 unsigned int meta_row_remainder_chroma = 0;
5718 unsigned int meta_chunk_threshold_chroma = 0;
5719 unsigned int meta_chunks_per_row_ub_chroma = 0;
5720 unsigned int dpte_group_width_luma = 0;
5721 unsigned int dpte_groups_per_row_luma_ub = 0;
5722 unsigned int dpte_group_width_chroma = 0;
5723 unsigned int dpte_groups_per_row_chroma_ub = 0;
5724 unsigned int k;
5725
5726 for (k = 0; k < NumberOfActivePlanes; ++k) {
5727 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5728 if (BytePerPixelC[k] == 0) {
5729 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5730 } else {
5731 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5732 }
5733 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5734 if (BytePerPixelC[k] == 0) {
5735 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5736 } else {
5737 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5738 }
5739 }
5740
5741 for (k = 0; k < NumberOfActivePlanes; ++k) {
5742 if (DCCEnable[k] == true) {
5743 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5744 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5745 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5746 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5747 if (SourceScan[k] != dm_vert) {
5748 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5749 } else {
5750 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5751 }
5752 if (meta_row_remainder <= meta_chunk_threshold) {
5753 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5754 } else {
5755 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5756 }
5757 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5758 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5759 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5760 if (BytePerPixelC[k] == 0) {
5761 TimePerChromaMetaChunkNominal[k] = 0;
5762 TimePerChromaMetaChunkVBlank[k] = 0;
5763 TimePerChromaMetaChunkFlip[k] = 0;
5764 } else {
5765 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5766 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5767 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5768 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5769 if (SourceScan[k] != dm_vert) {
5770 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5771 } else {
5772 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5773 }
5774 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5775 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5776 } else {
5777 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5778 }
5779 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5780 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5781 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5782 }
5783 } else {
5784 TimePerMetaChunkNominal[k] = 0;
5785 TimePerMetaChunkVBlank[k] = 0;
5786 TimePerMetaChunkFlip[k] = 0;
5787 TimePerChromaMetaChunkNominal[k] = 0;
5788 TimePerChromaMetaChunkVBlank[k] = 0;
5789 TimePerChromaMetaChunkFlip[k] = 0;
5790 }
5791 }
5792
5793 for (k = 0; k < NumberOfActivePlanes; ++k) {
5794 if (GPUVMEnable == true) {
5795 if (SourceScan[k] != dm_vert) {
5796 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5797 } else {
5798 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5799 }
5800 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
5801 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5802 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5803 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5804 if (BytePerPixelC[k] == 0) {
5805 time_per_pte_group_nom_chroma[k] = 0;
5806 time_per_pte_group_vblank_chroma[k] = 0;
5807 time_per_pte_group_flip_chroma[k] = 0;
5808 } else {
5809 if (SourceScan[k] != dm_vert) {
5810 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5811 } else {
5812 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5813 }
5814 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
5815 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5816 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5817 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5818 }
5819 } else {
5820 time_per_pte_group_nom_luma[k] = 0;
5821 time_per_pte_group_vblank_luma[k] = 0;
5822 time_per_pte_group_flip_luma[k] = 0;
5823 time_per_pte_group_nom_chroma[k] = 0;
5824 time_per_pte_group_vblank_chroma[k] = 0;
5825 time_per_pte_group_flip_chroma[k] = 0;
5826 }
5827 }
5828 }
5829
CalculateVMGroupAndRequestTimes(unsigned int NumberOfActivePlanes,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],int dpte_row_width_luma_ub[],int dpte_row_width_chroma_ub[],int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],int meta_pte_bytes_per_frame_ub_l[],int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5830 static void CalculateVMGroupAndRequestTimes(
5831 unsigned int NumberOfActivePlanes,
5832 bool GPUVMEnable,
5833 unsigned int GPUVMMaxPageTableLevels,
5834 unsigned int HTotal[],
5835 int BytePerPixelC[],
5836 double DestinationLinesToRequestVMInVBlank[],
5837 double DestinationLinesToRequestVMInImmediateFlip[],
5838 bool DCCEnable[],
5839 double PixelClock[],
5840 int dpte_row_width_luma_ub[],
5841 int dpte_row_width_chroma_ub[],
5842 int vm_group_bytes[],
5843 unsigned int dpde0_bytes_per_frame_ub_l[],
5844 unsigned int dpde0_bytes_per_frame_ub_c[],
5845 int meta_pte_bytes_per_frame_ub_l[],
5846 int meta_pte_bytes_per_frame_ub_c[],
5847 double TimePerVMGroupVBlank[],
5848 double TimePerVMGroupFlip[],
5849 double TimePerVMRequestVBlank[],
5850 double TimePerVMRequestFlip[])
5851 {
5852 int num_group_per_lower_vm_stage = 0;
5853 int num_req_per_lower_vm_stage = 0;
5854 unsigned int k;
5855
5856 for (k = 0; k < NumberOfActivePlanes; ++k) {
5857 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5858 if (DCCEnable[k] == false) {
5859 if (BytePerPixelC[k] > 0) {
5860 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5861 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k])
5862 / (double) (vm_group_bytes[k]), 1);
5863 } else {
5864 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k])
5865 / (double) (vm_group_bytes[k]), 1);
5866 }
5867 } else {
5868 if (GPUVMMaxPageTableLevels == 1) {
5869 if (BytePerPixelC[k] > 0) {
5870 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5871 / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k])
5872 / (double) (vm_group_bytes[k]), 1);
5873 } else {
5874 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k])
5875 / (double) (vm_group_bytes[k]), 1);
5876 }
5877 } else {
5878 if (BytePerPixelC[k] > 0) {
5879 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5880 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
5881 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5882 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
5883 } else {
5884 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
5885 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
5886 }
5887 }
5888 }
5889
5890 if (DCCEnable[k] == false) {
5891 if (BytePerPixelC[k] > 0) {
5892 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
5893 } else {
5894 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5895 }
5896 } else {
5897 if (GPUVMMaxPageTableLevels == 1) {
5898 if (BytePerPixelC[k] > 0) {
5899 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64
5900 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5901 } else {
5902 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5903 }
5904 } else {
5905 if (BytePerPixelC[k] > 0) {
5906 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5907 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k]
5908 / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
5909 } else {
5910 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64
5911 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5912 }
5913 }
5914 }
5915
5916 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5917 / num_group_per_lower_vm_stage;
5918 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5919 / num_group_per_lower_vm_stage;
5920 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k]
5921 / num_req_per_lower_vm_stage;
5922 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k]
5923 / num_req_per_lower_vm_stage;
5924
5925 if (GPUVMMaxPageTableLevels > 2) {
5926 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5927 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5928 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5929 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5930 }
5931
5932 } else {
5933 TimePerVMGroupVBlank[k] = 0;
5934 TimePerVMGroupFlip[k] = 0;
5935 TimePerVMRequestVBlank[k] = 0;
5936 TimePerVMRequestFlip[k] = 0;
5937 }
5938 }
5939 }
5940
CalculateStutterEfficiency(int NumberOfActivePlanes,long ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,double SRExitTime,bool SynchronizedVBlank,int DPPPerPlane[],unsigned int DETBufferSizeY[],int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],int SwathHeightY[],int SwathHeightC[],double DCCRateLuma[],double DCCRateChroma[],int HTotal[],int VTotal[],double PixelClock[],double VRatio[],enum scan_direction_class SourceScan[],int BlockHeight256BytesY[],int BlockWidth256BytesY[],int BlockHeight256BytesC[],int BlockWidth256BytesC[],int DCCYMaxUncompressedBlock[],int DCCCMaxUncompressedBlock[],int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthPlaneLuma[],double ReadBandwidthPlaneChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,double * StutterPeriodOut)5941 static void CalculateStutterEfficiency(
5942 int NumberOfActivePlanes,
5943 long ROBBufferSizeInKByte,
5944 double TotalDataReadBandwidth,
5945 double DCFCLK,
5946 double ReturnBW,
5947 double SRExitTime,
5948 bool SynchronizedVBlank,
5949 int DPPPerPlane[],
5950 unsigned int DETBufferSizeY[],
5951 int BytePerPixelY[],
5952 double BytePerPixelDETY[],
5953 double SwathWidthY[],
5954 int SwathHeightY[],
5955 int SwathHeightC[],
5956 double DCCRateLuma[],
5957 double DCCRateChroma[],
5958 int HTotal[],
5959 int VTotal[],
5960 double PixelClock[],
5961 double VRatio[],
5962 enum scan_direction_class SourceScan[],
5963 int BlockHeight256BytesY[],
5964 int BlockWidth256BytesY[],
5965 int BlockHeight256BytesC[],
5966 int BlockWidth256BytesC[],
5967 int DCCYMaxUncompressedBlock[],
5968 int DCCCMaxUncompressedBlock[],
5969 int VActive[],
5970 bool DCCEnable[],
5971 bool WritebackEnable[],
5972 double ReadBandwidthPlaneLuma[],
5973 double ReadBandwidthPlaneChroma[],
5974 double meta_row_bw[],
5975 double dpte_row_bw[],
5976 double *StutterEfficiencyNotIncludingVBlank,
5977 double *StutterEfficiency,
5978 double *StutterPeriodOut)
5979 {
5980 double FullDETBufferingTimeY[DC__NUM_DPP__MAX] = { 0 };
5981 double FrameTimeForMinFullDETBufferingTime = 0;
5982 double StutterPeriod = 0;
5983 double AverageReadBandwidth = 0;
5984 double TotalRowReadBandwidth = 0;
5985 double AverageDCCCompressionRate = 0;
5986 double PartOfBurstThatFitsInROB = 0;
5987 double StutterBurstTime = 0;
5988 int TotalActiveWriteback = 0;
5989 double VBlankTime = 0;
5990 double SmallestVBlank = 0;
5991 int BytePerPixelYCriticalPlane = 0;
5992 double SwathWidthYCriticalPlane = 0;
5993 double LinesInDETY[DC__NUM_DPP__MAX] = { 0 };
5994 double LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX] = { 0 };
5995 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
5996 double MaximumEffectiveCompressionLuma = 0;
5997 double MaximumEffectiveCompressionChroma = 0;
5998 unsigned int k;
5999
6000 for (k = 0; k < NumberOfActivePlanes; ++k) {
6001 LinesInDETY[k] = DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k];
6002 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
6003 FullDETBufferingTimeY[k] = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
6004 }
6005
6006 StutterPeriod = FullDETBufferingTimeY[0];
6007 FrameTimeForMinFullDETBufferingTime = VTotal[0] * HTotal[0] / PixelClock[0];
6008 BytePerPixelYCriticalPlane = BytePerPixelY[0];
6009 SwathWidthYCriticalPlane = SwathWidthY[0];
6010 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[0]
6011 - (LinesInDETY[0] - LinesInDETYRoundedDownToSwath[0]);
6012
6013 for (k = 0; k < NumberOfActivePlanes; ++k) {
6014 if (FullDETBufferingTimeY[k] < StutterPeriod) {
6015 StutterPeriod = FullDETBufferingTimeY[k];
6016 FrameTimeForMinFullDETBufferingTime = VTotal[k] * HTotal[k] / PixelClock[k];
6017 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6018 SwathWidthYCriticalPlane = SwathWidthY[k];
6019 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k]
6020 - (LinesInDETY[k] - LinesInDETYRoundedDownToSwath[k]);
6021 }
6022 }
6023
6024 AverageReadBandwidth = 0;
6025 TotalRowReadBandwidth = 0;
6026 for (k = 0; k < NumberOfActivePlanes; ++k) {
6027 if (DCCEnable[k] == true) {
6028 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k])
6029 || (SourceScan[k] != dm_vert
6030 && BlockHeight256BytesY[k] > SwathHeightY[k])
6031 || DCCYMaxUncompressedBlock[k] < 256) {
6032 MaximumEffectiveCompressionLuma = 2;
6033 } else {
6034 MaximumEffectiveCompressionLuma = 4;
6035 }
6036 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(DCCRateLuma[k], MaximumEffectiveCompressionLuma);
6037
6038 if (ReadBandwidthPlaneChroma[k] > 0) {
6039 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6040 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k])
6041 || DCCCMaxUncompressedBlock[k] < 256) {
6042 MaximumEffectiveCompressionChroma = 2;
6043 } else {
6044 MaximumEffectiveCompressionChroma = 4;
6045 }
6046 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(DCCRateChroma[k], MaximumEffectiveCompressionChroma);
6047 }
6048 } else {
6049 AverageReadBandwidth = AverageReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6050 }
6051 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6052 }
6053
6054 AverageDCCCompressionRate = TotalDataReadBandwidth / AverageReadBandwidth;
6055 PartOfBurstThatFitsInROB = dml_min(StutterPeriod * TotalDataReadBandwidth, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6056 StutterBurstTime = PartOfBurstThatFitsInROB / AverageDCCCompressionRate / ReturnBW + (StutterPeriod * TotalDataReadBandwidth
6057 - PartOfBurstThatFitsInROB) / (DCFCLK * 64) + StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6058 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6059
6060 TotalActiveWriteback = 0;
6061 for (k = 0; k < NumberOfActivePlanes; ++k) {
6062 if (WritebackEnable[k] == true) {
6063 TotalActiveWriteback = TotalActiveWriteback + 1;
6064 }
6065 }
6066
6067 if (TotalActiveWriteback == 0) {
6068 *StutterEfficiencyNotIncludingVBlank = (1
6069 - (SRExitTime + StutterBurstTime) / StutterPeriod) * 100;
6070 } else {
6071 *StutterEfficiencyNotIncludingVBlank = 0;
6072 }
6073
6074 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6075 SmallestVBlank = (VTotal[0] - VActive[0]) * HTotal[0] / PixelClock[0];
6076 } else {
6077 SmallestVBlank = 0;
6078 }
6079 for (k = 0; k < NumberOfActivePlanes; ++k) {
6080 if (SynchronizedVBlank == true || NumberOfActivePlanes == 1) {
6081 VBlankTime = (VTotal[k] - VActive[k]) * HTotal[k] / PixelClock[k];
6082 } else {
6083 VBlankTime = 0;
6084 }
6085 SmallestVBlank = dml_min(SmallestVBlank, VBlankTime);
6086 }
6087
6088 *StutterEfficiency = (*StutterEfficiencyNotIncludingVBlank / 100.0 * (FrameTimeForMinFullDETBufferingTime - SmallestVBlank) + SmallestVBlank) / FrameTimeForMinFullDETBufferingTime * 100;
6089
6090 if (StutterPeriodOut)
6091 *StutterPeriodOut = StutterPeriod;
6092 }
6093
CalculateSwathAndDETConfiguration(bool ForceSingleDPP,int NumberOfActivePlanes,unsigned int DETBufferSizeInKByte,double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum scan_direction_class SourceScan[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],int ViewportWidth[],int ViewportHeight[],int SurfaceWidthY[],int SurfaceWidthC[],int SurfaceHeightY[],int SurfaceHeightC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],enum odm_combine_mode ODMCombineEnabled[],int BlendingAndTiming[],int BytePerPixY[],int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],int HActive[],double HRatio[],double HRatioChroma[],int DPPPerPlane[],int swath_width_luma_ub[],int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],int SwathHeightY[],int SwathHeightC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool ViewportSizeSupportPerPlane[],bool * ViewportSizeSupport)6094 static void CalculateSwathAndDETConfiguration(
6095 bool ForceSingleDPP,
6096 int NumberOfActivePlanes,
6097 unsigned int DETBufferSizeInKByte,
6098 double MaximumSwathWidthLuma[],
6099 double MaximumSwathWidthChroma[],
6100 enum scan_direction_class SourceScan[],
6101 enum source_format_class SourcePixelFormat[],
6102 enum dm_swizzle_mode SurfaceTiling[],
6103 int ViewportWidth[],
6104 int ViewportHeight[],
6105 int SurfaceWidthY[],
6106 int SurfaceWidthC[],
6107 int SurfaceHeightY[],
6108 int SurfaceHeightC[],
6109 int Read256BytesBlockHeightY[],
6110 int Read256BytesBlockHeightC[],
6111 int Read256BytesBlockWidthY[],
6112 int Read256BytesBlockWidthC[],
6113 enum odm_combine_mode ODMCombineEnabled[],
6114 int BlendingAndTiming[],
6115 int BytePerPixY[],
6116 int BytePerPixC[],
6117 double BytePerPixDETY[],
6118 double BytePerPixDETC[],
6119 int HActive[],
6120 double HRatio[],
6121 double HRatioChroma[],
6122 int DPPPerPlane[],
6123 int swath_width_luma_ub[],
6124 int swath_width_chroma_ub[],
6125 double SwathWidth[],
6126 double SwathWidthChroma[],
6127 int SwathHeightY[],
6128 int SwathHeightC[],
6129 unsigned int DETBufferSizeY[],
6130 unsigned int DETBufferSizeC[],
6131 bool ViewportSizeSupportPerPlane[],
6132 bool *ViewportSizeSupport)
6133 {
6134 int MaximumSwathHeightY[DC__NUM_DPP__MAX] = { 0 };
6135 int MaximumSwathHeightC[DC__NUM_DPP__MAX] = { 0 };
6136 int MinimumSwathHeightY = 0;
6137 int MinimumSwathHeightC = 0;
6138 long RoundedUpMaxSwathSizeBytesY = 0;
6139 long RoundedUpMaxSwathSizeBytesC = 0;
6140 long RoundedUpMinSwathSizeBytesY = 0;
6141 long RoundedUpMinSwathSizeBytesC = 0;
6142 long RoundedUpSwathSizeBytesY = 0;
6143 long RoundedUpSwathSizeBytesC = 0;
6144 double SwathWidthSingleDPP[DC__NUM_DPP__MAX] = { 0 };
6145 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX] = { 0 };
6146 int k;
6147
6148 CalculateSwathWidth(
6149 ForceSingleDPP,
6150 NumberOfActivePlanes,
6151 SourcePixelFormat,
6152 SourceScan,
6153 ViewportWidth,
6154 ViewportHeight,
6155 SurfaceWidthY,
6156 SurfaceWidthC,
6157 SurfaceHeightY,
6158 SurfaceHeightC,
6159 ODMCombineEnabled,
6160 BytePerPixY,
6161 BytePerPixC,
6162 Read256BytesBlockHeightY,
6163 Read256BytesBlockHeightC,
6164 Read256BytesBlockWidthY,
6165 Read256BytesBlockWidthC,
6166 BlendingAndTiming,
6167 HActive,
6168 HRatio,
6169 DPPPerPlane,
6170 SwathWidthSingleDPP,
6171 SwathWidthSingleDPPChroma,
6172 SwathWidth,
6173 SwathWidthChroma,
6174 MaximumSwathHeightY,
6175 MaximumSwathHeightC,
6176 swath_width_luma_ub,
6177 swath_width_chroma_ub);
6178
6179 *ViewportSizeSupport = true;
6180 for (k = 0; k < NumberOfActivePlanes; ++k) {
6181 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32
6182 || SourcePixelFormat[k] == dm_444_16
6183 || SourcePixelFormat[k] == dm_mono_16
6184 || SourcePixelFormat[k] == dm_mono_8
6185 || SourcePixelFormat[k] == dm_rgbe)) {
6186 if (SurfaceTiling[k] == dm_sw_linear
6187 || (SourcePixelFormat[k] == dm_444_64
6188 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6189 && SourceScan[k] != dm_vert)) {
6190 MinimumSwathHeightY = MaximumSwathHeightY[k];
6191 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6192 MinimumSwathHeightY = MaximumSwathHeightY[k];
6193 } else {
6194 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6195 }
6196 MinimumSwathHeightC = MaximumSwathHeightC[k];
6197 } else {
6198 if (SurfaceTiling[k] == dm_sw_linear) {
6199 MinimumSwathHeightY = MaximumSwathHeightY[k];
6200 MinimumSwathHeightC = MaximumSwathHeightC[k];
6201 } else if (SourcePixelFormat[k] == dm_rgbe_alpha
6202 && SourceScan[k] == dm_vert) {
6203 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6204 MinimumSwathHeightC = MaximumSwathHeightC[k];
6205 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6206 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6207 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6208 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6209 MinimumSwathHeightY = MaximumSwathHeightY[k];
6210 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6211 } else {
6212 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6213 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6214 }
6215 }
6216
6217 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6218 * MaximumSwathHeightY[k];
6219 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k]
6220 * MinimumSwathHeightY;
6221 if (SourcePixelFormat[k] == dm_420_10) {
6222 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6223 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6224 }
6225 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6226 * MaximumSwathHeightC[k];
6227 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k]
6228 * MinimumSwathHeightC;
6229 if (SourcePixelFormat[k] == dm_420_10) {
6230 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6231 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6232 }
6233
6234 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6235 <= DETBufferSizeInKByte * 1024 / 2) {
6236 SwathHeightY[k] = MaximumSwathHeightY[k];
6237 SwathHeightC[k] = MaximumSwathHeightC[k];
6238 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6239 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6240 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6241 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC
6242 <= DETBufferSizeInKByte * 1024 / 2) {
6243 SwathHeightY[k] = MinimumSwathHeightY;
6244 SwathHeightC[k] = MaximumSwathHeightC[k];
6245 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6246 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6247 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6248 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6249 <= DETBufferSizeInKByte * 1024 / 2) {
6250 SwathHeightY[k] = MaximumSwathHeightY[k];
6251 SwathHeightC[k] = MinimumSwathHeightC;
6252 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6253 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6254 } else {
6255 SwathHeightY[k] = MinimumSwathHeightY;
6256 SwathHeightC[k] = MinimumSwathHeightC;
6257 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6258 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6259 }
6260
6261 if (SwathHeightC[k] == 0) {
6262 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024;
6263 DETBufferSizeC[k] = 0;
6264 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6265 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 / 2;
6266 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 2;
6267 } else {
6268 DETBufferSizeY[k] = DETBufferSizeInKByte * 1024 * 2 / 3;
6269 DETBufferSizeC[k] = DETBufferSizeInKByte * 1024 / 3;
6270 }
6271
6272 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC
6273 > DETBufferSizeInKByte * 1024 / 2
6274 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6275 || (SwathHeightC[k] > 0
6276 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6277 *ViewportSizeSupport = false;
6278 ViewportSizeSupportPerPlane[k] = false;
6279 } else {
6280 ViewportSizeSupportPerPlane[k] = true;
6281 }
6282 }
6283 }
6284
CalculateSwathWidth(bool ForceSingleDPP,int NumberOfActivePlanes,enum source_format_class SourcePixelFormat[],enum scan_direction_class SourceScan[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMCombineEnabled[],int BytePerPixY[],int BytePerPixC[],int Read256BytesBlockHeightY[],int Read256BytesBlockHeightC[],int Read256BytesBlockWidthY[],int Read256BytesBlockWidthC[],int BlendingAndTiming[],unsigned int HActive[],double HRatio[],int DPPPerPlane[],double SwathWidthSingleDPPY[],double SwathWidthSingleDPPC[],double SwathWidthY[],double SwathWidthC[],int MaximumSwathHeightY[],int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])6285 static void CalculateSwathWidth(
6286 bool ForceSingleDPP,
6287 int NumberOfActivePlanes,
6288 enum source_format_class SourcePixelFormat[],
6289 enum scan_direction_class SourceScan[],
6290 unsigned int ViewportWidth[],
6291 unsigned int ViewportHeight[],
6292 unsigned int SurfaceWidthY[],
6293 unsigned int SurfaceWidthC[],
6294 unsigned int SurfaceHeightY[],
6295 unsigned int SurfaceHeightC[],
6296 enum odm_combine_mode ODMCombineEnabled[],
6297 int BytePerPixY[],
6298 int BytePerPixC[],
6299 int Read256BytesBlockHeightY[],
6300 int Read256BytesBlockHeightC[],
6301 int Read256BytesBlockWidthY[],
6302 int Read256BytesBlockWidthC[],
6303 int BlendingAndTiming[],
6304 unsigned int HActive[],
6305 double HRatio[],
6306 int DPPPerPlane[],
6307 double SwathWidthSingleDPPY[],
6308 double SwathWidthSingleDPPC[],
6309 double SwathWidthY[],
6310 double SwathWidthC[],
6311 int MaximumSwathHeightY[],
6312 int MaximumSwathHeightC[],
6313 unsigned int swath_width_luma_ub[],
6314 unsigned int swath_width_chroma_ub[])
6315 {
6316 unsigned int k, j;
6317 long surface_width_ub_l;
6318 long surface_height_ub_l;
6319 long surface_width_ub_c;
6320 long surface_height_ub_c;
6321
6322 for (k = 0; k < NumberOfActivePlanes; ++k) {
6323 enum odm_combine_mode MainPlaneODMCombine = 0;
6324
6325 if (SourceScan[k] != dm_vert) {
6326 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6327 } else {
6328 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6329 }
6330
6331 MainPlaneODMCombine = ODMCombineEnabled[k];
6332 for (j = 0; j < NumberOfActivePlanes; ++j) {
6333 if (BlendingAndTiming[k] == j) {
6334 MainPlaneODMCombine = ODMCombineEnabled[j];
6335 }
6336 }
6337
6338 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6339 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6340 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6341 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6342 } else if (DPPPerPlane[k] == 2) {
6343 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6344 } else {
6345 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6346 }
6347
6348 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6349 SwathWidthC[k] = SwathWidthY[k] / 2;
6350 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6351 } else {
6352 SwathWidthC[k] = SwathWidthY[k];
6353 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6354 }
6355
6356 if (ForceSingleDPP == true) {
6357 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6358 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6359 }
6360
6361 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6362 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6363
6364 if (SourceScan[k] != dm_vert) {
6365 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6366 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6367 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6368 Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6369 if (BytePerPixC[k] > 0) {
6370 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6371 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6372 Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6373 } else {
6374 swath_width_chroma_ub[k] = 0;
6375 }
6376 } else {
6377 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6378 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6379 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
6380 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6381 if (BytePerPixC[k] > 0) {
6382 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6383 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
6384 Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6385 } else {
6386 swath_width_chroma_ub[k] = 0;
6387 }
6388 }
6389 }
6390 }
6391
CalculateExtraLatency(long RoundTripPingLatencyCycles,long ReorderingBytes,double DCFCLK,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6392 static double CalculateExtraLatency(
6393 long RoundTripPingLatencyCycles,
6394 long ReorderingBytes,
6395 double DCFCLK,
6396 int TotalNumberOfActiveDPP,
6397 int PixelChunkSizeInKByte,
6398 int TotalNumberOfDCCActiveDPP,
6399 int MetaChunkSize,
6400 double ReturnBW,
6401 bool GPUVMEnable,
6402 bool HostVMEnable,
6403 int NumberOfActivePlanes,
6404 int NumberOfDPP[],
6405 int dpte_group_bytes[],
6406 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6407 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6408 double HostVMMinPageSize,
6409 int HostVMMaxNonCachedPageTableLevels)
6410 {
6411 double ExtraLatencyBytes = 0;
6412 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6413 ReorderingBytes,
6414 TotalNumberOfActiveDPP,
6415 PixelChunkSizeInKByte,
6416 TotalNumberOfDCCActiveDPP,
6417 MetaChunkSize,
6418 GPUVMEnable,
6419 HostVMEnable,
6420 NumberOfActivePlanes,
6421 NumberOfDPP,
6422 dpte_group_bytes,
6423 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6424 PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6425 HostVMMinPageSize,
6426 HostVMMaxNonCachedPageTableLevels);
6427
6428 return (RoundTripPingLatencyCycles + 32) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6429 }
6430
CalculateExtraLatencyBytes(long ReorderingBytes,int TotalNumberOfActiveDPP,int PixelChunkSizeInKByte,int TotalNumberOfDCCActiveDPP,int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,int NumberOfActivePlanes,int NumberOfDPP[],int dpte_group_bytes[],double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,double HostVMMinPageSize,int HostVMMaxNonCachedPageTableLevels)6431 static double CalculateExtraLatencyBytes(
6432 long ReorderingBytes,
6433 int TotalNumberOfActiveDPP,
6434 int PixelChunkSizeInKByte,
6435 int TotalNumberOfDCCActiveDPP,
6436 int MetaChunkSize,
6437 bool GPUVMEnable,
6438 bool HostVMEnable,
6439 int NumberOfActivePlanes,
6440 int NumberOfDPP[],
6441 int dpte_group_bytes[],
6442 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
6443 double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6444 double HostVMMinPageSize,
6445 int HostVMMaxNonCachedPageTableLevels)
6446 {
6447 double ret = 0;
6448 double HostVMInefficiencyFactor = 0;
6449 int HostVMDynamicLevels = 0;
6450 unsigned int k;
6451
6452 if (GPUVMEnable == true && HostVMEnable == true) {
6453 HostVMInefficiencyFactor = PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly;
6454 if (HostVMMinPageSize < 2048) {
6455 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6456 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6457 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6458 } else {
6459 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6460 }
6461 } else {
6462 HostVMInefficiencyFactor = 1;
6463 HostVMDynamicLevels = 0;
6464 }
6465
6466 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6467
6468 if (GPUVMEnable == true) {
6469 for (k = 0; k < NumberOfActivePlanes; ++k) {
6470 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6471 }
6472 }
6473 return ret;
6474 }
6475
6476
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)6477 static double CalculateUrgentLatency(
6478 double UrgentLatencyPixelDataOnly,
6479 double UrgentLatencyPixelMixedWithVMData,
6480 double UrgentLatencyVMDataOnly,
6481 bool DoUrgentLatencyAdjustment,
6482 double UrgentLatencyAdjustmentFabricClockComponent,
6483 double UrgentLatencyAdjustmentFabricClockReference,
6484 double FabricClock)
6485 {
6486 double ret;
6487
6488 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6489 if (DoUrgentLatencyAdjustment == true) {
6490 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6491 }
6492 return ret;
6493 }
6494
UseMinimumDCFCLK(struct display_mode_lib * mode_lib,struct vba_vars_st * v,int MaxPrefetchMode,int ReorderingBytes)6495 static noinline_for_stack void UseMinimumDCFCLK(
6496 struct display_mode_lib *mode_lib,
6497 struct vba_vars_st *v,
6498 int MaxPrefetchMode,
6499 int ReorderingBytes)
6500 {
6501 double NormalEfficiency = 0;
6502 double PTEEfficiency = 0;
6503 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2] = { { 0 } };
6504 unsigned int i, j, k;
6505
6506 NormalEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData
6507 : v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly) / 100.0;
6508 PTEEfficiency = (v->HostVMEnable == true ? v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly
6509 / v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData : 1.0);
6510 for (i = 0; i < mode_lib->soc.num_states; ++i) {
6511 for (j = 0; j <= 1; ++j) {
6512 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX] = { 0 };
6513 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX] = { 0 };
6514 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX] = { 0 };
6515 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX] = { 0 };
6516 double MinimumTWait = 0;
6517 double NonDPTEBandwidth = 0;
6518 double DPTEBandwidth = 0;
6519 double DCFCLKRequiredForAverageBandwidth = 0;
6520 double ExtraLatencyBytes = 0;
6521 double ExtraLatencyCycles = 0;
6522 double DCFCLKRequiredForPeakBandwidth = 0;
6523 int NoOfDPPState[DC__NUM_DPP__MAX] = { 0 };
6524 double MinimumTvmPlus2Tr0 = 0;
6525
6526 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6527 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6528 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6529 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
6530 }
6531
6532 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6533 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
6534 }
6535
6536 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
6537 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
6538 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
6539 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
6540 DCFCLKRequiredForAverageBandwidth = dml_max3(v->ProjectedDCFCLKDeepSleep[i][j],
6541 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth / (v->MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6542 (NonDPTEBandwidth + DPTEBandwidth / PTEEfficiency) / NormalEfficiency / v->ReturnBusWidth);
6543
6544 ExtraLatencyBytes = CalculateExtraLatencyBytes(ReorderingBytes, v->TotalNumberOfActiveDPP[i][j], v->PixelChunkSizeInKByte, v->TotalNumberOfDCCActiveDPP[i][j],
6545 v->MetaChunkSize, v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, NoOfDPPState, v->dpte_group_bytes,
6546 v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, v->PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
6547 v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels);
6548 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + 32 + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
6549 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6550 double DCFCLKCyclesRequiredInPrefetch = { 0 };
6551 double ExpectedPrefetchBWAcceleration = { 0 };
6552 double PrefetchTime = { 0 };
6553
6554 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
6555 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
6556 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / PTEEfficiency
6557 / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * v->DPTEBytesPerRow[i][j][k] / PTEEfficiency
6558 / NormalEfficiency / v->ReturnBusWidth + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6559 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
6560 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
6561 DynamicMetadataVMExtraLatency[k] = (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
6562 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6563 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - v->UrgLatency[i] * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels
6564 : v->GPUVMMaxPageTableLevels - 2) * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k];
6565
6566 if (PrefetchTime > 0) {
6567 double ExpectedVRatioPrefetch = { 0 };
6568 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6569 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6570 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6571 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
6572 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6573 + NoOfDPPState[k] * DPTEBandwidth / PTEEfficiency / NormalEfficiency / v->ReturnBusWidth;
6574 }
6575 } else {
6576 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6577 }
6578 if (v->DynamicMetadataEnable[k] == true) {
6579 double TsetupPipe = { 0 };
6580 double TdmbfPipe = { 0 };
6581 double TdmsksPipe = { 0 };
6582 double TdmecPipe = { 0 };
6583 double AllowedTimeForUrgentExtraLatency = { 0 };
6584
6585 CalculateDynamicMetadataParameters(
6586 v->MaxInterDCNTileRepeaters,
6587 v->RequiredDPPCLK[i][j][k],
6588 v->RequiredDISPCLK[i][j],
6589 v->ProjectedDCFCLKDeepSleep[i][j],
6590 v->PixelClock[k],
6591 v->HTotal[k],
6592 v->VTotal[k] - v->VActive[k],
6593 v->DynamicMetadataTransmittedBytes[k],
6594 v->DynamicMetadataLinesBeforeActiveRequired[k],
6595 v->Interlace[k],
6596 v->ProgressiveToInterlaceUnitInOPP,
6597 &TsetupPipe,
6598 &TdmbfPipe,
6599 &TdmecPipe,
6600 &TdmsksPipe);
6601 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TsetupPipe
6602 - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6603 if (AllowedTimeForUrgentExtraLatency > 0) {
6604 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(DCFCLKRequiredForPeakBandwidthPerPlane[k],
6605 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
6606 } else {
6607 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6608 }
6609 }
6610 }
6611 DCFCLKRequiredForPeakBandwidth = 0;
6612 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6613 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
6614 }
6615 MinimumTvmPlus2Tr0 = v->UrgLatency[i] * (v->GPUVMEnable == true ? (v->HostVMEnable == true ?
6616 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0);
6617 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6618 double MaximumTvmPlus2Tr0PlusTsw = { 0 };
6619 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
6620 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
6621 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
6622 } else {
6623 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles
6624 / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
6625 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
6626 }
6627 }
6628 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * (1 + mode_lib->vba.PercentMarginOverMinimumRequiredDCFCLK / 100)
6629 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
6630 }
6631 }
6632 }
6633
6634