1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "dc_link.h"
28 #include "../display_mode_lib.h"
29 #include "../dcn30/display_mode_vba_30.h"
30 #include "display_mode_vba_31.h"
31 #include "../dml_inline_defs.h"
32
33 /*
34 * NOTE:
35 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 *
37 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
38 * ways. Unless there is something clearly wrong with it the code should
39 * remain as-is as it provides us with a guarantee from HW that it is correct.
40 */
41
42 #define BPP_INVALID 0
43 #define BPP_BLENDED_PIPE 0xffffffff
44 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
45 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
46 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128
47 #define DCN3_15_MAX_DET_SIZE 384
48
49 // For DML-C changes that hasn't been propagated to VBA yet
50 //#define __DML_VBA_ALLOW_DELTA__
51
52 // Move these to ip paramaters/constant
53
54 // At which vstartup the DML start to try if the mode can be supported
55 #define __DML_VBA_MIN_VSTARTUP__ 9
56
57 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
58 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
59
60 // fudge factor for min dcfclk calclation
61 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
62
63 typedef struct {
64 double DPPCLK;
65 double DISPCLK;
66 double PixelClock;
67 double DCFCLKDeepSleep;
68 unsigned int DPPPerPlane;
69 bool ScalerEnabled;
70 double VRatio;
71 double VRatioChroma;
72 enum scan_direction_class SourceScan;
73 unsigned int BlockWidth256BytesY;
74 unsigned int BlockHeight256BytesY;
75 unsigned int BlockWidth256BytesC;
76 unsigned int BlockHeight256BytesC;
77 unsigned int InterlaceEnable;
78 unsigned int NumberOfCursors;
79 unsigned int VBlank;
80 unsigned int HTotal;
81 unsigned int DCCEnable;
82 bool ODMCombineIsEnabled;
83 enum source_format_class SourcePixelFormat;
84 int BytePerPixelY;
85 int BytePerPixelC;
86 bool ProgressiveToInterlaceUnitInOPP;
87 } Pipe;
88
89 #define BPP_INVALID 0
90 #define BPP_BLENDED_PIPE 0xffffffff
91
92 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
93 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
94 static unsigned int dscceComputeDelay(
95 unsigned int bpc,
96 double BPP,
97 unsigned int sliceWidth,
98 unsigned int numSlices,
99 enum output_format_class pixelFormat,
100 enum output_encoder_class Output);
101 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
102 static bool CalculatePrefetchSchedule(
103 struct display_mode_lib *mode_lib,
104 double HostVMInefficiencyFactor,
105 Pipe *myPipe,
106 unsigned int DSCDelay,
107 double DPPCLKDelaySubtotalPlusCNVCFormater,
108 double DPPCLKDelaySCL,
109 double DPPCLKDelaySCLLBOnly,
110 double DPPCLKDelayCNVCCursor,
111 double DISPCLKDelaySubtotal,
112 unsigned int DPP_RECOUT_WIDTH,
113 enum output_format_class OutputFormat,
114 unsigned int MaxInterDCNTileRepeaters,
115 unsigned int VStartup,
116 unsigned int MaxVStartup,
117 unsigned int GPUVMPageTableLevels,
118 bool GPUVMEnable,
119 bool HostVMEnable,
120 unsigned int HostVMMaxNonCachedPageTableLevels,
121 double HostVMMinPageSize,
122 bool DynamicMetadataEnable,
123 bool DynamicMetadataVMEnabled,
124 int DynamicMetadataLinesBeforeActiveRequired,
125 unsigned int DynamicMetadataTransmittedBytes,
126 double UrgentLatency,
127 double UrgentExtraLatency,
128 double TCalc,
129 unsigned int PDEAndMetaPTEBytesFrame,
130 unsigned int MetaRowByte,
131 unsigned int PixelPTEBytesPerRow,
132 double PrefetchSourceLinesY,
133 unsigned int SwathWidthY,
134 double VInitPreFillY,
135 unsigned int MaxNumSwathY,
136 double PrefetchSourceLinesC,
137 unsigned int SwathWidthC,
138 double VInitPreFillC,
139 unsigned int MaxNumSwathC,
140 int swath_width_luma_ub,
141 int swath_width_chroma_ub,
142 unsigned int SwathHeightY,
143 unsigned int SwathHeightC,
144 double TWait,
145 double *DSTXAfterScaler,
146 double *DSTYAfterScaler,
147 double *DestinationLinesForPrefetch,
148 double *PrefetchBandwidth,
149 double *DestinationLinesToRequestVMInVBlank,
150 double *DestinationLinesToRequestRowInVBlank,
151 double *VRatioPrefetchY,
152 double *VRatioPrefetchC,
153 double *RequiredPrefetchPixDataBWLuma,
154 double *RequiredPrefetchPixDataBWChroma,
155 bool *NotEnoughTimeForDynamicMetadata,
156 double *Tno_bw,
157 double *prefetch_vmrow_bw,
158 double *Tdmdl_vm,
159 double *Tdmdl,
160 double *TSetup,
161 int *VUpdateOffsetPix,
162 double *VUpdateWidthPix,
163 double *VReadyOffsetPix);
164 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
165 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
166 static void CalculateDCCConfiguration(
167 bool DCCEnabled,
168 bool DCCProgrammingAssumesScanDirectionUnknown,
169 enum source_format_class SourcePixelFormat,
170 unsigned int SurfaceWidthLuma,
171 unsigned int SurfaceWidthChroma,
172 unsigned int SurfaceHeightLuma,
173 unsigned int SurfaceHeightChroma,
174 double DETBufferSize,
175 unsigned int RequestHeight256ByteLuma,
176 unsigned int RequestHeight256ByteChroma,
177 enum dm_swizzle_mode TilingFormat,
178 unsigned int BytePerPixelY,
179 unsigned int BytePerPixelC,
180 double BytePerPixelDETY,
181 double BytePerPixelDETC,
182 enum scan_direction_class ScanOrientation,
183 unsigned int *MaxUncompressedBlockLuma,
184 unsigned int *MaxUncompressedBlockChroma,
185 unsigned int *MaxCompressedBlockLuma,
186 unsigned int *MaxCompressedBlockChroma,
187 unsigned int *IndependentBlockLuma,
188 unsigned int *IndependentBlockChroma);
189 static double CalculatePrefetchSourceLines(
190 struct display_mode_lib *mode_lib,
191 double VRatio,
192 double vtaps,
193 bool Interlace,
194 bool ProgressiveToInterlaceUnitInOPP,
195 unsigned int SwathHeight,
196 unsigned int ViewportYStart,
197 double *VInitPreFill,
198 unsigned int *MaxNumSwath);
199 static unsigned int CalculateVMAndRowBytes(
200 struct display_mode_lib *mode_lib,
201 bool DCCEnable,
202 unsigned int BlockHeight256Bytes,
203 unsigned int BlockWidth256Bytes,
204 enum source_format_class SourcePixelFormat,
205 unsigned int SurfaceTiling,
206 unsigned int BytePerPixel,
207 enum scan_direction_class ScanDirection,
208 unsigned int SwathWidth,
209 unsigned int ViewportHeight,
210 bool GPUVMEnable,
211 bool HostVMEnable,
212 unsigned int HostVMMaxNonCachedPageTableLevels,
213 unsigned int GPUVMMinPageSize,
214 unsigned int HostVMMinPageSize,
215 unsigned int PTEBufferSizeInRequests,
216 unsigned int Pitch,
217 unsigned int DCCMetaPitch,
218 unsigned int *MacroTileWidth,
219 unsigned int *MetaRowByte,
220 unsigned int *PixelPTEBytesPerRow,
221 bool *PTEBufferSizeNotExceeded,
222 int *dpte_row_width_ub,
223 unsigned int *dpte_row_height,
224 unsigned int *MetaRequestWidth,
225 unsigned int *MetaRequestHeight,
226 unsigned int *meta_row_width,
227 unsigned int *meta_row_height,
228 int *vm_group_bytes,
229 unsigned int *dpte_group_bytes,
230 unsigned int *PixelPTEReqWidth,
231 unsigned int *PixelPTEReqHeight,
232 unsigned int *PTERequestSize,
233 int *DPDE0BytesFrame,
234 int *MetaPTEBytesFrame);
235 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
236 static void CalculateRowBandwidth(
237 bool GPUVMEnable,
238 enum source_format_class SourcePixelFormat,
239 double VRatio,
240 double VRatioChroma,
241 bool DCCEnable,
242 double LineTime,
243 unsigned int MetaRowByteLuma,
244 unsigned int MetaRowByteChroma,
245 unsigned int meta_row_height_luma,
246 unsigned int meta_row_height_chroma,
247 unsigned int PixelPTEBytesPerRowLuma,
248 unsigned int PixelPTEBytesPerRowChroma,
249 unsigned int dpte_row_height_luma,
250 unsigned int dpte_row_height_chroma,
251 double *meta_row_bw,
252 double *dpte_row_bw);
253
254 static void CalculateFlipSchedule(
255 struct display_mode_lib *mode_lib,
256 unsigned int k,
257 double HostVMInefficiencyFactor,
258 double UrgentExtraLatency,
259 double UrgentLatency,
260 double PDEAndMetaPTEBytesPerFrame,
261 double MetaRowBytes,
262 double DPTEBytesPerRow);
263 static double CalculateWriteBackDelay(
264 enum source_format_class WritebackPixelFormat,
265 double WritebackHRatio,
266 double WritebackVRatio,
267 unsigned int WritebackVTaps,
268 int WritebackDestinationWidth,
269 int WritebackDestinationHeight,
270 int WritebackSourceHeight,
271 unsigned int HTotal);
272
273 static void CalculateVupdateAndDynamicMetadataParameters(
274 int MaxInterDCNTileRepeaters,
275 double DPPCLK,
276 double DISPCLK,
277 double DCFClkDeepSleep,
278 double PixelClock,
279 int HTotal,
280 int VBlank,
281 int DynamicMetadataTransmittedBytes,
282 int DynamicMetadataLinesBeforeActiveRequired,
283 int InterlaceEnable,
284 bool ProgressiveToInterlaceUnitInOPP,
285 double *TSetup,
286 double *Tdmbf,
287 double *Tdmec,
288 double *Tdmsks,
289 int *VUpdateOffsetPix,
290 double *VUpdateWidthPix,
291 double *VReadyOffsetPix);
292
293 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
294 struct display_mode_lib *mode_lib,
295 unsigned int PrefetchMode,
296 double DCFCLK,
297 double ReturnBW,
298 double UrgentLatency,
299 double ExtraLatency,
300 double SOCCLK,
301 double DCFCLKDeepSleep,
302 unsigned int DETBufferSizeY[],
303 unsigned int DETBufferSizeC[],
304 unsigned int SwathHeightY[],
305 unsigned int SwathHeightC[],
306 double SwathWidthY[],
307 double SwathWidthC[],
308 unsigned int DPPPerPlane[],
309 double BytePerPixelDETY[],
310 double BytePerPixelDETC[],
311 bool UnboundedRequestEnabled,
312 int unsigned CompressedBufferSizeInkByte,
313 enum clock_change_support *DRAMClockChangeSupport,
314 double *StutterExitWatermark,
315 double *StutterEnterPlusExitWatermark,
316 double *Z8StutterExitWatermark,
317 double *Z8StutterEnterPlusExitWatermark);
318
319 static void CalculateDCFCLKDeepSleep(
320 struct display_mode_lib *mode_lib,
321 unsigned int NumberOfActivePlanes,
322 int BytePerPixelY[],
323 int BytePerPixelC[],
324 double VRatio[],
325 double VRatioChroma[],
326 double SwathWidthY[],
327 double SwathWidthC[],
328 unsigned int DPPPerPlane[],
329 double HRatio[],
330 double HRatioChroma[],
331 double PixelClock[],
332 double PSCL_THROUGHPUT[],
333 double PSCL_THROUGHPUT_CHROMA[],
334 double DPPCLK[],
335 double ReadBandwidthLuma[],
336 double ReadBandwidthChroma[],
337 int ReturnBusWidth,
338 double *DCFCLKDeepSleep);
339
340 static void CalculateUrgentBurstFactor(
341 int swath_width_luma_ub,
342 int swath_width_chroma_ub,
343 unsigned int SwathHeightY,
344 unsigned int SwathHeightC,
345 double LineTime,
346 double UrgentLatency,
347 double CursorBufferSize,
348 unsigned int CursorWidth,
349 unsigned int CursorBPP,
350 double VRatio,
351 double VRatioC,
352 double BytePerPixelInDETY,
353 double BytePerPixelInDETC,
354 double DETBufferSizeY,
355 double DETBufferSizeC,
356 double *UrgentBurstFactorCursor,
357 double *UrgentBurstFactorLuma,
358 double *UrgentBurstFactorChroma,
359 bool *NotEnoughUrgentLatencyHiding);
360
361 static void UseMinimumDCFCLK(
362 struct display_mode_lib *mode_lib,
363 int MaxPrefetchMode,
364 int ReorderingBytes);
365
366 static void CalculatePixelDeliveryTimes(
367 unsigned int NumberOfActivePlanes,
368 double VRatio[],
369 double VRatioChroma[],
370 double VRatioPrefetchY[],
371 double VRatioPrefetchC[],
372 unsigned int swath_width_luma_ub[],
373 unsigned int swath_width_chroma_ub[],
374 unsigned int DPPPerPlane[],
375 double HRatio[],
376 double HRatioChroma[],
377 double PixelClock[],
378 double PSCL_THROUGHPUT[],
379 double PSCL_THROUGHPUT_CHROMA[],
380 double DPPCLK[],
381 int BytePerPixelC[],
382 enum scan_direction_class SourceScan[],
383 unsigned int NumberOfCursors[],
384 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
385 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
386 unsigned int BlockWidth256BytesY[],
387 unsigned int BlockHeight256BytesY[],
388 unsigned int BlockWidth256BytesC[],
389 unsigned int BlockHeight256BytesC[],
390 double DisplayPipeLineDeliveryTimeLuma[],
391 double DisplayPipeLineDeliveryTimeChroma[],
392 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
393 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
394 double DisplayPipeRequestDeliveryTimeLuma[],
395 double DisplayPipeRequestDeliveryTimeChroma[],
396 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
397 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
398 double CursorRequestDeliveryTime[],
399 double CursorRequestDeliveryTimePrefetch[]);
400
401 static void CalculateMetaAndPTETimes(
402 int NumberOfActivePlanes,
403 bool GPUVMEnable,
404 int MetaChunkSize,
405 int MinMetaChunkSizeBytes,
406 int HTotal[],
407 double VRatio[],
408 double VRatioChroma[],
409 double DestinationLinesToRequestRowInVBlank[],
410 double DestinationLinesToRequestRowInImmediateFlip[],
411 bool DCCEnable[],
412 double PixelClock[],
413 int BytePerPixelY[],
414 int BytePerPixelC[],
415 enum scan_direction_class SourceScan[],
416 int dpte_row_height[],
417 int dpte_row_height_chroma[],
418 int meta_row_width[],
419 int meta_row_width_chroma[],
420 int meta_row_height[],
421 int meta_row_height_chroma[],
422 int meta_req_width[],
423 int meta_req_width_chroma[],
424 int meta_req_height[],
425 int meta_req_height_chroma[],
426 int dpte_group_bytes[],
427 int PTERequestSizeY[],
428 int PTERequestSizeC[],
429 int PixelPTEReqWidthY[],
430 int PixelPTEReqHeightY[],
431 int PixelPTEReqWidthC[],
432 int PixelPTEReqHeightC[],
433 int dpte_row_width_luma_ub[],
434 int dpte_row_width_chroma_ub[],
435 double DST_Y_PER_PTE_ROW_NOM_L[],
436 double DST_Y_PER_PTE_ROW_NOM_C[],
437 double DST_Y_PER_META_ROW_NOM_L[],
438 double DST_Y_PER_META_ROW_NOM_C[],
439 double TimePerMetaChunkNominal[],
440 double TimePerChromaMetaChunkNominal[],
441 double TimePerMetaChunkVBlank[],
442 double TimePerChromaMetaChunkVBlank[],
443 double TimePerMetaChunkFlip[],
444 double TimePerChromaMetaChunkFlip[],
445 double time_per_pte_group_nom_luma[],
446 double time_per_pte_group_vblank_luma[],
447 double time_per_pte_group_flip_luma[],
448 double time_per_pte_group_nom_chroma[],
449 double time_per_pte_group_vblank_chroma[],
450 double time_per_pte_group_flip_chroma[]);
451
452 static void CalculateVMGroupAndRequestTimes(
453 unsigned int NumberOfActivePlanes,
454 bool GPUVMEnable,
455 unsigned int GPUVMMaxPageTableLevels,
456 unsigned int HTotal[],
457 int BytePerPixelC[],
458 double DestinationLinesToRequestVMInVBlank[],
459 double DestinationLinesToRequestVMInImmediateFlip[],
460 bool DCCEnable[],
461 double PixelClock[],
462 int dpte_row_width_luma_ub[],
463 int dpte_row_width_chroma_ub[],
464 int vm_group_bytes[],
465 unsigned int dpde0_bytes_per_frame_ub_l[],
466 unsigned int dpde0_bytes_per_frame_ub_c[],
467 int meta_pte_bytes_per_frame_ub_l[],
468 int meta_pte_bytes_per_frame_ub_c[],
469 double TimePerVMGroupVBlank[],
470 double TimePerVMGroupFlip[],
471 double TimePerVMRequestVBlank[],
472 double TimePerVMRequestFlip[]);
473
474 static void CalculateStutterEfficiency(
475 struct display_mode_lib *mode_lib,
476 int CompressedBufferSizeInkByte,
477 bool UnboundedRequestEnabled,
478 int ConfigReturnBufferSizeInKByte,
479 int MetaFIFOSizeInKEntries,
480 int ZeroSizeBufferEntries,
481 int NumberOfActivePlanes,
482 int ROBBufferSizeInKByte,
483 double TotalDataReadBandwidth,
484 double DCFCLK,
485 double ReturnBW,
486 double COMPBUF_RESERVED_SPACE_64B,
487 double COMPBUF_RESERVED_SPACE_ZS,
488 double SRExitTime,
489 double SRExitZ8Time,
490 bool SynchronizedVBlank,
491 double Z8StutterEnterPlusExitWatermark,
492 double StutterEnterPlusExitWatermark,
493 bool ProgressiveToInterlaceUnitInOPP,
494 bool Interlace[],
495 double MinTTUVBlank[],
496 int DPPPerPlane[],
497 unsigned int DETBufferSizeY[],
498 int BytePerPixelY[],
499 double BytePerPixelDETY[],
500 double SwathWidthY[],
501 int SwathHeightY[],
502 int SwathHeightC[],
503 double NetDCCRateLuma[],
504 double NetDCCRateChroma[],
505 double DCCFractionOfZeroSizeRequestsLuma[],
506 double DCCFractionOfZeroSizeRequestsChroma[],
507 int HTotal[],
508 int VTotal[],
509 double PixelClock[],
510 double VRatio[],
511 enum scan_direction_class SourceScan[],
512 int BlockHeight256BytesY[],
513 int BlockWidth256BytesY[],
514 int BlockHeight256BytesC[],
515 int BlockWidth256BytesC[],
516 int DCCYMaxUncompressedBlock[],
517 int DCCCMaxUncompressedBlock[],
518 int VActive[],
519 bool DCCEnable[],
520 bool WritebackEnable[],
521 double ReadBandwidthPlaneLuma[],
522 double ReadBandwidthPlaneChroma[],
523 double meta_row_bw[],
524 double dpte_row_bw[],
525 double *StutterEfficiencyNotIncludingVBlank,
526 double *StutterEfficiency,
527 int *NumberOfStutterBurstsPerFrame,
528 double *Z8StutterEfficiencyNotIncludingVBlank,
529 double *Z8StutterEfficiency,
530 int *Z8NumberOfStutterBurstsPerFrame,
531 double *StutterPeriod);
532
533 static void CalculateSwathAndDETConfiguration(
534 bool ForceSingleDPP,
535 int NumberOfActivePlanes,
536 unsigned int DETBufferSizeInKByte,
537 double MaximumSwathWidthLuma[],
538 double MaximumSwathWidthChroma[],
539 enum scan_direction_class SourceScan[],
540 enum source_format_class SourcePixelFormat[],
541 enum dm_swizzle_mode SurfaceTiling[],
542 int ViewportWidth[],
543 int ViewportHeight[],
544 int SurfaceWidthY[],
545 int SurfaceWidthC[],
546 int SurfaceHeightY[],
547 int SurfaceHeightC[],
548 int Read256BytesBlockHeightY[],
549 int Read256BytesBlockHeightC[],
550 int Read256BytesBlockWidthY[],
551 int Read256BytesBlockWidthC[],
552 enum odm_combine_mode ODMCombineEnabled[],
553 int BlendingAndTiming[],
554 int BytePerPixY[],
555 int BytePerPixC[],
556 double BytePerPixDETY[],
557 double BytePerPixDETC[],
558 int HActive[],
559 double HRatio[],
560 double HRatioChroma[],
561 int DPPPerPlane[],
562 int swath_width_luma_ub[],
563 int swath_width_chroma_ub[],
564 double SwathWidth[],
565 double SwathWidthChroma[],
566 int SwathHeightY[],
567 int SwathHeightC[],
568 unsigned int DETBufferSizeY[],
569 unsigned int DETBufferSizeC[],
570 bool ViewportSizeSupportPerPlane[],
571 bool *ViewportSizeSupport);
572 static void CalculateSwathWidth(
573 bool ForceSingleDPP,
574 int NumberOfActivePlanes,
575 enum source_format_class SourcePixelFormat[],
576 enum scan_direction_class SourceScan[],
577 int ViewportWidth[],
578 int ViewportHeight[],
579 int SurfaceWidthY[],
580 int SurfaceWidthC[],
581 int SurfaceHeightY[],
582 int SurfaceHeightC[],
583 enum odm_combine_mode ODMCombineEnabled[],
584 int BytePerPixY[],
585 int BytePerPixC[],
586 int Read256BytesBlockHeightY[],
587 int Read256BytesBlockHeightC[],
588 int Read256BytesBlockWidthY[],
589 int Read256BytesBlockWidthC[],
590 int BlendingAndTiming[],
591 int HActive[],
592 double HRatio[],
593 int DPPPerPlane[],
594 double SwathWidthSingleDPPY[],
595 double SwathWidthSingleDPPC[],
596 double SwathWidthY[],
597 double SwathWidthC[],
598 int MaximumSwathHeightY[],
599 int MaximumSwathHeightC[],
600 int swath_width_luma_ub[],
601 int swath_width_chroma_ub[]);
602
603 static double CalculateExtraLatency(
604 int RoundTripPingLatencyCycles,
605 int ReorderingBytes,
606 double DCFCLK,
607 int TotalNumberOfActiveDPP,
608 int PixelChunkSizeInKByte,
609 int TotalNumberOfDCCActiveDPP,
610 int MetaChunkSize,
611 double ReturnBW,
612 bool GPUVMEnable,
613 bool HostVMEnable,
614 int NumberOfActivePlanes,
615 int NumberOfDPP[],
616 int dpte_group_bytes[],
617 double HostVMInefficiencyFactor,
618 double HostVMMinPageSize,
619 int HostVMMaxNonCachedPageTableLevels);
620
621 static double CalculateExtraLatencyBytes(
622 int ReorderingBytes,
623 int TotalNumberOfActiveDPP,
624 int PixelChunkSizeInKByte,
625 int TotalNumberOfDCCActiveDPP,
626 int MetaChunkSize,
627 bool GPUVMEnable,
628 bool HostVMEnable,
629 int NumberOfActivePlanes,
630 int NumberOfDPP[],
631 int dpte_group_bytes[],
632 double HostVMInefficiencyFactor,
633 double HostVMMinPageSize,
634 int HostVMMaxNonCachedPageTableLevels);
635
636 static double CalculateUrgentLatency(
637 double UrgentLatencyPixelDataOnly,
638 double UrgentLatencyPixelMixedWithVMData,
639 double UrgentLatencyVMDataOnly,
640 bool DoUrgentLatencyAdjustment,
641 double UrgentLatencyAdjustmentFabricClockComponent,
642 double UrgentLatencyAdjustmentFabricClockReference,
643 double FabricClockSingle);
644
645 static void CalculateUnboundedRequestAndCompressedBufferSize(
646 unsigned int DETBufferSizeInKByte,
647 int ConfigReturnBufferSizeInKByte,
648 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
649 int TotalActiveDPP,
650 bool NoChromaPlanes,
651 int MaxNumDPP,
652 int CompressedBufferSegmentSizeInkByteFinal,
653 enum output_encoder_class *Output,
654 bool *UnboundedRequestEnabled,
655 int *CompressedBufferSizeInkByte);
656
657 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
658
dml31_recalculate(struct display_mode_lib * mode_lib)659 void dml31_recalculate(struct display_mode_lib *mode_lib)
660 {
661 ModeSupportAndSystemConfiguration(mode_lib);
662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
663 DisplayPipeConfiguration(mode_lib);
664 #ifdef __DML_VBA_DEBUG__
665 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
666 #endif
667 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
668 }
669
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)670 static unsigned int dscceComputeDelay(
671 unsigned int bpc,
672 double BPP,
673 unsigned int sliceWidth,
674 unsigned int numSlices,
675 enum output_format_class pixelFormat,
676 enum output_encoder_class Output)
677 {
678 // valid bpc = source bits per component in the set of {8, 10, 12}
679 // valid bpp = increments of 1/16 of a bit
680 // min = 6/7/8 in N420/N422/444, respectively
681 // max = such that compression is 1:1
682 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
683 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
684 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
685
686 // fixed value
687 unsigned int rcModelSize = 8192;
688
689 // N422/N420 operate at 2 pixels per clock
690 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
691
692 if (pixelFormat == dm_420)
693 pixelsPerClock = 2;
694 else if (pixelFormat == dm_444)
695 pixelsPerClock = 1;
696 else if (pixelFormat == dm_n422)
697 pixelsPerClock = 2;
698 // #all other modes operate at 1 pixel per clock
699 else
700 pixelsPerClock = 1;
701
702 //initial transmit delay as per PPS
703 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
704
705 //compute ssm delay
706 if (bpc == 8)
707 D = 81;
708 else if (bpc == 10)
709 D = 89;
710 else
711 D = 113;
712
713 //divide by pixel per cycle to compute slice width as seen by DSC
714 w = sliceWidth / pixelsPerClock;
715
716 //422 mode has an additional cycle of delay
717 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
718 s = 0;
719 else
720 s = 1;
721
722 //main calculation for the dscce
723 ix = initalXmitDelay + 45;
724 wx = (w + 2) / 3;
725 P = 3 * wx - w;
726 l0 = ix / w;
727 a = ix + P * l0;
728 ax = (a + 2) / 3 + D + 6 + 1;
729 L = (ax + wx - 1) / wx;
730 if ((ix % w) == 0 && P != 0)
731 lstall = 1;
732 else
733 lstall = 0;
734 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
735
736 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
737 pixels = Delay * 3 * pixelsPerClock;
738 return pixels;
739 }
740
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)741 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
742 {
743 unsigned int Delay = 0;
744
745 if (pixelFormat == dm_420) {
746 // sfr
747 Delay = Delay + 2;
748 // dsccif
749 Delay = Delay + 0;
750 // dscc - input deserializer
751 Delay = Delay + 3;
752 // dscc gets pixels every other cycle
753 Delay = Delay + 2;
754 // dscc - input cdc fifo
755 Delay = Delay + 12;
756 // dscc gets pixels every other cycle
757 Delay = Delay + 13;
758 // dscc - cdc uncertainty
759 Delay = Delay + 2;
760 // dscc - output cdc fifo
761 Delay = Delay + 7;
762 // dscc gets pixels every other cycle
763 Delay = Delay + 3;
764 // dscc - cdc uncertainty
765 Delay = Delay + 2;
766 // dscc - output serializer
767 Delay = Delay + 1;
768 // sft
769 Delay = Delay + 1;
770 } else if (pixelFormat == dm_n422) {
771 // sfr
772 Delay = Delay + 2;
773 // dsccif
774 Delay = Delay + 1;
775 // dscc - input deserializer
776 Delay = Delay + 5;
777 // dscc - input cdc fifo
778 Delay = Delay + 25;
779 // dscc - cdc uncertainty
780 Delay = Delay + 2;
781 // dscc - output cdc fifo
782 Delay = Delay + 10;
783 // dscc - cdc uncertainty
784 Delay = Delay + 2;
785 // dscc - output serializer
786 Delay = Delay + 1;
787 // sft
788 Delay = Delay + 1;
789 } else {
790 // sfr
791 Delay = Delay + 2;
792 // dsccif
793 Delay = Delay + 0;
794 // dscc - input deserializer
795 Delay = Delay + 3;
796 // dscc - input cdc fifo
797 Delay = Delay + 12;
798 // dscc - cdc uncertainty
799 Delay = Delay + 2;
800 // dscc - output cdc fifo
801 Delay = Delay + 7;
802 // dscc - output serializer
803 Delay = Delay + 1;
804 // dscc - cdc uncertainty
805 Delay = Delay + 2;
806 // sft
807 Delay = Delay + 1;
808 }
809
810 return Delay;
811 }
812
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)813 static bool CalculatePrefetchSchedule(
814 struct display_mode_lib *mode_lib,
815 double HostVMInefficiencyFactor,
816 Pipe *myPipe,
817 unsigned int DSCDelay,
818 double DPPCLKDelaySubtotalPlusCNVCFormater,
819 double DPPCLKDelaySCL,
820 double DPPCLKDelaySCLLBOnly,
821 double DPPCLKDelayCNVCCursor,
822 double DISPCLKDelaySubtotal,
823 unsigned int DPP_RECOUT_WIDTH,
824 enum output_format_class OutputFormat,
825 unsigned int MaxInterDCNTileRepeaters,
826 unsigned int VStartup,
827 unsigned int MaxVStartup,
828 unsigned int GPUVMPageTableLevels,
829 bool GPUVMEnable,
830 bool HostVMEnable,
831 unsigned int HostVMMaxNonCachedPageTableLevels,
832 double HostVMMinPageSize,
833 bool DynamicMetadataEnable,
834 bool DynamicMetadataVMEnabled,
835 int DynamicMetadataLinesBeforeActiveRequired,
836 unsigned int DynamicMetadataTransmittedBytes,
837 double UrgentLatency,
838 double UrgentExtraLatency,
839 double TCalc,
840 unsigned int PDEAndMetaPTEBytesFrame,
841 unsigned int MetaRowByte,
842 unsigned int PixelPTEBytesPerRow,
843 double PrefetchSourceLinesY,
844 unsigned int SwathWidthY,
845 double VInitPreFillY,
846 unsigned int MaxNumSwathY,
847 double PrefetchSourceLinesC,
848 unsigned int SwathWidthC,
849 double VInitPreFillC,
850 unsigned int MaxNumSwathC,
851 int swath_width_luma_ub,
852 int swath_width_chroma_ub,
853 unsigned int SwathHeightY,
854 unsigned int SwathHeightC,
855 double TWait,
856 double *DSTXAfterScaler,
857 double *DSTYAfterScaler,
858 double *DestinationLinesForPrefetch,
859 double *PrefetchBandwidth,
860 double *DestinationLinesToRequestVMInVBlank,
861 double *DestinationLinesToRequestRowInVBlank,
862 double *VRatioPrefetchY,
863 double *VRatioPrefetchC,
864 double *RequiredPrefetchPixDataBWLuma,
865 double *RequiredPrefetchPixDataBWChroma,
866 bool *NotEnoughTimeForDynamicMetadata,
867 double *Tno_bw,
868 double *prefetch_vmrow_bw,
869 double *Tdmdl_vm,
870 double *Tdmdl,
871 double *TSetup,
872 int *VUpdateOffsetPix,
873 double *VUpdateWidthPix,
874 double *VReadyOffsetPix)
875 {
876 bool MyError = false;
877 unsigned int DPPCycles, DISPCLKCycles;
878 double DSTTotalPixelsAfterScaler;
879 double LineTime;
880 double dst_y_prefetch_equ;
881 double Tsw_oto;
882 double prefetch_bw_oto;
883 double prefetch_bw_pr;
884 double Tvm_oto;
885 double Tr0_oto;
886 double Tvm_oto_lines;
887 double Tr0_oto_lines;
888 double dst_y_prefetch_oto;
889 double TimeForFetchingMetaPTE = 0;
890 double TimeForFetchingRowInVBlank = 0;
891 double LinesToRequestPrefetchPixelData = 0;
892 unsigned int HostVMDynamicLevelsTrips;
893 double trip_to_mem;
894 double Tvm_trips;
895 double Tr0_trips;
896 double Tvm_trips_rounded;
897 double Tr0_trips_rounded;
898 double Lsw_oto;
899 double Tpre_rounded;
900 double prefetch_bw_equ;
901 double Tvm_equ;
902 double Tr0_equ;
903 double Tdmbf;
904 double Tdmec;
905 double Tdmsks;
906 double prefetch_sw_bytes;
907 double bytes_pp;
908 double dep_bytes;
909 int max_vratio_pre = 4;
910 double min_Lsw;
911 double Tsw_est1 = 0;
912 double Tsw_est3 = 0;
913 double max_Tsw = 0;
914
915 if (GPUVMEnable == true && HostVMEnable == true) {
916 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
917 } else {
918 HostVMDynamicLevelsTrips = 0;
919 }
920 #ifdef __DML_VBA_DEBUG__
921 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
922 #endif
923 CalculateVupdateAndDynamicMetadataParameters(
924 MaxInterDCNTileRepeaters,
925 myPipe->DPPCLK,
926 myPipe->DISPCLK,
927 myPipe->DCFCLKDeepSleep,
928 myPipe->PixelClock,
929 myPipe->HTotal,
930 myPipe->VBlank,
931 DynamicMetadataTransmittedBytes,
932 DynamicMetadataLinesBeforeActiveRequired,
933 myPipe->InterlaceEnable,
934 myPipe->ProgressiveToInterlaceUnitInOPP,
935 TSetup,
936 &Tdmbf,
937 &Tdmec,
938 &Tdmsks,
939 VUpdateOffsetPix,
940 VUpdateWidthPix,
941 VReadyOffsetPix);
942
943 LineTime = myPipe->HTotal / myPipe->PixelClock;
944 trip_to_mem = UrgentLatency;
945 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
946
947 #ifdef __DML_VBA_ALLOW_DELTA__
948 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
949 #else
950 if (DynamicMetadataVMEnabled == true) {
951 #endif
952 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
953 } else {
954 *Tdmdl = TWait + UrgentExtraLatency;
955 }
956
957 #ifdef __DML_VBA_ALLOW_DELTA__
958 if (DynamicMetadataEnable == false) {
959 *Tdmdl = 0.0;
960 }
961 #endif
962
963 if (DynamicMetadataEnable == true) {
964 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
965 *NotEnoughTimeForDynamicMetadata = true;
966 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
967 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
968 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
969 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
970 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
971 } else {
972 *NotEnoughTimeForDynamicMetadata = false;
973 }
974 } else {
975 *NotEnoughTimeForDynamicMetadata = false;
976 }
977
978 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
979
980 if (myPipe->ScalerEnabled)
981 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
982 else
983 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
984
985 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
986
987 DISPCLKCycles = DISPCLKDelaySubtotal;
988
989 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
990 return true;
991
992 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
993
994 #ifdef __DML_VBA_DEBUG__
995 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
996 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
997 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
998 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
999 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1000 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1001 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1002 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1003 #endif
1004
1005 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1006
1007 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1008 *DSTYAfterScaler = 1;
1009 else
1010 *DSTYAfterScaler = 0;
1011
1012 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1013 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1014 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1015
1016 #ifdef __DML_VBA_DEBUG__
1017 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1018 #endif
1019
1020 MyError = false;
1021
1022 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1023 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1024 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1025
1026 #ifdef __DML_VBA_ALLOW_DELTA__
1027 if (!myPipe->DCCEnable) {
1028 Tr0_trips = 0.0;
1029 Tr0_trips_rounded = 0.0;
1030 }
1031 #endif
1032
1033 if (!GPUVMEnable) {
1034 Tvm_trips = 0.0;
1035 Tvm_trips_rounded = 0.0;
1036 }
1037
1038 if (GPUVMEnable) {
1039 if (GPUVMPageTableLevels >= 3) {
1040 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1041 } else {
1042 *Tno_bw = 0;
1043 }
1044 } else if (!myPipe->DCCEnable) {
1045 *Tno_bw = LineTime;
1046 } else {
1047 *Tno_bw = LineTime / 4;
1048 }
1049
1050 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1051 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1052 else
1053 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1054 /*rev 99*/
1055 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
1056 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1057 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1058 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1059 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1060
1061 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1062 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1063 Tsw_oto = Lsw_oto * LineTime;
1064
1065 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1066
1067 #ifdef __DML_VBA_DEBUG__
1068 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1069 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1070 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1071 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1072 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1073 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1074 #endif
1075
1076 if (GPUVMEnable == true)
1077 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1078 else
1079 Tvm_oto = LineTime / 4.0;
1080
1081 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1082 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1083 LineTime - Tvm_oto,
1084 LineTime / 4);
1085 } else {
1086 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1087 }
1088
1089 #ifdef __DML_VBA_DEBUG__
1090 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1091 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1092 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1093 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1094 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1095 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1096 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1097 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1098 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1099 #endif
1100
1101 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1102 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1103 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1104 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1105 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1106 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1107
1108 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1109
1110 if (prefetch_sw_bytes < dep_bytes)
1111 prefetch_sw_bytes = 2 * dep_bytes;
1112
1113 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1114 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1115 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1116 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1117 dml_print("DML: LineTime: %f\n", LineTime);
1118 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1119
1120 dml_print("DML: LineTime: %f\n", LineTime);
1121 dml_print("DML: VStartup: %d\n", VStartup);
1122 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1123 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1124 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1125 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1126 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1127 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1128 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1129 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1130 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1131 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1132 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1133
1134 *PrefetchBandwidth = 0;
1135 *DestinationLinesToRequestVMInVBlank = 0;
1136 *DestinationLinesToRequestRowInVBlank = 0;
1137 *VRatioPrefetchY = 0;
1138 *VRatioPrefetchC = 0;
1139 *RequiredPrefetchPixDataBWLuma = 0;
1140 if (dst_y_prefetch_equ > 1) {
1141 double PrefetchBandwidth1;
1142 double PrefetchBandwidth2;
1143 double PrefetchBandwidth3;
1144 double PrefetchBandwidth4;
1145
1146 if (Tpre_rounded - *Tno_bw > 0) {
1147 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1148 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1149 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1150 } else {
1151 PrefetchBandwidth1 = 0;
1152 }
1153
1154 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1155 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1156 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1157 }
1158
1159 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1160 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1161 else
1162 PrefetchBandwidth2 = 0;
1163
1164 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1165 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1166 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1167 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1168 } else {
1169 PrefetchBandwidth3 = 0;
1170 }
1171
1172 #ifdef __DML_VBA_DEBUG__
1173 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1174 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1175 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1176 #endif
1177 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1178 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1179 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1180 }
1181
1182 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1183 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1184 else
1185 PrefetchBandwidth4 = 0;
1186
1187 {
1188 bool Case1OK;
1189 bool Case2OK;
1190 bool Case3OK;
1191
1192 if (PrefetchBandwidth1 > 0) {
1193 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1194 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1195 Case1OK = true;
1196 } else {
1197 Case1OK = false;
1198 }
1199 } else {
1200 Case1OK = false;
1201 }
1202
1203 if (PrefetchBandwidth2 > 0) {
1204 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1205 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1206 Case2OK = true;
1207 } else {
1208 Case2OK = false;
1209 }
1210 } else {
1211 Case2OK = false;
1212 }
1213
1214 if (PrefetchBandwidth3 > 0) {
1215 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1216 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1217 Case3OK = true;
1218 } else {
1219 Case3OK = false;
1220 }
1221 } else {
1222 Case3OK = false;
1223 }
1224
1225 if (Case1OK) {
1226 prefetch_bw_equ = PrefetchBandwidth1;
1227 } else if (Case2OK) {
1228 prefetch_bw_equ = PrefetchBandwidth2;
1229 } else if (Case3OK) {
1230 prefetch_bw_equ = PrefetchBandwidth3;
1231 } else {
1232 prefetch_bw_equ = PrefetchBandwidth4;
1233 }
1234
1235 #ifdef __DML_VBA_DEBUG__
1236 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1237 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1238 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1239 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1240 #endif
1241
1242 if (prefetch_bw_equ > 0) {
1243 if (GPUVMEnable == true) {
1244 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1245 } else {
1246 Tvm_equ = LineTime / 4;
1247 }
1248
1249 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1250 Tr0_equ = dml_max4(
1251 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1252 Tr0_trips,
1253 (LineTime - Tvm_equ) / 2,
1254 LineTime / 4);
1255 } else {
1256 Tr0_equ = (LineTime - Tvm_equ) / 2;
1257 }
1258 } else {
1259 Tvm_equ = 0;
1260 Tr0_equ = 0;
1261 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1262 }
1263 }
1264
1265 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1266 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1267 TimeForFetchingMetaPTE = Tvm_oto;
1268 TimeForFetchingRowInVBlank = Tr0_oto;
1269 *PrefetchBandwidth = prefetch_bw_oto;
1270 } else {
1271 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1272 TimeForFetchingMetaPTE = Tvm_equ;
1273 TimeForFetchingRowInVBlank = Tr0_equ;
1274 *PrefetchBandwidth = prefetch_bw_equ;
1275 }
1276
1277 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1278
1279 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1280
1281 #ifdef __DML_VBA_ALLOW_DELTA__
1282 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1283 // See note above dated 5/30/2018
1284 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1285 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1286 #else
1287 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1288 #endif
1289
1290 #ifdef __DML_VBA_DEBUG__
1291 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1292 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1293 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1294 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1295 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1296 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1297 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1298 #endif
1299
1300 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1301
1302 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1303 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1304 #ifdef __DML_VBA_DEBUG__
1305 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1306 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1307 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1308 #endif
1309 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1310 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1311 *VRatioPrefetchY = dml_max(
1312 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1313 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1314 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1315 } else {
1316 MyError = true;
1317 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1318 *VRatioPrefetchY = 0;
1319 }
1320 #ifdef __DML_VBA_DEBUG__
1321 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1322 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1323 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1324 #endif
1325 }
1326
1327 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1328 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1329
1330 #ifdef __DML_VBA_DEBUG__
1331 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1332 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1333 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1334 #endif
1335 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1336 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1337 *VRatioPrefetchC = dml_max(
1338 *VRatioPrefetchC,
1339 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1340 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1341 } else {
1342 MyError = true;
1343 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1344 *VRatioPrefetchC = 0;
1345 }
1346 #ifdef __DML_VBA_DEBUG__
1347 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1348 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1349 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1350 #endif
1351 }
1352
1353 #ifdef __DML_VBA_DEBUG__
1354 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1355 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1356 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1357 #endif
1358
1359 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1360
1361 #ifdef __DML_VBA_DEBUG__
1362 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1363 #endif
1364
1365 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1366 / LineTime;
1367 } else {
1368 MyError = true;
1369 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1370 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1371 *VRatioPrefetchY = 0;
1372 *VRatioPrefetchC = 0;
1373 *RequiredPrefetchPixDataBWLuma = 0;
1374 *RequiredPrefetchPixDataBWChroma = 0;
1375 }
1376
1377 dml_print(
1378 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1379 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1380 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1381 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1382 dml_print(
1383 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1384 (double) LinesToRequestPrefetchPixelData * LineTime);
1385 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1386 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1387 (double) myPipe->HTotal)) * LineTime);
1388 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1389 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1390 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1391 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1392 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1393
1394 } else {
1395 MyError = true;
1396 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1397 }
1398
1399 {
1400 double prefetch_vm_bw;
1401 double prefetch_row_bw;
1402
1403 if (PDEAndMetaPTEBytesFrame == 0) {
1404 prefetch_vm_bw = 0;
1405 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1406 #ifdef __DML_VBA_DEBUG__
1407 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1408 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1409 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1410 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1411 #endif
1412 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1413 #ifdef __DML_VBA_DEBUG__
1414 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1415 #endif
1416 } else {
1417 prefetch_vm_bw = 0;
1418 MyError = true;
1419 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1420 }
1421
1422 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1423 prefetch_row_bw = 0;
1424 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1425 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1426
1427 #ifdef __DML_VBA_DEBUG__
1428 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1429 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1430 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1431 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1432 #endif
1433 } else {
1434 prefetch_row_bw = 0;
1435 MyError = true;
1436 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1437 }
1438
1439 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1440 }
1441
1442 if (MyError) {
1443 *PrefetchBandwidth = 0;
1444 TimeForFetchingMetaPTE = 0;
1445 TimeForFetchingRowInVBlank = 0;
1446 *DestinationLinesToRequestVMInVBlank = 0;
1447 *DestinationLinesToRequestRowInVBlank = 0;
1448 *DestinationLinesForPrefetch = 0;
1449 LinesToRequestPrefetchPixelData = 0;
1450 *VRatioPrefetchY = 0;
1451 *VRatioPrefetchC = 0;
1452 *RequiredPrefetchPixDataBWLuma = 0;
1453 *RequiredPrefetchPixDataBWChroma = 0;
1454 }
1455
1456 return MyError;
1457 }
1458
1459 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1460 {
1461 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1462 }
1463
1464 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1465 {
1466 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1467 }
1468
1469 static void CalculateDCCConfiguration(
1470 bool DCCEnabled,
1471 bool DCCProgrammingAssumesScanDirectionUnknown,
1472 enum source_format_class SourcePixelFormat,
1473 unsigned int SurfaceWidthLuma,
1474 unsigned int SurfaceWidthChroma,
1475 unsigned int SurfaceHeightLuma,
1476 unsigned int SurfaceHeightChroma,
1477 double DETBufferSize,
1478 unsigned int RequestHeight256ByteLuma,
1479 unsigned int RequestHeight256ByteChroma,
1480 enum dm_swizzle_mode TilingFormat,
1481 unsigned int BytePerPixelY,
1482 unsigned int BytePerPixelC,
1483 double BytePerPixelDETY,
1484 double BytePerPixelDETC,
1485 enum scan_direction_class ScanOrientation,
1486 unsigned int *MaxUncompressedBlockLuma,
1487 unsigned int *MaxUncompressedBlockChroma,
1488 unsigned int *MaxCompressedBlockLuma,
1489 unsigned int *MaxCompressedBlockChroma,
1490 unsigned int *IndependentBlockLuma,
1491 unsigned int *IndependentBlockChroma)
1492 {
1493 int yuv420;
1494 int horz_div_l;
1495 int horz_div_c;
1496 int vert_div_l;
1497 int vert_div_c;
1498
1499 int swath_buf_size;
1500 double detile_buf_vp_horz_limit;
1501 double detile_buf_vp_vert_limit;
1502
1503 int MAS_vp_horz_limit;
1504 int MAS_vp_vert_limit;
1505 int max_vp_horz_width;
1506 int max_vp_vert_height;
1507 int eff_surf_width_l;
1508 int eff_surf_width_c;
1509 int eff_surf_height_l;
1510 int eff_surf_height_c;
1511
1512 int full_swath_bytes_horz_wc_l;
1513 int full_swath_bytes_horz_wc_c;
1514 int full_swath_bytes_vert_wc_l;
1515 int full_swath_bytes_vert_wc_c;
1516 int req128_horz_wc_l;
1517 int req128_horz_wc_c;
1518 int req128_vert_wc_l;
1519 int req128_vert_wc_c;
1520 int segment_order_horz_contiguous_luma;
1521 int segment_order_horz_contiguous_chroma;
1522 int segment_order_vert_contiguous_luma;
1523 int segment_order_vert_contiguous_chroma;
1524
1525 typedef enum {
1526 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1527 } RequestType;
1528 RequestType RequestLuma;
1529 RequestType RequestChroma;
1530
1531 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1532 horz_div_l = 1;
1533 horz_div_c = 1;
1534 vert_div_l = 1;
1535 vert_div_c = 1;
1536
1537 if (BytePerPixelY == 1)
1538 vert_div_l = 0;
1539 if (BytePerPixelC == 1)
1540 vert_div_c = 0;
1541 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1542 horz_div_l = 0;
1543 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1544 horz_div_c = 0;
1545
1546 if (BytePerPixelC == 0) {
1547 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1548 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1549 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1550 } else {
1551 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1552 detile_buf_vp_horz_limit = (double) swath_buf_size
1553 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1554 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1555 detile_buf_vp_vert_limit = (double) swath_buf_size
1556 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1557 }
1558
1559 if (SourcePixelFormat == dm_420_10) {
1560 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1561 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1562 }
1563
1564 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1565 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1566
1567 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1568 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1569 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1570 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1571 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1572 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1573 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1574 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1575
1576 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1577 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1578 if (BytePerPixelC > 0) {
1579 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1580 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1581 } else {
1582 full_swath_bytes_horz_wc_c = 0;
1583 full_swath_bytes_vert_wc_c = 0;
1584 }
1585
1586 if (SourcePixelFormat == dm_420_10) {
1587 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1588 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1589 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1590 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1591 }
1592
1593 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1594 req128_horz_wc_l = 0;
1595 req128_horz_wc_c = 0;
1596 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1597 req128_horz_wc_l = 0;
1598 req128_horz_wc_c = 1;
1599 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1600 req128_horz_wc_l = 1;
1601 req128_horz_wc_c = 0;
1602 } else {
1603 req128_horz_wc_l = 1;
1604 req128_horz_wc_c = 1;
1605 }
1606
1607 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1608 req128_vert_wc_l = 0;
1609 req128_vert_wc_c = 0;
1610 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1611 req128_vert_wc_l = 0;
1612 req128_vert_wc_c = 1;
1613 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1614 req128_vert_wc_l = 1;
1615 req128_vert_wc_c = 0;
1616 } else {
1617 req128_vert_wc_l = 1;
1618 req128_vert_wc_c = 1;
1619 }
1620
1621 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1622 segment_order_horz_contiguous_luma = 0;
1623 } else {
1624 segment_order_horz_contiguous_luma = 1;
1625 }
1626 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1627 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1628 segment_order_vert_contiguous_luma = 0;
1629 } else {
1630 segment_order_vert_contiguous_luma = 1;
1631 }
1632 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1633 segment_order_horz_contiguous_chroma = 0;
1634 } else {
1635 segment_order_horz_contiguous_chroma = 1;
1636 }
1637 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1638 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1639 segment_order_vert_contiguous_chroma = 0;
1640 } else {
1641 segment_order_vert_contiguous_chroma = 1;
1642 }
1643
1644 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1645 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1646 RequestLuma = REQ_256Bytes;
1647 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1648 RequestLuma = REQ_128BytesNonContiguous;
1649 } else {
1650 RequestLuma = REQ_128BytesContiguous;
1651 }
1652 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1653 RequestChroma = REQ_256Bytes;
1654 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1655 RequestChroma = REQ_128BytesNonContiguous;
1656 } else {
1657 RequestChroma = REQ_128BytesContiguous;
1658 }
1659 } else if (ScanOrientation != dm_vert) {
1660 if (req128_horz_wc_l == 0) {
1661 RequestLuma = REQ_256Bytes;
1662 } else if (segment_order_horz_contiguous_luma == 0) {
1663 RequestLuma = REQ_128BytesNonContiguous;
1664 } else {
1665 RequestLuma = REQ_128BytesContiguous;
1666 }
1667 if (req128_horz_wc_c == 0) {
1668 RequestChroma = REQ_256Bytes;
1669 } else if (segment_order_horz_contiguous_chroma == 0) {
1670 RequestChroma = REQ_128BytesNonContiguous;
1671 } else {
1672 RequestChroma = REQ_128BytesContiguous;
1673 }
1674 } else {
1675 if (req128_vert_wc_l == 0) {
1676 RequestLuma = REQ_256Bytes;
1677 } else if (segment_order_vert_contiguous_luma == 0) {
1678 RequestLuma = REQ_128BytesNonContiguous;
1679 } else {
1680 RequestLuma = REQ_128BytesContiguous;
1681 }
1682 if (req128_vert_wc_c == 0) {
1683 RequestChroma = REQ_256Bytes;
1684 } else if (segment_order_vert_contiguous_chroma == 0) {
1685 RequestChroma = REQ_128BytesNonContiguous;
1686 } else {
1687 RequestChroma = REQ_128BytesContiguous;
1688 }
1689 }
1690
1691 if (RequestLuma == REQ_256Bytes) {
1692 *MaxUncompressedBlockLuma = 256;
1693 *MaxCompressedBlockLuma = 256;
1694 *IndependentBlockLuma = 0;
1695 } else if (RequestLuma == REQ_128BytesContiguous) {
1696 *MaxUncompressedBlockLuma = 256;
1697 *MaxCompressedBlockLuma = 128;
1698 *IndependentBlockLuma = 128;
1699 } else {
1700 *MaxUncompressedBlockLuma = 256;
1701 *MaxCompressedBlockLuma = 64;
1702 *IndependentBlockLuma = 64;
1703 }
1704
1705 if (RequestChroma == REQ_256Bytes) {
1706 *MaxUncompressedBlockChroma = 256;
1707 *MaxCompressedBlockChroma = 256;
1708 *IndependentBlockChroma = 0;
1709 } else if (RequestChroma == REQ_128BytesContiguous) {
1710 *MaxUncompressedBlockChroma = 256;
1711 *MaxCompressedBlockChroma = 128;
1712 *IndependentBlockChroma = 128;
1713 } else {
1714 *MaxUncompressedBlockChroma = 256;
1715 *MaxCompressedBlockChroma = 64;
1716 *IndependentBlockChroma = 64;
1717 }
1718
1719 if (DCCEnabled != true || BytePerPixelC == 0) {
1720 *MaxUncompressedBlockChroma = 0;
1721 *MaxCompressedBlockChroma = 0;
1722 *IndependentBlockChroma = 0;
1723 }
1724
1725 if (DCCEnabled != true) {
1726 *MaxUncompressedBlockLuma = 0;
1727 *MaxCompressedBlockLuma = 0;
1728 *IndependentBlockLuma = 0;
1729 }
1730 }
1731
1732 static double CalculatePrefetchSourceLines(
1733 struct display_mode_lib *mode_lib,
1734 double VRatio,
1735 double vtaps,
1736 bool Interlace,
1737 bool ProgressiveToInterlaceUnitInOPP,
1738 unsigned int SwathHeight,
1739 unsigned int ViewportYStart,
1740 double *VInitPreFill,
1741 unsigned int *MaxNumSwath)
1742 {
1743 struct vba_vars_st *v = &mode_lib->vba;
1744 unsigned int MaxPartialSwath;
1745
1746 if (ProgressiveToInterlaceUnitInOPP)
1747 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1748 else
1749 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1750
1751 if (!v->IgnoreViewportPositioning) {
1752
1753 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1754
1755 if (*VInitPreFill > 1.0)
1756 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1757 else
1758 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1759 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1760
1761 } else {
1762
1763 if (ViewportYStart != 0)
1764 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1765
1766 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1767
1768 if (*VInitPreFill > 1.0)
1769 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1770 else
1771 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1772 }
1773
1774 #ifdef __DML_VBA_DEBUG__
1775 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1776 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1777 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1778 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1779 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1780 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1781 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1782 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1783 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1784 #endif
1785 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1786 }
1787
1788 static unsigned int CalculateVMAndRowBytes(
1789 struct display_mode_lib *mode_lib,
1790 bool DCCEnable,
1791 unsigned int BlockHeight256Bytes,
1792 unsigned int BlockWidth256Bytes,
1793 enum source_format_class SourcePixelFormat,
1794 unsigned int SurfaceTiling,
1795 unsigned int BytePerPixel,
1796 enum scan_direction_class ScanDirection,
1797 unsigned int SwathWidth,
1798 unsigned int ViewportHeight,
1799 bool GPUVMEnable,
1800 bool HostVMEnable,
1801 unsigned int HostVMMaxNonCachedPageTableLevels,
1802 unsigned int GPUVMMinPageSize,
1803 unsigned int HostVMMinPageSize,
1804 unsigned int PTEBufferSizeInRequests,
1805 unsigned int Pitch,
1806 unsigned int DCCMetaPitch,
1807 unsigned int *MacroTileWidth,
1808 unsigned int *MetaRowByte,
1809 unsigned int *PixelPTEBytesPerRow,
1810 bool *PTEBufferSizeNotExceeded,
1811 int *dpte_row_width_ub,
1812 unsigned int *dpte_row_height,
1813 unsigned int *MetaRequestWidth,
1814 unsigned int *MetaRequestHeight,
1815 unsigned int *meta_row_width,
1816 unsigned int *meta_row_height,
1817 int *vm_group_bytes,
1818 unsigned int *dpte_group_bytes,
1819 unsigned int *PixelPTEReqWidth,
1820 unsigned int *PixelPTEReqHeight,
1821 unsigned int *PTERequestSize,
1822 int *DPDE0BytesFrame,
1823 int *MetaPTEBytesFrame)
1824 {
1825 struct vba_vars_st *v = &mode_lib->vba;
1826 unsigned int MPDEBytesFrame;
1827 unsigned int DCCMetaSurfaceBytes;
1828 unsigned int MacroTileSizeBytes;
1829 unsigned int MacroTileHeight;
1830 unsigned int ExtraDPDEBytesFrame;
1831 unsigned int PDEAndMetaPTEBytesFrame;
1832 unsigned int PixelPTEReqHeightPTEs = 0;
1833 unsigned int HostVMDynamicLevels = 0;
1834 double FractionOfPTEReturnDrop;
1835
1836 if (GPUVMEnable == true && HostVMEnable == true) {
1837 if (HostVMMinPageSize < 2048) {
1838 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1839 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1840 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1841 } else {
1842 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1843 }
1844 }
1845
1846 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1847 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1848 if (ScanDirection != dm_vert) {
1849 *meta_row_height = *MetaRequestHeight;
1850 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1851 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1852 } else {
1853 *meta_row_height = *MetaRequestWidth;
1854 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1855 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1856 }
1857 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1858 if (GPUVMEnable == true) {
1859 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1860 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1861 } else {
1862 *MetaPTEBytesFrame = 0;
1863 MPDEBytesFrame = 0;
1864 }
1865
1866 if (DCCEnable != true) {
1867 *MetaPTEBytesFrame = 0;
1868 MPDEBytesFrame = 0;
1869 *MetaRowByte = 0;
1870 }
1871
1872 if (SurfaceTiling == dm_sw_linear) {
1873 MacroTileSizeBytes = 256;
1874 MacroTileHeight = BlockHeight256Bytes;
1875 } else {
1876 MacroTileSizeBytes = 65536;
1877 MacroTileHeight = 16 * BlockHeight256Bytes;
1878 }
1879 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1880
1881 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1882 if (ScanDirection != dm_vert) {
1883 *DPDE0BytesFrame = 64
1884 * (dml_ceil(
1885 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1886 / (8 * 2097152),
1887 1) + 1);
1888 } else {
1889 *DPDE0BytesFrame = 64
1890 * (dml_ceil(
1891 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1892 / (8 * 2097152),
1893 1) + 1);
1894 }
1895 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1896 } else {
1897 *DPDE0BytesFrame = 0;
1898 ExtraDPDEBytesFrame = 0;
1899 }
1900
1901 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1902
1903 #ifdef __DML_VBA_DEBUG__
1904 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1905 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1906 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1907 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1908 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1909 #endif
1910
1911 if (HostVMEnable == true) {
1912 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1913 }
1914 #ifdef __DML_VBA_DEBUG__
1915 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1916 #endif
1917
1918 if (SurfaceTiling == dm_sw_linear) {
1919 PixelPTEReqHeightPTEs = 1;
1920 *PixelPTEReqHeight = 1;
1921 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1922 *PTERequestSize = 64;
1923 FractionOfPTEReturnDrop = 0;
1924 } else if (MacroTileSizeBytes == 4096) {
1925 PixelPTEReqHeightPTEs = 1;
1926 *PixelPTEReqHeight = MacroTileHeight;
1927 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1928 *PTERequestSize = 64;
1929 if (ScanDirection != dm_vert)
1930 FractionOfPTEReturnDrop = 0;
1931 else
1932 FractionOfPTEReturnDrop = 7 / 8;
1933 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1934 PixelPTEReqHeightPTEs = 16;
1935 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1936 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1937 *PTERequestSize = 128;
1938 FractionOfPTEReturnDrop = 0;
1939 } else {
1940 PixelPTEReqHeightPTEs = 1;
1941 *PixelPTEReqHeight = MacroTileHeight;
1942 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1943 *PTERequestSize = 64;
1944 FractionOfPTEReturnDrop = 0;
1945 }
1946
1947 if (SurfaceTiling == dm_sw_linear) {
1948 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1949 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1950 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1951 } else if (ScanDirection != dm_vert) {
1952 *dpte_row_height = *PixelPTEReqHeight;
1953 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1954 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1955 } else {
1956 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1957 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1958 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1959 }
1960
1961 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1962 *PTEBufferSizeNotExceeded = true;
1963 } else {
1964 *PTEBufferSizeNotExceeded = false;
1965 }
1966
1967 if (GPUVMEnable != true) {
1968 *PixelPTEBytesPerRow = 0;
1969 *PTEBufferSizeNotExceeded = true;
1970 }
1971
1972 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1973
1974 if (HostVMEnable == true) {
1975 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1976 }
1977
1978 if (HostVMEnable == true) {
1979 *vm_group_bytes = 512;
1980 *dpte_group_bytes = 512;
1981 } else if (GPUVMEnable == true) {
1982 *vm_group_bytes = 2048;
1983 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1984 *dpte_group_bytes = 512;
1985 } else {
1986 *dpte_group_bytes = 2048;
1987 }
1988 } else {
1989 *vm_group_bytes = 0;
1990 *dpte_group_bytes = 0;
1991 }
1992 return PDEAndMetaPTEBytesFrame;
1993 }
1994
1995 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
1996 {
1997 struct vba_vars_st *v = &mode_lib->vba;
1998 unsigned int j, k;
1999 double HostVMInefficiencyFactor = 1.0;
2000 bool NoChromaPlanes = true;
2001 int ReorderBytes;
2002 double VMDataOnlyReturnBW;
2003 double MaxTotalRDBandwidth = 0;
2004 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2005
2006 v->WritebackDISPCLK = 0.0;
2007 v->DISPCLKWithRamping = 0;
2008 v->DISPCLKWithoutRamping = 0;
2009 v->GlobalDPPCLK = 0.0;
2010 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2011 {
2012 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2013 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2014 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2015 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2016 if (v->HostVMEnable != true) {
2017 v->ReturnBW = dml_min(
2018 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2019 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2020 } else {
2021 v->ReturnBW = dml_min(
2022 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2023 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2024 }
2025 }
2026 /* End DAL custom code */
2027
2028 // DISPCLK and DPPCLK Calculation
2029 //
2030 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2031 if (v->WritebackEnable[k]) {
2032 v->WritebackDISPCLK = dml_max(
2033 v->WritebackDISPCLK,
2034 dml31_CalculateWriteBackDISPCLK(
2035 v->WritebackPixelFormat[k],
2036 v->PixelClock[k],
2037 v->WritebackHRatio[k],
2038 v->WritebackVRatio[k],
2039 v->WritebackHTaps[k],
2040 v->WritebackVTaps[k],
2041 v->WritebackSourceWidth[k],
2042 v->WritebackDestinationWidth[k],
2043 v->HTotal[k],
2044 v->WritebackLineBufferSize));
2045 }
2046 }
2047
2048 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2049 if (v->HRatio[k] > 1) {
2050 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2051 v->MaxDCHUBToPSCLThroughput,
2052 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2053 } else {
2054 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2055 }
2056
2057 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2058 * dml_max(
2059 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2060 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2061
2062 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2063 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2064 }
2065
2066 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2067 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2068 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2069 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2070 } else {
2071 if (v->HRatioChroma[k] > 1) {
2072 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2073 v->MaxDCHUBToPSCLThroughput,
2074 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2075 } else {
2076 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2077 }
2078 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2079 * dml_max3(
2080 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2081 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2082 1.0);
2083
2084 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2085 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2086 }
2087
2088 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2089 }
2090 }
2091
2092 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2093 if (v->BlendingAndTiming[k] != k)
2094 continue;
2095 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2096 v->DISPCLKWithRamping = dml_max(
2097 v->DISPCLKWithRamping,
2098 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2099 * (1 + v->DISPCLKRampingMargin / 100));
2100 v->DISPCLKWithoutRamping = dml_max(
2101 v->DISPCLKWithoutRamping,
2102 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2103 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2104 v->DISPCLKWithRamping = dml_max(
2105 v->DISPCLKWithRamping,
2106 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2107 * (1 + v->DISPCLKRampingMargin / 100));
2108 v->DISPCLKWithoutRamping = dml_max(
2109 v->DISPCLKWithoutRamping,
2110 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2111 } else {
2112 v->DISPCLKWithRamping = dml_max(
2113 v->DISPCLKWithRamping,
2114 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2115 v->DISPCLKWithoutRamping = dml_max(
2116 v->DISPCLKWithoutRamping,
2117 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2118 }
2119 }
2120
2121 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2122 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2123
2124 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2125 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2126 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2127 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2128 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2129 v->DISPCLKDPPCLKVCOSpeed);
2130 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2131 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2132 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2133 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2134 } else {
2135 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2136 }
2137 v->DISPCLK = v->DISPCLK_calculated;
2138 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2139
2140 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2141 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2142 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2143 }
2144 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2145 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2146 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2147 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2148 }
2149
2150 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2151 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2152 }
2153
2154 // Urgent and B P-State/DRAM Clock Change Watermark
2155 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2156 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2157
2158 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2159 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2160 v->SourcePixelFormat[k],
2161 v->SurfaceTiling[k],
2162 &v->BytePerPixelY[k],
2163 &v->BytePerPixelC[k],
2164 &v->BytePerPixelDETY[k],
2165 &v->BytePerPixelDETC[k],
2166 &v->BlockHeight256BytesY[k],
2167 &v->BlockHeight256BytesC[k],
2168 &v->BlockWidth256BytesY[k],
2169 &v->BlockWidth256BytesC[k]);
2170 }
2171
2172 CalculateSwathWidth(
2173 false,
2174 v->NumberOfActivePlanes,
2175 v->SourcePixelFormat,
2176 v->SourceScan,
2177 v->ViewportWidth,
2178 v->ViewportHeight,
2179 v->SurfaceWidthY,
2180 v->SurfaceWidthC,
2181 v->SurfaceHeightY,
2182 v->SurfaceHeightC,
2183 v->ODMCombineEnabled,
2184 v->BytePerPixelY,
2185 v->BytePerPixelC,
2186 v->BlockHeight256BytesY,
2187 v->BlockHeight256BytesC,
2188 v->BlockWidth256BytesY,
2189 v->BlockWidth256BytesC,
2190 v->BlendingAndTiming,
2191 v->HActive,
2192 v->HRatio,
2193 v->DPPPerPlane,
2194 v->SwathWidthSingleDPPY,
2195 v->SwathWidthSingleDPPC,
2196 v->SwathWidthY,
2197 v->SwathWidthC,
2198 v->dummyinteger3,
2199 v->dummyinteger4,
2200 v->swath_width_luma_ub,
2201 v->swath_width_chroma_ub);
2202
2203 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2204 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2205 * v->VRatio[k];
2206 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2207 * v->VRatioChroma[k];
2208 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2209 }
2210
2211 // DCFCLK Deep Sleep
2212 CalculateDCFCLKDeepSleep(
2213 mode_lib,
2214 v->NumberOfActivePlanes,
2215 v->BytePerPixelY,
2216 v->BytePerPixelC,
2217 v->VRatio,
2218 v->VRatioChroma,
2219 v->SwathWidthY,
2220 v->SwathWidthC,
2221 v->DPPPerPlane,
2222 v->HRatio,
2223 v->HRatioChroma,
2224 v->PixelClock,
2225 v->PSCL_THROUGHPUT_LUMA,
2226 v->PSCL_THROUGHPUT_CHROMA,
2227 v->DPPCLK,
2228 v->ReadBandwidthPlaneLuma,
2229 v->ReadBandwidthPlaneChroma,
2230 v->ReturnBusWidth,
2231 &v->DCFCLKDeepSleep);
2232
2233 // DSCCLK
2234 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2235 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2236 v->DSCCLK_calculated[k] = 0.0;
2237 } else {
2238 if (v->OutputFormat[k] == dm_420)
2239 v->DSCFormatFactor = 2;
2240 else if (v->OutputFormat[k] == dm_444)
2241 v->DSCFormatFactor = 1;
2242 else if (v->OutputFormat[k] == dm_n422)
2243 v->DSCFormatFactor = 2;
2244 else
2245 v->DSCFormatFactor = 1;
2246 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2247 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2248 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2249 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2250 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2251 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2252 else
2253 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2254 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2255 }
2256 }
2257
2258 // DSC Delay
2259 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2260 double BPP = v->OutputBpp[k];
2261
2262 if (v->DSCEnabled[k] && BPP != 0) {
2263 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2264 v->DSCDelay[k] = dscceComputeDelay(
2265 v->DSCInputBitPerComponent[k],
2266 BPP,
2267 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2268 v->NumberOfDSCSlices[k],
2269 v->OutputFormat[k],
2270 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2271 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2272 v->DSCDelay[k] = 2
2273 * (dscceComputeDelay(
2274 v->DSCInputBitPerComponent[k],
2275 BPP,
2276 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2277 v->NumberOfDSCSlices[k] / 2.0,
2278 v->OutputFormat[k],
2279 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2280 } else {
2281 v->DSCDelay[k] = 4
2282 * (dscceComputeDelay(
2283 v->DSCInputBitPerComponent[k],
2284 BPP,
2285 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2286 v->NumberOfDSCSlices[k] / 4.0,
2287 v->OutputFormat[k],
2288 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2289 }
2290 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2291 } else {
2292 v->DSCDelay[k] = 0;
2293 }
2294 }
2295
2296 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2297 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2298 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2299 v->DSCDelay[k] = v->DSCDelay[j];
2300
2301 // Prefetch
2302 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2303 unsigned int PDEAndMetaPTEBytesFrameY;
2304 unsigned int PixelPTEBytesPerRowY;
2305 unsigned int MetaRowByteY;
2306 unsigned int MetaRowByteC;
2307 unsigned int PDEAndMetaPTEBytesFrameC;
2308 unsigned int PixelPTEBytesPerRowC;
2309 bool PTEBufferSizeNotExceededY;
2310 bool PTEBufferSizeNotExceededC;
2311
2312 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2313 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2314 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2315 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2316 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2317 } else {
2318 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2319 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2320 }
2321
2322 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2323 mode_lib,
2324 v->DCCEnable[k],
2325 v->BlockHeight256BytesC[k],
2326 v->BlockWidth256BytesC[k],
2327 v->SourcePixelFormat[k],
2328 v->SurfaceTiling[k],
2329 v->BytePerPixelC[k],
2330 v->SourceScan[k],
2331 v->SwathWidthC[k],
2332 v->ViewportHeightChroma[k],
2333 v->GPUVMEnable,
2334 v->HostVMEnable,
2335 v->HostVMMaxNonCachedPageTableLevels,
2336 v->GPUVMMinPageSize,
2337 v->HostVMMinPageSize,
2338 v->PTEBufferSizeInRequestsForChroma,
2339 v->PitchC[k],
2340 v->DCCMetaPitchC[k],
2341 &v->MacroTileWidthC[k],
2342 &MetaRowByteC,
2343 &PixelPTEBytesPerRowC,
2344 &PTEBufferSizeNotExceededC,
2345 &v->dpte_row_width_chroma_ub[k],
2346 &v->dpte_row_height_chroma[k],
2347 &v->meta_req_width_chroma[k],
2348 &v->meta_req_height_chroma[k],
2349 &v->meta_row_width_chroma[k],
2350 &v->meta_row_height_chroma[k],
2351 &v->dummyinteger1,
2352 &v->dummyinteger2,
2353 &v->PixelPTEReqWidthC[k],
2354 &v->PixelPTEReqHeightC[k],
2355 &v->PTERequestSizeC[k],
2356 &v->dpde0_bytes_per_frame_ub_c[k],
2357 &v->meta_pte_bytes_per_frame_ub_c[k]);
2358
2359 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2360 mode_lib,
2361 v->VRatioChroma[k],
2362 v->VTAPsChroma[k],
2363 v->Interlace[k],
2364 v->ProgressiveToInterlaceUnitInOPP,
2365 v->SwathHeightC[k],
2366 v->ViewportYStartC[k],
2367 &v->VInitPreFillC[k],
2368 &v->MaxNumSwathC[k]);
2369 } else {
2370 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2371 v->PTEBufferSizeInRequestsForChroma = 0;
2372 PixelPTEBytesPerRowC = 0;
2373 PDEAndMetaPTEBytesFrameC = 0;
2374 MetaRowByteC = 0;
2375 v->MaxNumSwathC[k] = 0;
2376 v->PrefetchSourceLinesC[k] = 0;
2377 }
2378
2379 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2380 mode_lib,
2381 v->DCCEnable[k],
2382 v->BlockHeight256BytesY[k],
2383 v->BlockWidth256BytesY[k],
2384 v->SourcePixelFormat[k],
2385 v->SurfaceTiling[k],
2386 v->BytePerPixelY[k],
2387 v->SourceScan[k],
2388 v->SwathWidthY[k],
2389 v->ViewportHeight[k],
2390 v->GPUVMEnable,
2391 v->HostVMEnable,
2392 v->HostVMMaxNonCachedPageTableLevels,
2393 v->GPUVMMinPageSize,
2394 v->HostVMMinPageSize,
2395 v->PTEBufferSizeInRequestsForLuma,
2396 v->PitchY[k],
2397 v->DCCMetaPitchY[k],
2398 &v->MacroTileWidthY[k],
2399 &MetaRowByteY,
2400 &PixelPTEBytesPerRowY,
2401 &PTEBufferSizeNotExceededY,
2402 &v->dpte_row_width_luma_ub[k],
2403 &v->dpte_row_height[k],
2404 &v->meta_req_width[k],
2405 &v->meta_req_height[k],
2406 &v->meta_row_width[k],
2407 &v->meta_row_height[k],
2408 &v->vm_group_bytes[k],
2409 &v->dpte_group_bytes[k],
2410 &v->PixelPTEReqWidthY[k],
2411 &v->PixelPTEReqHeightY[k],
2412 &v->PTERequestSizeY[k],
2413 &v->dpde0_bytes_per_frame_ub_l[k],
2414 &v->meta_pte_bytes_per_frame_ub_l[k]);
2415
2416 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2417 mode_lib,
2418 v->VRatio[k],
2419 v->vtaps[k],
2420 v->Interlace[k],
2421 v->ProgressiveToInterlaceUnitInOPP,
2422 v->SwathHeightY[k],
2423 v->ViewportYStartY[k],
2424 &v->VInitPreFillY[k],
2425 &v->MaxNumSwathY[k]);
2426 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2427 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2428 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2429
2430 CalculateRowBandwidth(
2431 v->GPUVMEnable,
2432 v->SourcePixelFormat[k],
2433 v->VRatio[k],
2434 v->VRatioChroma[k],
2435 v->DCCEnable[k],
2436 v->HTotal[k] / v->PixelClock[k],
2437 MetaRowByteY,
2438 MetaRowByteC,
2439 v->meta_row_height[k],
2440 v->meta_row_height_chroma[k],
2441 PixelPTEBytesPerRowY,
2442 PixelPTEBytesPerRowC,
2443 v->dpte_row_height[k],
2444 v->dpte_row_height_chroma[k],
2445 &v->meta_row_bw[k],
2446 &v->dpte_row_bw[k]);
2447 }
2448
2449 v->TotalDCCActiveDPP = 0;
2450 v->TotalActiveDPP = 0;
2451 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2452 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2453 if (v->DCCEnable[k])
2454 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2455 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2456 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2457 NoChromaPlanes = false;
2458 }
2459
2460 ReorderBytes = v->NumberOfChannels
2461 * dml_max3(
2462 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2463 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2464 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2465
2466 VMDataOnlyReturnBW = dml_min(
2467 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2468 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2469 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2470 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2471
2472 #ifdef __DML_VBA_DEBUG__
2473 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2474 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2475 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2476 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2477 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2478 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2479 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2480 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2481 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2482 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2483 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2484 #endif
2485
2486 if (v->GPUVMEnable && v->HostVMEnable)
2487 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2488
2489 v->UrgentExtraLatency = CalculateExtraLatency(
2490 v->RoundTripPingLatencyCycles,
2491 ReorderBytes,
2492 v->DCFCLK,
2493 v->TotalActiveDPP,
2494 v->PixelChunkSizeInKByte,
2495 v->TotalDCCActiveDPP,
2496 v->MetaChunkSize,
2497 v->ReturnBW,
2498 v->GPUVMEnable,
2499 v->HostVMEnable,
2500 v->NumberOfActivePlanes,
2501 v->DPPPerPlane,
2502 v->dpte_group_bytes,
2503 HostVMInefficiencyFactor,
2504 v->HostVMMinPageSize,
2505 v->HostVMMaxNonCachedPageTableLevels);
2506
2507 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2508
2509 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2510 if (v->BlendingAndTiming[k] == k) {
2511 if (v->WritebackEnable[k] == true) {
2512 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2513 + CalculateWriteBackDelay(
2514 v->WritebackPixelFormat[k],
2515 v->WritebackHRatio[k],
2516 v->WritebackVRatio[k],
2517 v->WritebackVTaps[k],
2518 v->WritebackDestinationWidth[k],
2519 v->WritebackDestinationHeight[k],
2520 v->WritebackSourceHeight[k],
2521 v->HTotal[k]) / v->DISPCLK;
2522 } else
2523 v->WritebackDelay[v->VoltageLevel][k] = 0;
2524 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2525 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2526 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2527 v->WritebackDelay[v->VoltageLevel][k],
2528 v->WritebackLatency
2529 + CalculateWriteBackDelay(
2530 v->WritebackPixelFormat[j],
2531 v->WritebackHRatio[j],
2532 v->WritebackVRatio[j],
2533 v->WritebackVTaps[j],
2534 v->WritebackDestinationWidth[j],
2535 v->WritebackDestinationHeight[j],
2536 v->WritebackSourceHeight[j],
2537 v->HTotal[k]) / v->DISPCLK);
2538 }
2539 }
2540 }
2541 }
2542
2543 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2544 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2545 if (v->BlendingAndTiming[k] == j)
2546 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2547
2548 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2549 v->MaxVStartupLines[k] =
2550 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2551 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2552 v->VTotal[k] - v->VActive[k]
2553 - dml_max(
2554 1.0,
2555 dml_ceil(
2556 (double) v->WritebackDelay[v->VoltageLevel][k]
2557 / (v->HTotal[k] / v->PixelClock[k]),
2558 1));
2559 if (v->MaxVStartupLines[k] > 1023)
2560 v->MaxVStartupLines[k] = 1023;
2561
2562 #ifdef __DML_VBA_DEBUG__
2563 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2564 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2565 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2566 #endif
2567 }
2568
2569 v->MaximumMaxVStartupLines = 0;
2570 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2571 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2572
2573 // VBA_DELTA
2574 // We don't really care to iterate between the various prefetch modes
2575 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2576
2577 v->UrgentLatency = CalculateUrgentLatency(
2578 v->UrgentLatencyPixelDataOnly,
2579 v->UrgentLatencyPixelMixedWithVMData,
2580 v->UrgentLatencyVMDataOnly,
2581 v->DoUrgentLatencyAdjustment,
2582 v->UrgentLatencyAdjustmentFabricClockComponent,
2583 v->UrgentLatencyAdjustmentFabricClockReference,
2584 v->FabricClock);
2585
2586 v->FractionOfUrgentBandwidth = 0.0;
2587 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2588
2589 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2590
2591 do {
2592 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2593 bool DestinationLineTimesForPrefetchLessThan2 = false;
2594 bool VRatioPrefetchMoreThan4 = false;
2595 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2596 MaxTotalRDBandwidth = 0;
2597
2598 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2599
2600 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2601 Pipe myPipe;
2602
2603 myPipe.DPPCLK = v->DPPCLK[k];
2604 myPipe.DISPCLK = v->DISPCLK;
2605 myPipe.PixelClock = v->PixelClock[k];
2606 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2607 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2608 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2609 myPipe.VRatio = v->VRatio[k];
2610 myPipe.VRatioChroma = v->VRatioChroma[k];
2611 myPipe.SourceScan = v->SourceScan[k];
2612 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2613 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2614 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2615 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2616 myPipe.InterlaceEnable = v->Interlace[k];
2617 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2618 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2619 myPipe.HTotal = v->HTotal[k];
2620 myPipe.DCCEnable = v->DCCEnable[k];
2621 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2622 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2623 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2624 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2625 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2626 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2627 v->ErrorResult[k] = CalculatePrefetchSchedule(
2628 mode_lib,
2629 HostVMInefficiencyFactor,
2630 &myPipe,
2631 v->DSCDelay[k],
2632 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2633 v->DPPCLKDelaySCL,
2634 v->DPPCLKDelaySCLLBOnly,
2635 v->DPPCLKDelayCNVCCursor,
2636 v->DISPCLKDelaySubtotal,
2637 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2638 v->OutputFormat[k],
2639 v->MaxInterDCNTileRepeaters,
2640 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2641 v->MaxVStartupLines[k],
2642 v->GPUVMMaxPageTableLevels,
2643 v->GPUVMEnable,
2644 v->HostVMEnable,
2645 v->HostVMMaxNonCachedPageTableLevels,
2646 v->HostVMMinPageSize,
2647 v->DynamicMetadataEnable[k],
2648 v->DynamicMetadataVMEnabled,
2649 v->DynamicMetadataLinesBeforeActiveRequired[k],
2650 v->DynamicMetadataTransmittedBytes[k],
2651 v->UrgentLatency,
2652 v->UrgentExtraLatency,
2653 v->TCalc,
2654 v->PDEAndMetaPTEBytesFrame[k],
2655 v->MetaRowByte[k],
2656 v->PixelPTEBytesPerRow[k],
2657 v->PrefetchSourceLinesY[k],
2658 v->SwathWidthY[k],
2659 v->VInitPreFillY[k],
2660 v->MaxNumSwathY[k],
2661 v->PrefetchSourceLinesC[k],
2662 v->SwathWidthC[k],
2663 v->VInitPreFillC[k],
2664 v->MaxNumSwathC[k],
2665 v->swath_width_luma_ub[k],
2666 v->swath_width_chroma_ub[k],
2667 v->SwathHeightY[k],
2668 v->SwathHeightC[k],
2669 TWait,
2670 &v->DSTXAfterScaler[k],
2671 &v->DSTYAfterScaler[k],
2672 &v->DestinationLinesForPrefetch[k],
2673 &v->PrefetchBandwidth[k],
2674 &v->DestinationLinesToRequestVMInVBlank[k],
2675 &v->DestinationLinesToRequestRowInVBlank[k],
2676 &v->VRatioPrefetchY[k],
2677 &v->VRatioPrefetchC[k],
2678 &v->RequiredPrefetchPixDataBWLuma[k],
2679 &v->RequiredPrefetchPixDataBWChroma[k],
2680 &v->NotEnoughTimeForDynamicMetadata[k],
2681 &v->Tno_bw[k],
2682 &v->prefetch_vmrow_bw[k],
2683 &v->Tdmdl_vm[k],
2684 &v->Tdmdl[k],
2685 &v->TSetup[k],
2686 &v->VUpdateOffsetPix[k],
2687 &v->VUpdateWidthPix[k],
2688 &v->VReadyOffsetPix[k]);
2689
2690 #ifdef __DML_VBA_DEBUG__
2691 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2692 #endif
2693 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2694 }
2695
2696 v->NoEnoughUrgentLatencyHiding = false;
2697 v->NoEnoughUrgentLatencyHidingPre = false;
2698
2699 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2700 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2701 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2702 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2703 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2704
2705 CalculateUrgentBurstFactor(
2706 v->swath_width_luma_ub[k],
2707 v->swath_width_chroma_ub[k],
2708 v->SwathHeightY[k],
2709 v->SwathHeightC[k],
2710 v->HTotal[k] / v->PixelClock[k],
2711 v->UrgentLatency,
2712 v->CursorBufferSize,
2713 v->CursorWidth[k][0],
2714 v->CursorBPP[k][0],
2715 v->VRatio[k],
2716 v->VRatioChroma[k],
2717 v->BytePerPixelDETY[k],
2718 v->BytePerPixelDETC[k],
2719 v->DETBufferSizeY[k],
2720 v->DETBufferSizeC[k],
2721 &v->UrgBurstFactorCursor[k],
2722 &v->UrgBurstFactorLuma[k],
2723 &v->UrgBurstFactorChroma[k],
2724 &v->NoUrgentLatencyHiding[k]);
2725
2726 CalculateUrgentBurstFactor(
2727 v->swath_width_luma_ub[k],
2728 v->swath_width_chroma_ub[k],
2729 v->SwathHeightY[k],
2730 v->SwathHeightC[k],
2731 v->HTotal[k] / v->PixelClock[k],
2732 v->UrgentLatency,
2733 v->CursorBufferSize,
2734 v->CursorWidth[k][0],
2735 v->CursorBPP[k][0],
2736 v->VRatioPrefetchY[k],
2737 v->VRatioPrefetchC[k],
2738 v->BytePerPixelDETY[k],
2739 v->BytePerPixelDETC[k],
2740 v->DETBufferSizeY[k],
2741 v->DETBufferSizeC[k],
2742 &v->UrgBurstFactorCursorPre[k],
2743 &v->UrgBurstFactorLumaPre[k],
2744 &v->UrgBurstFactorChromaPre[k],
2745 &v->NoUrgentLatencyHidingPre[k]);
2746
2747 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2748 + dml_max3(
2749 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2750 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2751 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2752 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2753 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2754 v->DPPPerPlane[k]
2755 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2756 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2757 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2758
2759 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2760 + dml_max3(
2761 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2762 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2763 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2764 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2765 + v->cursor_bw_pre[k]);
2766
2767 #ifdef __DML_VBA_DEBUG__
2768 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2769 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2770 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2771 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2772 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2773
2774 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2775 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2776
2777 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2778 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2779 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2780 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2781 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2782 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2783 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2784 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2785 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2786 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2787 #endif
2788
2789 if (v->DestinationLinesForPrefetch[k] < 2)
2790 DestinationLineTimesForPrefetchLessThan2 = true;
2791
2792 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2793 VRatioPrefetchMoreThan4 = true;
2794
2795 if (v->NoUrgentLatencyHiding[k] == true)
2796 v->NoEnoughUrgentLatencyHiding = true;
2797
2798 if (v->NoUrgentLatencyHidingPre[k] == true)
2799 v->NoEnoughUrgentLatencyHidingPre = true;
2800 }
2801
2802 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2803
2804 #ifdef __DML_VBA_DEBUG__
2805 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2806 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2807 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2808 #endif
2809
2810 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2811 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2812 v->PrefetchModeSupported = true;
2813 else {
2814 v->PrefetchModeSupported = false;
2815 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2816 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2817 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2818 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2819 }
2820
2821 // PREVIOUS_ERROR
2822 // This error result check was done after the PrefetchModeSupported. So we will
2823 // still try to calculate flip schedule even prefetch mode not supported
2824 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2825 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2826 v->PrefetchModeSupported = false;
2827 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2828 }
2829 }
2830
2831 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2832 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2833 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2834 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2835 - dml_max(
2836 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2837 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2838 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2839 v->DPPPerPlane[k]
2840 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2841 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2842 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2843 }
2844
2845 v->TotImmediateFlipBytes = 0;
2846 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2847 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2848 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2849 }
2850 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2851 CalculateFlipSchedule(
2852 mode_lib,
2853 k,
2854 HostVMInefficiencyFactor,
2855 v->UrgentExtraLatency,
2856 v->UrgentLatency,
2857 v->PDEAndMetaPTEBytesFrame[k],
2858 v->MetaRowByte[k],
2859 v->PixelPTEBytesPerRow[k]);
2860 }
2861
2862 v->total_dcn_read_bw_with_flip = 0.0;
2863 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2864 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2865 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2866 + dml_max3(
2867 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2868 v->DPPPerPlane[k] * v->final_flip_bw[k]
2869 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2870 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2871 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2872 v->DPPPerPlane[k]
2873 * (v->final_flip_bw[k]
2874 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2875 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2876 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2877 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2878 + dml_max3(
2879 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2880 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2881 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2882 v->DPPPerPlane[k]
2883 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2884 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2885 }
2886 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2887
2888 v->ImmediateFlipSupported = true;
2889 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2890 #ifdef __DML_VBA_DEBUG__
2891 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2892 #endif
2893 v->ImmediateFlipSupported = false;
2894 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2895 }
2896 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2897 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2898 #ifdef __DML_VBA_DEBUG__
2899 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2900 __func__, k);
2901 #endif
2902 v->ImmediateFlipSupported = false;
2903 }
2904 }
2905 } else {
2906 v->ImmediateFlipSupported = false;
2907 }
2908
2909 v->PrefetchAndImmediateFlipSupported =
2910 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2911 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2912 v->ImmediateFlipSupported)) ? true : false;
2913 #ifdef __DML_VBA_DEBUG__
2914 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2915 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
2916 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2917 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2918 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2919 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2920 #endif
2921 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2922
2923 v->VStartupLines = v->VStartupLines + 1;
2924 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2925 ASSERT(v->PrefetchAndImmediateFlipSupported);
2926
2927 // Unbounded Request Enabled
2928 CalculateUnboundedRequestAndCompressedBufferSize(
2929 v->DETBufferSizeInKByte[0],
2930 v->ConfigReturnBufferSizeInKByte,
2931 v->UseUnboundedRequesting,
2932 v->TotalActiveDPP,
2933 NoChromaPlanes,
2934 v->MaxNumDPP,
2935 v->CompressedBufferSegmentSizeInkByte,
2936 v->Output,
2937 &v->UnboundedRequestEnabled,
2938 &v->CompressedBufferSizeInkByte);
2939
2940 //Watermarks and NB P-State/DRAM Clock Change Support
2941 {
2942 enum clock_change_support DRAMClockChangeSupport; // dummy
2943 CalculateWatermarksAndDRAMSpeedChangeSupport(
2944 mode_lib,
2945 PrefetchMode,
2946 v->DCFCLK,
2947 v->ReturnBW,
2948 v->UrgentLatency,
2949 v->UrgentExtraLatency,
2950 v->SOCCLK,
2951 v->DCFCLKDeepSleep,
2952 v->DETBufferSizeY,
2953 v->DETBufferSizeC,
2954 v->SwathHeightY,
2955 v->SwathHeightC,
2956 v->SwathWidthY,
2957 v->SwathWidthC,
2958 v->DPPPerPlane,
2959 v->BytePerPixelDETY,
2960 v->BytePerPixelDETC,
2961 v->UnboundedRequestEnabled,
2962 v->CompressedBufferSizeInkByte,
2963 &DRAMClockChangeSupport,
2964 &v->StutterExitWatermark,
2965 &v->StutterEnterPlusExitWatermark,
2966 &v->Z8StutterExitWatermark,
2967 &v->Z8StutterEnterPlusExitWatermark);
2968
2969 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2970 if (v->WritebackEnable[k] == true) {
2971 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2972 0,
2973 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2974 } else {
2975 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2976 }
2977 }
2978 }
2979
2980 //Display Pipeline Delivery Time in Prefetch, Groups
2981 CalculatePixelDeliveryTimes(
2982 v->NumberOfActivePlanes,
2983 v->VRatio,
2984 v->VRatioChroma,
2985 v->VRatioPrefetchY,
2986 v->VRatioPrefetchC,
2987 v->swath_width_luma_ub,
2988 v->swath_width_chroma_ub,
2989 v->DPPPerPlane,
2990 v->HRatio,
2991 v->HRatioChroma,
2992 v->PixelClock,
2993 v->PSCL_THROUGHPUT_LUMA,
2994 v->PSCL_THROUGHPUT_CHROMA,
2995 v->DPPCLK,
2996 v->BytePerPixelC,
2997 v->SourceScan,
2998 v->NumberOfCursors,
2999 v->CursorWidth,
3000 v->CursorBPP,
3001 v->BlockWidth256BytesY,
3002 v->BlockHeight256BytesY,
3003 v->BlockWidth256BytesC,
3004 v->BlockHeight256BytesC,
3005 v->DisplayPipeLineDeliveryTimeLuma,
3006 v->DisplayPipeLineDeliveryTimeChroma,
3007 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3008 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3009 v->DisplayPipeRequestDeliveryTimeLuma,
3010 v->DisplayPipeRequestDeliveryTimeChroma,
3011 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3012 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3013 v->CursorRequestDeliveryTime,
3014 v->CursorRequestDeliveryTimePrefetch);
3015
3016 CalculateMetaAndPTETimes(
3017 v->NumberOfActivePlanes,
3018 v->GPUVMEnable,
3019 v->MetaChunkSize,
3020 v->MinMetaChunkSizeBytes,
3021 v->HTotal,
3022 v->VRatio,
3023 v->VRatioChroma,
3024 v->DestinationLinesToRequestRowInVBlank,
3025 v->DestinationLinesToRequestRowInImmediateFlip,
3026 v->DCCEnable,
3027 v->PixelClock,
3028 v->BytePerPixelY,
3029 v->BytePerPixelC,
3030 v->SourceScan,
3031 v->dpte_row_height,
3032 v->dpte_row_height_chroma,
3033 v->meta_row_width,
3034 v->meta_row_width_chroma,
3035 v->meta_row_height,
3036 v->meta_row_height_chroma,
3037 v->meta_req_width,
3038 v->meta_req_width_chroma,
3039 v->meta_req_height,
3040 v->meta_req_height_chroma,
3041 v->dpte_group_bytes,
3042 v->PTERequestSizeY,
3043 v->PTERequestSizeC,
3044 v->PixelPTEReqWidthY,
3045 v->PixelPTEReqHeightY,
3046 v->PixelPTEReqWidthC,
3047 v->PixelPTEReqHeightC,
3048 v->dpte_row_width_luma_ub,
3049 v->dpte_row_width_chroma_ub,
3050 v->DST_Y_PER_PTE_ROW_NOM_L,
3051 v->DST_Y_PER_PTE_ROW_NOM_C,
3052 v->DST_Y_PER_META_ROW_NOM_L,
3053 v->DST_Y_PER_META_ROW_NOM_C,
3054 v->TimePerMetaChunkNominal,
3055 v->TimePerChromaMetaChunkNominal,
3056 v->TimePerMetaChunkVBlank,
3057 v->TimePerChromaMetaChunkVBlank,
3058 v->TimePerMetaChunkFlip,
3059 v->TimePerChromaMetaChunkFlip,
3060 v->time_per_pte_group_nom_luma,
3061 v->time_per_pte_group_vblank_luma,
3062 v->time_per_pte_group_flip_luma,
3063 v->time_per_pte_group_nom_chroma,
3064 v->time_per_pte_group_vblank_chroma,
3065 v->time_per_pte_group_flip_chroma);
3066
3067 CalculateVMGroupAndRequestTimes(
3068 v->NumberOfActivePlanes,
3069 v->GPUVMEnable,
3070 v->GPUVMMaxPageTableLevels,
3071 v->HTotal,
3072 v->BytePerPixelC,
3073 v->DestinationLinesToRequestVMInVBlank,
3074 v->DestinationLinesToRequestVMInImmediateFlip,
3075 v->DCCEnable,
3076 v->PixelClock,
3077 v->dpte_row_width_luma_ub,
3078 v->dpte_row_width_chroma_ub,
3079 v->vm_group_bytes,
3080 v->dpde0_bytes_per_frame_ub_l,
3081 v->dpde0_bytes_per_frame_ub_c,
3082 v->meta_pte_bytes_per_frame_ub_l,
3083 v->meta_pte_bytes_per_frame_ub_c,
3084 v->TimePerVMGroupVBlank,
3085 v->TimePerVMGroupFlip,
3086 v->TimePerVMRequestVBlank,
3087 v->TimePerVMRequestFlip);
3088
3089 // Min TTUVBlank
3090 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3091 if (PrefetchMode == 0) {
3092 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3093 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3094 v->MinTTUVBlank[k] = dml_max(
3095 v->DRAMClockChangeWatermark,
3096 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3097 } else if (PrefetchMode == 1) {
3098 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3099 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3100 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3101 } else {
3102 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3103 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3104 v->MinTTUVBlank[k] = v->UrgentWatermark;
3105 }
3106 if (!v->DynamicMetadataEnable[k])
3107 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3108 }
3109
3110 // DCC Configuration
3111 v->ActiveDPPs = 0;
3112 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3113 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3114 v->SourcePixelFormat[k],
3115 v->SurfaceWidthY[k],
3116 v->SurfaceWidthC[k],
3117 v->SurfaceHeightY[k],
3118 v->SurfaceHeightC[k],
3119 v->DETBufferSizeInKByte[0] * 1024,
3120 v->BlockHeight256BytesY[k],
3121 v->BlockHeight256BytesC[k],
3122 v->SurfaceTiling[k],
3123 v->BytePerPixelY[k],
3124 v->BytePerPixelC[k],
3125 v->BytePerPixelDETY[k],
3126 v->BytePerPixelDETC[k],
3127 v->SourceScan[k],
3128 &v->DCCYMaxUncompressedBlock[k],
3129 &v->DCCCMaxUncompressedBlock[k],
3130 &v->DCCYMaxCompressedBlock[k],
3131 &v->DCCCMaxCompressedBlock[k],
3132 &v->DCCYIndependentBlock[k],
3133 &v->DCCCIndependentBlock[k]);
3134 }
3135
3136 // VStartup Adjustment
3137 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3138 bool isInterlaceTiming;
3139 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3140 #ifdef __DML_VBA_DEBUG__
3141 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3142 #endif
3143
3144 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3145
3146 #ifdef __DML_VBA_DEBUG__
3147 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3148 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3149 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3150 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3151 #endif
3152
3153 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3154 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3155 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3156 }
3157
3158 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3159
3160 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3161 - v->VFrontPorch[k])
3162 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3163 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3164
3165 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3166
3167 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3168 <= (isInterlaceTiming ?
3169 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3170 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3171 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3172 } else {
3173 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3174 }
3175 #ifdef __DML_VBA_DEBUG__
3176 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3177 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3178 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3179 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3180 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3181 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3182 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3183 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3184 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3185 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3186 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3187 #endif
3188 }
3189
3190 {
3191 //Maximum Bandwidth Used
3192 double TotalWRBandwidth = 0;
3193 double MaxPerPlaneVActiveWRBandwidth = 0;
3194 double WRBandwidth = 0;
3195 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3196 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3197 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3198 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3199 } else if (v->WritebackEnable[k] == true) {
3200 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3201 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3202 }
3203 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3204 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3205 }
3206
3207 v->TotalDataReadBandwidth = 0;
3208 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3209 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3210 }
3211 }
3212 // Stutter Efficiency
3213 CalculateStutterEfficiency(
3214 mode_lib,
3215 v->CompressedBufferSizeInkByte,
3216 v->UnboundedRequestEnabled,
3217 v->ConfigReturnBufferSizeInKByte,
3218 v->MetaFIFOSizeInKEntries,
3219 v->ZeroSizeBufferEntries,
3220 v->NumberOfActivePlanes,
3221 v->ROBBufferSizeInKByte,
3222 v->TotalDataReadBandwidth,
3223 v->DCFCLK,
3224 v->ReturnBW,
3225 v->COMPBUF_RESERVED_SPACE_64B,
3226 v->COMPBUF_RESERVED_SPACE_ZS,
3227 v->SRExitTime,
3228 v->SRExitZ8Time,
3229 v->SynchronizedVBlank,
3230 v->StutterEnterPlusExitWatermark,
3231 v->Z8StutterEnterPlusExitWatermark,
3232 v->ProgressiveToInterlaceUnitInOPP,
3233 v->Interlace,
3234 v->MinTTUVBlank,
3235 v->DPPPerPlane,
3236 v->DETBufferSizeY,
3237 v->BytePerPixelY,
3238 v->BytePerPixelDETY,
3239 v->SwathWidthY,
3240 v->SwathHeightY,
3241 v->SwathHeightC,
3242 v->DCCRateLuma,
3243 v->DCCRateChroma,
3244 v->DCCFractionOfZeroSizeRequestsLuma,
3245 v->DCCFractionOfZeroSizeRequestsChroma,
3246 v->HTotal,
3247 v->VTotal,
3248 v->PixelClock,
3249 v->VRatio,
3250 v->SourceScan,
3251 v->BlockHeight256BytesY,
3252 v->BlockWidth256BytesY,
3253 v->BlockHeight256BytesC,
3254 v->BlockWidth256BytesC,
3255 v->DCCYMaxUncompressedBlock,
3256 v->DCCCMaxUncompressedBlock,
3257 v->VActive,
3258 v->DCCEnable,
3259 v->WritebackEnable,
3260 v->ReadBandwidthPlaneLuma,
3261 v->ReadBandwidthPlaneChroma,
3262 v->meta_row_bw,
3263 v->dpte_row_bw,
3264 &v->StutterEfficiencyNotIncludingVBlank,
3265 &v->StutterEfficiency,
3266 &v->NumberOfStutterBurstsPerFrame,
3267 &v->Z8StutterEfficiencyNotIncludingVBlank,
3268 &v->Z8StutterEfficiency,
3269 &v->Z8NumberOfStutterBurstsPerFrame,
3270 &v->StutterPeriod);
3271 }
3272
3273 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3274 {
3275 struct vba_vars_st *v = &mode_lib->vba;
3276 // Display Pipe Configuration
3277 double BytePerPixDETY[DC__NUM_DPP__MAX];
3278 double BytePerPixDETC[DC__NUM_DPP__MAX];
3279 int BytePerPixY[DC__NUM_DPP__MAX];
3280 int BytePerPixC[DC__NUM_DPP__MAX];
3281 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3282 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3283 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3284 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3285 double dummy1[DC__NUM_DPP__MAX];
3286 double dummy2[DC__NUM_DPP__MAX];
3287 double dummy3[DC__NUM_DPP__MAX];
3288 double dummy4[DC__NUM_DPP__MAX];
3289 int dummy5[DC__NUM_DPP__MAX];
3290 int dummy6[DC__NUM_DPP__MAX];
3291 bool dummy7[DC__NUM_DPP__MAX];
3292 bool dummysinglestring;
3293
3294 unsigned int k;
3295
3296 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3297
3298 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3299 v->SourcePixelFormat[k],
3300 v->SurfaceTiling[k],
3301 &BytePerPixY[k],
3302 &BytePerPixC[k],
3303 &BytePerPixDETY[k],
3304 &BytePerPixDETC[k],
3305 &Read256BytesBlockHeightY[k],
3306 &Read256BytesBlockHeightC[k],
3307 &Read256BytesBlockWidthY[k],
3308 &Read256BytesBlockWidthC[k]);
3309 }
3310
3311 CalculateSwathAndDETConfiguration(
3312 false,
3313 v->NumberOfActivePlanes,
3314 v->DETBufferSizeInKByte[0],
3315 dummy1,
3316 dummy2,
3317 v->SourceScan,
3318 v->SourcePixelFormat,
3319 v->SurfaceTiling,
3320 v->ViewportWidth,
3321 v->ViewportHeight,
3322 v->SurfaceWidthY,
3323 v->SurfaceWidthC,
3324 v->SurfaceHeightY,
3325 v->SurfaceHeightC,
3326 Read256BytesBlockHeightY,
3327 Read256BytesBlockHeightC,
3328 Read256BytesBlockWidthY,
3329 Read256BytesBlockWidthC,
3330 v->ODMCombineEnabled,
3331 v->BlendingAndTiming,
3332 BytePerPixY,
3333 BytePerPixC,
3334 BytePerPixDETY,
3335 BytePerPixDETC,
3336 v->HActive,
3337 v->HRatio,
3338 v->HRatioChroma,
3339 v->DPPPerPlane,
3340 dummy5,
3341 dummy6,
3342 dummy3,
3343 dummy4,
3344 v->SwathHeightY,
3345 v->SwathHeightC,
3346 v->DETBufferSizeY,
3347 v->DETBufferSizeC,
3348 dummy7,
3349 &dummysinglestring);
3350 }
3351
3352 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3353 {
3354 if (PrefetchMode == 0) {
3355 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3356 } else if (PrefetchMode == 1) {
3357 return dml_max(SREnterPlusExitTime, UrgentLatency);
3358 } else {
3359 return UrgentLatency;
3360 }
3361 }
3362
3363 double dml31_CalculateWriteBackDISPCLK(
3364 enum source_format_class WritebackPixelFormat,
3365 double PixelClock,
3366 double WritebackHRatio,
3367 double WritebackVRatio,
3368 unsigned int WritebackHTaps,
3369 unsigned int WritebackVTaps,
3370 long WritebackSourceWidth,
3371 long WritebackDestinationWidth,
3372 unsigned int HTotal,
3373 unsigned int WritebackLineBufferSize)
3374 {
3375 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3376
3377 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3378 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3379 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3380 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3381 }
3382
3383 static double CalculateWriteBackDelay(
3384 enum source_format_class WritebackPixelFormat,
3385 double WritebackHRatio,
3386 double WritebackVRatio,
3387 unsigned int WritebackVTaps,
3388 int WritebackDestinationWidth,
3389 int WritebackDestinationHeight,
3390 int WritebackSourceHeight,
3391 unsigned int HTotal)
3392 {
3393 double CalculateWriteBackDelay;
3394 double Line_length;
3395 double Output_lines_last_notclamped;
3396 double WritebackVInit;
3397
3398 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3399 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3400 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3401 if (Output_lines_last_notclamped < 0) {
3402 CalculateWriteBackDelay = 0;
3403 } else {
3404 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3405 }
3406 return CalculateWriteBackDelay;
3407 }
3408
3409 static void CalculateVupdateAndDynamicMetadataParameters(
3410 int MaxInterDCNTileRepeaters,
3411 double DPPCLK,
3412 double DISPCLK,
3413 double DCFClkDeepSleep,
3414 double PixelClock,
3415 int HTotal,
3416 int VBlank,
3417 int DynamicMetadataTransmittedBytes,
3418 int DynamicMetadataLinesBeforeActiveRequired,
3419 int InterlaceEnable,
3420 bool ProgressiveToInterlaceUnitInOPP,
3421 double *TSetup,
3422 double *Tdmbf,
3423 double *Tdmec,
3424 double *Tdmsks,
3425 int *VUpdateOffsetPix,
3426 double *VUpdateWidthPix,
3427 double *VReadyOffsetPix)
3428 {
3429 double TotalRepeaterDelayTime;
3430
3431 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3432 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3433 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3434 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3435 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3436 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3437 *Tdmec = HTotal / PixelClock;
3438 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3439 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3440 } else {
3441 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3442 }
3443 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3444 *Tdmsks = *Tdmsks / 2;
3445 }
3446 #ifdef __DML_VBA_DEBUG__
3447 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3448 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3449 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3450 #endif
3451 }
3452
3453 static void CalculateRowBandwidth(
3454 bool GPUVMEnable,
3455 enum source_format_class SourcePixelFormat,
3456 double VRatio,
3457 double VRatioChroma,
3458 bool DCCEnable,
3459 double LineTime,
3460 unsigned int MetaRowByteLuma,
3461 unsigned int MetaRowByteChroma,
3462 unsigned int meta_row_height_luma,
3463 unsigned int meta_row_height_chroma,
3464 unsigned int PixelPTEBytesPerRowLuma,
3465 unsigned int PixelPTEBytesPerRowChroma,
3466 unsigned int dpte_row_height_luma,
3467 unsigned int dpte_row_height_chroma,
3468 double *meta_row_bw,
3469 double *dpte_row_bw)
3470 {
3471 if (DCCEnable != true) {
3472 *meta_row_bw = 0;
3473 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3474 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3475 } else {
3476 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3477 }
3478
3479 if (GPUVMEnable != true) {
3480 *dpte_row_bw = 0;
3481 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3482 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3483 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3484 } else {
3485 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3486 }
3487 }
3488
3489 static void CalculateFlipSchedule(
3490 struct display_mode_lib *mode_lib,
3491 unsigned int k,
3492 double HostVMInefficiencyFactor,
3493 double UrgentExtraLatency,
3494 double UrgentLatency,
3495 double PDEAndMetaPTEBytesPerFrame,
3496 double MetaRowBytes,
3497 double DPTEBytesPerRow)
3498 {
3499 struct vba_vars_st *v = &mode_lib->vba;
3500 double min_row_time = 0.0;
3501 unsigned int HostVMDynamicLevelsTrips;
3502 double TimeForFetchingMetaPTEImmediateFlip;
3503 double TimeForFetchingRowInVBlankImmediateFlip;
3504 double ImmediateFlipBW;
3505 double LineTime = v->HTotal[k] / v->PixelClock[k];
3506
3507 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3508 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3509 } else {
3510 HostVMDynamicLevelsTrips = 0;
3511 }
3512
3513 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3514 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3515 }
3516
3517 if (v->GPUVMEnable == true) {
3518 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3519 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3520 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3521 LineTime / 4.0);
3522 } else {
3523 TimeForFetchingMetaPTEImmediateFlip = 0;
3524 }
3525
3526 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3527 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3528 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3529 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3530 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3531 LineTime / 4);
3532 } else {
3533 TimeForFetchingRowInVBlankImmediateFlip = 0;
3534 }
3535
3536 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3537
3538 if (v->GPUVMEnable == true) {
3539 v->final_flip_bw[k] = dml_max(
3540 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3541 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3542 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3543 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3544 } else {
3545 v->final_flip_bw[k] = 0;
3546 }
3547
3548 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3549 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3550 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3551 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3552 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3553 } else {
3554 min_row_time = dml_min4(
3555 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3556 v->meta_row_height[k] * LineTime / v->VRatio[k],
3557 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3558 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3559 }
3560 } else {
3561 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3562 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3563 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3564 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3565 } else {
3566 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3567 }
3568 }
3569
3570 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3571 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3572 v->ImmediateFlipSupportedForPipe[k] = false;
3573 } else {
3574 v->ImmediateFlipSupportedForPipe[k] = true;
3575 }
3576
3577 #ifdef __DML_VBA_DEBUG__
3578 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3579 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3580 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3581 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3582 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3583 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3584 #endif
3585
3586 }
3587
3588 static double TruncToValidBPP(
3589 double LinkBitRate,
3590 int Lanes,
3591 int HTotal,
3592 int HActive,
3593 double PixelClock,
3594 double DesiredBPP,
3595 bool DSCEnable,
3596 enum output_encoder_class Output,
3597 enum output_format_class Format,
3598 unsigned int DSCInputBitPerComponent,
3599 int DSCSlices,
3600 int AudioRate,
3601 int AudioLayout,
3602 enum odm_combine_mode ODMCombine)
3603 {
3604 double MaxLinkBPP;
3605 int MinDSCBPP;
3606 double MaxDSCBPP;
3607 int NonDSCBPP0;
3608 int NonDSCBPP1;
3609 int NonDSCBPP2;
3610
3611 if (Format == dm_420) {
3612 NonDSCBPP0 = 12;
3613 NonDSCBPP1 = 15;
3614 NonDSCBPP2 = 18;
3615 MinDSCBPP = 6;
3616 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3617 } else if (Format == dm_444) {
3618 NonDSCBPP0 = 24;
3619 NonDSCBPP1 = 30;
3620 NonDSCBPP2 = 36;
3621 MinDSCBPP = 8;
3622 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3623 } else {
3624
3625 NonDSCBPP0 = 16;
3626 NonDSCBPP1 = 20;
3627 NonDSCBPP2 = 24;
3628
3629 if (Format == dm_n422) {
3630 MinDSCBPP = 7;
3631 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3632 } else {
3633 MinDSCBPP = 8;
3634 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3635 }
3636 }
3637
3638 if (DSCEnable && Output == dm_dp) {
3639 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3640 } else {
3641 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3642 }
3643
3644 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3645 MaxLinkBPP = 16;
3646 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3647 MaxLinkBPP = 32;
3648 }
3649
3650 if (DesiredBPP == 0) {
3651 if (DSCEnable) {
3652 if (MaxLinkBPP < MinDSCBPP) {
3653 return BPP_INVALID;
3654 } else if (MaxLinkBPP >= MaxDSCBPP) {
3655 return MaxDSCBPP;
3656 } else {
3657 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3658 }
3659 } else {
3660 if (MaxLinkBPP >= NonDSCBPP2) {
3661 return NonDSCBPP2;
3662 } else if (MaxLinkBPP >= NonDSCBPP1) {
3663 return NonDSCBPP1;
3664 } else if (MaxLinkBPP >= NonDSCBPP0) {
3665 return 16.0;
3666 } else {
3667 return BPP_INVALID;
3668 }
3669 }
3670 } else {
3671 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3672 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3673 return BPP_INVALID;
3674 } else {
3675 return DesiredBPP;
3676 }
3677 }
3678 return BPP_INVALID;
3679 }
3680
3681 static noinline void CalculatePrefetchSchedulePerPlane(
3682 struct display_mode_lib *mode_lib,
3683 double HostVMInefficiencyFactor,
3684 int i,
3685 unsigned j,
3686 unsigned k)
3687 {
3688 struct vba_vars_st *v = &mode_lib->vba;
3689 Pipe myPipe;
3690
3691 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3692 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3693 myPipe.PixelClock = v->PixelClock[k];
3694 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3695 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3696 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3697 myPipe.VRatio = mode_lib->vba.VRatio[k];
3698 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3699
3700 myPipe.SourceScan = v->SourceScan[k];
3701 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3702 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3703 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3704 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3705 myPipe.InterlaceEnable = v->Interlace[k];
3706 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3707 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3708 myPipe.HTotal = v->HTotal[k];
3709 myPipe.DCCEnable = v->DCCEnable[k];
3710 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3711 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3712 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3713 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3714 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3715 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3716 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3717 mode_lib,
3718 HostVMInefficiencyFactor,
3719 &myPipe,
3720 v->DSCDelayPerState[i][k],
3721 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3722 v->DPPCLKDelaySCL,
3723 v->DPPCLKDelaySCLLBOnly,
3724 v->DPPCLKDelayCNVCCursor,
3725 v->DISPCLKDelaySubtotal,
3726 v->SwathWidthYThisState[k] / v->HRatio[k],
3727 v->OutputFormat[k],
3728 v->MaxInterDCNTileRepeaters,
3729 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3730 v->MaximumVStartup[i][j][k],
3731 v->GPUVMMaxPageTableLevels,
3732 v->GPUVMEnable,
3733 v->HostVMEnable,
3734 v->HostVMMaxNonCachedPageTableLevels,
3735 v->HostVMMinPageSize,
3736 v->DynamicMetadataEnable[k],
3737 v->DynamicMetadataVMEnabled,
3738 v->DynamicMetadataLinesBeforeActiveRequired[k],
3739 v->DynamicMetadataTransmittedBytes[k],
3740 v->UrgLatency[i],
3741 v->ExtraLatency,
3742 v->TimeCalc,
3743 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3744 v->MetaRowBytes[i][j][k],
3745 v->DPTEBytesPerRow[i][j][k],
3746 v->PrefetchLinesY[i][j][k],
3747 v->SwathWidthYThisState[k],
3748 v->PrefillY[k],
3749 v->MaxNumSwY[k],
3750 v->PrefetchLinesC[i][j][k],
3751 v->SwathWidthCThisState[k],
3752 v->PrefillC[k],
3753 v->MaxNumSwC[k],
3754 v->swath_width_luma_ub_this_state[k],
3755 v->swath_width_chroma_ub_this_state[k],
3756 v->SwathHeightYThisState[k],
3757 v->SwathHeightCThisState[k],
3758 v->TWait,
3759 &v->DSTXAfterScaler[k],
3760 &v->DSTYAfterScaler[k],
3761 &v->LineTimesForPrefetch[k],
3762 &v->PrefetchBW[k],
3763 &v->LinesForMetaPTE[k],
3764 &v->LinesForMetaAndDPTERow[k],
3765 &v->VRatioPreY[i][j][k],
3766 &v->VRatioPreC[i][j][k],
3767 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3768 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3769 &v->NoTimeForDynamicMetadata[i][j][k],
3770 &v->Tno_bw[k],
3771 &v->prefetch_vmrow_bw[k],
3772 &v->dummy7[k],
3773 &v->dummy8[k],
3774 &v->dummy13[k],
3775 &v->VUpdateOffsetPix[k],
3776 &v->VUpdateWidthPix[k],
3777 &v->VReadyOffsetPix[k]);
3778 }
3779
3780 static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int *DETBufferSizeInKByte)
3781 {
3782 int i, total_pipes = 0;
3783 for (i = 0; i < NumberOfActivePlanes; i++)
3784 total_pipes += NoOfDPPThisState[i];
3785 *DETBufferSizeInKByte = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
3786 if (*DETBufferSizeInKByte > DCN3_15_MAX_DET_SIZE)
3787 *DETBufferSizeInKByte = DCN3_15_MAX_DET_SIZE;
3788 }
3789
3790
3791 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3792 {
3793 struct vba_vars_st *v = &mode_lib->vba;
3794
3795 int i, j;
3796 unsigned int k, m;
3797 int ReorderingBytes;
3798 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3799 bool NoChroma = true;
3800 bool EnoughWritebackUnits = true;
3801 bool P2IWith420 = false;
3802 bool DSCOnlyIfNecessaryWithBPP = false;
3803 bool DSC422NativeNotSupported = false;
3804 double MaxTotalVActiveRDBandwidth;
3805 bool ViewportExceedsSurface = false;
3806 bool FMTBufferExceeded = false;
3807
3808 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3809
3810 CalculateMinAndMaxPrefetchMode(
3811 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3812 &MinPrefetchMode, &MaxPrefetchMode);
3813
3814 /*Scale Ratio, taps Support Check*/
3815
3816 v->ScaleRatioAndTapsSupport = true;
3817 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3818 if (v->ScalerEnabled[k] == false
3819 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3820 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3821 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3822 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3823 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3824 v->ScaleRatioAndTapsSupport = false;
3825 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3826 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3827 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3828 || v->VRatio[k] > v->vtaps[k]
3829 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3830 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3831 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3832 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3833 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3834 || v->HRatioChroma[k] > v->MaxHSCLRatio
3835 || v->VRatioChroma[k] > v->MaxVSCLRatio
3836 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3837 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3838 v->ScaleRatioAndTapsSupport = false;
3839 }
3840 }
3841 /*Source Format, Pixel Format and Scan Support Check*/
3842
3843 v->SourceFormatPixelAndScanSupport = true;
3844 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3845 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3846 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3847 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3848 v->SourceFormatPixelAndScanSupport = false;
3849 }
3850 }
3851 /*Bandwidth Support Check*/
3852
3853 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3854 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3855 v->SourcePixelFormat[k],
3856 v->SurfaceTiling[k],
3857 &v->BytePerPixelY[k],
3858 &v->BytePerPixelC[k],
3859 &v->BytePerPixelInDETY[k],
3860 &v->BytePerPixelInDETC[k],
3861 &v->Read256BlockHeightY[k],
3862 &v->Read256BlockHeightC[k],
3863 &v->Read256BlockWidthY[k],
3864 &v->Read256BlockWidthC[k]);
3865 }
3866 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3867 if (v->SourceScan[k] != dm_vert) {
3868 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3869 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3870 } else {
3871 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3872 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3873 }
3874 }
3875 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3876 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3877 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3878 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3879 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3880 }
3881 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3882 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3883 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3884 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3885 } else if (v->WritebackEnable[k] == true) {
3886 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3887 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3888 } else {
3889 v->WriteBandwidth[k] = 0.0;
3890 }
3891 }
3892
3893 /*Writeback Latency support check*/
3894
3895 v->WritebackLatencySupport = true;
3896 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3897 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3898 v->WritebackLatencySupport = false;
3899 }
3900 }
3901
3902 /*Writeback Mode Support Check*/
3903
3904 v->TotalNumberOfActiveWriteback = 0;
3905 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3906 if (v->WritebackEnable[k] == true) {
3907 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3908 }
3909 }
3910
3911 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3912 EnoughWritebackUnits = false;
3913 }
3914
3915 /*Writeback Scale Ratio and Taps Support Check*/
3916
3917 v->WritebackScaleRatioAndTapsSupport = true;
3918 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3919 if (v->WritebackEnable[k] == true) {
3920 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
3921 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
3922 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
3923 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
3924 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
3925 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
3926 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
3927 v->WritebackScaleRatioAndTapsSupport = false;
3928 }
3929 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3930 v->WritebackScaleRatioAndTapsSupport = false;
3931 }
3932 }
3933 }
3934 /*Maximum DISPCLK/DPPCLK Support check*/
3935
3936 v->WritebackRequiredDISPCLK = 0.0;
3937 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3938 if (v->WritebackEnable[k] == true) {
3939 v->WritebackRequiredDISPCLK = dml_max(
3940 v->WritebackRequiredDISPCLK,
3941 dml31_CalculateWriteBackDISPCLK(
3942 v->WritebackPixelFormat[k],
3943 v->PixelClock[k],
3944 v->WritebackHRatio[k],
3945 v->WritebackVRatio[k],
3946 v->WritebackHTaps[k],
3947 v->WritebackVTaps[k],
3948 v->WritebackSourceWidth[k],
3949 v->WritebackDestinationWidth[k],
3950 v->HTotal[k],
3951 v->WritebackLineBufferSize));
3952 }
3953 }
3954 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3955 if (v->HRatio[k] > 1.0) {
3956 v->PSCL_FACTOR[k] = dml_min(
3957 v->MaxDCHUBToPSCLThroughput,
3958 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3959 } else {
3960 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3961 }
3962 if (v->BytePerPixelC[k] == 0.0) {
3963 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3964 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3965 * dml_max3(
3966 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3967 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3968 1.0);
3969 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3970 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3971 }
3972 } else {
3973 if (v->HRatioChroma[k] > 1.0) {
3974 v->PSCL_FACTOR_CHROMA[k] = dml_min(
3975 v->MaxDCHUBToPSCLThroughput,
3976 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3977 } else {
3978 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3979 }
3980 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3981 * dml_max5(
3982 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3983 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3984 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3985 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3986 1.0);
3987 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3988 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3989 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3990 }
3991 }
3992 }
3993 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3994 int MaximumSwathWidthSupportLuma;
3995 int MaximumSwathWidthSupportChroma;
3996
3997 if (v->SurfaceTiling[k] == dm_sw_linear) {
3998 MaximumSwathWidthSupportLuma = 8192.0;
3999 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4000 MaximumSwathWidthSupportLuma = 2880.0;
4001 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4002 MaximumSwathWidthSupportLuma = 3840.0;
4003 } else {
4004 MaximumSwathWidthSupportLuma = 5760.0;
4005 }
4006
4007 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4008 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4009 } else {
4010 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4011 }
4012 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4013 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4014 if (v->BytePerPixelC[k] == 0.0) {
4015 v->MaximumSwathWidthInLineBufferChroma = 0;
4016 } else {
4017 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4018 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4019 }
4020 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4021 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4022 }
4023
4024 CalculateSwathAndDETConfiguration(
4025 true,
4026 v->NumberOfActivePlanes,
4027 v->DETBufferSizeInKByte[0],
4028 v->MaximumSwathWidthLuma,
4029 v->MaximumSwathWidthChroma,
4030 v->SourceScan,
4031 v->SourcePixelFormat,
4032 v->SurfaceTiling,
4033 v->ViewportWidth,
4034 v->ViewportHeight,
4035 v->SurfaceWidthY,
4036 v->SurfaceWidthC,
4037 v->SurfaceHeightY,
4038 v->SurfaceHeightC,
4039 v->Read256BlockHeightY,
4040 v->Read256BlockHeightC,
4041 v->Read256BlockWidthY,
4042 v->Read256BlockWidthC,
4043 v->odm_combine_dummy,
4044 v->BlendingAndTiming,
4045 v->BytePerPixelY,
4046 v->BytePerPixelC,
4047 v->BytePerPixelInDETY,
4048 v->BytePerPixelInDETC,
4049 v->HActive,
4050 v->HRatio,
4051 v->HRatioChroma,
4052 v->NoOfDPPThisState,
4053 v->swath_width_luma_ub_this_state,
4054 v->swath_width_chroma_ub_this_state,
4055 v->SwathWidthYThisState,
4056 v->SwathWidthCThisState,
4057 v->SwathHeightYThisState,
4058 v->SwathHeightCThisState,
4059 v->DETBufferSizeYThisState,
4060 v->DETBufferSizeCThisState,
4061 v->SingleDPPViewportSizeSupportPerPlane,
4062 &v->ViewportSizeSupport[0][0]);
4063
4064 for (i = 0; i < v->soc.num_states; i++) {
4065 for (j = 0; j < 2; j++) {
4066 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4067 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4068 v->RequiredDISPCLK[i][j] = 0.0;
4069 v->DISPCLK_DPPCLK_Support[i][j] = true;
4070 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4071 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4072 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4073 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4074 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4075 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4076 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4077 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4078 }
4079 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4080 * (1 + v->DISPCLKRampingMargin / 100.0);
4081 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4082 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4083 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4084 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4085 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4086 }
4087 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4088 * (1 + v->DISPCLKRampingMargin / 100.0);
4089 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4090 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4091 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4092 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4093 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4094 }
4095
4096 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4097 || !(v->Output[k] == dm_dp ||
4098 v->Output[k] == dm_dp2p0 ||
4099 v->Output[k] == dm_edp)) {
4100 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4101 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4102
4103 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4104 FMTBufferExceeded = true;
4105 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4106 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4107 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4108 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4109 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4110 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4111 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4112 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4113 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4114 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4115 } else {
4116 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4117 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4118 }
4119 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4120 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4121 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4122 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4123 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4124 } else {
4125 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4126 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4127 }
4128 }
4129 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4130 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4131 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4132 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4133 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4134
4135 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4136 FMTBufferExceeded = true;
4137 } else {
4138 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4139 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4140 }
4141 }
4142 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4143 v->MPCCombine[i][j][k] = false;
4144 v->NoOfDPP[i][j][k] = 4;
4145 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4146 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4147 v->MPCCombine[i][j][k] = false;
4148 v->NoOfDPP[i][j][k] = 2;
4149 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4150 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4151 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4152 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4153 v->MPCCombine[i][j][k] = false;
4154 v->NoOfDPP[i][j][k] = 1;
4155 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4156 } else {
4157 v->MPCCombine[i][j][k] = true;
4158 v->NoOfDPP[i][j][k] = 2;
4159 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4160 }
4161 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4162 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4163 > v->MaxDppclkRoundedDownToDFSGranularity)
4164 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4165 v->DISPCLK_DPPCLK_Support[i][j] = false;
4166 }
4167 }
4168 v->TotalNumberOfActiveDPP[i][j] = 0;
4169 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4170 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4171 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4172 if (v->NoOfDPP[i][j][k] == 1)
4173 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4174 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4175 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4176 NoChroma = false;
4177 }
4178
4179 // UPTO
4180 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4181 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4182 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4183 double BWOfNonSplitPlaneOfMaximumBandwidth;
4184 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4185 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4186 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4187 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4188 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4189 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4190 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4191 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4192 }
4193 }
4194 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4195 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4196 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4197 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4198 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4199 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4200 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4201 }
4202 }
4203 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4204 v->RequiredDISPCLK[i][j] = 0.0;
4205 v->DISPCLK_DPPCLK_Support[i][j] = true;
4206 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4207 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4208 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4209 v->MPCCombine[i][j][k] = true;
4210 v->NoOfDPP[i][j][k] = 2;
4211 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4212 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4213 } else {
4214 v->MPCCombine[i][j][k] = false;
4215 v->NoOfDPP[i][j][k] = 1;
4216 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4217 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4218 }
4219 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4220 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4221 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4222 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4223 } else {
4224 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4225 }
4226 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4227 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4228 > v->MaxDppclkRoundedDownToDFSGranularity)
4229 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4230 v->DISPCLK_DPPCLK_Support[i][j] = false;
4231 }
4232 }
4233 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4234 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4235 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4236 }
4237 }
4238 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4239 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4240 v->DISPCLK_DPPCLK_Support[i][j] = false;
4241 }
4242 }
4243 }
4244
4245 /*Total Available Pipes Support Check*/
4246
4247 for (i = 0; i < v->soc.num_states; i++) {
4248 for (j = 0; j < 2; j++) {
4249 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4250 v->TotalAvailablePipesSupport[i][j] = true;
4251 } else {
4252 v->TotalAvailablePipesSupport[i][j] = false;
4253 }
4254 }
4255 }
4256 /*Display IO and DSC Support Check*/
4257
4258 v->NonsupportedDSCInputBPC = false;
4259 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4260 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4261 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4262 v->NonsupportedDSCInputBPC = true;
4263 }
4264 }
4265
4266 /*Number Of DSC Slices*/
4267 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4268 if (v->BlendingAndTiming[k] == k) {
4269 if (v->PixelClockBackEnd[k] > 3200) {
4270 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4271 } else if (v->PixelClockBackEnd[k] > 1360) {
4272 v->NumberOfDSCSlices[k] = 8;
4273 } else if (v->PixelClockBackEnd[k] > 680) {
4274 v->NumberOfDSCSlices[k] = 4;
4275 } else if (v->PixelClockBackEnd[k] > 340) {
4276 v->NumberOfDSCSlices[k] = 2;
4277 } else {
4278 v->NumberOfDSCSlices[k] = 1;
4279 }
4280 } else {
4281 v->NumberOfDSCSlices[k] = 0;
4282 }
4283 }
4284
4285 for (i = 0; i < v->soc.num_states; i++) {
4286 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4287 v->RequiresDSC[i][k] = false;
4288 v->RequiresFEC[i][k] = false;
4289 if (v->BlendingAndTiming[k] == k) {
4290 if (v->Output[k] == dm_hdmi) {
4291 v->RequiresDSC[i][k] = false;
4292 v->RequiresFEC[i][k] = false;
4293 v->OutputBppPerState[i][k] = TruncToValidBPP(
4294 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4295 3,
4296 v->HTotal[k],
4297 v->HActive[k],
4298 v->PixelClockBackEnd[k],
4299 v->ForcedOutputLinkBPP[k],
4300 false,
4301 v->Output[k],
4302 v->OutputFormat[k],
4303 v->DSCInputBitPerComponent[k],
4304 v->NumberOfDSCSlices[k],
4305 v->AudioSampleRate[k],
4306 v->AudioSampleLayout[k],
4307 v->ODMCombineEnablePerState[i][k]);
4308 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4309 if (v->DSCEnable[k] == true) {
4310 v->RequiresDSC[i][k] = true;
4311 v->LinkDSCEnable = true;
4312 if (v->Output[k] == dm_dp) {
4313 v->RequiresFEC[i][k] = true;
4314 } else {
4315 v->RequiresFEC[i][k] = false;
4316 }
4317 } else {
4318 v->RequiresDSC[i][k] = false;
4319 v->LinkDSCEnable = false;
4320 v->RequiresFEC[i][k] = false;
4321 }
4322
4323 v->Outbpp = BPP_INVALID;
4324 if (v->PHYCLKPerState[i] >= 270.0) {
4325 v->Outbpp = TruncToValidBPP(
4326 (1.0 - v->Downspreading / 100.0) * 2700,
4327 v->OutputLinkDPLanes[k],
4328 v->HTotal[k],
4329 v->HActive[k],
4330 v->PixelClockBackEnd[k],
4331 v->ForcedOutputLinkBPP[k],
4332 v->LinkDSCEnable,
4333 v->Output[k],
4334 v->OutputFormat[k],
4335 v->DSCInputBitPerComponent[k],
4336 v->NumberOfDSCSlices[k],
4337 v->AudioSampleRate[k],
4338 v->AudioSampleLayout[k],
4339 v->ODMCombineEnablePerState[i][k]);
4340 v->OutputBppPerState[i][k] = v->Outbpp;
4341 // TODO: Need some other way to handle this nonsense
4342 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4343 }
4344 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4345 v->Outbpp = TruncToValidBPP(
4346 (1.0 - v->Downspreading / 100.0) * 5400,
4347 v->OutputLinkDPLanes[k],
4348 v->HTotal[k],
4349 v->HActive[k],
4350 v->PixelClockBackEnd[k],
4351 v->ForcedOutputLinkBPP[k],
4352 v->LinkDSCEnable,
4353 v->Output[k],
4354 v->OutputFormat[k],
4355 v->DSCInputBitPerComponent[k],
4356 v->NumberOfDSCSlices[k],
4357 v->AudioSampleRate[k],
4358 v->AudioSampleLayout[k],
4359 v->ODMCombineEnablePerState[i][k]);
4360 v->OutputBppPerState[i][k] = v->Outbpp;
4361 // TODO: Need some other way to handle this nonsense
4362 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4363 }
4364 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4365 v->Outbpp = TruncToValidBPP(
4366 (1.0 - v->Downspreading / 100.0) * 8100,
4367 v->OutputLinkDPLanes[k],
4368 v->HTotal[k],
4369 v->HActive[k],
4370 v->PixelClockBackEnd[k],
4371 v->ForcedOutputLinkBPP[k],
4372 v->LinkDSCEnable,
4373 v->Output[k],
4374 v->OutputFormat[k],
4375 v->DSCInputBitPerComponent[k],
4376 v->NumberOfDSCSlices[k],
4377 v->AudioSampleRate[k],
4378 v->AudioSampleLayout[k],
4379 v->ODMCombineEnablePerState[i][k]);
4380 v->OutputBppPerState[i][k] = v->Outbpp;
4381 // TODO: Need some other way to handle this nonsense
4382 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4383 }
4384 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4385 v->Outbpp = TruncToValidBPP(
4386 (1.0 - v->Downspreading / 100.0) * 10000,
4387 4,
4388 v->HTotal[k],
4389 v->HActive[k],
4390 v->PixelClockBackEnd[k],
4391 v->ForcedOutputLinkBPP[k],
4392 v->LinkDSCEnable,
4393 v->Output[k],
4394 v->OutputFormat[k],
4395 v->DSCInputBitPerComponent[k],
4396 v->NumberOfDSCSlices[k],
4397 v->AudioSampleRate[k],
4398 v->AudioSampleLayout[k],
4399 v->ODMCombineEnablePerState[i][k]);
4400 v->OutputBppPerState[i][k] = v->Outbpp;
4401 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4402 }
4403 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4404 v->Outbpp = TruncToValidBPP(
4405 12000,
4406 4,
4407 v->HTotal[k],
4408 v->HActive[k],
4409 v->PixelClockBackEnd[k],
4410 v->ForcedOutputLinkBPP[k],
4411 v->LinkDSCEnable,
4412 v->Output[k],
4413 v->OutputFormat[k],
4414 v->DSCInputBitPerComponent[k],
4415 v->NumberOfDSCSlices[k],
4416 v->AudioSampleRate[k],
4417 v->AudioSampleLayout[k],
4418 v->ODMCombineEnablePerState[i][k]);
4419 v->OutputBppPerState[i][k] = v->Outbpp;
4420 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4421 }
4422 }
4423 } else {
4424 v->OutputBppPerState[i][k] = 0;
4425 }
4426 }
4427 }
4428
4429 for (i = 0; i < v->soc.num_states; i++) {
4430 v->LinkCapacitySupport[i] = true;
4431 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4432 if (v->BlendingAndTiming[k] == k
4433 && (v->Output[k] == dm_dp ||
4434 v->Output[k] == dm_edp ||
4435 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4436 v->LinkCapacitySupport[i] = false;
4437 }
4438 }
4439 }
4440
4441 // UPTO 2172
4442 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4443 if (v->BlendingAndTiming[k] == k
4444 && (v->Output[k] == dm_dp ||
4445 v->Output[k] == dm_edp ||
4446 v->Output[k] == dm_hdmi)) {
4447 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4448 P2IWith420 = true;
4449 }
4450 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4451 && !v->DSC422NativeSupport) {
4452 DSC422NativeNotSupported = true;
4453 }
4454 }
4455 }
4456
4457 for (i = 0; i < v->soc.num_states; ++i) {
4458 v->ODMCombine4To1SupportCheckOK[i] = true;
4459 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4460 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4461 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4462 || v->Output[k] == dm_hdmi)) {
4463 v->ODMCombine4To1SupportCheckOK[i] = false;
4464 }
4465 }
4466 }
4467
4468 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4469
4470 for (i = 0; i < v->soc.num_states; i++) {
4471 v->NotEnoughDSCUnits[i] = false;
4472 v->TotalDSCUnitsRequired = 0.0;
4473 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4474 if (v->RequiresDSC[i][k] == true) {
4475 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4476 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4477 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4478 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4479 } else {
4480 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4481 }
4482 }
4483 }
4484 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4485 v->NotEnoughDSCUnits[i] = true;
4486 }
4487 }
4488 /*DSC Delay per state*/
4489
4490 for (i = 0; i < v->soc.num_states; i++) {
4491 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4492 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4493 v->BPP = 0.0;
4494 } else {
4495 v->BPP = v->OutputBppPerState[i][k];
4496 }
4497 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4498 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4499 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4500 v->DSCInputBitPerComponent[k],
4501 v->BPP,
4502 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4503 v->NumberOfDSCSlices[k],
4504 v->OutputFormat[k],
4505 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4506 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4507 v->DSCDelayPerState[i][k] = 2.0
4508 * (dscceComputeDelay(
4509 v->DSCInputBitPerComponent[k],
4510 v->BPP,
4511 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4512 v->NumberOfDSCSlices[k] / 2,
4513 v->OutputFormat[k],
4514 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4515 } else {
4516 v->DSCDelayPerState[i][k] = 4.0
4517 * (dscceComputeDelay(
4518 v->DSCInputBitPerComponent[k],
4519 v->BPP,
4520 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4521 v->NumberOfDSCSlices[k] / 4,
4522 v->OutputFormat[k],
4523 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4524 }
4525 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4526 } else {
4527 v->DSCDelayPerState[i][k] = 0.0;
4528 }
4529 }
4530 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4531 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4532 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4533 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4534 }
4535 }
4536 }
4537 }
4538
4539 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4540 //
4541 for (i = 0; i < v->soc.num_states; ++i) {
4542 for (j = 0; j <= 1; ++j) {
4543 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4544 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4545 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4546 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4547 }
4548
4549 if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315)
4550 PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, &v->DETBufferSizeInKByte[0]);
4551 CalculateSwathAndDETConfiguration(
4552 false,
4553 v->NumberOfActivePlanes,
4554 v->DETBufferSizeInKByte[0],
4555 v->MaximumSwathWidthLuma,
4556 v->MaximumSwathWidthChroma,
4557 v->SourceScan,
4558 v->SourcePixelFormat,
4559 v->SurfaceTiling,
4560 v->ViewportWidth,
4561 v->ViewportHeight,
4562 v->SurfaceWidthY,
4563 v->SurfaceWidthC,
4564 v->SurfaceHeightY,
4565 v->SurfaceHeightC,
4566 v->Read256BlockHeightY,
4567 v->Read256BlockHeightC,
4568 v->Read256BlockWidthY,
4569 v->Read256BlockWidthC,
4570 v->ODMCombineEnableThisState,
4571 v->BlendingAndTiming,
4572 v->BytePerPixelY,
4573 v->BytePerPixelC,
4574 v->BytePerPixelInDETY,
4575 v->BytePerPixelInDETC,
4576 v->HActive,
4577 v->HRatio,
4578 v->HRatioChroma,
4579 v->NoOfDPPThisState,
4580 v->swath_width_luma_ub_this_state,
4581 v->swath_width_chroma_ub_this_state,
4582 v->SwathWidthYThisState,
4583 v->SwathWidthCThisState,
4584 v->SwathHeightYThisState,
4585 v->SwathHeightCThisState,
4586 v->DETBufferSizeYThisState,
4587 v->DETBufferSizeCThisState,
4588 v->dummystring,
4589 &v->ViewportSizeSupport[i][j]);
4590
4591 CalculateDCFCLKDeepSleep(
4592 mode_lib,
4593 v->NumberOfActivePlanes,
4594 v->BytePerPixelY,
4595 v->BytePerPixelC,
4596 v->VRatio,
4597 v->VRatioChroma,
4598 v->SwathWidthYThisState,
4599 v->SwathWidthCThisState,
4600 v->NoOfDPPThisState,
4601 v->HRatio,
4602 v->HRatioChroma,
4603 v->PixelClock,
4604 v->PSCL_FACTOR,
4605 v->PSCL_FACTOR_CHROMA,
4606 v->RequiredDPPCLKThisState,
4607 v->ReadBandwidthLuma,
4608 v->ReadBandwidthChroma,
4609 v->ReturnBusWidth,
4610 &v->ProjectedDCFCLKDeepSleep[i][j]);
4611
4612 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4613 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4614 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4615 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4616 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4617 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4618 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4619 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4620 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4621 }
4622 }
4623 }
4624
4625 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4626 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4627 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4628 }
4629
4630 for (i = 0; i < v->soc.num_states; i++) {
4631 for (j = 0; j < 2; j++) {
4632 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4633
4634 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4635 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4636 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4637 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4638 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4639 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4640 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4641 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4642 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4643 }
4644
4645 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4646 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4647 if (v->DCCEnable[k] == true) {
4648 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4649 }
4650 }
4651
4652 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4653 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4654 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4655
4656 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4657 && v->SourceScan[k] != dm_vert) {
4658 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4659 / 2;
4660 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4661 } else {
4662 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4663 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4664 }
4665
4666 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4667 mode_lib,
4668 v->DCCEnable[k],
4669 v->Read256BlockHeightC[k],
4670 v->Read256BlockWidthC[k],
4671 v->SourcePixelFormat[k],
4672 v->SurfaceTiling[k],
4673 v->BytePerPixelC[k],
4674 v->SourceScan[k],
4675 v->SwathWidthCThisState[k],
4676 v->ViewportHeightChroma[k],
4677 v->GPUVMEnable,
4678 v->HostVMEnable,
4679 v->HostVMMaxNonCachedPageTableLevels,
4680 v->GPUVMMinPageSize,
4681 v->HostVMMinPageSize,
4682 v->PTEBufferSizeInRequestsForChroma,
4683 v->PitchC[k],
4684 0.0,
4685 &v->MacroTileWidthC[k],
4686 &v->MetaRowBytesC,
4687 &v->DPTEBytesPerRowC,
4688 &v->PTEBufferSizeNotExceededC[i][j][k],
4689 &v->dummyinteger7,
4690 &v->dpte_row_height_chroma[k],
4691 &v->dummyinteger28,
4692 &v->dummyinteger26,
4693 &v->dummyinteger23,
4694 &v->meta_row_height_chroma[k],
4695 &v->dummyinteger8,
4696 &v->dummyinteger9,
4697 &v->dummyinteger19,
4698 &v->dummyinteger20,
4699 &v->dummyinteger17,
4700 &v->dummyinteger10,
4701 &v->dummyinteger11);
4702
4703 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4704 mode_lib,
4705 v->VRatioChroma[k],
4706 v->VTAPsChroma[k],
4707 v->Interlace[k],
4708 v->ProgressiveToInterlaceUnitInOPP,
4709 v->SwathHeightCThisState[k],
4710 v->ViewportYStartC[k],
4711 &v->PrefillC[k],
4712 &v->MaxNumSwC[k]);
4713 } else {
4714 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4715 v->PTEBufferSizeInRequestsForChroma = 0;
4716 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4717 v->MetaRowBytesC = 0.0;
4718 v->DPTEBytesPerRowC = 0.0;
4719 v->PrefetchLinesC[i][j][k] = 0.0;
4720 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4721 }
4722 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4723 mode_lib,
4724 v->DCCEnable[k],
4725 v->Read256BlockHeightY[k],
4726 v->Read256BlockWidthY[k],
4727 v->SourcePixelFormat[k],
4728 v->SurfaceTiling[k],
4729 v->BytePerPixelY[k],
4730 v->SourceScan[k],
4731 v->SwathWidthYThisState[k],
4732 v->ViewportHeight[k],
4733 v->GPUVMEnable,
4734 v->HostVMEnable,
4735 v->HostVMMaxNonCachedPageTableLevels,
4736 v->GPUVMMinPageSize,
4737 v->HostVMMinPageSize,
4738 v->PTEBufferSizeInRequestsForLuma,
4739 v->PitchY[k],
4740 v->DCCMetaPitchY[k],
4741 &v->MacroTileWidthY[k],
4742 &v->MetaRowBytesY,
4743 &v->DPTEBytesPerRowY,
4744 &v->PTEBufferSizeNotExceededY[i][j][k],
4745 &v->dummyinteger7,
4746 &v->dpte_row_height[k],
4747 &v->dummyinteger29,
4748 &v->dummyinteger27,
4749 &v->dummyinteger24,
4750 &v->meta_row_height[k],
4751 &v->dummyinteger25,
4752 &v->dpte_group_bytes[k],
4753 &v->dummyinteger21,
4754 &v->dummyinteger22,
4755 &v->dummyinteger18,
4756 &v->dummyinteger5,
4757 &v->dummyinteger6);
4758 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4759 mode_lib,
4760 v->VRatio[k],
4761 v->vtaps[k],
4762 v->Interlace[k],
4763 v->ProgressiveToInterlaceUnitInOPP,
4764 v->SwathHeightYThisState[k],
4765 v->ViewportYStartY[k],
4766 &v->PrefillY[k],
4767 &v->MaxNumSwY[k]);
4768 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4769 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4770 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4771
4772 CalculateRowBandwidth(
4773 v->GPUVMEnable,
4774 v->SourcePixelFormat[k],
4775 v->VRatio[k],
4776 v->VRatioChroma[k],
4777 v->DCCEnable[k],
4778 v->HTotal[k] / v->PixelClock[k],
4779 v->MetaRowBytesY,
4780 v->MetaRowBytesC,
4781 v->meta_row_height[k],
4782 v->meta_row_height_chroma[k],
4783 v->DPTEBytesPerRowY,
4784 v->DPTEBytesPerRowC,
4785 v->dpte_row_height[k],
4786 v->dpte_row_height_chroma[k],
4787 &v->meta_row_bandwidth[i][j][k],
4788 &v->dpte_row_bandwidth[i][j][k]);
4789 }
4790 /*DCCMetaBufferSizeSupport(i, j) = True
4791 For k = 0 To NumberOfActivePlanes - 1
4792 If MetaRowBytes(i, j, k) > 24064 Then
4793 DCCMetaBufferSizeSupport(i, j) = False
4794 End If
4795 Next k*/
4796 v->DCCMetaBufferSizeSupport[i][j] = true;
4797 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4798 if (v->MetaRowBytes[i][j][k] > 24064)
4799 v->DCCMetaBufferSizeSupport[i][j] = false;
4800 }
4801 v->UrgLatency[i] = CalculateUrgentLatency(
4802 v->UrgentLatencyPixelDataOnly,
4803 v->UrgentLatencyPixelMixedWithVMData,
4804 v->UrgentLatencyVMDataOnly,
4805 v->DoUrgentLatencyAdjustment,
4806 v->UrgentLatencyAdjustmentFabricClockComponent,
4807 v->UrgentLatencyAdjustmentFabricClockReference,
4808 v->FabricClockPerState[i]);
4809
4810 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4811 CalculateUrgentBurstFactor(
4812 v->swath_width_luma_ub_this_state[k],
4813 v->swath_width_chroma_ub_this_state[k],
4814 v->SwathHeightYThisState[k],
4815 v->SwathHeightCThisState[k],
4816 v->HTotal[k] / v->PixelClock[k],
4817 v->UrgLatency[i],
4818 v->CursorBufferSize,
4819 v->CursorWidth[k][0],
4820 v->CursorBPP[k][0],
4821 v->VRatio[k],
4822 v->VRatioChroma[k],
4823 v->BytePerPixelInDETY[k],
4824 v->BytePerPixelInDETC[k],
4825 v->DETBufferSizeYThisState[k],
4826 v->DETBufferSizeCThisState[k],
4827 &v->UrgentBurstFactorCursor[k],
4828 &v->UrgentBurstFactorLuma[k],
4829 &v->UrgentBurstFactorChroma[k],
4830 &NotUrgentLatencyHiding[k]);
4831 }
4832
4833 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4834 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4835 if (NotUrgentLatencyHiding[k]) {
4836 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4837 }
4838 }
4839
4840 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4841 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4842 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4843 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4844 }
4845
4846 v->TotalVActivePixelBandwidth[i][j] = 0;
4847 v->TotalVActiveCursorBandwidth[i][j] = 0;
4848 v->TotalMetaRowBandwidth[i][j] = 0;
4849 v->TotalDPTERowBandwidth[i][j] = 0;
4850 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4851 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4852 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4853 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4854 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4855 }
4856 }
4857 }
4858
4859 //Calculate Return BW
4860 for (i = 0; i < v->soc.num_states; ++i) {
4861 for (j = 0; j <= 1; ++j) {
4862 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4863 if (v->BlendingAndTiming[k] == k) {
4864 if (v->WritebackEnable[k] == true) {
4865 v->WritebackDelayTime[k] = v->WritebackLatency
4866 + CalculateWriteBackDelay(
4867 v->WritebackPixelFormat[k],
4868 v->WritebackHRatio[k],
4869 v->WritebackVRatio[k],
4870 v->WritebackVTaps[k],
4871 v->WritebackDestinationWidth[k],
4872 v->WritebackDestinationHeight[k],
4873 v->WritebackSourceHeight[k],
4874 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4875 } else {
4876 v->WritebackDelayTime[k] = 0.0;
4877 }
4878 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4879 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4880 v->WritebackDelayTime[k] = dml_max(
4881 v->WritebackDelayTime[k],
4882 v->WritebackLatency
4883 + CalculateWriteBackDelay(
4884 v->WritebackPixelFormat[m],
4885 v->WritebackHRatio[m],
4886 v->WritebackVRatio[m],
4887 v->WritebackVTaps[m],
4888 v->WritebackDestinationWidth[m],
4889 v->WritebackDestinationHeight[m],
4890 v->WritebackSourceHeight[m],
4891 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4892 }
4893 }
4894 }
4895 }
4896 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4897 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4898 if (v->BlendingAndTiming[k] == m) {
4899 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4900 }
4901 }
4902 }
4903 v->MaxMaxVStartup[i][j] = 0;
4904 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4905 v->MaximumVStartup[i][j][k] =
4906 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
4907 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
4908 v->VTotal[k] - v->VActive[k]
4909 - dml_max(
4910 1.0,
4911 dml_ceil(
4912 1.0 * v->WritebackDelayTime[k]
4913 / (v->HTotal[k]
4914 / v->PixelClock[k]),
4915 1.0));
4916 if (v->MaximumVStartup[i][j][k] > 1023)
4917 v->MaximumVStartup[i][j][k] = 1023;
4918 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
4919 }
4920 }
4921 }
4922
4923 ReorderingBytes = v->NumberOfChannels
4924 * dml_max3(
4925 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
4926 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
4927 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
4928
4929 for (i = 0; i < v->soc.num_states; ++i) {
4930 for (j = 0; j <= 1; ++j) {
4931 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
4932 }
4933 }
4934
4935 if (v->UseMinimumRequiredDCFCLK == true)
4936 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
4937
4938 for (i = 0; i < v->soc.num_states; ++i) {
4939 for (j = 0; j <= 1; ++j) {
4940 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
4941 v->ReturnBusWidth * v->DCFCLKState[i][j],
4942 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
4943 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
4944 double PixelDataOnlyReturnBWPerState = dml_min(
4945 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
4946 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
4947 double PixelMixedWithVMDataReturnBWPerState = dml_min(
4948 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
4949 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
4950
4951 if (v->HostVMEnable != true) {
4952 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
4953 } else {
4954 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
4955 }
4956 }
4957 }
4958
4959 //Re-ordering Buffer Support Check
4960 for (i = 0; i < v->soc.num_states; ++i) {
4961 for (j = 0; j <= 1; ++j) {
4962 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
4963 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
4964 v->ROBSupport[i][j] = true;
4965 } else {
4966 v->ROBSupport[i][j] = false;
4967 }
4968 }
4969 }
4970
4971 //Vertical Active BW support check
4972
4973 MaxTotalVActiveRDBandwidth = 0;
4974 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4975 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4976 }
4977
4978 for (i = 0; i < v->soc.num_states; ++i) {
4979 for (j = 0; j <= 1; ++j) {
4980 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
4981 dml_min(
4982 v->ReturnBusWidth * v->DCFCLKState[i][j],
4983 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
4984 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
4985 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
4986 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
4987
4988 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
4989 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
4990 } else {
4991 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
4992 }
4993 }
4994 }
4995
4996 v->UrgentLatency = CalculateUrgentLatency(
4997 v->UrgentLatencyPixelDataOnly,
4998 v->UrgentLatencyPixelMixedWithVMData,
4999 v->UrgentLatencyVMDataOnly,
5000 v->DoUrgentLatencyAdjustment,
5001 v->UrgentLatencyAdjustmentFabricClockComponent,
5002 v->UrgentLatencyAdjustmentFabricClockReference,
5003 v->FabricClock);
5004 //Prefetch Check
5005 for (i = 0; i < v->soc.num_states; ++i) {
5006 for (j = 0; j <= 1; ++j) {
5007 double VMDataOnlyReturnBWPerState;
5008 double HostVMInefficiencyFactor = 1;
5009 int NextPrefetchModeState = MinPrefetchMode;
5010 bool UnboundedRequestEnabledThisState = false;
5011 int CompressedBufferSizeInkByteThisState = 0;
5012 double dummy;
5013
5014 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5015
5016 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5017 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5018 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5019 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5020 }
5021
5022 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5023 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5024 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5025 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5026 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5027 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5028 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5029 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5030 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5031 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5032 }
5033
5034 VMDataOnlyReturnBWPerState = dml_min(
5035 dml_min(
5036 v->ReturnBusWidth * v->DCFCLKState[i][j],
5037 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5038 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5039 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5040 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5041 if (v->GPUVMEnable && v->HostVMEnable)
5042 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5043
5044 v->ExtraLatency = CalculateExtraLatency(
5045 v->RoundTripPingLatencyCycles,
5046 ReorderingBytes,
5047 v->DCFCLKState[i][j],
5048 v->TotalNumberOfActiveDPP[i][j],
5049 v->PixelChunkSizeInKByte,
5050 v->TotalNumberOfDCCActiveDPP[i][j],
5051 v->MetaChunkSize,
5052 v->ReturnBWPerState[i][j],
5053 v->GPUVMEnable,
5054 v->HostVMEnable,
5055 v->NumberOfActivePlanes,
5056 v->NoOfDPPThisState,
5057 v->dpte_group_bytes,
5058 HostVMInefficiencyFactor,
5059 v->HostVMMinPageSize,
5060 v->HostVMMaxNonCachedPageTableLevels);
5061
5062 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5063 do {
5064 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5065 v->MaxVStartup = v->NextMaxVStartup;
5066
5067 v->TWait = CalculateTWait(
5068 v->PrefetchModePerState[i][j],
5069 v->DRAMClockChangeLatency,
5070 v->UrgLatency[i],
5071 v->SREnterPlusExitTime);
5072
5073 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5074 CalculatePrefetchSchedulePerPlane(mode_lib,
5075 HostVMInefficiencyFactor,
5076 i, j, k);
5077 }
5078
5079 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5080 CalculateUrgentBurstFactor(
5081 v->swath_width_luma_ub_this_state[k],
5082 v->swath_width_chroma_ub_this_state[k],
5083 v->SwathHeightYThisState[k],
5084 v->SwathHeightCThisState[k],
5085 v->HTotal[k] / v->PixelClock[k],
5086 v->UrgentLatency,
5087 v->CursorBufferSize,
5088 v->CursorWidth[k][0],
5089 v->CursorBPP[k][0],
5090 v->VRatioPreY[i][j][k],
5091 v->VRatioPreC[i][j][k],
5092 v->BytePerPixelInDETY[k],
5093 v->BytePerPixelInDETC[k],
5094 v->DETBufferSizeYThisState[k],
5095 v->DETBufferSizeCThisState[k],
5096 &v->UrgentBurstFactorCursorPre[k],
5097 &v->UrgentBurstFactorLumaPre[k],
5098 &v->UrgentBurstFactorChroma[k],
5099 &v->NotUrgentLatencyHidingPre[k]);
5100 }
5101
5102 v->MaximumReadBandwidthWithPrefetch = 0.0;
5103 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5104 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5105 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5106
5107 v->MaximumReadBandwidthWithPrefetch =
5108 v->MaximumReadBandwidthWithPrefetch
5109 + dml_max3(
5110 v->VActivePixelBandwidth[i][j][k]
5111 + v->VActiveCursorBandwidth[i][j][k]
5112 + v->NoOfDPP[i][j][k]
5113 * (v->meta_row_bandwidth[i][j][k]
5114 + v->dpte_row_bandwidth[i][j][k]),
5115 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5116 v->NoOfDPP[i][j][k]
5117 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5118 * v->UrgentBurstFactorLumaPre[k]
5119 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5120 * v->UrgentBurstFactorChromaPre[k])
5121 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5122 }
5123
5124 v->NotEnoughUrgentLatencyHidingPre = false;
5125 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5126 if (v->NotUrgentLatencyHidingPre[k] == true) {
5127 v->NotEnoughUrgentLatencyHidingPre = true;
5128 }
5129 }
5130
5131 v->PrefetchSupported[i][j] = true;
5132 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5133 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5134 v->PrefetchSupported[i][j] = false;
5135 }
5136 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5137 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5138 || v->NoTimeForPrefetch[i][j][k] == true) {
5139 v->PrefetchSupported[i][j] = false;
5140 }
5141 }
5142
5143 v->DynamicMetadataSupported[i][j] = true;
5144 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5145 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5146 v->DynamicMetadataSupported[i][j] = false;
5147 }
5148 }
5149
5150 v->VRatioInPrefetchSupported[i][j] = true;
5151 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5152 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5153 v->VRatioInPrefetchSupported[i][j] = false;
5154 }
5155 }
5156 v->AnyLinesForVMOrRowTooLarge = false;
5157 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5158 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5159 v->AnyLinesForVMOrRowTooLarge = true;
5160 }
5161 }
5162
5163 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5164
5165 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5166 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5167 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5168 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5169 - dml_max(
5170 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5171 v->NoOfDPP[i][j][k]
5172 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5173 * v->UrgentBurstFactorLumaPre[k]
5174 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5175 * v->UrgentBurstFactorChromaPre[k])
5176 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5177 }
5178 v->TotImmediateFlipBytes = 0.0;
5179 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5180 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5181 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5182 + v->DPTEBytesPerRow[i][j][k];
5183 }
5184
5185 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5186 CalculateFlipSchedule(
5187 mode_lib,
5188 k,
5189 HostVMInefficiencyFactor,
5190 v->ExtraLatency,
5191 v->UrgLatency[i],
5192 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5193 v->MetaRowBytes[i][j][k],
5194 v->DPTEBytesPerRow[i][j][k]);
5195 }
5196 v->total_dcn_read_bw_with_flip = 0.0;
5197 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5198 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5199 + dml_max3(
5200 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5201 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5202 + v->VActiveCursorBandwidth[i][j][k],
5203 v->NoOfDPP[i][j][k]
5204 * (v->final_flip_bw[k]
5205 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5206 * v->UrgentBurstFactorLumaPre[k]
5207 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5208 * v->UrgentBurstFactorChromaPre[k])
5209 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5210 }
5211 v->ImmediateFlipSupportedForState[i][j] = true;
5212 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5213 v->ImmediateFlipSupportedForState[i][j] = false;
5214 }
5215 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5216 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5217 v->ImmediateFlipSupportedForState[i][j] = false;
5218 }
5219 }
5220 } else {
5221 v->ImmediateFlipSupportedForState[i][j] = false;
5222 }
5223
5224 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5225 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5226 NextPrefetchModeState = NextPrefetchModeState + 1;
5227 } else {
5228 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5229 }
5230 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5231 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5232 && ((v->HostVMEnable == false &&
5233 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5234 || v->ImmediateFlipSupportedForState[i][j] == true))
5235 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5236
5237 CalculateUnboundedRequestAndCompressedBufferSize(
5238 v->DETBufferSizeInKByte[0],
5239 v->ConfigReturnBufferSizeInKByte,
5240 v->UseUnboundedRequesting,
5241 v->TotalNumberOfActiveDPP[i][j],
5242 NoChroma,
5243 v->MaxNumDPP,
5244 v->CompressedBufferSegmentSizeInkByte,
5245 v->Output,
5246 &UnboundedRequestEnabledThisState,
5247 &CompressedBufferSizeInkByteThisState);
5248
5249 CalculateWatermarksAndDRAMSpeedChangeSupport(
5250 mode_lib,
5251 v->PrefetchModePerState[i][j],
5252 v->DCFCLKState[i][j],
5253 v->ReturnBWPerState[i][j],
5254 v->UrgLatency[i],
5255 v->ExtraLatency,
5256 v->SOCCLKPerState[i],
5257 v->ProjectedDCFCLKDeepSleep[i][j],
5258 v->DETBufferSizeYThisState,
5259 v->DETBufferSizeCThisState,
5260 v->SwathHeightYThisState,
5261 v->SwathHeightCThisState,
5262 v->SwathWidthYThisState,
5263 v->SwathWidthCThisState,
5264 v->NoOfDPPThisState,
5265 v->BytePerPixelInDETY,
5266 v->BytePerPixelInDETC,
5267 UnboundedRequestEnabledThisState,
5268 CompressedBufferSizeInkByteThisState,
5269 &v->DRAMClockChangeSupport[i][j],
5270 &dummy,
5271 &dummy,
5272 &dummy,
5273 &dummy);
5274 }
5275 }
5276
5277 /*PTE Buffer Size Check*/
5278 for (i = 0; i < v->soc.num_states; i++) {
5279 for (j = 0; j < 2; j++) {
5280 v->PTEBufferSizeNotExceeded[i][j] = true;
5281 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5282 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5283 v->PTEBufferSizeNotExceeded[i][j] = false;
5284 }
5285 }
5286 }
5287 }
5288
5289 /*Cursor Support Check*/
5290 v->CursorSupport = true;
5291 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5292 if (v->CursorWidth[k][0] > 0.0) {
5293 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5294 v->CursorSupport = false;
5295 }
5296 }
5297 }
5298
5299 /*Valid Pitch Check*/
5300 v->PitchSupport = true;
5301 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5302 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5303 if (v->DCCEnable[k] == true) {
5304 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5305 } else {
5306 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5307 }
5308 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5309 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5310 && v->SourcePixelFormat[k] != dm_mono_8) {
5311 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5312 if (v->DCCEnable[k] == true) {
5313 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5314 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5315 64.0 * v->Read256BlockWidthC[k]);
5316 } else {
5317 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5318 }
5319 } else {
5320 v->AlignedCPitch[k] = v->PitchC[k];
5321 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5322 }
5323 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5324 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5325 v->PitchSupport = false;
5326 }
5327 }
5328
5329 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5330 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5331 ViewportExceedsSurface = true;
5332 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5333 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5334 && v->SourcePixelFormat[k] != dm_rgbe) {
5335 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5336 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5337 ViewportExceedsSurface = true;
5338 }
5339 }
5340 }
5341 }
5342
5343 /*Mode Support, Voltage State and SOC Configuration*/
5344 for (i = v->soc.num_states - 1; i >= 0; i--) {
5345 for (j = 0; j < 2; j++) {
5346 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5347 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5348 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5349 && v->DTBCLKRequiredMoreThanSupported[i] == false
5350 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5351 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5352 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5353 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5354 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5355 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5356 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5357 && ((v->HostVMEnable == false
5358 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5359 || v->ImmediateFlipSupportedForState[i][j] == true)
5360 && FMTBufferExceeded == false) {
5361 v->ModeSupport[i][j] = true;
5362 } else {
5363 v->ModeSupport[i][j] = false;
5364 }
5365 }
5366 }
5367
5368 {
5369 unsigned int MaximumMPCCombine = 0;
5370 for (i = v->soc.num_states; i >= 0; i--) {
5371 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5372 v->VoltageLevel = i;
5373 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5374 if (v->ModeSupport[i][0] == true) {
5375 MaximumMPCCombine = 0;
5376 } else {
5377 MaximumMPCCombine = 1;
5378 }
5379 }
5380 }
5381 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5382 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5383 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5384 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5385 }
5386 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5387 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5388 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5389 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5390 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5391 v->maxMpcComb = MaximumMPCCombine;
5392 }
5393 }
5394
5395 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5396 struct display_mode_lib *mode_lib,
5397 unsigned int PrefetchMode,
5398 double DCFCLK,
5399 double ReturnBW,
5400 double UrgentLatency,
5401 double ExtraLatency,
5402 double SOCCLK,
5403 double DCFCLKDeepSleep,
5404 unsigned int DETBufferSizeY[],
5405 unsigned int DETBufferSizeC[],
5406 unsigned int SwathHeightY[],
5407 unsigned int SwathHeightC[],
5408 double SwathWidthY[],
5409 double SwathWidthC[],
5410 unsigned int DPPPerPlane[],
5411 double BytePerPixelDETY[],
5412 double BytePerPixelDETC[],
5413 bool UnboundedRequestEnabled,
5414 int unsigned CompressedBufferSizeInkByte,
5415 enum clock_change_support *DRAMClockChangeSupport,
5416 double *StutterExitWatermark,
5417 double *StutterEnterPlusExitWatermark,
5418 double *Z8StutterExitWatermark,
5419 double *Z8StutterEnterPlusExitWatermark)
5420 {
5421 struct vba_vars_st *v = &mode_lib->vba;
5422 double EffectiveLBLatencyHidingY;
5423 double EffectiveLBLatencyHidingC;
5424 double LinesInDETY[DC__NUM_DPP__MAX];
5425 double LinesInDETC;
5426 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5427 unsigned int LinesInDETCRoundedDownToSwath;
5428 double FullDETBufferingTimeY;
5429 double FullDETBufferingTimeC;
5430 double ActiveDRAMClockChangeLatencyMarginY;
5431 double ActiveDRAMClockChangeLatencyMarginC;
5432 double WritebackDRAMClockChangeLatencyMargin;
5433 double PlaneWithMinActiveDRAMClockChangeMargin;
5434 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5435 double WritebackDRAMClockChangeLatencyHiding;
5436 double TotalPixelBW = 0.0;
5437 int k, j;
5438
5439 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5440
5441 #ifdef __DML_VBA_DEBUG__
5442 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5443 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5444 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5445 #endif
5446
5447 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5448
5449 #ifdef __DML_VBA_DEBUG__
5450 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5451 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5452 #endif
5453
5454 v->TotalActiveWriteback = 0;
5455 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5456 if (v->WritebackEnable[k] == true) {
5457 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5458 }
5459 }
5460
5461 if (v->TotalActiveWriteback <= 1) {
5462 v->WritebackUrgentWatermark = v->WritebackLatency;
5463 } else {
5464 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5465 }
5466
5467 if (v->TotalActiveWriteback <= 1) {
5468 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5469 } else {
5470 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5471 }
5472
5473 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5474 TotalPixelBW = TotalPixelBW
5475 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5476 / (v->HTotal[k] / v->PixelClock[k]);
5477 }
5478
5479 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5480 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5481
5482 v->LBLatencyHidingSourceLinesY = dml_min(
5483 (double) v->MaxLineBufferLines,
5484 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5485
5486 v->LBLatencyHidingSourceLinesC = dml_min(
5487 (double) v->MaxLineBufferLines,
5488 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5489
5490 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5491
5492 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5493
5494 if (UnboundedRequestEnabled) {
5495 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5496 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5497 }
5498
5499 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5500 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5501 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5502 if (BytePerPixelDETC[k] > 0) {
5503 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5504 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5505 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5506 } else {
5507 LinesInDETC = 0;
5508 FullDETBufferingTimeC = 999999;
5509 }
5510
5511 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5512 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5513
5514 if (v->NumberOfActivePlanes > 1) {
5515 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5516 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5517 }
5518
5519 if (BytePerPixelDETC[k] > 0) {
5520 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5521 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5522
5523 if (v->NumberOfActivePlanes > 1) {
5524 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5525 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5526 }
5527 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5528 } else {
5529 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5530 }
5531
5532 if (v->WritebackEnable[k] == true) {
5533 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5534 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5535 if (v->WritebackPixelFormat[k] == dm_444_64) {
5536 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5537 }
5538 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5539 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5540 }
5541 }
5542
5543 v->MinActiveDRAMClockChangeMargin = 999999;
5544 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5545 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5546 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5547 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5548 if (v->BlendingAndTiming[k] == k) {
5549 PlaneWithMinActiveDRAMClockChangeMargin = k;
5550 } else {
5551 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5552 if (v->BlendingAndTiming[k] == j) {
5553 PlaneWithMinActiveDRAMClockChangeMargin = j;
5554 }
5555 }
5556 }
5557 }
5558 }
5559
5560 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5561
5562 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5563 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5564 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5565 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5566 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5567 }
5568 }
5569
5570 v->TotalNumberOfActiveOTG = 0;
5571
5572 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5573 if (v->BlendingAndTiming[k] == k) {
5574 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5575 }
5576 }
5577
5578 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5579 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5580 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5581 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5582 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5583 } else {
5584 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5585 }
5586
5587 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5588 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5589 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5590 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5591
5592 #ifdef __DML_VBA_DEBUG__
5593 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5594 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5595 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5596 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5597 #endif
5598 }
5599
5600 static void CalculateDCFCLKDeepSleep(
5601 struct display_mode_lib *mode_lib,
5602 unsigned int NumberOfActivePlanes,
5603 int BytePerPixelY[],
5604 int BytePerPixelC[],
5605 double VRatio[],
5606 double VRatioChroma[],
5607 double SwathWidthY[],
5608 double SwathWidthC[],
5609 unsigned int DPPPerPlane[],
5610 double HRatio[],
5611 double HRatioChroma[],
5612 double PixelClock[],
5613 double PSCL_THROUGHPUT[],
5614 double PSCL_THROUGHPUT_CHROMA[],
5615 double DPPCLK[],
5616 double ReadBandwidthLuma[],
5617 double ReadBandwidthChroma[],
5618 int ReturnBusWidth,
5619 double *DCFCLKDeepSleep)
5620 {
5621 struct vba_vars_st *v = &mode_lib->vba;
5622 double DisplayPipeLineDeliveryTimeLuma;
5623 double DisplayPipeLineDeliveryTimeChroma;
5624 double ReadBandwidth = 0.0;
5625 int k;
5626
5627 for (k = 0; k < NumberOfActivePlanes; ++k) {
5628
5629 if (VRatio[k] <= 1) {
5630 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5631 } else {
5632 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5633 }
5634 if (BytePerPixelC[k] == 0) {
5635 DisplayPipeLineDeliveryTimeChroma = 0;
5636 } else {
5637 if (VRatioChroma[k] <= 1) {
5638 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5639 } else {
5640 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5641 }
5642 }
5643
5644 if (BytePerPixelC[k] > 0) {
5645 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5646 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5647 } else {
5648 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5649 }
5650 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5651
5652 }
5653
5654 for (k = 0; k < NumberOfActivePlanes; ++k) {
5655 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5656 }
5657
5658 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5659
5660 for (k = 0; k < NumberOfActivePlanes; ++k) {
5661 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5662 }
5663 }
5664
5665 static void CalculateUrgentBurstFactor(
5666 int swath_width_luma_ub,
5667 int swath_width_chroma_ub,
5668 unsigned int SwathHeightY,
5669 unsigned int SwathHeightC,
5670 double LineTime,
5671 double UrgentLatency,
5672 double CursorBufferSize,
5673 unsigned int CursorWidth,
5674 unsigned int CursorBPP,
5675 double VRatio,
5676 double VRatioC,
5677 double BytePerPixelInDETY,
5678 double BytePerPixelInDETC,
5679 double DETBufferSizeY,
5680 double DETBufferSizeC,
5681 double *UrgentBurstFactorCursor,
5682 double *UrgentBurstFactorLuma,
5683 double *UrgentBurstFactorChroma,
5684 bool *NotEnoughUrgentLatencyHiding)
5685 {
5686 double LinesInDETLuma;
5687 double LinesInDETChroma;
5688 unsigned int LinesInCursorBuffer;
5689 double CursorBufferSizeInTime;
5690 double DETBufferSizeInTimeLuma;
5691 double DETBufferSizeInTimeChroma;
5692
5693 *NotEnoughUrgentLatencyHiding = 0;
5694
5695 if (CursorWidth > 0) {
5696 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5697 if (VRatio > 0) {
5698 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5699 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5700 *NotEnoughUrgentLatencyHiding = 1;
5701 *UrgentBurstFactorCursor = 0;
5702 } else {
5703 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5704 }
5705 } else {
5706 *UrgentBurstFactorCursor = 1;
5707 }
5708 }
5709
5710 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5711 if (VRatio > 0) {
5712 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5713 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5714 *NotEnoughUrgentLatencyHiding = 1;
5715 *UrgentBurstFactorLuma = 0;
5716 } else {
5717 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5718 }
5719 } else {
5720 *UrgentBurstFactorLuma = 1;
5721 }
5722
5723 if (BytePerPixelInDETC > 0) {
5724 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5725 if (VRatio > 0) {
5726 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5727 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5728 *NotEnoughUrgentLatencyHiding = 1;
5729 *UrgentBurstFactorChroma = 0;
5730 } else {
5731 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5732 }
5733 } else {
5734 *UrgentBurstFactorChroma = 1;
5735 }
5736 }
5737 }
5738
5739 static void CalculatePixelDeliveryTimes(
5740 unsigned int NumberOfActivePlanes,
5741 double VRatio[],
5742 double VRatioChroma[],
5743 double VRatioPrefetchY[],
5744 double VRatioPrefetchC[],
5745 unsigned int swath_width_luma_ub[],
5746 unsigned int swath_width_chroma_ub[],
5747 unsigned int DPPPerPlane[],
5748 double HRatio[],
5749 double HRatioChroma[],
5750 double PixelClock[],
5751 double PSCL_THROUGHPUT[],
5752 double PSCL_THROUGHPUT_CHROMA[],
5753 double DPPCLK[],
5754 int BytePerPixelC[],
5755 enum scan_direction_class SourceScan[],
5756 unsigned int NumberOfCursors[],
5757 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5758 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5759 unsigned int BlockWidth256BytesY[],
5760 unsigned int BlockHeight256BytesY[],
5761 unsigned int BlockWidth256BytesC[],
5762 unsigned int BlockHeight256BytesC[],
5763 double DisplayPipeLineDeliveryTimeLuma[],
5764 double DisplayPipeLineDeliveryTimeChroma[],
5765 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5766 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5767 double DisplayPipeRequestDeliveryTimeLuma[],
5768 double DisplayPipeRequestDeliveryTimeChroma[],
5769 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5770 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5771 double CursorRequestDeliveryTime[],
5772 double CursorRequestDeliveryTimePrefetch[])
5773 {
5774 double req_per_swath_ub;
5775 int k;
5776
5777 for (k = 0; k < NumberOfActivePlanes; ++k) {
5778 if (VRatio[k] <= 1) {
5779 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5780 } else {
5781 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5782 }
5783
5784 if (BytePerPixelC[k] == 0) {
5785 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5786 } else {
5787 if (VRatioChroma[k] <= 1) {
5788 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5789 } else {
5790 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5791 }
5792 }
5793
5794 if (VRatioPrefetchY[k] <= 1) {
5795 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5796 } else {
5797 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5798 }
5799
5800 if (BytePerPixelC[k] == 0) {
5801 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5802 } else {
5803 if (VRatioPrefetchC[k] <= 1) {
5804 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5805 } else {
5806 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5807 }
5808 }
5809 }
5810
5811 for (k = 0; k < NumberOfActivePlanes; ++k) {
5812 if (SourceScan[k] != dm_vert) {
5813 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5814 } else {
5815 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5816 }
5817 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5818 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5819 if (BytePerPixelC[k] == 0) {
5820 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5821 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5822 } else {
5823 if (SourceScan[k] != dm_vert) {
5824 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5825 } else {
5826 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5827 }
5828 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5829 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5830 }
5831 #ifdef __DML_VBA_DEBUG__
5832 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5833 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5834 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5835 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
5836 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
5837 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
5838 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
5839 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
5840 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
5841 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
5842 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
5843 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
5844 #endif
5845 }
5846
5847 for (k = 0; k < NumberOfActivePlanes; ++k) {
5848 int cursor_req_per_width;
5849 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5850 if (NumberOfCursors[k] > 0) {
5851 if (VRatio[k] <= 1) {
5852 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5853 } else {
5854 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5855 }
5856 if (VRatioPrefetchY[k] <= 1) {
5857 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5858 } else {
5859 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5860 }
5861 } else {
5862 CursorRequestDeliveryTime[k] = 0;
5863 CursorRequestDeliveryTimePrefetch[k] = 0;
5864 }
5865 #ifdef __DML_VBA_DEBUG__
5866 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
5867 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
5868 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
5869 #endif
5870 }
5871 }
5872
5873 static void CalculateMetaAndPTETimes(
5874 int NumberOfActivePlanes,
5875 bool GPUVMEnable,
5876 int MetaChunkSize,
5877 int MinMetaChunkSizeBytes,
5878 int HTotal[],
5879 double VRatio[],
5880 double VRatioChroma[],
5881 double DestinationLinesToRequestRowInVBlank[],
5882 double DestinationLinesToRequestRowInImmediateFlip[],
5883 bool DCCEnable[],
5884 double PixelClock[],
5885 int BytePerPixelY[],
5886 int BytePerPixelC[],
5887 enum scan_direction_class SourceScan[],
5888 int dpte_row_height[],
5889 int dpte_row_height_chroma[],
5890 int meta_row_width[],
5891 int meta_row_width_chroma[],
5892 int meta_row_height[],
5893 int meta_row_height_chroma[],
5894 int meta_req_width[],
5895 int meta_req_width_chroma[],
5896 int meta_req_height[],
5897 int meta_req_height_chroma[],
5898 int dpte_group_bytes[],
5899 int PTERequestSizeY[],
5900 int PTERequestSizeC[],
5901 int PixelPTEReqWidthY[],
5902 int PixelPTEReqHeightY[],
5903 int PixelPTEReqWidthC[],
5904 int PixelPTEReqHeightC[],
5905 int dpte_row_width_luma_ub[],
5906 int dpte_row_width_chroma_ub[],
5907 double DST_Y_PER_PTE_ROW_NOM_L[],
5908 double DST_Y_PER_PTE_ROW_NOM_C[],
5909 double DST_Y_PER_META_ROW_NOM_L[],
5910 double DST_Y_PER_META_ROW_NOM_C[],
5911 double TimePerMetaChunkNominal[],
5912 double TimePerChromaMetaChunkNominal[],
5913 double TimePerMetaChunkVBlank[],
5914 double TimePerChromaMetaChunkVBlank[],
5915 double TimePerMetaChunkFlip[],
5916 double TimePerChromaMetaChunkFlip[],
5917 double time_per_pte_group_nom_luma[],
5918 double time_per_pte_group_vblank_luma[],
5919 double time_per_pte_group_flip_luma[],
5920 double time_per_pte_group_nom_chroma[],
5921 double time_per_pte_group_vblank_chroma[],
5922 double time_per_pte_group_flip_chroma[])
5923 {
5924 unsigned int meta_chunk_width;
5925 unsigned int min_meta_chunk_width;
5926 unsigned int meta_chunk_per_row_int;
5927 unsigned int meta_row_remainder;
5928 unsigned int meta_chunk_threshold;
5929 unsigned int meta_chunks_per_row_ub;
5930 unsigned int meta_chunk_width_chroma;
5931 unsigned int min_meta_chunk_width_chroma;
5932 unsigned int meta_chunk_per_row_int_chroma;
5933 unsigned int meta_row_remainder_chroma;
5934 unsigned int meta_chunk_threshold_chroma;
5935 unsigned int meta_chunks_per_row_ub_chroma;
5936 unsigned int dpte_group_width_luma;
5937 unsigned int dpte_groups_per_row_luma_ub;
5938 unsigned int dpte_group_width_chroma;
5939 unsigned int dpte_groups_per_row_chroma_ub;
5940 int k;
5941
5942 for (k = 0; k < NumberOfActivePlanes; ++k) {
5943 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
5944 if (BytePerPixelC[k] == 0) {
5945 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
5946 } else {
5947 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
5948 }
5949 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
5950 if (BytePerPixelC[k] == 0) {
5951 DST_Y_PER_META_ROW_NOM_C[k] = 0;
5952 } else {
5953 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
5954 }
5955 }
5956
5957 for (k = 0; k < NumberOfActivePlanes; ++k) {
5958 if (DCCEnable[k] == true) {
5959 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
5960 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
5961 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
5962 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
5963 if (SourceScan[k] != dm_vert) {
5964 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
5965 } else {
5966 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
5967 }
5968 if (meta_row_remainder <= meta_chunk_threshold) {
5969 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
5970 } else {
5971 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
5972 }
5973 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5974 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5975 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
5976 if (BytePerPixelC[k] == 0) {
5977 TimePerChromaMetaChunkNominal[k] = 0;
5978 TimePerChromaMetaChunkVBlank[k] = 0;
5979 TimePerChromaMetaChunkFlip[k] = 0;
5980 } else {
5981 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5982 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
5983 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
5984 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
5985 if (SourceScan[k] != dm_vert) {
5986 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
5987 } else {
5988 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
5989 }
5990 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
5991 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5992 } else {
5993 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5994 }
5995 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5996 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5997 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5998 }
5999 } else {
6000 TimePerMetaChunkNominal[k] = 0;
6001 TimePerMetaChunkVBlank[k] = 0;
6002 TimePerMetaChunkFlip[k] = 0;
6003 TimePerChromaMetaChunkNominal[k] = 0;
6004 TimePerChromaMetaChunkVBlank[k] = 0;
6005 TimePerChromaMetaChunkFlip[k] = 0;
6006 }
6007 }
6008
6009 for (k = 0; k < NumberOfActivePlanes; ++k) {
6010 if (GPUVMEnable == true) {
6011 if (SourceScan[k] != dm_vert) {
6012 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6013 } else {
6014 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6015 }
6016 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6017 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6018 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6019 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6020 if (BytePerPixelC[k] == 0) {
6021 time_per_pte_group_nom_chroma[k] = 0;
6022 time_per_pte_group_vblank_chroma[k] = 0;
6023 time_per_pte_group_flip_chroma[k] = 0;
6024 } else {
6025 if (SourceScan[k] != dm_vert) {
6026 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6027 } else {
6028 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6029 }
6030 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6031 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6032 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6033 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6034 }
6035 } else {
6036 time_per_pte_group_nom_luma[k] = 0;
6037 time_per_pte_group_vblank_luma[k] = 0;
6038 time_per_pte_group_flip_luma[k] = 0;
6039 time_per_pte_group_nom_chroma[k] = 0;
6040 time_per_pte_group_vblank_chroma[k] = 0;
6041 time_per_pte_group_flip_chroma[k] = 0;
6042 }
6043 }
6044 }
6045
6046 static void CalculateVMGroupAndRequestTimes(
6047 unsigned int NumberOfActivePlanes,
6048 bool GPUVMEnable,
6049 unsigned int GPUVMMaxPageTableLevels,
6050 unsigned int HTotal[],
6051 int BytePerPixelC[],
6052 double DestinationLinesToRequestVMInVBlank[],
6053 double DestinationLinesToRequestVMInImmediateFlip[],
6054 bool DCCEnable[],
6055 double PixelClock[],
6056 int dpte_row_width_luma_ub[],
6057 int dpte_row_width_chroma_ub[],
6058 int vm_group_bytes[],
6059 unsigned int dpde0_bytes_per_frame_ub_l[],
6060 unsigned int dpde0_bytes_per_frame_ub_c[],
6061 int meta_pte_bytes_per_frame_ub_l[],
6062 int meta_pte_bytes_per_frame_ub_c[],
6063 double TimePerVMGroupVBlank[],
6064 double TimePerVMGroupFlip[],
6065 double TimePerVMRequestVBlank[],
6066 double TimePerVMRequestFlip[])
6067 {
6068 int num_group_per_lower_vm_stage;
6069 int num_req_per_lower_vm_stage;
6070 int k;
6071
6072 for (k = 0; k < NumberOfActivePlanes; ++k) {
6073 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6074 if (DCCEnable[k] == false) {
6075 if (BytePerPixelC[k] > 0) {
6076 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6077 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6078 } else {
6079 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6080 }
6081 } else {
6082 if (GPUVMMaxPageTableLevels == 1) {
6083 if (BytePerPixelC[k] > 0) {
6084 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6085 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6086 } else {
6087 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6088 }
6089 } else {
6090 if (BytePerPixelC[k] > 0) {
6091 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6092 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6093 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6094 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6095 } else {
6096 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6097 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6098 }
6099 }
6100 }
6101
6102 if (DCCEnable[k] == false) {
6103 if (BytePerPixelC[k] > 0) {
6104 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6105 } else {
6106 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6107 }
6108 } else {
6109 if (GPUVMMaxPageTableLevels == 1) {
6110 if (BytePerPixelC[k] > 0) {
6111 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6112 } else {
6113 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6114 }
6115 } else {
6116 if (BytePerPixelC[k] > 0) {
6117 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6118 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6119 } else {
6120 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6121 }
6122 }
6123 }
6124
6125 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6126 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6127 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6128 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6129
6130 if (GPUVMMaxPageTableLevels > 2) {
6131 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6132 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6133 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6134 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6135 }
6136
6137 } else {
6138 TimePerVMGroupVBlank[k] = 0;
6139 TimePerVMGroupFlip[k] = 0;
6140 TimePerVMRequestVBlank[k] = 0;
6141 TimePerVMRequestFlip[k] = 0;
6142 }
6143 }
6144 }
6145
6146 static void CalculateStutterEfficiency(
6147 struct display_mode_lib *mode_lib,
6148 int CompressedBufferSizeInkByte,
6149 bool UnboundedRequestEnabled,
6150 int ConfigReturnBufferSizeInKByte,
6151 int MetaFIFOSizeInKEntries,
6152 int ZeroSizeBufferEntries,
6153 int NumberOfActivePlanes,
6154 int ROBBufferSizeInKByte,
6155 double TotalDataReadBandwidth,
6156 double DCFCLK,
6157 double ReturnBW,
6158 double COMPBUF_RESERVED_SPACE_64B,
6159 double COMPBUF_RESERVED_SPACE_ZS,
6160 double SRExitTime,
6161 double SRExitZ8Time,
6162 bool SynchronizedVBlank,
6163 double Z8StutterEnterPlusExitWatermark,
6164 double StutterEnterPlusExitWatermark,
6165 bool ProgressiveToInterlaceUnitInOPP,
6166 bool Interlace[],
6167 double MinTTUVBlank[],
6168 int DPPPerPlane[],
6169 unsigned int DETBufferSizeY[],
6170 int BytePerPixelY[],
6171 double BytePerPixelDETY[],
6172 double SwathWidthY[],
6173 int SwathHeightY[],
6174 int SwathHeightC[],
6175 double NetDCCRateLuma[],
6176 double NetDCCRateChroma[],
6177 double DCCFractionOfZeroSizeRequestsLuma[],
6178 double DCCFractionOfZeroSizeRequestsChroma[],
6179 int HTotal[],
6180 int VTotal[],
6181 double PixelClock[],
6182 double VRatio[],
6183 enum scan_direction_class SourceScan[],
6184 int BlockHeight256BytesY[],
6185 int BlockWidth256BytesY[],
6186 int BlockHeight256BytesC[],
6187 int BlockWidth256BytesC[],
6188 int DCCYMaxUncompressedBlock[],
6189 int DCCCMaxUncompressedBlock[],
6190 int VActive[],
6191 bool DCCEnable[],
6192 bool WritebackEnable[],
6193 double ReadBandwidthPlaneLuma[],
6194 double ReadBandwidthPlaneChroma[],
6195 double meta_row_bw[],
6196 double dpte_row_bw[],
6197 double *StutterEfficiencyNotIncludingVBlank,
6198 double *StutterEfficiency,
6199 int *NumberOfStutterBurstsPerFrame,
6200 double *Z8StutterEfficiencyNotIncludingVBlank,
6201 double *Z8StutterEfficiency,
6202 int *Z8NumberOfStutterBurstsPerFrame,
6203 double *StutterPeriod)
6204 {
6205 struct vba_vars_st *v = &mode_lib->vba;
6206
6207 double DETBufferingTimeY;
6208 double SwathWidthYCriticalPlane = 0;
6209 double VActiveTimeCriticalPlane = 0;
6210 double FrameTimeCriticalPlane = 0;
6211 int BytePerPixelYCriticalPlane = 0;
6212 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6213 double MinTTUVBlankCriticalPlane = 0;
6214 double TotalCompressedReadBandwidth;
6215 double TotalRowReadBandwidth;
6216 double AverageDCCCompressionRate;
6217 double EffectiveCompressedBufferSize;
6218 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6219 double StutterBurstTime;
6220 int TotalActiveWriteback;
6221 double LinesInDETY;
6222 double LinesInDETYRoundedDownToSwath;
6223 double MaximumEffectiveCompressionLuma;
6224 double MaximumEffectiveCompressionChroma;
6225 double TotalZeroSizeRequestReadBandwidth;
6226 double TotalZeroSizeCompressedReadBandwidth;
6227 double AverageDCCZeroSizeFraction;
6228 double AverageZeroSizeCompressionRate;
6229 int TotalNumberOfActiveOTG = 0;
6230 double LastStutterPeriod = 0.0;
6231 double LastZ8StutterPeriod = 0.0;
6232 int k;
6233
6234 TotalZeroSizeRequestReadBandwidth = 0;
6235 TotalZeroSizeCompressedReadBandwidth = 0;
6236 TotalRowReadBandwidth = 0;
6237 TotalCompressedReadBandwidth = 0;
6238
6239 for (k = 0; k < NumberOfActivePlanes; ++k) {
6240 if (DCCEnable[k] == true) {
6241 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6242 || DCCYMaxUncompressedBlock[k] < 256) {
6243 MaximumEffectiveCompressionLuma = 2;
6244 } else {
6245 MaximumEffectiveCompressionLuma = 4;
6246 }
6247 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6248 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6249 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6250 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6251 if (ReadBandwidthPlaneChroma[k] > 0) {
6252 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6253 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6254 MaximumEffectiveCompressionChroma = 2;
6255 } else {
6256 MaximumEffectiveCompressionChroma = 4;
6257 }
6258 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6259 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6260 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6261 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6262 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6263 }
6264 } else {
6265 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6266 }
6267 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6268 }
6269
6270 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6271 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6272
6273 #ifdef __DML_VBA_DEBUG__
6274 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6275 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6276 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6277 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6278 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6279 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6280 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6281 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6282 #endif
6283
6284 if (AverageDCCZeroSizeFraction == 1) {
6285 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6286 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6287 } else if (AverageDCCZeroSizeFraction > 0) {
6288 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6289 EffectiveCompressedBufferSize = dml_min(
6290 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6291 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6292 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6293 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6294 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6295 dml_print(
6296 "DML::%s: min 2 = %f\n",
6297 __func__,
6298 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6299 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6300 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6301 } else {
6302 EffectiveCompressedBufferSize = dml_min(
6303 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6304 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6305 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6306 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6307 }
6308
6309 #ifdef __DML_VBA_DEBUG__
6310 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6311 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6312 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6313 #endif
6314
6315 *StutterPeriod = 0;
6316 for (k = 0; k < NumberOfActivePlanes; ++k) {
6317 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6318 / BytePerPixelDETY[k] / SwathWidthY[k];
6319 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6320 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6321 #ifdef __DML_VBA_DEBUG__
6322 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6323 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6324 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6325 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6326 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6327 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6328 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6329 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6330 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6331 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6332 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6333 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6334 #endif
6335
6336 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6337 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6338
6339 *StutterPeriod = DETBufferingTimeY;
6340 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6341 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6342 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6343 SwathWidthYCriticalPlane = SwathWidthY[k];
6344 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6345 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6346
6347 #ifdef __DML_VBA_DEBUG__
6348 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6349 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6350 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6351 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6352 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6353 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6354 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6355 #endif
6356 }
6357 }
6358
6359 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6360 #ifdef __DML_VBA_DEBUG__
6361 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6362 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6363 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6364 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6365 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6366 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6367 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6368 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6369 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6370 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6371 #endif
6372
6373 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6374 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6375 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6376 #ifdef __DML_VBA_DEBUG__
6377 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6378 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6379 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6380 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6381 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6382 #endif
6383 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6384
6385 dml_print(
6386 "DML::%s: Time to finish residue swath=%f\n",
6387 __func__,
6388 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6389
6390 TotalActiveWriteback = 0;
6391 for (k = 0; k < NumberOfActivePlanes; ++k) {
6392 if (WritebackEnable[k]) {
6393 TotalActiveWriteback = TotalActiveWriteback + 1;
6394 }
6395 }
6396
6397 if (TotalActiveWriteback == 0) {
6398 #ifdef __DML_VBA_DEBUG__
6399 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6400 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6401 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6402 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6403 #endif
6404 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6405 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6406 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6407 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6408 } else {
6409 *StutterEfficiencyNotIncludingVBlank = 0.;
6410 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6411 *NumberOfStutterBurstsPerFrame = 0;
6412 *Z8NumberOfStutterBurstsPerFrame = 0;
6413 }
6414 #ifdef __DML_VBA_DEBUG__
6415 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6416 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6417 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6418 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6419 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6420 #endif
6421
6422 for (k = 0; k < NumberOfActivePlanes; ++k) {
6423 if (v->BlendingAndTiming[k] == k) {
6424 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6425 }
6426 }
6427
6428 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6429 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6430
6431 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6432 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6433 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6434 } else {
6435 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6436 }
6437 } else {
6438 *StutterEfficiency = 0;
6439 }
6440
6441 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6442 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6443 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6444 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6445 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6446 } else {
6447 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6448 }
6449 } else {
6450 *Z8StutterEfficiency = 0.;
6451 }
6452
6453 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6454 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6455 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6456 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6457 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6458 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6459 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6460 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6461 }
6462
6463 static void CalculateSwathAndDETConfiguration(
6464 bool ForceSingleDPP,
6465 int NumberOfActivePlanes,
6466 unsigned int DETBufferSizeInKByte,
6467 double MaximumSwathWidthLuma[],
6468 double MaximumSwathWidthChroma[],
6469 enum scan_direction_class SourceScan[],
6470 enum source_format_class SourcePixelFormat[],
6471 enum dm_swizzle_mode SurfaceTiling[],
6472 int ViewportWidth[],
6473 int ViewportHeight[],
6474 int SurfaceWidthY[],
6475 int SurfaceWidthC[],
6476 int SurfaceHeightY[],
6477 int SurfaceHeightC[],
6478 int Read256BytesBlockHeightY[],
6479 int Read256BytesBlockHeightC[],
6480 int Read256BytesBlockWidthY[],
6481 int Read256BytesBlockWidthC[],
6482 enum odm_combine_mode ODMCombineEnabled[],
6483 int BlendingAndTiming[],
6484 int BytePerPixY[],
6485 int BytePerPixC[],
6486 double BytePerPixDETY[],
6487 double BytePerPixDETC[],
6488 int HActive[],
6489 double HRatio[],
6490 double HRatioChroma[],
6491 int DPPPerPlane[],
6492 int swath_width_luma_ub[],
6493 int swath_width_chroma_ub[],
6494 double SwathWidth[],
6495 double SwathWidthChroma[],
6496 int SwathHeightY[],
6497 int SwathHeightC[],
6498 unsigned int DETBufferSizeY[],
6499 unsigned int DETBufferSizeC[],
6500 bool ViewportSizeSupportPerPlane[],
6501 bool *ViewportSizeSupport)
6502 {
6503 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6504 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6505 int MinimumSwathHeightY;
6506 int MinimumSwathHeightC;
6507 int RoundedUpMaxSwathSizeBytesY;
6508 int RoundedUpMaxSwathSizeBytesC;
6509 int RoundedUpMinSwathSizeBytesY;
6510 int RoundedUpMinSwathSizeBytesC;
6511 int RoundedUpSwathSizeBytesY;
6512 int RoundedUpSwathSizeBytesC;
6513 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6514 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6515 int k;
6516
6517 CalculateSwathWidth(
6518 ForceSingleDPP,
6519 NumberOfActivePlanes,
6520 SourcePixelFormat,
6521 SourceScan,
6522 ViewportWidth,
6523 ViewportHeight,
6524 SurfaceWidthY,
6525 SurfaceWidthC,
6526 SurfaceHeightY,
6527 SurfaceHeightC,
6528 ODMCombineEnabled,
6529 BytePerPixY,
6530 BytePerPixC,
6531 Read256BytesBlockHeightY,
6532 Read256BytesBlockHeightC,
6533 Read256BytesBlockWidthY,
6534 Read256BytesBlockWidthC,
6535 BlendingAndTiming,
6536 HActive,
6537 HRatio,
6538 DPPPerPlane,
6539 SwathWidthSingleDPP,
6540 SwathWidthSingleDPPChroma,
6541 SwathWidth,
6542 SwathWidthChroma,
6543 MaximumSwathHeightY,
6544 MaximumSwathHeightC,
6545 swath_width_luma_ub,
6546 swath_width_chroma_ub);
6547
6548 *ViewportSizeSupport = true;
6549 for (k = 0; k < NumberOfActivePlanes; ++k) {
6550 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6551 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6552 if (SurfaceTiling[k] == dm_sw_linear
6553 || (SourcePixelFormat[k] == dm_444_64
6554 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6555 && SourceScan[k] != dm_vert)) {
6556 MinimumSwathHeightY = MaximumSwathHeightY[k];
6557 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6558 MinimumSwathHeightY = MaximumSwathHeightY[k];
6559 } else {
6560 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6561 }
6562 MinimumSwathHeightC = MaximumSwathHeightC[k];
6563 } else {
6564 if (SurfaceTiling[k] == dm_sw_linear) {
6565 MinimumSwathHeightY = MaximumSwathHeightY[k];
6566 MinimumSwathHeightC = MaximumSwathHeightC[k];
6567 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6568 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6569 MinimumSwathHeightC = MaximumSwathHeightC[k];
6570 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6571 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6572 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6573 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6574 MinimumSwathHeightY = MaximumSwathHeightY[k];
6575 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6576 } else {
6577 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6578 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6579 }
6580 }
6581
6582 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6583 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6584 if (SourcePixelFormat[k] == dm_420_10) {
6585 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6586 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6587 }
6588 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6589 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6590 if (SourcePixelFormat[k] == dm_420_10) {
6591 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6592 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6593 }
6594
6595 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6596 SwathHeightY[k] = MaximumSwathHeightY[k];
6597 SwathHeightC[k] = MaximumSwathHeightC[k];
6598 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6599 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6600 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6601 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6602 SwathHeightY[k] = MinimumSwathHeightY;
6603 SwathHeightC[k] = MaximumSwathHeightC[k];
6604 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6605 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6606 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6607 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6608 SwathHeightY[k] = MaximumSwathHeightY[k];
6609 SwathHeightC[k] = MinimumSwathHeightC;
6610 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6611 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6612 } else {
6613 SwathHeightY[k] = MinimumSwathHeightY;
6614 SwathHeightC[k] = MinimumSwathHeightC;
6615 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6616 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6617 }
6618 {
6619 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6620 if (SwathHeightC[k] == 0) {
6621 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6622 DETBufferSizeC[k] = 0;
6623 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6624 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6625 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6626 } else {
6627 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6628 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6629 }
6630
6631 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6632 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6633 *ViewportSizeSupport = false;
6634 ViewportSizeSupportPerPlane[k] = false;
6635 } else {
6636 ViewportSizeSupportPerPlane[k] = true;
6637 }
6638 }
6639 }
6640 }
6641
6642 static void CalculateSwathWidth(
6643 bool ForceSingleDPP,
6644 int NumberOfActivePlanes,
6645 enum source_format_class SourcePixelFormat[],
6646 enum scan_direction_class SourceScan[],
6647 int ViewportWidth[],
6648 int ViewportHeight[],
6649 int SurfaceWidthY[],
6650 int SurfaceWidthC[],
6651 int SurfaceHeightY[],
6652 int SurfaceHeightC[],
6653 enum odm_combine_mode ODMCombineEnabled[],
6654 int BytePerPixY[],
6655 int BytePerPixC[],
6656 int Read256BytesBlockHeightY[],
6657 int Read256BytesBlockHeightC[],
6658 int Read256BytesBlockWidthY[],
6659 int Read256BytesBlockWidthC[],
6660 int BlendingAndTiming[],
6661 int HActive[],
6662 double HRatio[],
6663 int DPPPerPlane[],
6664 double SwathWidthSingleDPPY[],
6665 double SwathWidthSingleDPPC[],
6666 double SwathWidthY[],
6667 double SwathWidthC[],
6668 int MaximumSwathHeightY[],
6669 int MaximumSwathHeightC[],
6670 int swath_width_luma_ub[],
6671 int swath_width_chroma_ub[])
6672 {
6673 enum odm_combine_mode MainPlaneODMCombine;
6674 int j, k;
6675
6676 #ifdef __DML_VBA_DEBUG__
6677 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6678 #endif
6679
6680 for (k = 0; k < NumberOfActivePlanes; ++k) {
6681 if (SourceScan[k] != dm_vert) {
6682 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6683 } else {
6684 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6685 }
6686
6687 #ifdef __DML_VBA_DEBUG__
6688 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6689 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6690 #endif
6691
6692 MainPlaneODMCombine = ODMCombineEnabled[k];
6693 for (j = 0; j < NumberOfActivePlanes; ++j) {
6694 if (BlendingAndTiming[k] == j) {
6695 MainPlaneODMCombine = ODMCombineEnabled[j];
6696 }
6697 }
6698
6699 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6700 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6701 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6702 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6703 } else if (DPPPerPlane[k] == 2) {
6704 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6705 } else {
6706 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6707 }
6708
6709 #ifdef __DML_VBA_DEBUG__
6710 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6711 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6712 #endif
6713
6714 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6715 SwathWidthC[k] = SwathWidthY[k] / 2;
6716 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6717 } else {
6718 SwathWidthC[k] = SwathWidthY[k];
6719 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6720 }
6721
6722 if (ForceSingleDPP == true) {
6723 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6724 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6725 }
6726 {
6727 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6728 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6729
6730 #ifdef __DML_VBA_DEBUG__
6731 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6732 #endif
6733
6734 if (SourceScan[k] != dm_vert) {
6735 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6736 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6737 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6738 if (BytePerPixC[k] > 0) {
6739 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6740
6741 swath_width_chroma_ub[k] = dml_min(
6742 surface_width_ub_c,
6743 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6744 } else {
6745 swath_width_chroma_ub[k] = 0;
6746 }
6747 } else {
6748 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6749 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6750 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6751 if (BytePerPixC[k] > 0) {
6752 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6753
6754 swath_width_chroma_ub[k] = dml_min(
6755 surface_height_ub_c,
6756 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6757 } else {
6758 swath_width_chroma_ub[k] = 0;
6759 }
6760 }
6761 }
6762 }
6763 }
6764
6765 static double CalculateExtraLatency(
6766 int RoundTripPingLatencyCycles,
6767 int ReorderingBytes,
6768 double DCFCLK,
6769 int TotalNumberOfActiveDPP,
6770 int PixelChunkSizeInKByte,
6771 int TotalNumberOfDCCActiveDPP,
6772 int MetaChunkSize,
6773 double ReturnBW,
6774 bool GPUVMEnable,
6775 bool HostVMEnable,
6776 int NumberOfActivePlanes,
6777 int NumberOfDPP[],
6778 int dpte_group_bytes[],
6779 double HostVMInefficiencyFactor,
6780 double HostVMMinPageSize,
6781 int HostVMMaxNonCachedPageTableLevels)
6782 {
6783 double ExtraLatencyBytes;
6784 double ExtraLatency;
6785
6786 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6787 ReorderingBytes,
6788 TotalNumberOfActiveDPP,
6789 PixelChunkSizeInKByte,
6790 TotalNumberOfDCCActiveDPP,
6791 MetaChunkSize,
6792 GPUVMEnable,
6793 HostVMEnable,
6794 NumberOfActivePlanes,
6795 NumberOfDPP,
6796 dpte_group_bytes,
6797 HostVMInefficiencyFactor,
6798 HostVMMinPageSize,
6799 HostVMMaxNonCachedPageTableLevels);
6800
6801 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6802
6803 #ifdef __DML_VBA_DEBUG__
6804 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6805 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6806 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6807 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6808 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6809 #endif
6810
6811 return ExtraLatency;
6812 }
6813
6814 static double CalculateExtraLatencyBytes(
6815 int ReorderingBytes,
6816 int TotalNumberOfActiveDPP,
6817 int PixelChunkSizeInKByte,
6818 int TotalNumberOfDCCActiveDPP,
6819 int MetaChunkSize,
6820 bool GPUVMEnable,
6821 bool HostVMEnable,
6822 int NumberOfActivePlanes,
6823 int NumberOfDPP[],
6824 int dpte_group_bytes[],
6825 double HostVMInefficiencyFactor,
6826 double HostVMMinPageSize,
6827 int HostVMMaxNonCachedPageTableLevels)
6828 {
6829 double ret;
6830 int HostVMDynamicLevels = 0, k;
6831
6832 if (GPUVMEnable == true && HostVMEnable == true) {
6833 if (HostVMMinPageSize < 2048) {
6834 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6835 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6836 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6837 } else {
6838 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6839 }
6840 } else {
6841 HostVMDynamicLevels = 0;
6842 }
6843
6844 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6845
6846 if (GPUVMEnable == true) {
6847 for (k = 0; k < NumberOfActivePlanes; ++k) {
6848 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6849 }
6850 }
6851 return ret;
6852 }
6853
6854 static double CalculateUrgentLatency(
6855 double UrgentLatencyPixelDataOnly,
6856 double UrgentLatencyPixelMixedWithVMData,
6857 double UrgentLatencyVMDataOnly,
6858 bool DoUrgentLatencyAdjustment,
6859 double UrgentLatencyAdjustmentFabricClockComponent,
6860 double UrgentLatencyAdjustmentFabricClockReference,
6861 double FabricClock)
6862 {
6863 double ret;
6864
6865 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6866 if (DoUrgentLatencyAdjustment == true) {
6867 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6868 }
6869 return ret;
6870 }
6871
6872 static void UseMinimumDCFCLK(
6873 struct display_mode_lib *mode_lib,
6874 int MaxPrefetchMode,
6875 int ReorderingBytes)
6876 {
6877 struct vba_vars_st *v = &mode_lib->vba;
6878 int dummy1, i, j, k;
6879 double NormalEfficiency, dummy2, dummy3;
6880 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
6881
6882 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
6883 for (i = 0; i < v->soc.num_states; ++i) {
6884 for (j = 0; j <= 1; ++j) {
6885 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
6886 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
6887 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
6888 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
6889 double MinimumTWait;
6890 double NonDPTEBandwidth;
6891 double DPTEBandwidth;
6892 double DCFCLKRequiredForAverageBandwidth;
6893 double ExtraLatencyBytes;
6894 double ExtraLatencyCycles;
6895 double DCFCLKRequiredForPeakBandwidth;
6896 int NoOfDPPState[DC__NUM_DPP__MAX];
6897 double MinimumTvmPlus2Tr0;
6898
6899 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6900 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6901 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6902 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
6903 }
6904
6905 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6906 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
6907 }
6908
6909 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
6910 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
6911 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
6912 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
6913 DCFCLKRequiredForAverageBandwidth = dml_max3(
6914 v->ProjectedDCFCLKDeepSleep[i][j],
6915 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
6916 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6917 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
6918
6919 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6920 ReorderingBytes,
6921 v->TotalNumberOfActiveDPP[i][j],
6922 v->PixelChunkSizeInKByte,
6923 v->TotalNumberOfDCCActiveDPP[i][j],
6924 v->MetaChunkSize,
6925 v->GPUVMEnable,
6926 v->HostVMEnable,
6927 v->NumberOfActivePlanes,
6928 NoOfDPPState,
6929 v->dpte_group_bytes,
6930 1,
6931 v->HostVMMinPageSize,
6932 v->HostVMMaxNonCachedPageTableLevels);
6933 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
6934 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6935 double DCFCLKCyclesRequiredInPrefetch;
6936 double ExpectedPrefetchBWAcceleration;
6937 double PrefetchTime;
6938
6939 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
6940 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
6941 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
6942 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
6943 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
6944 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
6945 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
6946 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
6947 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
6948 DynamicMetadataVMExtraLatency[k] =
6949 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
6950 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
6951 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
6952 - v->UrgLatency[i]
6953 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
6954 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
6955 - DynamicMetadataVMExtraLatency[k];
6956
6957 if (PrefetchTime > 0) {
6958 double ExpectedVRatioPrefetch;
6959 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
6960 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
6961 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
6962 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
6963 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
6964 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
6965 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
6966 }
6967 } else {
6968 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
6969 }
6970 if (v->DynamicMetadataEnable[k] == true) {
6971 double TSetupPipe;
6972 double TdmbfPipe;
6973 double TdmsksPipe;
6974 double TdmecPipe;
6975 double AllowedTimeForUrgentExtraLatency;
6976
6977 CalculateVupdateAndDynamicMetadataParameters(
6978 v->MaxInterDCNTileRepeaters,
6979 v->RequiredDPPCLK[i][j][k],
6980 v->RequiredDISPCLK[i][j],
6981 v->ProjectedDCFCLKDeepSleep[i][j],
6982 v->PixelClock[k],
6983 v->HTotal[k],
6984 v->VTotal[k] - v->VActive[k],
6985 v->DynamicMetadataTransmittedBytes[k],
6986 v->DynamicMetadataLinesBeforeActiveRequired[k],
6987 v->Interlace[k],
6988 v->ProgressiveToInterlaceUnitInOPP,
6989 &TSetupPipe,
6990 &TdmbfPipe,
6991 &TdmecPipe,
6992 &TdmsksPipe,
6993 &dummy1,
6994 &dummy2,
6995 &dummy3);
6996 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
6997 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
6998 if (AllowedTimeForUrgentExtraLatency > 0) {
6999 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7000 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7001 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7002 } else {
7003 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7004 }
7005 }
7006 }
7007 DCFCLKRequiredForPeakBandwidth = 0;
7008 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7009 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7010 }
7011 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7012 * (v->GPUVMEnable == true ?
7013 (v->HostVMEnable == true ?
7014 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7015 0);
7016 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7017 double MaximumTvmPlus2Tr0PlusTsw;
7018 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7019 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7020 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7021 } else {
7022 DCFCLKRequiredForPeakBandwidth = dml_max3(
7023 DCFCLKRequiredForPeakBandwidth,
7024 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7025 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7026 }
7027 }
7028 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7029 }
7030 }
7031 }
7032
7033 static void CalculateUnboundedRequestAndCompressedBufferSize(
7034 unsigned int DETBufferSizeInKByte,
7035 int ConfigReturnBufferSizeInKByte,
7036 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7037 int TotalActiveDPP,
7038 bool NoChromaPlanes,
7039 int MaxNumDPP,
7040 int CompressedBufferSegmentSizeInkByteFinal,
7041 enum output_encoder_class *Output,
7042 bool *UnboundedRequestEnabled,
7043 int *CompressedBufferSizeInkByte)
7044 {
7045 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7046
7047 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7048 *CompressedBufferSizeInkByte = (
7049 *UnboundedRequestEnabled == true ?
7050 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7051 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7052 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7053
7054 #ifdef __DML_VBA_DEBUG__
7055 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7056 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7057 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7058 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7059 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7060 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7061 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7062 #endif
7063 }
7064
7065 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7066 {
7067 bool ret_val = false;
7068
7069 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7070 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
7071 ret_val = false;
7072 }
7073 return (ret_val);
7074 }
7075
7076