1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "../display_mode_lib.h"
28 #include "../dcn30/display_mode_vba_30.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41 #define BPP_INVALID 0
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
45 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128
46 #define DCN3_15_MAX_DET_SIZE 384
47
48 // For DML-C changes that hasn't been propagated to VBA yet
49 //#define __DML_VBA_ALLOW_DELTA__
50
51 // Move these to ip paramaters/constant
52
53 // At which vstartup the DML start to try if the mode can be supported
54 #define __DML_VBA_MIN_VSTARTUP__ 9
55
56 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
57 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
58
59 // fudge factor for min dcfclk calclation
60 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
61
62 typedef struct {
63 double DPPCLK;
64 double DISPCLK;
65 double PixelClock;
66 double DCFCLKDeepSleep;
67 unsigned int DPPPerPlane;
68 bool ScalerEnabled;
69 double VRatio;
70 double VRatioChroma;
71 enum scan_direction_class SourceScan;
72 unsigned int BlockWidth256BytesY;
73 unsigned int BlockHeight256BytesY;
74 unsigned int BlockWidth256BytesC;
75 unsigned int BlockHeight256BytesC;
76 unsigned int InterlaceEnable;
77 unsigned int NumberOfCursors;
78 unsigned int VBlank;
79 unsigned int HTotal;
80 unsigned int DCCEnable;
81 bool ODMCombineIsEnabled;
82 enum source_format_class SourcePixelFormat;
83 int BytePerPixelY;
84 int BytePerPixelC;
85 bool ProgressiveToInterlaceUnitInOPP;
86 } Pipe;
87
88 #define BPP_INVALID 0
89 #define BPP_BLENDED_PIPE 0xffffffff
90
91 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
92 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
93 static unsigned int dscceComputeDelay(
94 unsigned int bpc,
95 double BPP,
96 unsigned int sliceWidth,
97 unsigned int numSlices,
98 enum output_format_class pixelFormat,
99 enum output_encoder_class Output);
100 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
101 static bool CalculatePrefetchSchedule(
102 struct display_mode_lib *mode_lib,
103 double HostVMInefficiencyFactor,
104 Pipe *myPipe,
105 unsigned int DSCDelay,
106 double DPPCLKDelaySubtotalPlusCNVCFormater,
107 double DPPCLKDelaySCL,
108 double DPPCLKDelaySCLLBOnly,
109 double DPPCLKDelayCNVCCursor,
110 double DISPCLKDelaySubtotal,
111 unsigned int DPP_RECOUT_WIDTH,
112 enum output_format_class OutputFormat,
113 unsigned int MaxInterDCNTileRepeaters,
114 unsigned int VStartup,
115 unsigned int MaxVStartup,
116 unsigned int GPUVMPageTableLevels,
117 bool GPUVMEnable,
118 bool HostVMEnable,
119 unsigned int HostVMMaxNonCachedPageTableLevels,
120 double HostVMMinPageSize,
121 bool DynamicMetadataEnable,
122 bool DynamicMetadataVMEnabled,
123 int DynamicMetadataLinesBeforeActiveRequired,
124 unsigned int DynamicMetadataTransmittedBytes,
125 double UrgentLatency,
126 double UrgentExtraLatency,
127 double TCalc,
128 unsigned int PDEAndMetaPTEBytesFrame,
129 unsigned int MetaRowByte,
130 unsigned int PixelPTEBytesPerRow,
131 double PrefetchSourceLinesY,
132 unsigned int SwathWidthY,
133 double VInitPreFillY,
134 unsigned int MaxNumSwathY,
135 double PrefetchSourceLinesC,
136 unsigned int SwathWidthC,
137 double VInitPreFillC,
138 unsigned int MaxNumSwathC,
139 int swath_width_luma_ub,
140 int swath_width_chroma_ub,
141 unsigned int SwathHeightY,
142 unsigned int SwathHeightC,
143 double TWait,
144 double *DSTXAfterScaler,
145 double *DSTYAfterScaler,
146 double *DestinationLinesForPrefetch,
147 double *PrefetchBandwidth,
148 double *DestinationLinesToRequestVMInVBlank,
149 double *DestinationLinesToRequestRowInVBlank,
150 double *VRatioPrefetchY,
151 double *VRatioPrefetchC,
152 double *RequiredPrefetchPixDataBWLuma,
153 double *RequiredPrefetchPixDataBWChroma,
154 bool *NotEnoughTimeForDynamicMetadata,
155 double *Tno_bw,
156 double *prefetch_vmrow_bw,
157 double *Tdmdl_vm,
158 double *Tdmdl,
159 double *TSetup,
160 int *VUpdateOffsetPix,
161 double *VUpdateWidthPix,
162 double *VReadyOffsetPix);
163 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
164 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
165 static void CalculateDCCConfiguration(
166 bool DCCEnabled,
167 bool DCCProgrammingAssumesScanDirectionUnknown,
168 enum source_format_class SourcePixelFormat,
169 unsigned int SurfaceWidthLuma,
170 unsigned int SurfaceWidthChroma,
171 unsigned int SurfaceHeightLuma,
172 unsigned int SurfaceHeightChroma,
173 double DETBufferSize,
174 unsigned int RequestHeight256ByteLuma,
175 unsigned int RequestHeight256ByteChroma,
176 enum dm_swizzle_mode TilingFormat,
177 unsigned int BytePerPixelY,
178 unsigned int BytePerPixelC,
179 double BytePerPixelDETY,
180 double BytePerPixelDETC,
181 enum scan_direction_class ScanOrientation,
182 unsigned int *MaxUncompressedBlockLuma,
183 unsigned int *MaxUncompressedBlockChroma,
184 unsigned int *MaxCompressedBlockLuma,
185 unsigned int *MaxCompressedBlockChroma,
186 unsigned int *IndependentBlockLuma,
187 unsigned int *IndependentBlockChroma);
188 static double CalculatePrefetchSourceLines(
189 struct display_mode_lib *mode_lib,
190 double VRatio,
191 double vtaps,
192 bool Interlace,
193 bool ProgressiveToInterlaceUnitInOPP,
194 unsigned int SwathHeight,
195 unsigned int ViewportYStart,
196 double *VInitPreFill,
197 unsigned int *MaxNumSwath);
198 static unsigned int CalculateVMAndRowBytes(
199 struct display_mode_lib *mode_lib,
200 bool DCCEnable,
201 unsigned int BlockHeight256Bytes,
202 unsigned int BlockWidth256Bytes,
203 enum source_format_class SourcePixelFormat,
204 unsigned int SurfaceTiling,
205 unsigned int BytePerPixel,
206 enum scan_direction_class ScanDirection,
207 unsigned int SwathWidth,
208 unsigned int ViewportHeight,
209 bool GPUVMEnable,
210 bool HostVMEnable,
211 unsigned int HostVMMaxNonCachedPageTableLevels,
212 unsigned int GPUVMMinPageSize,
213 unsigned int HostVMMinPageSize,
214 unsigned int PTEBufferSizeInRequests,
215 unsigned int Pitch,
216 unsigned int DCCMetaPitch,
217 unsigned int *MacroTileWidth,
218 unsigned int *MetaRowByte,
219 unsigned int *PixelPTEBytesPerRow,
220 bool *PTEBufferSizeNotExceeded,
221 int *dpte_row_width_ub,
222 unsigned int *dpte_row_height,
223 unsigned int *MetaRequestWidth,
224 unsigned int *MetaRequestHeight,
225 unsigned int *meta_row_width,
226 unsigned int *meta_row_height,
227 int *vm_group_bytes,
228 unsigned int *dpte_group_bytes,
229 unsigned int *PixelPTEReqWidth,
230 unsigned int *PixelPTEReqHeight,
231 unsigned int *PTERequestSize,
232 int *DPDE0BytesFrame,
233 int *MetaPTEBytesFrame);
234 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
235 static void CalculateRowBandwidth(
236 bool GPUVMEnable,
237 enum source_format_class SourcePixelFormat,
238 double VRatio,
239 double VRatioChroma,
240 bool DCCEnable,
241 double LineTime,
242 unsigned int MetaRowByteLuma,
243 unsigned int MetaRowByteChroma,
244 unsigned int meta_row_height_luma,
245 unsigned int meta_row_height_chroma,
246 unsigned int PixelPTEBytesPerRowLuma,
247 unsigned int PixelPTEBytesPerRowChroma,
248 unsigned int dpte_row_height_luma,
249 unsigned int dpte_row_height_chroma,
250 double *meta_row_bw,
251 double *dpte_row_bw);
252
253 static void CalculateFlipSchedule(
254 struct display_mode_lib *mode_lib,
255 unsigned int k,
256 double HostVMInefficiencyFactor,
257 double UrgentExtraLatency,
258 double UrgentLatency,
259 double PDEAndMetaPTEBytesPerFrame,
260 double MetaRowBytes,
261 double DPTEBytesPerRow);
262 static double CalculateWriteBackDelay(
263 enum source_format_class WritebackPixelFormat,
264 double WritebackHRatio,
265 double WritebackVRatio,
266 unsigned int WritebackVTaps,
267 int WritebackDestinationWidth,
268 int WritebackDestinationHeight,
269 int WritebackSourceHeight,
270 unsigned int HTotal);
271
272 static void CalculateVupdateAndDynamicMetadataParameters(
273 int MaxInterDCNTileRepeaters,
274 double DPPCLK,
275 double DISPCLK,
276 double DCFClkDeepSleep,
277 double PixelClock,
278 int HTotal,
279 int VBlank,
280 int DynamicMetadataTransmittedBytes,
281 int DynamicMetadataLinesBeforeActiveRequired,
282 int InterlaceEnable,
283 bool ProgressiveToInterlaceUnitInOPP,
284 double *TSetup,
285 double *Tdmbf,
286 double *Tdmec,
287 double *Tdmsks,
288 int *VUpdateOffsetPix,
289 double *VUpdateWidthPix,
290 double *VReadyOffsetPix);
291
292 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
293 struct display_mode_lib *mode_lib,
294 unsigned int PrefetchMode,
295 double DCFCLK,
296 double ReturnBW,
297 double UrgentLatency,
298 double ExtraLatency,
299 double SOCCLK,
300 double DCFCLKDeepSleep,
301 unsigned int DETBufferSizeY[],
302 unsigned int DETBufferSizeC[],
303 unsigned int SwathHeightY[],
304 unsigned int SwathHeightC[],
305 double SwathWidthY[],
306 double SwathWidthC[],
307 unsigned int DPPPerPlane[],
308 double BytePerPixelDETY[],
309 double BytePerPixelDETC[],
310 bool UnboundedRequestEnabled,
311 int unsigned CompressedBufferSizeInkByte,
312 enum clock_change_support *DRAMClockChangeSupport,
313 double *StutterExitWatermark,
314 double *StutterEnterPlusExitWatermark,
315 double *Z8StutterExitWatermark,
316 double *Z8StutterEnterPlusExitWatermark);
317
318 static void CalculateDCFCLKDeepSleep(
319 struct display_mode_lib *mode_lib,
320 unsigned int NumberOfActivePlanes,
321 int BytePerPixelY[],
322 int BytePerPixelC[],
323 double VRatio[],
324 double VRatioChroma[],
325 double SwathWidthY[],
326 double SwathWidthC[],
327 unsigned int DPPPerPlane[],
328 double HRatio[],
329 double HRatioChroma[],
330 double PixelClock[],
331 double PSCL_THROUGHPUT[],
332 double PSCL_THROUGHPUT_CHROMA[],
333 double DPPCLK[],
334 double ReadBandwidthLuma[],
335 double ReadBandwidthChroma[],
336 int ReturnBusWidth,
337 double *DCFCLKDeepSleep);
338
339 static void CalculateUrgentBurstFactor(
340 int swath_width_luma_ub,
341 int swath_width_chroma_ub,
342 unsigned int SwathHeightY,
343 unsigned int SwathHeightC,
344 double LineTime,
345 double UrgentLatency,
346 double CursorBufferSize,
347 unsigned int CursorWidth,
348 unsigned int CursorBPP,
349 double VRatio,
350 double VRatioC,
351 double BytePerPixelInDETY,
352 double BytePerPixelInDETC,
353 double DETBufferSizeY,
354 double DETBufferSizeC,
355 double *UrgentBurstFactorCursor,
356 double *UrgentBurstFactorLuma,
357 double *UrgentBurstFactorChroma,
358 bool *NotEnoughUrgentLatencyHiding);
359
360 static void UseMinimumDCFCLK(
361 struct display_mode_lib *mode_lib,
362 int MaxPrefetchMode,
363 int ReorderingBytes);
364
365 static void CalculatePixelDeliveryTimes(
366 unsigned int NumberOfActivePlanes,
367 double VRatio[],
368 double VRatioChroma[],
369 double VRatioPrefetchY[],
370 double VRatioPrefetchC[],
371 unsigned int swath_width_luma_ub[],
372 unsigned int swath_width_chroma_ub[],
373 unsigned int DPPPerPlane[],
374 double HRatio[],
375 double HRatioChroma[],
376 double PixelClock[],
377 double PSCL_THROUGHPUT[],
378 double PSCL_THROUGHPUT_CHROMA[],
379 double DPPCLK[],
380 int BytePerPixelC[],
381 enum scan_direction_class SourceScan[],
382 unsigned int NumberOfCursors[],
383 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
384 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
385 unsigned int BlockWidth256BytesY[],
386 unsigned int BlockHeight256BytesY[],
387 unsigned int BlockWidth256BytesC[],
388 unsigned int BlockHeight256BytesC[],
389 double DisplayPipeLineDeliveryTimeLuma[],
390 double DisplayPipeLineDeliveryTimeChroma[],
391 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
392 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
393 double DisplayPipeRequestDeliveryTimeLuma[],
394 double DisplayPipeRequestDeliveryTimeChroma[],
395 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
396 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
397 double CursorRequestDeliveryTime[],
398 double CursorRequestDeliveryTimePrefetch[]);
399
400 static void CalculateMetaAndPTETimes(
401 int NumberOfActivePlanes,
402 bool GPUVMEnable,
403 int MetaChunkSize,
404 int MinMetaChunkSizeBytes,
405 int HTotal[],
406 double VRatio[],
407 double VRatioChroma[],
408 double DestinationLinesToRequestRowInVBlank[],
409 double DestinationLinesToRequestRowInImmediateFlip[],
410 bool DCCEnable[],
411 double PixelClock[],
412 int BytePerPixelY[],
413 int BytePerPixelC[],
414 enum scan_direction_class SourceScan[],
415 int dpte_row_height[],
416 int dpte_row_height_chroma[],
417 int meta_row_width[],
418 int meta_row_width_chroma[],
419 int meta_row_height[],
420 int meta_row_height_chroma[],
421 int meta_req_width[],
422 int meta_req_width_chroma[],
423 int meta_req_height[],
424 int meta_req_height_chroma[],
425 int dpte_group_bytes[],
426 int PTERequestSizeY[],
427 int PTERequestSizeC[],
428 int PixelPTEReqWidthY[],
429 int PixelPTEReqHeightY[],
430 int PixelPTEReqWidthC[],
431 int PixelPTEReqHeightC[],
432 int dpte_row_width_luma_ub[],
433 int dpte_row_width_chroma_ub[],
434 double DST_Y_PER_PTE_ROW_NOM_L[],
435 double DST_Y_PER_PTE_ROW_NOM_C[],
436 double DST_Y_PER_META_ROW_NOM_L[],
437 double DST_Y_PER_META_ROW_NOM_C[],
438 double TimePerMetaChunkNominal[],
439 double TimePerChromaMetaChunkNominal[],
440 double TimePerMetaChunkVBlank[],
441 double TimePerChromaMetaChunkVBlank[],
442 double TimePerMetaChunkFlip[],
443 double TimePerChromaMetaChunkFlip[],
444 double time_per_pte_group_nom_luma[],
445 double time_per_pte_group_vblank_luma[],
446 double time_per_pte_group_flip_luma[],
447 double time_per_pte_group_nom_chroma[],
448 double time_per_pte_group_vblank_chroma[],
449 double time_per_pte_group_flip_chroma[]);
450
451 static void CalculateVMGroupAndRequestTimes(
452 unsigned int NumberOfActivePlanes,
453 bool GPUVMEnable,
454 unsigned int GPUVMMaxPageTableLevels,
455 unsigned int HTotal[],
456 int BytePerPixelC[],
457 double DestinationLinesToRequestVMInVBlank[],
458 double DestinationLinesToRequestVMInImmediateFlip[],
459 bool DCCEnable[],
460 double PixelClock[],
461 int dpte_row_width_luma_ub[],
462 int dpte_row_width_chroma_ub[],
463 int vm_group_bytes[],
464 unsigned int dpde0_bytes_per_frame_ub_l[],
465 unsigned int dpde0_bytes_per_frame_ub_c[],
466 int meta_pte_bytes_per_frame_ub_l[],
467 int meta_pte_bytes_per_frame_ub_c[],
468 double TimePerVMGroupVBlank[],
469 double TimePerVMGroupFlip[],
470 double TimePerVMRequestVBlank[],
471 double TimePerVMRequestFlip[]);
472
473 static void CalculateStutterEfficiency(
474 struct display_mode_lib *mode_lib,
475 int CompressedBufferSizeInkByte,
476 bool UnboundedRequestEnabled,
477 int ConfigReturnBufferSizeInKByte,
478 int MetaFIFOSizeInKEntries,
479 int ZeroSizeBufferEntries,
480 int NumberOfActivePlanes,
481 int ROBBufferSizeInKByte,
482 double TotalDataReadBandwidth,
483 double DCFCLK,
484 double ReturnBW,
485 double COMPBUF_RESERVED_SPACE_64B,
486 double COMPBUF_RESERVED_SPACE_ZS,
487 double SRExitTime,
488 double SRExitZ8Time,
489 bool SynchronizedVBlank,
490 double Z8StutterEnterPlusExitWatermark,
491 double StutterEnterPlusExitWatermark,
492 bool ProgressiveToInterlaceUnitInOPP,
493 bool Interlace[],
494 double MinTTUVBlank[],
495 int DPPPerPlane[],
496 unsigned int DETBufferSizeY[],
497 int BytePerPixelY[],
498 double BytePerPixelDETY[],
499 double SwathWidthY[],
500 int SwathHeightY[],
501 int SwathHeightC[],
502 double NetDCCRateLuma[],
503 double NetDCCRateChroma[],
504 double DCCFractionOfZeroSizeRequestsLuma[],
505 double DCCFractionOfZeroSizeRequestsChroma[],
506 int HTotal[],
507 int VTotal[],
508 double PixelClock[],
509 double VRatio[],
510 enum scan_direction_class SourceScan[],
511 int BlockHeight256BytesY[],
512 int BlockWidth256BytesY[],
513 int BlockHeight256BytesC[],
514 int BlockWidth256BytesC[],
515 int DCCYMaxUncompressedBlock[],
516 int DCCCMaxUncompressedBlock[],
517 int VActive[],
518 bool DCCEnable[],
519 bool WritebackEnable[],
520 double ReadBandwidthPlaneLuma[],
521 double ReadBandwidthPlaneChroma[],
522 double meta_row_bw[],
523 double dpte_row_bw[],
524 double *StutterEfficiencyNotIncludingVBlank,
525 double *StutterEfficiency,
526 int *NumberOfStutterBurstsPerFrame,
527 double *Z8StutterEfficiencyNotIncludingVBlank,
528 double *Z8StutterEfficiency,
529 int *Z8NumberOfStutterBurstsPerFrame,
530 double *StutterPeriod);
531
532 static void CalculateSwathAndDETConfiguration(
533 bool ForceSingleDPP,
534 int NumberOfActivePlanes,
535 bool DETSharedByAllDPP,
536 unsigned int DETBufferSizeInKByte[],
537 double MaximumSwathWidthLuma[],
538 double MaximumSwathWidthChroma[],
539 enum scan_direction_class SourceScan[],
540 enum source_format_class SourcePixelFormat[],
541 enum dm_swizzle_mode SurfaceTiling[],
542 int ViewportWidth[],
543 int ViewportHeight[],
544 int SurfaceWidthY[],
545 int SurfaceWidthC[],
546 int SurfaceHeightY[],
547 int SurfaceHeightC[],
548 int Read256BytesBlockHeightY[],
549 int Read256BytesBlockHeightC[],
550 int Read256BytesBlockWidthY[],
551 int Read256BytesBlockWidthC[],
552 enum odm_combine_mode ODMCombineEnabled[],
553 int BlendingAndTiming[],
554 int BytePerPixY[],
555 int BytePerPixC[],
556 double BytePerPixDETY[],
557 double BytePerPixDETC[],
558 int HActive[],
559 double HRatio[],
560 double HRatioChroma[],
561 int DPPPerPlane[],
562 int swath_width_luma_ub[],
563 int swath_width_chroma_ub[],
564 double SwathWidth[],
565 double SwathWidthChroma[],
566 int SwathHeightY[],
567 int SwathHeightC[],
568 unsigned int DETBufferSizeY[],
569 unsigned int DETBufferSizeC[],
570 bool ViewportSizeSupportPerPlane[],
571 bool *ViewportSizeSupport);
572 static void CalculateSwathWidth(
573 bool ForceSingleDPP,
574 int NumberOfActivePlanes,
575 enum source_format_class SourcePixelFormat[],
576 enum scan_direction_class SourceScan[],
577 int ViewportWidth[],
578 int ViewportHeight[],
579 int SurfaceWidthY[],
580 int SurfaceWidthC[],
581 int SurfaceHeightY[],
582 int SurfaceHeightC[],
583 enum odm_combine_mode ODMCombineEnabled[],
584 int BytePerPixY[],
585 int BytePerPixC[],
586 int Read256BytesBlockHeightY[],
587 int Read256BytesBlockHeightC[],
588 int Read256BytesBlockWidthY[],
589 int Read256BytesBlockWidthC[],
590 int BlendingAndTiming[],
591 int HActive[],
592 double HRatio[],
593 int DPPPerPlane[],
594 double SwathWidthSingleDPPY[],
595 double SwathWidthSingleDPPC[],
596 double SwathWidthY[],
597 double SwathWidthC[],
598 int MaximumSwathHeightY[],
599 int MaximumSwathHeightC[],
600 int swath_width_luma_ub[],
601 int swath_width_chroma_ub[]);
602
603 static double CalculateExtraLatency(
604 int RoundTripPingLatencyCycles,
605 int ReorderingBytes,
606 double DCFCLK,
607 int TotalNumberOfActiveDPP,
608 int PixelChunkSizeInKByte,
609 int TotalNumberOfDCCActiveDPP,
610 int MetaChunkSize,
611 double ReturnBW,
612 bool GPUVMEnable,
613 bool HostVMEnable,
614 int NumberOfActivePlanes,
615 int NumberOfDPP[],
616 int dpte_group_bytes[],
617 double HostVMInefficiencyFactor,
618 double HostVMMinPageSize,
619 int HostVMMaxNonCachedPageTableLevels);
620
621 static double CalculateExtraLatencyBytes(
622 int ReorderingBytes,
623 int TotalNumberOfActiveDPP,
624 int PixelChunkSizeInKByte,
625 int TotalNumberOfDCCActiveDPP,
626 int MetaChunkSize,
627 bool GPUVMEnable,
628 bool HostVMEnable,
629 int NumberOfActivePlanes,
630 int NumberOfDPP[],
631 int dpte_group_bytes[],
632 double HostVMInefficiencyFactor,
633 double HostVMMinPageSize,
634 int HostVMMaxNonCachedPageTableLevels);
635
636 static double CalculateUrgentLatency(
637 double UrgentLatencyPixelDataOnly,
638 double UrgentLatencyPixelMixedWithVMData,
639 double UrgentLatencyVMDataOnly,
640 bool DoUrgentLatencyAdjustment,
641 double UrgentLatencyAdjustmentFabricClockComponent,
642 double UrgentLatencyAdjustmentFabricClockReference,
643 double FabricClockSingle);
644
645 static void CalculateUnboundedRequestAndCompressedBufferSize(
646 unsigned int DETBufferSizeInKByte,
647 int ConfigReturnBufferSizeInKByte,
648 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
649 int TotalActiveDPP,
650 bool NoChromaPlanes,
651 int MaxNumDPP,
652 int CompressedBufferSegmentSizeInkByteFinal,
653 enum output_encoder_class *Output,
654 bool *UnboundedRequestEnabled,
655 int *CompressedBufferSizeInkByte);
656
657 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
658
dml31_recalculate(struct display_mode_lib * mode_lib)659 void dml31_recalculate(struct display_mode_lib *mode_lib)
660 {
661 ModeSupportAndSystemConfiguration(mode_lib);
662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
663 DisplayPipeConfiguration(mode_lib);
664 #ifdef __DML_VBA_DEBUG__
665 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
666 #endif
667 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
668 }
669
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)670 static unsigned int dscceComputeDelay(
671 unsigned int bpc,
672 double BPP,
673 unsigned int sliceWidth,
674 unsigned int numSlices,
675 enum output_format_class pixelFormat,
676 enum output_encoder_class Output)
677 {
678 // valid bpc = source bits per component in the set of {8, 10, 12}
679 // valid bpp = increments of 1/16 of a bit
680 // min = 6/7/8 in N420/N422/444, respectively
681 // max = such that compression is 1:1
682 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
683 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
684 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
685
686 // fixed value
687 unsigned int rcModelSize = 8192;
688
689 // N422/N420 operate at 2 pixels per clock
690 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
691
692 if (pixelFormat == dm_420)
693 pixelsPerClock = 2;
694 else if (pixelFormat == dm_444)
695 pixelsPerClock = 1;
696 else if (pixelFormat == dm_n422)
697 pixelsPerClock = 2;
698 // #all other modes operate at 1 pixel per clock
699 else
700 pixelsPerClock = 1;
701
702 //initial transmit delay as per PPS
703 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
704
705 //compute ssm delay
706 if (bpc == 8)
707 D = 81;
708 else if (bpc == 10)
709 D = 89;
710 else
711 D = 113;
712
713 //divide by pixel per cycle to compute slice width as seen by DSC
714 w = sliceWidth / pixelsPerClock;
715
716 //422 mode has an additional cycle of delay
717 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
718 s = 0;
719 else
720 s = 1;
721
722 //main calculation for the dscce
723 ix = initalXmitDelay + 45;
724 wx = (w + 2) / 3;
725 P = 3 * wx - w;
726 l0 = ix / w;
727 a = ix + P * l0;
728 ax = (a + 2) / 3 + D + 6 + 1;
729 L = (ax + wx - 1) / wx;
730 if ((ix % w) == 0 && P != 0)
731 lstall = 1;
732 else
733 lstall = 0;
734 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
735
736 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
737 pixels = Delay * 3 * pixelsPerClock;
738 return pixels;
739 }
740
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)741 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
742 {
743 unsigned int Delay = 0;
744
745 if (pixelFormat == dm_420) {
746 // sfr
747 Delay = Delay + 2;
748 // dsccif
749 Delay = Delay + 0;
750 // dscc - input deserializer
751 Delay = Delay + 3;
752 // dscc gets pixels every other cycle
753 Delay = Delay + 2;
754 // dscc - input cdc fifo
755 Delay = Delay + 12;
756 // dscc gets pixels every other cycle
757 Delay = Delay + 13;
758 // dscc - cdc uncertainty
759 Delay = Delay + 2;
760 // dscc - output cdc fifo
761 Delay = Delay + 7;
762 // dscc gets pixels every other cycle
763 Delay = Delay + 3;
764 // dscc - cdc uncertainty
765 Delay = Delay + 2;
766 // dscc - output serializer
767 Delay = Delay + 1;
768 // sft
769 Delay = Delay + 1;
770 } else if (pixelFormat == dm_n422) {
771 // sfr
772 Delay = Delay + 2;
773 // dsccif
774 Delay = Delay + 1;
775 // dscc - input deserializer
776 Delay = Delay + 5;
777 // dscc - input cdc fifo
778 Delay = Delay + 25;
779 // dscc - cdc uncertainty
780 Delay = Delay + 2;
781 // dscc - output cdc fifo
782 Delay = Delay + 10;
783 // dscc - cdc uncertainty
784 Delay = Delay + 2;
785 // dscc - output serializer
786 Delay = Delay + 1;
787 // sft
788 Delay = Delay + 1;
789 } else {
790 // sfr
791 Delay = Delay + 2;
792 // dsccif
793 Delay = Delay + 0;
794 // dscc - input deserializer
795 Delay = Delay + 3;
796 // dscc - input cdc fifo
797 Delay = Delay + 12;
798 // dscc - cdc uncertainty
799 Delay = Delay + 2;
800 // dscc - output cdc fifo
801 Delay = Delay + 7;
802 // dscc - output serializer
803 Delay = Delay + 1;
804 // dscc - cdc uncertainty
805 Delay = Delay + 2;
806 // sft
807 Delay = Delay + 1;
808 }
809
810 return Delay;
811 }
812
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)813 static bool CalculatePrefetchSchedule(
814 struct display_mode_lib *mode_lib,
815 double HostVMInefficiencyFactor,
816 Pipe *myPipe,
817 unsigned int DSCDelay,
818 double DPPCLKDelaySubtotalPlusCNVCFormater,
819 double DPPCLKDelaySCL,
820 double DPPCLKDelaySCLLBOnly,
821 double DPPCLKDelayCNVCCursor,
822 double DISPCLKDelaySubtotal,
823 unsigned int DPP_RECOUT_WIDTH,
824 enum output_format_class OutputFormat,
825 unsigned int MaxInterDCNTileRepeaters,
826 unsigned int VStartup,
827 unsigned int MaxVStartup,
828 unsigned int GPUVMPageTableLevels,
829 bool GPUVMEnable,
830 bool HostVMEnable,
831 unsigned int HostVMMaxNonCachedPageTableLevels,
832 double HostVMMinPageSize,
833 bool DynamicMetadataEnable,
834 bool DynamicMetadataVMEnabled,
835 int DynamicMetadataLinesBeforeActiveRequired,
836 unsigned int DynamicMetadataTransmittedBytes,
837 double UrgentLatency,
838 double UrgentExtraLatency,
839 double TCalc,
840 unsigned int PDEAndMetaPTEBytesFrame,
841 unsigned int MetaRowByte,
842 unsigned int PixelPTEBytesPerRow,
843 double PrefetchSourceLinesY,
844 unsigned int SwathWidthY,
845 double VInitPreFillY,
846 unsigned int MaxNumSwathY,
847 double PrefetchSourceLinesC,
848 unsigned int SwathWidthC,
849 double VInitPreFillC,
850 unsigned int MaxNumSwathC,
851 int swath_width_luma_ub,
852 int swath_width_chroma_ub,
853 unsigned int SwathHeightY,
854 unsigned int SwathHeightC,
855 double TWait,
856 double *DSTXAfterScaler,
857 double *DSTYAfterScaler,
858 double *DestinationLinesForPrefetch,
859 double *PrefetchBandwidth,
860 double *DestinationLinesToRequestVMInVBlank,
861 double *DestinationLinesToRequestRowInVBlank,
862 double *VRatioPrefetchY,
863 double *VRatioPrefetchC,
864 double *RequiredPrefetchPixDataBWLuma,
865 double *RequiredPrefetchPixDataBWChroma,
866 bool *NotEnoughTimeForDynamicMetadata,
867 double *Tno_bw,
868 double *prefetch_vmrow_bw,
869 double *Tdmdl_vm,
870 double *Tdmdl,
871 double *TSetup,
872 int *VUpdateOffsetPix,
873 double *VUpdateWidthPix,
874 double *VReadyOffsetPix)
875 {
876 bool MyError = false;
877 unsigned int DPPCycles, DISPCLKCycles;
878 double DSTTotalPixelsAfterScaler;
879 double LineTime;
880 double dst_y_prefetch_equ;
881 #ifdef __DML_VBA_DEBUG__
882 double Tsw_oto;
883 #endif
884 double prefetch_bw_oto;
885 double prefetch_bw_pr;
886 double Tvm_oto;
887 double Tr0_oto;
888 double Tvm_oto_lines;
889 double Tr0_oto_lines;
890 double dst_y_prefetch_oto;
891 double TimeForFetchingMetaPTE = 0;
892 double TimeForFetchingRowInVBlank = 0;
893 double LinesToRequestPrefetchPixelData = 0;
894 unsigned int HostVMDynamicLevelsTrips;
895 double trip_to_mem;
896 double Tvm_trips;
897 double Tr0_trips;
898 double Tvm_trips_rounded;
899 double Tr0_trips_rounded;
900 double Lsw_oto;
901 double Tpre_rounded;
902 double prefetch_bw_equ;
903 double Tvm_equ;
904 double Tr0_equ;
905 double Tdmbf;
906 double Tdmec;
907 double Tdmsks;
908 double prefetch_sw_bytes;
909 double bytes_pp;
910 double dep_bytes;
911 int max_vratio_pre = 4;
912 double min_Lsw;
913 double Tsw_est1 = 0;
914 double Tsw_est3 = 0;
915 double max_Tsw = 0;
916
917 if (GPUVMEnable == true && HostVMEnable == true) {
918 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
919 } else {
920 HostVMDynamicLevelsTrips = 0;
921 }
922 #ifdef __DML_VBA_DEBUG__
923 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
924 #endif
925 CalculateVupdateAndDynamicMetadataParameters(
926 MaxInterDCNTileRepeaters,
927 myPipe->DPPCLK,
928 myPipe->DISPCLK,
929 myPipe->DCFCLKDeepSleep,
930 myPipe->PixelClock,
931 myPipe->HTotal,
932 myPipe->VBlank,
933 DynamicMetadataTransmittedBytes,
934 DynamicMetadataLinesBeforeActiveRequired,
935 myPipe->InterlaceEnable,
936 myPipe->ProgressiveToInterlaceUnitInOPP,
937 TSetup,
938 &Tdmbf,
939 &Tdmec,
940 &Tdmsks,
941 VUpdateOffsetPix,
942 VUpdateWidthPix,
943 VReadyOffsetPix);
944
945 LineTime = myPipe->HTotal / myPipe->PixelClock;
946 trip_to_mem = UrgentLatency;
947 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
948
949 #ifdef __DML_VBA_ALLOW_DELTA__
950 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
951 #else
952 if (DynamicMetadataVMEnabled == true) {
953 #endif
954 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
955 } else {
956 *Tdmdl = TWait + UrgentExtraLatency;
957 }
958
959 #ifdef __DML_VBA_ALLOW_DELTA__
960 if (DynamicMetadataEnable == false) {
961 *Tdmdl = 0.0;
962 }
963 #endif
964
965 if (DynamicMetadataEnable == true) {
966 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
967 *NotEnoughTimeForDynamicMetadata = true;
968 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
969 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
970 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
971 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
972 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
973 } else {
974 *NotEnoughTimeForDynamicMetadata = false;
975 }
976 } else {
977 *NotEnoughTimeForDynamicMetadata = false;
978 }
979
980 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
981
982 if (myPipe->ScalerEnabled)
983 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
984 else
985 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
986
987 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
988
989 DISPCLKCycles = DISPCLKDelaySubtotal;
990
991 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
992 return true;
993
994 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
995
996 #ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
998 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
999 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1000 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1001 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1002 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1003 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1004 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1005 #endif
1006
1007 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1008
1009 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1010 *DSTYAfterScaler = 1;
1011 else
1012 *DSTYAfterScaler = 0;
1013
1014 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1015 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1016 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1017
1018 #ifdef __DML_VBA_DEBUG__
1019 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1020 #endif
1021
1022 MyError = false;
1023
1024 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1025 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1026 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1027
1028 #ifdef __DML_VBA_ALLOW_DELTA__
1029 if (!myPipe->DCCEnable) {
1030 Tr0_trips = 0.0;
1031 Tr0_trips_rounded = 0.0;
1032 }
1033 #endif
1034
1035 if (!GPUVMEnable) {
1036 Tvm_trips = 0.0;
1037 Tvm_trips_rounded = 0.0;
1038 }
1039
1040 if (GPUVMEnable) {
1041 if (GPUVMPageTableLevels >= 3) {
1042 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1043 } else {
1044 *Tno_bw = 0;
1045 }
1046 } else if (!myPipe->DCCEnable) {
1047 *Tno_bw = LineTime;
1048 } else {
1049 *Tno_bw = LineTime / 4;
1050 }
1051
1052 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1053 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1054 else
1055 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1056 /*rev 99*/
1057 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1058 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1059 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1060 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1061 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1062
1063 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1064 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1065 #ifdef __DML_VBA_DEBUG__
1066 Tsw_oto = Lsw_oto * LineTime;
1067 #endif
1068
1069
1070 #ifdef __DML_VBA_DEBUG__
1071 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1072 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1073 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1074 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1075 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1076 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1077 #endif
1078
1079 if (GPUVMEnable == true)
1080 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1081 else
1082 Tvm_oto = LineTime / 4.0;
1083
1084 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1085 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1086 LineTime - Tvm_oto,
1087 LineTime / 4);
1088 } else {
1089 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1090 }
1091
1092 #ifdef __DML_VBA_DEBUG__
1093 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1094 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1095 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1096 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1097 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1098 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1099 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1100 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1101 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1102 #endif
1103
1104 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1105 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1106 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1107 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1108 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1109 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1110
1111 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1112
1113 if (prefetch_sw_bytes < dep_bytes)
1114 prefetch_sw_bytes = 2 * dep_bytes;
1115
1116 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1117 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1118 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1119 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1120 dml_print("DML: LineTime: %f\n", LineTime);
1121 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1122
1123 dml_print("DML: LineTime: %f\n", LineTime);
1124 dml_print("DML: VStartup: %d\n", VStartup);
1125 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1126 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1127 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1128 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1129 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1130 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1131 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1132 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1133 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1134 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1135 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1136
1137 *PrefetchBandwidth = 0;
1138 *DestinationLinesToRequestVMInVBlank = 0;
1139 *DestinationLinesToRequestRowInVBlank = 0;
1140 *VRatioPrefetchY = 0;
1141 *VRatioPrefetchC = 0;
1142 *RequiredPrefetchPixDataBWLuma = 0;
1143 if (dst_y_prefetch_equ > 1) {
1144 double PrefetchBandwidth1;
1145 double PrefetchBandwidth2;
1146 double PrefetchBandwidth3;
1147 double PrefetchBandwidth4;
1148
1149 if (Tpre_rounded - *Tno_bw > 0) {
1150 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1151 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1152 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1153 } else {
1154 PrefetchBandwidth1 = 0;
1155 }
1156
1157 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1158 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1159 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1160 }
1161
1162 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1163 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1164 else
1165 PrefetchBandwidth2 = 0;
1166
1167 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1168 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1169 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1170 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1171 } else {
1172 PrefetchBandwidth3 = 0;
1173 }
1174
1175 #ifdef __DML_VBA_DEBUG__
1176 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1177 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1178 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1179 #endif
1180 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1181 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1182 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1183 }
1184
1185 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1186 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1187 else
1188 PrefetchBandwidth4 = 0;
1189
1190 {
1191 bool Case1OK;
1192 bool Case2OK;
1193 bool Case3OK;
1194
1195 if (PrefetchBandwidth1 > 0) {
1196 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1197 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1198 Case1OK = true;
1199 } else {
1200 Case1OK = false;
1201 }
1202 } else {
1203 Case1OK = false;
1204 }
1205
1206 if (PrefetchBandwidth2 > 0) {
1207 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1208 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1209 Case2OK = true;
1210 } else {
1211 Case2OK = false;
1212 }
1213 } else {
1214 Case2OK = false;
1215 }
1216
1217 if (PrefetchBandwidth3 > 0) {
1218 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1219 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1220 Case3OK = true;
1221 } else {
1222 Case3OK = false;
1223 }
1224 } else {
1225 Case3OK = false;
1226 }
1227
1228 if (Case1OK) {
1229 prefetch_bw_equ = PrefetchBandwidth1;
1230 } else if (Case2OK) {
1231 prefetch_bw_equ = PrefetchBandwidth2;
1232 } else if (Case3OK) {
1233 prefetch_bw_equ = PrefetchBandwidth3;
1234 } else {
1235 prefetch_bw_equ = PrefetchBandwidth4;
1236 }
1237
1238 #ifdef __DML_VBA_DEBUG__
1239 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1240 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1241 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1242 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1243 #endif
1244
1245 if (prefetch_bw_equ > 0) {
1246 if (GPUVMEnable == true) {
1247 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1248 } else {
1249 Tvm_equ = LineTime / 4;
1250 }
1251
1252 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1253 Tr0_equ = dml_max4(
1254 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1255 Tr0_trips,
1256 (LineTime - Tvm_equ) / 2,
1257 LineTime / 4);
1258 } else {
1259 Tr0_equ = (LineTime - Tvm_equ) / 2;
1260 }
1261 } else {
1262 Tvm_equ = 0;
1263 Tr0_equ = 0;
1264 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1265 }
1266 }
1267
1268 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1269 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1270 TimeForFetchingMetaPTE = Tvm_oto;
1271 TimeForFetchingRowInVBlank = Tr0_oto;
1272 *PrefetchBandwidth = prefetch_bw_oto;
1273 } else {
1274 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1275 TimeForFetchingMetaPTE = Tvm_equ;
1276 TimeForFetchingRowInVBlank = Tr0_equ;
1277 *PrefetchBandwidth = prefetch_bw_equ;
1278 }
1279
1280 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1281
1282 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1283
1284 #ifdef __DML_VBA_ALLOW_DELTA__
1285 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1286 // See note above dated 5/30/2018
1287 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1288 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1289 #else
1290 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1291 #endif
1292
1293 #ifdef __DML_VBA_DEBUG__
1294 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1295 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1296 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1297 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1298 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1299 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1300 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1301 #endif
1302
1303 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1304
1305 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1306 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1307 #ifdef __DML_VBA_DEBUG__
1308 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1309 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1310 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1311 #endif
1312 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1313 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1314 *VRatioPrefetchY = dml_max(
1315 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1316 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1317 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1318 } else {
1319 MyError = true;
1320 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1321 *VRatioPrefetchY = 0;
1322 }
1323 #ifdef __DML_VBA_DEBUG__
1324 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1325 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1326 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1327 #endif
1328 }
1329
1330 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1331 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1332
1333 #ifdef __DML_VBA_DEBUG__
1334 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1335 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1336 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1337 #endif
1338 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1339 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1340 *VRatioPrefetchC = dml_max(
1341 *VRatioPrefetchC,
1342 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1343 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1344 } else {
1345 MyError = true;
1346 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1347 *VRatioPrefetchC = 0;
1348 }
1349 #ifdef __DML_VBA_DEBUG__
1350 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1351 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1352 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1353 #endif
1354 }
1355
1356 #ifdef __DML_VBA_DEBUG__
1357 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1358 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1359 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1360 #endif
1361
1362 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1363
1364 #ifdef __DML_VBA_DEBUG__
1365 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1366 #endif
1367
1368 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1369 / LineTime;
1370 } else {
1371 MyError = true;
1372 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1373 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1374 *VRatioPrefetchY = 0;
1375 *VRatioPrefetchC = 0;
1376 *RequiredPrefetchPixDataBWLuma = 0;
1377 *RequiredPrefetchPixDataBWChroma = 0;
1378 }
1379
1380 dml_print(
1381 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1382 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1383 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1384 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1385 dml_print(
1386 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1387 (double) LinesToRequestPrefetchPixelData * LineTime);
1388 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1389 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1390 (double) myPipe->HTotal)) * LineTime);
1391 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1392 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1393 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1394 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1395 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1396
1397 } else {
1398 MyError = true;
1399 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1400 }
1401
1402 {
1403 double prefetch_vm_bw;
1404 double prefetch_row_bw;
1405
1406 if (PDEAndMetaPTEBytesFrame == 0) {
1407 prefetch_vm_bw = 0;
1408 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1409 #ifdef __DML_VBA_DEBUG__
1410 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1411 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1412 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1413 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1414 #endif
1415 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1416 #ifdef __DML_VBA_DEBUG__
1417 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1418 #endif
1419 } else {
1420 prefetch_vm_bw = 0;
1421 MyError = true;
1422 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1423 }
1424
1425 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1426 prefetch_row_bw = 0;
1427 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1428 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1429
1430 #ifdef __DML_VBA_DEBUG__
1431 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1432 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1433 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1434 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1435 #endif
1436 } else {
1437 prefetch_row_bw = 0;
1438 MyError = true;
1439 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1440 }
1441
1442 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1443 }
1444
1445 if (MyError) {
1446 *PrefetchBandwidth = 0;
1447 TimeForFetchingMetaPTE = 0;
1448 TimeForFetchingRowInVBlank = 0;
1449 *DestinationLinesToRequestVMInVBlank = 0;
1450 *DestinationLinesToRequestRowInVBlank = 0;
1451 *DestinationLinesForPrefetch = 0;
1452 LinesToRequestPrefetchPixelData = 0;
1453 *VRatioPrefetchY = 0;
1454 *VRatioPrefetchC = 0;
1455 *RequiredPrefetchPixDataBWLuma = 0;
1456 *RequiredPrefetchPixDataBWChroma = 0;
1457 }
1458
1459 return MyError;
1460 }
1461
1462 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1463 {
1464 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1465 }
1466
1467 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1468 {
1469 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1470 }
1471
1472 static void CalculateDCCConfiguration(
1473 bool DCCEnabled,
1474 bool DCCProgrammingAssumesScanDirectionUnknown,
1475 enum source_format_class SourcePixelFormat,
1476 unsigned int SurfaceWidthLuma,
1477 unsigned int SurfaceWidthChroma,
1478 unsigned int SurfaceHeightLuma,
1479 unsigned int SurfaceHeightChroma,
1480 double DETBufferSize,
1481 unsigned int RequestHeight256ByteLuma,
1482 unsigned int RequestHeight256ByteChroma,
1483 enum dm_swizzle_mode TilingFormat,
1484 unsigned int BytePerPixelY,
1485 unsigned int BytePerPixelC,
1486 double BytePerPixelDETY,
1487 double BytePerPixelDETC,
1488 enum scan_direction_class ScanOrientation,
1489 unsigned int *MaxUncompressedBlockLuma,
1490 unsigned int *MaxUncompressedBlockChroma,
1491 unsigned int *MaxCompressedBlockLuma,
1492 unsigned int *MaxCompressedBlockChroma,
1493 unsigned int *IndependentBlockLuma,
1494 unsigned int *IndependentBlockChroma)
1495 {
1496 int yuv420;
1497 int horz_div_l;
1498 int horz_div_c;
1499 int vert_div_l;
1500 int vert_div_c;
1501
1502 int swath_buf_size;
1503 double detile_buf_vp_horz_limit;
1504 double detile_buf_vp_vert_limit;
1505
1506 int MAS_vp_horz_limit;
1507 int MAS_vp_vert_limit;
1508 int max_vp_horz_width;
1509 int max_vp_vert_height;
1510 int eff_surf_width_l;
1511 int eff_surf_width_c;
1512 int eff_surf_height_l;
1513 int eff_surf_height_c;
1514
1515 int full_swath_bytes_horz_wc_l;
1516 int full_swath_bytes_horz_wc_c;
1517 int full_swath_bytes_vert_wc_l;
1518 int full_swath_bytes_vert_wc_c;
1519 int req128_horz_wc_l;
1520 int req128_horz_wc_c;
1521 int req128_vert_wc_l;
1522 int req128_vert_wc_c;
1523 int segment_order_horz_contiguous_luma;
1524 int segment_order_horz_contiguous_chroma;
1525 int segment_order_vert_contiguous_luma;
1526 int segment_order_vert_contiguous_chroma;
1527
1528 typedef enum {
1529 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1530 } RequestType;
1531 RequestType RequestLuma;
1532 RequestType RequestChroma;
1533
1534 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1535 horz_div_l = 1;
1536 horz_div_c = 1;
1537 vert_div_l = 1;
1538 vert_div_c = 1;
1539
1540 if (BytePerPixelY == 1)
1541 vert_div_l = 0;
1542 if (BytePerPixelC == 1)
1543 vert_div_c = 0;
1544 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1545 horz_div_l = 0;
1546 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1547 horz_div_c = 0;
1548
1549 if (BytePerPixelC == 0) {
1550 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1551 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1552 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1553 } else {
1554 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1555 detile_buf_vp_horz_limit = (double) swath_buf_size
1556 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1557 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1558 detile_buf_vp_vert_limit = (double) swath_buf_size
1559 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1560 }
1561
1562 if (SourcePixelFormat == dm_420_10) {
1563 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1564 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1565 }
1566
1567 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1568 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1569
1570 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1571 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1572 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1573 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1574 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1575 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1576 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1577 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1578
1579 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1580 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1581 if (BytePerPixelC > 0) {
1582 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1583 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1584 } else {
1585 full_swath_bytes_horz_wc_c = 0;
1586 full_swath_bytes_vert_wc_c = 0;
1587 }
1588
1589 if (SourcePixelFormat == dm_420_10) {
1590 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1591 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1592 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1593 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1594 }
1595
1596 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1597 req128_horz_wc_l = 0;
1598 req128_horz_wc_c = 0;
1599 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1600 req128_horz_wc_l = 0;
1601 req128_horz_wc_c = 1;
1602 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1603 req128_horz_wc_l = 1;
1604 req128_horz_wc_c = 0;
1605 } else {
1606 req128_horz_wc_l = 1;
1607 req128_horz_wc_c = 1;
1608 }
1609
1610 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1611 req128_vert_wc_l = 0;
1612 req128_vert_wc_c = 0;
1613 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1614 req128_vert_wc_l = 0;
1615 req128_vert_wc_c = 1;
1616 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1617 req128_vert_wc_l = 1;
1618 req128_vert_wc_c = 0;
1619 } else {
1620 req128_vert_wc_l = 1;
1621 req128_vert_wc_c = 1;
1622 }
1623
1624 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1625 segment_order_horz_contiguous_luma = 0;
1626 } else {
1627 segment_order_horz_contiguous_luma = 1;
1628 }
1629 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1630 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1631 segment_order_vert_contiguous_luma = 0;
1632 } else {
1633 segment_order_vert_contiguous_luma = 1;
1634 }
1635 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1636 segment_order_horz_contiguous_chroma = 0;
1637 } else {
1638 segment_order_horz_contiguous_chroma = 1;
1639 }
1640 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1641 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1642 segment_order_vert_contiguous_chroma = 0;
1643 } else {
1644 segment_order_vert_contiguous_chroma = 1;
1645 }
1646
1647 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1648 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1649 RequestLuma = REQ_256Bytes;
1650 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1651 RequestLuma = REQ_128BytesNonContiguous;
1652 } else {
1653 RequestLuma = REQ_128BytesContiguous;
1654 }
1655 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1656 RequestChroma = REQ_256Bytes;
1657 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1658 RequestChroma = REQ_128BytesNonContiguous;
1659 } else {
1660 RequestChroma = REQ_128BytesContiguous;
1661 }
1662 } else if (ScanOrientation != dm_vert) {
1663 if (req128_horz_wc_l == 0) {
1664 RequestLuma = REQ_256Bytes;
1665 } else if (segment_order_horz_contiguous_luma == 0) {
1666 RequestLuma = REQ_128BytesNonContiguous;
1667 } else {
1668 RequestLuma = REQ_128BytesContiguous;
1669 }
1670 if (req128_horz_wc_c == 0) {
1671 RequestChroma = REQ_256Bytes;
1672 } else if (segment_order_horz_contiguous_chroma == 0) {
1673 RequestChroma = REQ_128BytesNonContiguous;
1674 } else {
1675 RequestChroma = REQ_128BytesContiguous;
1676 }
1677 } else {
1678 if (req128_vert_wc_l == 0) {
1679 RequestLuma = REQ_256Bytes;
1680 } else if (segment_order_vert_contiguous_luma == 0) {
1681 RequestLuma = REQ_128BytesNonContiguous;
1682 } else {
1683 RequestLuma = REQ_128BytesContiguous;
1684 }
1685 if (req128_vert_wc_c == 0) {
1686 RequestChroma = REQ_256Bytes;
1687 } else if (segment_order_vert_contiguous_chroma == 0) {
1688 RequestChroma = REQ_128BytesNonContiguous;
1689 } else {
1690 RequestChroma = REQ_128BytesContiguous;
1691 }
1692 }
1693
1694 if (RequestLuma == REQ_256Bytes) {
1695 *MaxUncompressedBlockLuma = 256;
1696 *MaxCompressedBlockLuma = 256;
1697 *IndependentBlockLuma = 0;
1698 } else if (RequestLuma == REQ_128BytesContiguous) {
1699 *MaxUncompressedBlockLuma = 256;
1700 *MaxCompressedBlockLuma = 128;
1701 *IndependentBlockLuma = 128;
1702 } else {
1703 *MaxUncompressedBlockLuma = 256;
1704 *MaxCompressedBlockLuma = 64;
1705 *IndependentBlockLuma = 64;
1706 }
1707
1708 if (RequestChroma == REQ_256Bytes) {
1709 *MaxUncompressedBlockChroma = 256;
1710 *MaxCompressedBlockChroma = 256;
1711 *IndependentBlockChroma = 0;
1712 } else if (RequestChroma == REQ_128BytesContiguous) {
1713 *MaxUncompressedBlockChroma = 256;
1714 *MaxCompressedBlockChroma = 128;
1715 *IndependentBlockChroma = 128;
1716 } else {
1717 *MaxUncompressedBlockChroma = 256;
1718 *MaxCompressedBlockChroma = 64;
1719 *IndependentBlockChroma = 64;
1720 }
1721
1722 if (DCCEnabled != true || BytePerPixelC == 0) {
1723 *MaxUncompressedBlockChroma = 0;
1724 *MaxCompressedBlockChroma = 0;
1725 *IndependentBlockChroma = 0;
1726 }
1727
1728 if (DCCEnabled != true) {
1729 *MaxUncompressedBlockLuma = 0;
1730 *MaxCompressedBlockLuma = 0;
1731 *IndependentBlockLuma = 0;
1732 }
1733 }
1734
1735 static double CalculatePrefetchSourceLines(
1736 struct display_mode_lib *mode_lib,
1737 double VRatio,
1738 double vtaps,
1739 bool Interlace,
1740 bool ProgressiveToInterlaceUnitInOPP,
1741 unsigned int SwathHeight,
1742 unsigned int ViewportYStart,
1743 double *VInitPreFill,
1744 unsigned int *MaxNumSwath)
1745 {
1746 struct vba_vars_st *v = &mode_lib->vba;
1747 unsigned int MaxPartialSwath;
1748
1749 if (ProgressiveToInterlaceUnitInOPP)
1750 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1751 else
1752 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1753
1754 if (!v->IgnoreViewportPositioning) {
1755
1756 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1757
1758 if (*VInitPreFill > 1.0)
1759 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1760 else
1761 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1762 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1763
1764 } else {
1765
1766 if (ViewportYStart != 0)
1767 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1768
1769 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1770
1771 if (*VInitPreFill > 1.0)
1772 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1773 else
1774 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1775 }
1776
1777 #ifdef __DML_VBA_DEBUG__
1778 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1779 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1780 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1781 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1782 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1783 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1784 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1785 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1786 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1787 #endif
1788 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1789 }
1790
1791 static unsigned int CalculateVMAndRowBytes(
1792 struct display_mode_lib *mode_lib,
1793 bool DCCEnable,
1794 unsigned int BlockHeight256Bytes,
1795 unsigned int BlockWidth256Bytes,
1796 enum source_format_class SourcePixelFormat,
1797 unsigned int SurfaceTiling,
1798 unsigned int BytePerPixel,
1799 enum scan_direction_class ScanDirection,
1800 unsigned int SwathWidth,
1801 unsigned int ViewportHeight,
1802 bool GPUVMEnable,
1803 bool HostVMEnable,
1804 unsigned int HostVMMaxNonCachedPageTableLevels,
1805 unsigned int GPUVMMinPageSize,
1806 unsigned int HostVMMinPageSize,
1807 unsigned int PTEBufferSizeInRequests,
1808 unsigned int Pitch,
1809 unsigned int DCCMetaPitch,
1810 unsigned int *MacroTileWidth,
1811 unsigned int *MetaRowByte,
1812 unsigned int *PixelPTEBytesPerRow,
1813 bool *PTEBufferSizeNotExceeded,
1814 int *dpte_row_width_ub,
1815 unsigned int *dpte_row_height,
1816 unsigned int *MetaRequestWidth,
1817 unsigned int *MetaRequestHeight,
1818 unsigned int *meta_row_width,
1819 unsigned int *meta_row_height,
1820 int *vm_group_bytes,
1821 unsigned int *dpte_group_bytes,
1822 unsigned int *PixelPTEReqWidth,
1823 unsigned int *PixelPTEReqHeight,
1824 unsigned int *PTERequestSize,
1825 int *DPDE0BytesFrame,
1826 int *MetaPTEBytesFrame)
1827 {
1828 struct vba_vars_st *v = &mode_lib->vba;
1829 unsigned int MPDEBytesFrame;
1830 unsigned int DCCMetaSurfaceBytes;
1831 unsigned int MacroTileSizeBytes;
1832 unsigned int MacroTileHeight;
1833 unsigned int ExtraDPDEBytesFrame;
1834 unsigned int PDEAndMetaPTEBytesFrame;
1835 unsigned int PixelPTEReqHeightPTEs = 0;
1836 unsigned int HostVMDynamicLevels = 0;
1837 double FractionOfPTEReturnDrop;
1838
1839 if (GPUVMEnable == true && HostVMEnable == true) {
1840 if (HostVMMinPageSize < 2048) {
1841 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1842 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1843 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1844 } else {
1845 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1846 }
1847 }
1848
1849 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1850 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1851 if (ScanDirection != dm_vert) {
1852 *meta_row_height = *MetaRequestHeight;
1853 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1854 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1855 } else {
1856 *meta_row_height = *MetaRequestWidth;
1857 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1858 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1859 }
1860 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1861 if (GPUVMEnable == true) {
1862 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1863 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1864 } else {
1865 *MetaPTEBytesFrame = 0;
1866 MPDEBytesFrame = 0;
1867 }
1868
1869 if (DCCEnable != true) {
1870 *MetaPTEBytesFrame = 0;
1871 MPDEBytesFrame = 0;
1872 *MetaRowByte = 0;
1873 }
1874
1875 if (SurfaceTiling == dm_sw_linear) {
1876 MacroTileSizeBytes = 256;
1877 MacroTileHeight = BlockHeight256Bytes;
1878 } else {
1879 MacroTileSizeBytes = 65536;
1880 MacroTileHeight = 16 * BlockHeight256Bytes;
1881 }
1882 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1883
1884 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1885 if (ScanDirection != dm_vert) {
1886 *DPDE0BytesFrame = 64
1887 * (dml_ceil(
1888 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1889 / (8 * 2097152),
1890 1) + 1);
1891 } else {
1892 *DPDE0BytesFrame = 64
1893 * (dml_ceil(
1894 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1895 / (8 * 2097152),
1896 1) + 1);
1897 }
1898 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1899 } else {
1900 *DPDE0BytesFrame = 0;
1901 ExtraDPDEBytesFrame = 0;
1902 }
1903
1904 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1905
1906 #ifdef __DML_VBA_DEBUG__
1907 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1908 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1909 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1910 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1911 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1912 #endif
1913
1914 if (HostVMEnable == true) {
1915 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1916 }
1917 #ifdef __DML_VBA_DEBUG__
1918 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1919 #endif
1920
1921 if (SurfaceTiling == dm_sw_linear) {
1922 PixelPTEReqHeightPTEs = 1;
1923 *PixelPTEReqHeight = 1;
1924 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1925 *PTERequestSize = 64;
1926 FractionOfPTEReturnDrop = 0;
1927 } else if (MacroTileSizeBytes == 4096) {
1928 PixelPTEReqHeightPTEs = 1;
1929 *PixelPTEReqHeight = MacroTileHeight;
1930 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1931 *PTERequestSize = 64;
1932 if (ScanDirection != dm_vert)
1933 FractionOfPTEReturnDrop = 0;
1934 else
1935 FractionOfPTEReturnDrop = 7 / 8;
1936 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1937 PixelPTEReqHeightPTEs = 16;
1938 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1939 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1940 *PTERequestSize = 128;
1941 FractionOfPTEReturnDrop = 0;
1942 } else {
1943 PixelPTEReqHeightPTEs = 1;
1944 *PixelPTEReqHeight = MacroTileHeight;
1945 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1946 *PTERequestSize = 64;
1947 FractionOfPTEReturnDrop = 0;
1948 }
1949
1950 if (SurfaceTiling == dm_sw_linear) {
1951 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1952 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1953 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1954 } else if (ScanDirection != dm_vert) {
1955 *dpte_row_height = *PixelPTEReqHeight;
1956 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1957 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1958 } else {
1959 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1960 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1961 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1962 }
1963
1964 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1965 *PTEBufferSizeNotExceeded = true;
1966 } else {
1967 *PTEBufferSizeNotExceeded = false;
1968 }
1969
1970 if (GPUVMEnable != true) {
1971 *PixelPTEBytesPerRow = 0;
1972 *PTEBufferSizeNotExceeded = true;
1973 }
1974
1975 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1976
1977 if (HostVMEnable == true) {
1978 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1979 }
1980
1981 if (HostVMEnable == true) {
1982 *vm_group_bytes = 512;
1983 *dpte_group_bytes = 512;
1984 } else if (GPUVMEnable == true) {
1985 *vm_group_bytes = 2048;
1986 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1987 *dpte_group_bytes = 512;
1988 } else {
1989 *dpte_group_bytes = 2048;
1990 }
1991 } else {
1992 *vm_group_bytes = 0;
1993 *dpte_group_bytes = 0;
1994 }
1995 return PDEAndMetaPTEBytesFrame;
1996 }
1997
1998 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
1999 {
2000 struct vba_vars_st *v = &mode_lib->vba;
2001 unsigned int j, k;
2002 double HostVMInefficiencyFactor = 1.0;
2003 bool NoChromaPlanes = true;
2004 int ReorderBytes;
2005 double VMDataOnlyReturnBW;
2006 double MaxTotalRDBandwidth = 0;
2007 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2008
2009 v->WritebackDISPCLK = 0.0;
2010 v->DISPCLKWithRamping = 0;
2011 v->DISPCLKWithoutRamping = 0;
2012 v->GlobalDPPCLK = 0.0;
2013 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2014 {
2015 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2016 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2017 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2018 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2019 if (v->HostVMEnable != true) {
2020 v->ReturnBW = dml_min(
2021 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2022 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2023 } else {
2024 v->ReturnBW = dml_min(
2025 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2026 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2027 }
2028 }
2029 /* End DAL custom code */
2030
2031 // DISPCLK and DPPCLK Calculation
2032 //
2033 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2034 if (v->WritebackEnable[k]) {
2035 v->WritebackDISPCLK = dml_max(
2036 v->WritebackDISPCLK,
2037 dml31_CalculateWriteBackDISPCLK(
2038 v->WritebackPixelFormat[k],
2039 v->PixelClock[k],
2040 v->WritebackHRatio[k],
2041 v->WritebackVRatio[k],
2042 v->WritebackHTaps[k],
2043 v->WritebackVTaps[k],
2044 v->WritebackSourceWidth[k],
2045 v->WritebackDestinationWidth[k],
2046 v->HTotal[k],
2047 v->WritebackLineBufferSize));
2048 }
2049 }
2050
2051 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2052 if (v->HRatio[k] > 1) {
2053 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2054 v->MaxDCHUBToPSCLThroughput,
2055 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2056 } else {
2057 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2058 }
2059
2060 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2061 * dml_max(
2062 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2063 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2064
2065 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2066 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2067 }
2068
2069 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2070 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2071 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2072 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2073 } else {
2074 if (v->HRatioChroma[k] > 1) {
2075 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2076 v->MaxDCHUBToPSCLThroughput,
2077 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2078 } else {
2079 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2080 }
2081 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2082 * dml_max3(
2083 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2084 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2085 1.0);
2086
2087 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2088 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2089 }
2090
2091 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2092 }
2093 }
2094
2095 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2096 if (v->BlendingAndTiming[k] != k)
2097 continue;
2098 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2099 v->DISPCLKWithRamping = dml_max(
2100 v->DISPCLKWithRamping,
2101 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2102 * (1 + v->DISPCLKRampingMargin / 100));
2103 v->DISPCLKWithoutRamping = dml_max(
2104 v->DISPCLKWithoutRamping,
2105 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2106 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2107 v->DISPCLKWithRamping = dml_max(
2108 v->DISPCLKWithRamping,
2109 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2110 * (1 + v->DISPCLKRampingMargin / 100));
2111 v->DISPCLKWithoutRamping = dml_max(
2112 v->DISPCLKWithoutRamping,
2113 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2114 } else {
2115 v->DISPCLKWithRamping = dml_max(
2116 v->DISPCLKWithRamping,
2117 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2118 v->DISPCLKWithoutRamping = dml_max(
2119 v->DISPCLKWithoutRamping,
2120 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2121 }
2122 }
2123
2124 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2125 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2126
2127 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2128 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2129 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2130 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2131 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2132 v->DISPCLKDPPCLKVCOSpeed);
2133 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2134 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2135 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2136 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2137 } else {
2138 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2139 }
2140 v->DISPCLK = v->DISPCLK_calculated;
2141 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2142
2143 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2144 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2145 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2146 }
2147 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2148 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2149 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2150 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2151 }
2152
2153 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2154 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2155 }
2156
2157 // Urgent and B P-State/DRAM Clock Change Watermark
2158 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2159 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2160
2161 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2162 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2163 v->SourcePixelFormat[k],
2164 v->SurfaceTiling[k],
2165 &v->BytePerPixelY[k],
2166 &v->BytePerPixelC[k],
2167 &v->BytePerPixelDETY[k],
2168 &v->BytePerPixelDETC[k],
2169 &v->BlockHeight256BytesY[k],
2170 &v->BlockHeight256BytesC[k],
2171 &v->BlockWidth256BytesY[k],
2172 &v->BlockWidth256BytesC[k]);
2173 }
2174
2175 CalculateSwathWidth(
2176 false,
2177 v->NumberOfActivePlanes,
2178 v->SourcePixelFormat,
2179 v->SourceScan,
2180 v->ViewportWidth,
2181 v->ViewportHeight,
2182 v->SurfaceWidthY,
2183 v->SurfaceWidthC,
2184 v->SurfaceHeightY,
2185 v->SurfaceHeightC,
2186 v->ODMCombineEnabled,
2187 v->BytePerPixelY,
2188 v->BytePerPixelC,
2189 v->BlockHeight256BytesY,
2190 v->BlockHeight256BytesC,
2191 v->BlockWidth256BytesY,
2192 v->BlockWidth256BytesC,
2193 v->BlendingAndTiming,
2194 v->HActive,
2195 v->HRatio,
2196 v->DPPPerPlane,
2197 v->SwathWidthSingleDPPY,
2198 v->SwathWidthSingleDPPC,
2199 v->SwathWidthY,
2200 v->SwathWidthC,
2201 v->dummyinteger3,
2202 v->dummyinteger4,
2203 v->swath_width_luma_ub,
2204 v->swath_width_chroma_ub);
2205
2206 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2207 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2208 * v->VRatio[k];
2209 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2210 * v->VRatioChroma[k];
2211 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2212 }
2213
2214 // DCFCLK Deep Sleep
2215 CalculateDCFCLKDeepSleep(
2216 mode_lib,
2217 v->NumberOfActivePlanes,
2218 v->BytePerPixelY,
2219 v->BytePerPixelC,
2220 v->VRatio,
2221 v->VRatioChroma,
2222 v->SwathWidthY,
2223 v->SwathWidthC,
2224 v->DPPPerPlane,
2225 v->HRatio,
2226 v->HRatioChroma,
2227 v->PixelClock,
2228 v->PSCL_THROUGHPUT_LUMA,
2229 v->PSCL_THROUGHPUT_CHROMA,
2230 v->DPPCLK,
2231 v->ReadBandwidthPlaneLuma,
2232 v->ReadBandwidthPlaneChroma,
2233 v->ReturnBusWidth,
2234 &v->DCFCLKDeepSleep);
2235
2236 // DSCCLK
2237 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2238 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2239 v->DSCCLK_calculated[k] = 0.0;
2240 } else {
2241 if (v->OutputFormat[k] == dm_420)
2242 v->DSCFormatFactor = 2;
2243 else if (v->OutputFormat[k] == dm_444)
2244 v->DSCFormatFactor = 1;
2245 else if (v->OutputFormat[k] == dm_n422)
2246 v->DSCFormatFactor = 2;
2247 else
2248 v->DSCFormatFactor = 1;
2249 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2250 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2251 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2252 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2253 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2254 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2255 else
2256 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2257 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2258 }
2259 }
2260
2261 // DSC Delay
2262 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2263 double BPP = v->OutputBpp[k];
2264
2265 if (v->DSCEnabled[k] && BPP != 0) {
2266 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2267 v->DSCDelay[k] = dscceComputeDelay(
2268 v->DSCInputBitPerComponent[k],
2269 BPP,
2270 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2271 v->NumberOfDSCSlices[k],
2272 v->OutputFormat[k],
2273 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2274 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2275 v->DSCDelay[k] = 2
2276 * (dscceComputeDelay(
2277 v->DSCInputBitPerComponent[k],
2278 BPP,
2279 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2280 v->NumberOfDSCSlices[k] / 2.0,
2281 v->OutputFormat[k],
2282 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2283 } else {
2284 v->DSCDelay[k] = 4
2285 * (dscceComputeDelay(
2286 v->DSCInputBitPerComponent[k],
2287 BPP,
2288 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2289 v->NumberOfDSCSlices[k] / 4.0,
2290 v->OutputFormat[k],
2291 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2292 }
2293 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2294 } else {
2295 v->DSCDelay[k] = 0;
2296 }
2297 }
2298
2299 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2300 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2301 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2302 v->DSCDelay[k] = v->DSCDelay[j];
2303
2304 // Prefetch
2305 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2306 unsigned int PDEAndMetaPTEBytesFrameY;
2307 unsigned int PixelPTEBytesPerRowY;
2308 unsigned int MetaRowByteY;
2309 unsigned int MetaRowByteC;
2310 unsigned int PDEAndMetaPTEBytesFrameC;
2311 unsigned int PixelPTEBytesPerRowC;
2312 bool PTEBufferSizeNotExceededY;
2313 bool PTEBufferSizeNotExceededC;
2314
2315 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2316 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2317 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2318 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2319 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2320 } else {
2321 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2322 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2323 }
2324
2325 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2326 mode_lib,
2327 v->DCCEnable[k],
2328 v->BlockHeight256BytesC[k],
2329 v->BlockWidth256BytesC[k],
2330 v->SourcePixelFormat[k],
2331 v->SurfaceTiling[k],
2332 v->BytePerPixelC[k],
2333 v->SourceScan[k],
2334 v->SwathWidthC[k],
2335 v->ViewportHeightChroma[k],
2336 v->GPUVMEnable,
2337 v->HostVMEnable,
2338 v->HostVMMaxNonCachedPageTableLevels,
2339 v->GPUVMMinPageSize,
2340 v->HostVMMinPageSize,
2341 v->PTEBufferSizeInRequestsForChroma,
2342 v->PitchC[k],
2343 v->DCCMetaPitchC[k],
2344 &v->MacroTileWidthC[k],
2345 &MetaRowByteC,
2346 &PixelPTEBytesPerRowC,
2347 &PTEBufferSizeNotExceededC,
2348 &v->dpte_row_width_chroma_ub[k],
2349 &v->dpte_row_height_chroma[k],
2350 &v->meta_req_width_chroma[k],
2351 &v->meta_req_height_chroma[k],
2352 &v->meta_row_width_chroma[k],
2353 &v->meta_row_height_chroma[k],
2354 &v->dummyinteger1,
2355 &v->dummyinteger2,
2356 &v->PixelPTEReqWidthC[k],
2357 &v->PixelPTEReqHeightC[k],
2358 &v->PTERequestSizeC[k],
2359 &v->dpde0_bytes_per_frame_ub_c[k],
2360 &v->meta_pte_bytes_per_frame_ub_c[k]);
2361
2362 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2363 mode_lib,
2364 v->VRatioChroma[k],
2365 v->VTAPsChroma[k],
2366 v->Interlace[k],
2367 v->ProgressiveToInterlaceUnitInOPP,
2368 v->SwathHeightC[k],
2369 v->ViewportYStartC[k],
2370 &v->VInitPreFillC[k],
2371 &v->MaxNumSwathC[k]);
2372 } else {
2373 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2374 v->PTEBufferSizeInRequestsForChroma = 0;
2375 PixelPTEBytesPerRowC = 0;
2376 PDEAndMetaPTEBytesFrameC = 0;
2377 MetaRowByteC = 0;
2378 v->MaxNumSwathC[k] = 0;
2379 v->PrefetchSourceLinesC[k] = 0;
2380 }
2381
2382 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2383 mode_lib,
2384 v->DCCEnable[k],
2385 v->BlockHeight256BytesY[k],
2386 v->BlockWidth256BytesY[k],
2387 v->SourcePixelFormat[k],
2388 v->SurfaceTiling[k],
2389 v->BytePerPixelY[k],
2390 v->SourceScan[k],
2391 v->SwathWidthY[k],
2392 v->ViewportHeight[k],
2393 v->GPUVMEnable,
2394 v->HostVMEnable,
2395 v->HostVMMaxNonCachedPageTableLevels,
2396 v->GPUVMMinPageSize,
2397 v->HostVMMinPageSize,
2398 v->PTEBufferSizeInRequestsForLuma,
2399 v->PitchY[k],
2400 v->DCCMetaPitchY[k],
2401 &v->MacroTileWidthY[k],
2402 &MetaRowByteY,
2403 &PixelPTEBytesPerRowY,
2404 &PTEBufferSizeNotExceededY,
2405 &v->dpte_row_width_luma_ub[k],
2406 &v->dpte_row_height[k],
2407 &v->meta_req_width[k],
2408 &v->meta_req_height[k],
2409 &v->meta_row_width[k],
2410 &v->meta_row_height[k],
2411 &v->vm_group_bytes[k],
2412 &v->dpte_group_bytes[k],
2413 &v->PixelPTEReqWidthY[k],
2414 &v->PixelPTEReqHeightY[k],
2415 &v->PTERequestSizeY[k],
2416 &v->dpde0_bytes_per_frame_ub_l[k],
2417 &v->meta_pte_bytes_per_frame_ub_l[k]);
2418
2419 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2420 mode_lib,
2421 v->VRatio[k],
2422 v->vtaps[k],
2423 v->Interlace[k],
2424 v->ProgressiveToInterlaceUnitInOPP,
2425 v->SwathHeightY[k],
2426 v->ViewportYStartY[k],
2427 &v->VInitPreFillY[k],
2428 &v->MaxNumSwathY[k]);
2429 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2430 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2431 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2432
2433 CalculateRowBandwidth(
2434 v->GPUVMEnable,
2435 v->SourcePixelFormat[k],
2436 v->VRatio[k],
2437 v->VRatioChroma[k],
2438 v->DCCEnable[k],
2439 v->HTotal[k] / v->PixelClock[k],
2440 MetaRowByteY,
2441 MetaRowByteC,
2442 v->meta_row_height[k],
2443 v->meta_row_height_chroma[k],
2444 PixelPTEBytesPerRowY,
2445 PixelPTEBytesPerRowC,
2446 v->dpte_row_height[k],
2447 v->dpte_row_height_chroma[k],
2448 &v->meta_row_bw[k],
2449 &v->dpte_row_bw[k]);
2450 }
2451
2452 v->TotalDCCActiveDPP = 0;
2453 v->TotalActiveDPP = 0;
2454 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2455 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2456 if (v->DCCEnable[k])
2457 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2458 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2459 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2460 NoChromaPlanes = false;
2461 }
2462
2463 ReorderBytes = v->NumberOfChannels
2464 * dml_max3(
2465 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2466 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2467 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2468
2469 VMDataOnlyReturnBW = dml_min(
2470 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2471 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2472 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2473 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2474
2475 #ifdef __DML_VBA_DEBUG__
2476 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2477 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2478 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2479 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2480 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2481 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2482 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2483 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2484 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2485 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2486 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2487 #endif
2488
2489 if (v->GPUVMEnable && v->HostVMEnable)
2490 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2491
2492 v->UrgentExtraLatency = CalculateExtraLatency(
2493 v->RoundTripPingLatencyCycles,
2494 ReorderBytes,
2495 v->DCFCLK,
2496 v->TotalActiveDPP,
2497 v->PixelChunkSizeInKByte,
2498 v->TotalDCCActiveDPP,
2499 v->MetaChunkSize,
2500 v->ReturnBW,
2501 v->GPUVMEnable,
2502 v->HostVMEnable,
2503 v->NumberOfActivePlanes,
2504 v->DPPPerPlane,
2505 v->dpte_group_bytes,
2506 HostVMInefficiencyFactor,
2507 v->HostVMMinPageSize,
2508 v->HostVMMaxNonCachedPageTableLevels);
2509
2510 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2511
2512 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2513 if (v->BlendingAndTiming[k] == k) {
2514 if (v->WritebackEnable[k] == true) {
2515 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2516 + CalculateWriteBackDelay(
2517 v->WritebackPixelFormat[k],
2518 v->WritebackHRatio[k],
2519 v->WritebackVRatio[k],
2520 v->WritebackVTaps[k],
2521 v->WritebackDestinationWidth[k],
2522 v->WritebackDestinationHeight[k],
2523 v->WritebackSourceHeight[k],
2524 v->HTotal[k]) / v->DISPCLK;
2525 } else
2526 v->WritebackDelay[v->VoltageLevel][k] = 0;
2527 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2528 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2529 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2530 v->WritebackDelay[v->VoltageLevel][k],
2531 v->WritebackLatency
2532 + CalculateWriteBackDelay(
2533 v->WritebackPixelFormat[j],
2534 v->WritebackHRatio[j],
2535 v->WritebackVRatio[j],
2536 v->WritebackVTaps[j],
2537 v->WritebackDestinationWidth[j],
2538 v->WritebackDestinationHeight[j],
2539 v->WritebackSourceHeight[j],
2540 v->HTotal[k]) / v->DISPCLK);
2541 }
2542 }
2543 }
2544 }
2545
2546 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2547 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2548 if (v->BlendingAndTiming[k] == j)
2549 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2550
2551 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2552 v->MaxVStartupLines[k] =
2553 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2554 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2555 v->VTotal[k] - v->VActive[k]
2556 - dml_max(
2557 1.0,
2558 dml_ceil(
2559 (double) v->WritebackDelay[v->VoltageLevel][k]
2560 / (v->HTotal[k] / v->PixelClock[k]),
2561 1));
2562 if (v->MaxVStartupLines[k] > 1023)
2563 v->MaxVStartupLines[k] = 1023;
2564
2565 #ifdef __DML_VBA_DEBUG__
2566 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2567 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2568 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2569 #endif
2570 }
2571
2572 v->MaximumMaxVStartupLines = 0;
2573 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2574 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2575
2576 // VBA_DELTA
2577 // We don't really care to iterate between the various prefetch modes
2578 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2579
2580 v->UrgentLatency = CalculateUrgentLatency(
2581 v->UrgentLatencyPixelDataOnly,
2582 v->UrgentLatencyPixelMixedWithVMData,
2583 v->UrgentLatencyVMDataOnly,
2584 v->DoUrgentLatencyAdjustment,
2585 v->UrgentLatencyAdjustmentFabricClockComponent,
2586 v->UrgentLatencyAdjustmentFabricClockReference,
2587 v->FabricClock);
2588
2589 v->FractionOfUrgentBandwidth = 0.0;
2590 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2591
2592 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2593
2594 do {
2595 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2596 bool DestinationLineTimesForPrefetchLessThan2 = false;
2597 bool VRatioPrefetchMoreThan4 = false;
2598 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2599 MaxTotalRDBandwidth = 0;
2600
2601 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2602
2603 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2604 Pipe myPipe;
2605
2606 myPipe.DPPCLK = v->DPPCLK[k];
2607 myPipe.DISPCLK = v->DISPCLK;
2608 myPipe.PixelClock = v->PixelClock[k];
2609 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2610 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2611 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2612 myPipe.VRatio = v->VRatio[k];
2613 myPipe.VRatioChroma = v->VRatioChroma[k];
2614 myPipe.SourceScan = v->SourceScan[k];
2615 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2616 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2617 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2618 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2619 myPipe.InterlaceEnable = v->Interlace[k];
2620 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2621 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2622 myPipe.HTotal = v->HTotal[k];
2623 myPipe.DCCEnable = v->DCCEnable[k];
2624 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2625 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2626 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2627 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2628 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2629 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2630 v->ErrorResult[k] = CalculatePrefetchSchedule(
2631 mode_lib,
2632 HostVMInefficiencyFactor,
2633 &myPipe,
2634 v->DSCDelay[k],
2635 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2636 v->DPPCLKDelaySCL,
2637 v->DPPCLKDelaySCLLBOnly,
2638 v->DPPCLKDelayCNVCCursor,
2639 v->DISPCLKDelaySubtotal,
2640 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2641 v->OutputFormat[k],
2642 v->MaxInterDCNTileRepeaters,
2643 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2644 v->MaxVStartupLines[k],
2645 v->GPUVMMaxPageTableLevels,
2646 v->GPUVMEnable,
2647 v->HostVMEnable,
2648 v->HostVMMaxNonCachedPageTableLevels,
2649 v->HostVMMinPageSize,
2650 v->DynamicMetadataEnable[k],
2651 v->DynamicMetadataVMEnabled,
2652 v->DynamicMetadataLinesBeforeActiveRequired[k],
2653 v->DynamicMetadataTransmittedBytes[k],
2654 v->UrgentLatency,
2655 v->UrgentExtraLatency,
2656 v->TCalc,
2657 v->PDEAndMetaPTEBytesFrame[k],
2658 v->MetaRowByte[k],
2659 v->PixelPTEBytesPerRow[k],
2660 v->PrefetchSourceLinesY[k],
2661 v->SwathWidthY[k],
2662 v->VInitPreFillY[k],
2663 v->MaxNumSwathY[k],
2664 v->PrefetchSourceLinesC[k],
2665 v->SwathWidthC[k],
2666 v->VInitPreFillC[k],
2667 v->MaxNumSwathC[k],
2668 v->swath_width_luma_ub[k],
2669 v->swath_width_chroma_ub[k],
2670 v->SwathHeightY[k],
2671 v->SwathHeightC[k],
2672 TWait,
2673 &v->DSTXAfterScaler[k],
2674 &v->DSTYAfterScaler[k],
2675 &v->DestinationLinesForPrefetch[k],
2676 &v->PrefetchBandwidth[k],
2677 &v->DestinationLinesToRequestVMInVBlank[k],
2678 &v->DestinationLinesToRequestRowInVBlank[k],
2679 &v->VRatioPrefetchY[k],
2680 &v->VRatioPrefetchC[k],
2681 &v->RequiredPrefetchPixDataBWLuma[k],
2682 &v->RequiredPrefetchPixDataBWChroma[k],
2683 &v->NotEnoughTimeForDynamicMetadata[k],
2684 &v->Tno_bw[k],
2685 &v->prefetch_vmrow_bw[k],
2686 &v->Tdmdl_vm[k],
2687 &v->Tdmdl[k],
2688 &v->TSetup[k],
2689 &v->VUpdateOffsetPix[k],
2690 &v->VUpdateWidthPix[k],
2691 &v->VReadyOffsetPix[k]);
2692
2693 #ifdef __DML_VBA_DEBUG__
2694 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2695 #endif
2696 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2697 }
2698
2699 v->NoEnoughUrgentLatencyHiding = false;
2700 v->NoEnoughUrgentLatencyHidingPre = false;
2701
2702 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2703 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2704 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2705 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2706 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2707
2708 CalculateUrgentBurstFactor(
2709 v->swath_width_luma_ub[k],
2710 v->swath_width_chroma_ub[k],
2711 v->SwathHeightY[k],
2712 v->SwathHeightC[k],
2713 v->HTotal[k] / v->PixelClock[k],
2714 v->UrgentLatency,
2715 v->CursorBufferSize,
2716 v->CursorWidth[k][0],
2717 v->CursorBPP[k][0],
2718 v->VRatio[k],
2719 v->VRatioChroma[k],
2720 v->BytePerPixelDETY[k],
2721 v->BytePerPixelDETC[k],
2722 v->DETBufferSizeY[k],
2723 v->DETBufferSizeC[k],
2724 &v->UrgBurstFactorCursor[k],
2725 &v->UrgBurstFactorLuma[k],
2726 &v->UrgBurstFactorChroma[k],
2727 &v->NoUrgentLatencyHiding[k]);
2728
2729 CalculateUrgentBurstFactor(
2730 v->swath_width_luma_ub[k],
2731 v->swath_width_chroma_ub[k],
2732 v->SwathHeightY[k],
2733 v->SwathHeightC[k],
2734 v->HTotal[k] / v->PixelClock[k],
2735 v->UrgentLatency,
2736 v->CursorBufferSize,
2737 v->CursorWidth[k][0],
2738 v->CursorBPP[k][0],
2739 v->VRatioPrefetchY[k],
2740 v->VRatioPrefetchC[k],
2741 v->BytePerPixelDETY[k],
2742 v->BytePerPixelDETC[k],
2743 v->DETBufferSizeY[k],
2744 v->DETBufferSizeC[k],
2745 &v->UrgBurstFactorCursorPre[k],
2746 &v->UrgBurstFactorLumaPre[k],
2747 &v->UrgBurstFactorChromaPre[k],
2748 &v->NoUrgentLatencyHidingPre[k]);
2749
2750 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2751 + dml_max3(
2752 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2753 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2754 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2755 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2756 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2757 v->DPPPerPlane[k]
2758 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2759 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2760 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2761
2762 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2763 + dml_max3(
2764 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2765 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2766 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2767 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2768 + v->cursor_bw_pre[k]);
2769
2770 #ifdef __DML_VBA_DEBUG__
2771 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2772 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2773 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2774 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2775 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2776
2777 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2778 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2779
2780 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2781 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2782 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2783 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2784 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2785 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2786 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2787 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2788 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2789 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2790 #endif
2791
2792 if (v->DestinationLinesForPrefetch[k] < 2)
2793 DestinationLineTimesForPrefetchLessThan2 = true;
2794
2795 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2796 VRatioPrefetchMoreThan4 = true;
2797
2798 if (v->NoUrgentLatencyHiding[k] == true)
2799 v->NoEnoughUrgentLatencyHiding = true;
2800
2801 if (v->NoUrgentLatencyHidingPre[k] == true)
2802 v->NoEnoughUrgentLatencyHidingPre = true;
2803 }
2804
2805 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2806
2807 #ifdef __DML_VBA_DEBUG__
2808 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2809 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2810 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2811 #endif
2812
2813 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2814 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2815 v->PrefetchModeSupported = true;
2816 else {
2817 v->PrefetchModeSupported = false;
2818 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2819 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2820 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2821 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2822 }
2823
2824 // PREVIOUS_ERROR
2825 // This error result check was done after the PrefetchModeSupported. So we will
2826 // still try to calculate flip schedule even prefetch mode not supported
2827 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2828 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2829 v->PrefetchModeSupported = false;
2830 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2831 }
2832 }
2833
2834 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2835 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2836 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2837 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2838 - dml_max(
2839 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2840 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2841 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2842 v->DPPPerPlane[k]
2843 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2844 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2845 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2846 }
2847
2848 v->TotImmediateFlipBytes = 0;
2849 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2850 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2851 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2852 }
2853 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2854 CalculateFlipSchedule(
2855 mode_lib,
2856 k,
2857 HostVMInefficiencyFactor,
2858 v->UrgentExtraLatency,
2859 v->UrgentLatency,
2860 v->PDEAndMetaPTEBytesFrame[k],
2861 v->MetaRowByte[k],
2862 v->PixelPTEBytesPerRow[k]);
2863 }
2864
2865 v->total_dcn_read_bw_with_flip = 0.0;
2866 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2867 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2868 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2869 + dml_max3(
2870 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2871 v->DPPPerPlane[k] * v->final_flip_bw[k]
2872 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2873 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2874 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2875 v->DPPPerPlane[k]
2876 * (v->final_flip_bw[k]
2877 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2878 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2879 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2880 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2881 + dml_max3(
2882 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2883 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2884 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2885 v->DPPPerPlane[k]
2886 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2887 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2888 }
2889 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2890
2891 v->ImmediateFlipSupported = true;
2892 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2893 #ifdef __DML_VBA_DEBUG__
2894 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2895 #endif
2896 v->ImmediateFlipSupported = false;
2897 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2898 }
2899 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2900 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2901 #ifdef __DML_VBA_DEBUG__
2902 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2903 __func__, k);
2904 #endif
2905 v->ImmediateFlipSupported = false;
2906 }
2907 }
2908 } else {
2909 v->ImmediateFlipSupported = false;
2910 }
2911
2912 v->PrefetchAndImmediateFlipSupported =
2913 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2914 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2915 v->ImmediateFlipSupported)) ? true : false;
2916 #ifdef __DML_VBA_DEBUG__
2917 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2918 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
2919 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2920 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2921 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2922 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2923 #endif
2924 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2925
2926 v->VStartupLines = v->VStartupLines + 1;
2927 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2928 ASSERT(v->PrefetchAndImmediateFlipSupported);
2929
2930 // Unbounded Request Enabled
2931 CalculateUnboundedRequestAndCompressedBufferSize(
2932 v->DETBufferSizeInKByte[0],
2933 v->ConfigReturnBufferSizeInKByte,
2934 v->UseUnboundedRequesting,
2935 v->TotalActiveDPP,
2936 NoChromaPlanes,
2937 v->MaxNumDPP,
2938 v->CompressedBufferSegmentSizeInkByte,
2939 v->Output,
2940 &v->UnboundedRequestEnabled,
2941 &v->CompressedBufferSizeInkByte);
2942
2943 //Watermarks and NB P-State/DRAM Clock Change Support
2944 {
2945 enum clock_change_support DRAMClockChangeSupport; // dummy
2946 CalculateWatermarksAndDRAMSpeedChangeSupport(
2947 mode_lib,
2948 PrefetchMode,
2949 v->DCFCLK,
2950 v->ReturnBW,
2951 v->UrgentLatency,
2952 v->UrgentExtraLatency,
2953 v->SOCCLK,
2954 v->DCFCLKDeepSleep,
2955 v->DETBufferSizeY,
2956 v->DETBufferSizeC,
2957 v->SwathHeightY,
2958 v->SwathHeightC,
2959 v->SwathWidthY,
2960 v->SwathWidthC,
2961 v->DPPPerPlane,
2962 v->BytePerPixelDETY,
2963 v->BytePerPixelDETC,
2964 v->UnboundedRequestEnabled,
2965 v->CompressedBufferSizeInkByte,
2966 &DRAMClockChangeSupport,
2967 &v->StutterExitWatermark,
2968 &v->StutterEnterPlusExitWatermark,
2969 &v->Z8StutterExitWatermark,
2970 &v->Z8StutterEnterPlusExitWatermark);
2971
2972 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2973 if (v->WritebackEnable[k] == true) {
2974 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2975 0,
2976 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2977 } else {
2978 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2979 }
2980 }
2981 }
2982
2983 //Display Pipeline Delivery Time in Prefetch, Groups
2984 CalculatePixelDeliveryTimes(
2985 v->NumberOfActivePlanes,
2986 v->VRatio,
2987 v->VRatioChroma,
2988 v->VRatioPrefetchY,
2989 v->VRatioPrefetchC,
2990 v->swath_width_luma_ub,
2991 v->swath_width_chroma_ub,
2992 v->DPPPerPlane,
2993 v->HRatio,
2994 v->HRatioChroma,
2995 v->PixelClock,
2996 v->PSCL_THROUGHPUT_LUMA,
2997 v->PSCL_THROUGHPUT_CHROMA,
2998 v->DPPCLK,
2999 v->BytePerPixelC,
3000 v->SourceScan,
3001 v->NumberOfCursors,
3002 v->CursorWidth,
3003 v->CursorBPP,
3004 v->BlockWidth256BytesY,
3005 v->BlockHeight256BytesY,
3006 v->BlockWidth256BytesC,
3007 v->BlockHeight256BytesC,
3008 v->DisplayPipeLineDeliveryTimeLuma,
3009 v->DisplayPipeLineDeliveryTimeChroma,
3010 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3011 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3012 v->DisplayPipeRequestDeliveryTimeLuma,
3013 v->DisplayPipeRequestDeliveryTimeChroma,
3014 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3015 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3016 v->CursorRequestDeliveryTime,
3017 v->CursorRequestDeliveryTimePrefetch);
3018
3019 CalculateMetaAndPTETimes(
3020 v->NumberOfActivePlanes,
3021 v->GPUVMEnable,
3022 v->MetaChunkSize,
3023 v->MinMetaChunkSizeBytes,
3024 v->HTotal,
3025 v->VRatio,
3026 v->VRatioChroma,
3027 v->DestinationLinesToRequestRowInVBlank,
3028 v->DestinationLinesToRequestRowInImmediateFlip,
3029 v->DCCEnable,
3030 v->PixelClock,
3031 v->BytePerPixelY,
3032 v->BytePerPixelC,
3033 v->SourceScan,
3034 v->dpte_row_height,
3035 v->dpte_row_height_chroma,
3036 v->meta_row_width,
3037 v->meta_row_width_chroma,
3038 v->meta_row_height,
3039 v->meta_row_height_chroma,
3040 v->meta_req_width,
3041 v->meta_req_width_chroma,
3042 v->meta_req_height,
3043 v->meta_req_height_chroma,
3044 v->dpte_group_bytes,
3045 v->PTERequestSizeY,
3046 v->PTERequestSizeC,
3047 v->PixelPTEReqWidthY,
3048 v->PixelPTEReqHeightY,
3049 v->PixelPTEReqWidthC,
3050 v->PixelPTEReqHeightC,
3051 v->dpte_row_width_luma_ub,
3052 v->dpte_row_width_chroma_ub,
3053 v->DST_Y_PER_PTE_ROW_NOM_L,
3054 v->DST_Y_PER_PTE_ROW_NOM_C,
3055 v->DST_Y_PER_META_ROW_NOM_L,
3056 v->DST_Y_PER_META_ROW_NOM_C,
3057 v->TimePerMetaChunkNominal,
3058 v->TimePerChromaMetaChunkNominal,
3059 v->TimePerMetaChunkVBlank,
3060 v->TimePerChromaMetaChunkVBlank,
3061 v->TimePerMetaChunkFlip,
3062 v->TimePerChromaMetaChunkFlip,
3063 v->time_per_pte_group_nom_luma,
3064 v->time_per_pte_group_vblank_luma,
3065 v->time_per_pte_group_flip_luma,
3066 v->time_per_pte_group_nom_chroma,
3067 v->time_per_pte_group_vblank_chroma,
3068 v->time_per_pte_group_flip_chroma);
3069
3070 CalculateVMGroupAndRequestTimes(
3071 v->NumberOfActivePlanes,
3072 v->GPUVMEnable,
3073 v->GPUVMMaxPageTableLevels,
3074 v->HTotal,
3075 v->BytePerPixelC,
3076 v->DestinationLinesToRequestVMInVBlank,
3077 v->DestinationLinesToRequestVMInImmediateFlip,
3078 v->DCCEnable,
3079 v->PixelClock,
3080 v->dpte_row_width_luma_ub,
3081 v->dpte_row_width_chroma_ub,
3082 v->vm_group_bytes,
3083 v->dpde0_bytes_per_frame_ub_l,
3084 v->dpde0_bytes_per_frame_ub_c,
3085 v->meta_pte_bytes_per_frame_ub_l,
3086 v->meta_pte_bytes_per_frame_ub_c,
3087 v->TimePerVMGroupVBlank,
3088 v->TimePerVMGroupFlip,
3089 v->TimePerVMRequestVBlank,
3090 v->TimePerVMRequestFlip);
3091
3092 // Min TTUVBlank
3093 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3094 if (PrefetchMode == 0) {
3095 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3096 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3097 v->MinTTUVBlank[k] = dml_max(
3098 v->DRAMClockChangeWatermark,
3099 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3100 } else if (PrefetchMode == 1) {
3101 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3102 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3103 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3104 } else {
3105 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3106 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3107 v->MinTTUVBlank[k] = v->UrgentWatermark;
3108 }
3109 if (!v->DynamicMetadataEnable[k])
3110 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3111 }
3112
3113 // DCC Configuration
3114 v->ActiveDPPs = 0;
3115 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3116 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3117 v->SourcePixelFormat[k],
3118 v->SurfaceWidthY[k],
3119 v->SurfaceWidthC[k],
3120 v->SurfaceHeightY[k],
3121 v->SurfaceHeightC[k],
3122 v->DETBufferSizeInKByte[k] * 1024,
3123 v->BlockHeight256BytesY[k],
3124 v->BlockHeight256BytesC[k],
3125 v->SurfaceTiling[k],
3126 v->BytePerPixelY[k],
3127 v->BytePerPixelC[k],
3128 v->BytePerPixelDETY[k],
3129 v->BytePerPixelDETC[k],
3130 v->SourceScan[k],
3131 &v->DCCYMaxUncompressedBlock[k],
3132 &v->DCCCMaxUncompressedBlock[k],
3133 &v->DCCYMaxCompressedBlock[k],
3134 &v->DCCCMaxCompressedBlock[k],
3135 &v->DCCYIndependentBlock[k],
3136 &v->DCCCIndependentBlock[k]);
3137 }
3138
3139 // VStartup Adjustment
3140 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3141 bool isInterlaceTiming;
3142 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3143 #ifdef __DML_VBA_DEBUG__
3144 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3145 #endif
3146
3147 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3148
3149 #ifdef __DML_VBA_DEBUG__
3150 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3151 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3152 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3153 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3154 #endif
3155
3156 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3157 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3158 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3159 }
3160
3161 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3162
3163 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3164 - v->VFrontPorch[k])
3165 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3166 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3167
3168 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3169
3170 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3171 <= (isInterlaceTiming ?
3172 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3173 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3174 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3175 } else {
3176 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3177 }
3178 #ifdef __DML_VBA_DEBUG__
3179 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3180 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3181 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3182 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3183 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3184 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3185 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3186 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3187 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3188 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3189 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3190 #endif
3191 }
3192
3193 {
3194 //Maximum Bandwidth Used
3195 double TotalWRBandwidth = 0;
3196 double MaxPerPlaneVActiveWRBandwidth = 0;
3197 double WRBandwidth = 0;
3198 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3199 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3200 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3201 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3202 } else if (v->WritebackEnable[k] == true) {
3203 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3204 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3205 }
3206 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3207 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3208 }
3209
3210 v->TotalDataReadBandwidth = 0;
3211 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3212 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3213 }
3214 }
3215 // Stutter Efficiency
3216 CalculateStutterEfficiency(
3217 mode_lib,
3218 v->CompressedBufferSizeInkByte,
3219 v->UnboundedRequestEnabled,
3220 v->ConfigReturnBufferSizeInKByte,
3221 v->MetaFIFOSizeInKEntries,
3222 v->ZeroSizeBufferEntries,
3223 v->NumberOfActivePlanes,
3224 v->ROBBufferSizeInKByte,
3225 v->TotalDataReadBandwidth,
3226 v->DCFCLK,
3227 v->ReturnBW,
3228 v->COMPBUF_RESERVED_SPACE_64B,
3229 v->COMPBUF_RESERVED_SPACE_ZS,
3230 v->SRExitTime,
3231 v->SRExitZ8Time,
3232 v->SynchronizedVBlank,
3233 v->StutterEnterPlusExitWatermark,
3234 v->Z8StutterEnterPlusExitWatermark,
3235 v->ProgressiveToInterlaceUnitInOPP,
3236 v->Interlace,
3237 v->MinTTUVBlank,
3238 v->DPPPerPlane,
3239 v->DETBufferSizeY,
3240 v->BytePerPixelY,
3241 v->BytePerPixelDETY,
3242 v->SwathWidthY,
3243 v->SwathHeightY,
3244 v->SwathHeightC,
3245 v->DCCRateLuma,
3246 v->DCCRateChroma,
3247 v->DCCFractionOfZeroSizeRequestsLuma,
3248 v->DCCFractionOfZeroSizeRequestsChroma,
3249 v->HTotal,
3250 v->VTotal,
3251 v->PixelClock,
3252 v->VRatio,
3253 v->SourceScan,
3254 v->BlockHeight256BytesY,
3255 v->BlockWidth256BytesY,
3256 v->BlockHeight256BytesC,
3257 v->BlockWidth256BytesC,
3258 v->DCCYMaxUncompressedBlock,
3259 v->DCCCMaxUncompressedBlock,
3260 v->VActive,
3261 v->DCCEnable,
3262 v->WritebackEnable,
3263 v->ReadBandwidthPlaneLuma,
3264 v->ReadBandwidthPlaneChroma,
3265 v->meta_row_bw,
3266 v->dpte_row_bw,
3267 &v->StutterEfficiencyNotIncludingVBlank,
3268 &v->StutterEfficiency,
3269 &v->NumberOfStutterBurstsPerFrame,
3270 &v->Z8StutterEfficiencyNotIncludingVBlank,
3271 &v->Z8StutterEfficiency,
3272 &v->Z8NumberOfStutterBurstsPerFrame,
3273 &v->StutterPeriod);
3274 }
3275
3276 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3277 {
3278 struct vba_vars_st *v = &mode_lib->vba;
3279 // Display Pipe Configuration
3280 double BytePerPixDETY[DC__NUM_DPP__MAX];
3281 double BytePerPixDETC[DC__NUM_DPP__MAX];
3282 int BytePerPixY[DC__NUM_DPP__MAX];
3283 int BytePerPixC[DC__NUM_DPP__MAX];
3284 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3285 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3286 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3287 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3288 double dummy1[DC__NUM_DPP__MAX];
3289 double dummy2[DC__NUM_DPP__MAX];
3290 double dummy3[DC__NUM_DPP__MAX];
3291 double dummy4[DC__NUM_DPP__MAX];
3292 int dummy5[DC__NUM_DPP__MAX];
3293 int dummy6[DC__NUM_DPP__MAX];
3294 bool dummy7[DC__NUM_DPP__MAX];
3295 bool dummysinglestring;
3296
3297 unsigned int k;
3298
3299 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3300
3301 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3302 v->SourcePixelFormat[k],
3303 v->SurfaceTiling[k],
3304 &BytePerPixY[k],
3305 &BytePerPixC[k],
3306 &BytePerPixDETY[k],
3307 &BytePerPixDETC[k],
3308 &Read256BytesBlockHeightY[k],
3309 &Read256BytesBlockHeightC[k],
3310 &Read256BytesBlockWidthY[k],
3311 &Read256BytesBlockWidthC[k]);
3312 }
3313
3314 CalculateSwathAndDETConfiguration(
3315 false,
3316 v->NumberOfActivePlanes,
3317 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
3318 v->DETBufferSizeInKByte,
3319 dummy1,
3320 dummy2,
3321 v->SourceScan,
3322 v->SourcePixelFormat,
3323 v->SurfaceTiling,
3324 v->ViewportWidth,
3325 v->ViewportHeight,
3326 v->SurfaceWidthY,
3327 v->SurfaceWidthC,
3328 v->SurfaceHeightY,
3329 v->SurfaceHeightC,
3330 Read256BytesBlockHeightY,
3331 Read256BytesBlockHeightC,
3332 Read256BytesBlockWidthY,
3333 Read256BytesBlockWidthC,
3334 v->ODMCombineEnabled,
3335 v->BlendingAndTiming,
3336 BytePerPixY,
3337 BytePerPixC,
3338 BytePerPixDETY,
3339 BytePerPixDETC,
3340 v->HActive,
3341 v->HRatio,
3342 v->HRatioChroma,
3343 v->DPPPerPlane,
3344 dummy5,
3345 dummy6,
3346 dummy3,
3347 dummy4,
3348 v->SwathHeightY,
3349 v->SwathHeightC,
3350 v->DETBufferSizeY,
3351 v->DETBufferSizeC,
3352 dummy7,
3353 &dummysinglestring);
3354 }
3355
3356 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3357 {
3358 if (PrefetchMode == 0) {
3359 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3360 } else if (PrefetchMode == 1) {
3361 return dml_max(SREnterPlusExitTime, UrgentLatency);
3362 } else {
3363 return UrgentLatency;
3364 }
3365 }
3366
3367 double dml31_CalculateWriteBackDISPCLK(
3368 enum source_format_class WritebackPixelFormat,
3369 double PixelClock,
3370 double WritebackHRatio,
3371 double WritebackVRatio,
3372 unsigned int WritebackHTaps,
3373 unsigned int WritebackVTaps,
3374 long WritebackSourceWidth,
3375 long WritebackDestinationWidth,
3376 unsigned int HTotal,
3377 unsigned int WritebackLineBufferSize)
3378 {
3379 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3380
3381 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3382 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3383 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3384 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3385 }
3386
3387 static double CalculateWriteBackDelay(
3388 enum source_format_class WritebackPixelFormat,
3389 double WritebackHRatio,
3390 double WritebackVRatio,
3391 unsigned int WritebackVTaps,
3392 int WritebackDestinationWidth,
3393 int WritebackDestinationHeight,
3394 int WritebackSourceHeight,
3395 unsigned int HTotal)
3396 {
3397 double CalculateWriteBackDelay;
3398 double Line_length;
3399 double Output_lines_last_notclamped;
3400 double WritebackVInit;
3401
3402 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3403 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3404 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3405 if (Output_lines_last_notclamped < 0) {
3406 CalculateWriteBackDelay = 0;
3407 } else {
3408 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3409 }
3410 return CalculateWriteBackDelay;
3411 }
3412
3413 static void CalculateVupdateAndDynamicMetadataParameters(
3414 int MaxInterDCNTileRepeaters,
3415 double DPPCLK,
3416 double DISPCLK,
3417 double DCFClkDeepSleep,
3418 double PixelClock,
3419 int HTotal,
3420 int VBlank,
3421 int DynamicMetadataTransmittedBytes,
3422 int DynamicMetadataLinesBeforeActiveRequired,
3423 int InterlaceEnable,
3424 bool ProgressiveToInterlaceUnitInOPP,
3425 double *TSetup,
3426 double *Tdmbf,
3427 double *Tdmec,
3428 double *Tdmsks,
3429 int *VUpdateOffsetPix,
3430 double *VUpdateWidthPix,
3431 double *VReadyOffsetPix)
3432 {
3433 double TotalRepeaterDelayTime;
3434
3435 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3436 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3437 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3438 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3439 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3440 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3441 *Tdmec = HTotal / PixelClock;
3442 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3443 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3444 } else {
3445 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3446 }
3447 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3448 *Tdmsks = *Tdmsks / 2;
3449 }
3450 #ifdef __DML_VBA_DEBUG__
3451 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3452 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3453 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3454 #endif
3455 }
3456
3457 static void CalculateRowBandwidth(
3458 bool GPUVMEnable,
3459 enum source_format_class SourcePixelFormat,
3460 double VRatio,
3461 double VRatioChroma,
3462 bool DCCEnable,
3463 double LineTime,
3464 unsigned int MetaRowByteLuma,
3465 unsigned int MetaRowByteChroma,
3466 unsigned int meta_row_height_luma,
3467 unsigned int meta_row_height_chroma,
3468 unsigned int PixelPTEBytesPerRowLuma,
3469 unsigned int PixelPTEBytesPerRowChroma,
3470 unsigned int dpte_row_height_luma,
3471 unsigned int dpte_row_height_chroma,
3472 double *meta_row_bw,
3473 double *dpte_row_bw)
3474 {
3475 if (DCCEnable != true) {
3476 *meta_row_bw = 0;
3477 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3478 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3479 } else {
3480 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3481 }
3482
3483 if (GPUVMEnable != true) {
3484 *dpte_row_bw = 0;
3485 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3486 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3487 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3488 } else {
3489 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3490 }
3491 }
3492
3493 static void CalculateFlipSchedule(
3494 struct display_mode_lib *mode_lib,
3495 unsigned int k,
3496 double HostVMInefficiencyFactor,
3497 double UrgentExtraLatency,
3498 double UrgentLatency,
3499 double PDEAndMetaPTEBytesPerFrame,
3500 double MetaRowBytes,
3501 double DPTEBytesPerRow)
3502 {
3503 struct vba_vars_st *v = &mode_lib->vba;
3504 double min_row_time = 0.0;
3505 unsigned int HostVMDynamicLevelsTrips;
3506 double TimeForFetchingMetaPTEImmediateFlip;
3507 double TimeForFetchingRowInVBlankImmediateFlip;
3508 double ImmediateFlipBW = 1.0;
3509 double LineTime = v->HTotal[k] / v->PixelClock[k];
3510
3511 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3512 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3513 } else {
3514 HostVMDynamicLevelsTrips = 0;
3515 }
3516
3517 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3518 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3519 }
3520
3521 if (v->GPUVMEnable == true) {
3522 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3523 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3524 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3525 LineTime / 4.0);
3526 } else {
3527 TimeForFetchingMetaPTEImmediateFlip = 0;
3528 }
3529
3530 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3531 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3532 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3533 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3534 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3535 LineTime / 4);
3536 } else {
3537 TimeForFetchingRowInVBlankImmediateFlip = 0;
3538 }
3539
3540 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3541
3542 if (v->GPUVMEnable == true) {
3543 v->final_flip_bw[k] = dml_max(
3544 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3545 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3546 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3547 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3548 } else {
3549 v->final_flip_bw[k] = 0;
3550 }
3551
3552 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3553 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3554 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3555 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3556 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3557 } else {
3558 min_row_time = dml_min4(
3559 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3560 v->meta_row_height[k] * LineTime / v->VRatio[k],
3561 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3562 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3563 }
3564 } else {
3565 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3566 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3567 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3568 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3569 } else {
3570 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3571 }
3572 }
3573
3574 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3575 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3576 v->ImmediateFlipSupportedForPipe[k] = false;
3577 } else {
3578 v->ImmediateFlipSupportedForPipe[k] = true;
3579 }
3580
3581 #ifdef __DML_VBA_DEBUG__
3582 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3583 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3584 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3585 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3586 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3587 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3588 #endif
3589
3590 }
3591
3592 static double TruncToValidBPP(
3593 double LinkBitRate,
3594 int Lanes,
3595 int HTotal,
3596 int HActive,
3597 double PixelClock,
3598 double DesiredBPP,
3599 bool DSCEnable,
3600 enum output_encoder_class Output,
3601 enum output_format_class Format,
3602 unsigned int DSCInputBitPerComponent,
3603 int DSCSlices,
3604 int AudioRate,
3605 int AudioLayout,
3606 enum odm_combine_mode ODMCombine)
3607 {
3608 double MaxLinkBPP;
3609 int MinDSCBPP;
3610 double MaxDSCBPP;
3611 int NonDSCBPP0;
3612 int NonDSCBPP1;
3613 int NonDSCBPP2;
3614
3615 if (Format == dm_420) {
3616 NonDSCBPP0 = 12;
3617 NonDSCBPP1 = 15;
3618 NonDSCBPP2 = 18;
3619 MinDSCBPP = 6;
3620 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3621 } else if (Format == dm_444) {
3622 NonDSCBPP0 = 24;
3623 NonDSCBPP1 = 30;
3624 NonDSCBPP2 = 36;
3625 MinDSCBPP = 8;
3626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3627 } else {
3628
3629 NonDSCBPP0 = 16;
3630 NonDSCBPP1 = 20;
3631 NonDSCBPP2 = 24;
3632
3633 if (Format == dm_n422) {
3634 MinDSCBPP = 7;
3635 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3636 } else {
3637 MinDSCBPP = 8;
3638 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3639 }
3640 }
3641
3642 if (DSCEnable && Output == dm_dp) {
3643 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3644 } else {
3645 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3646 }
3647
3648 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3649 MaxLinkBPP = 16;
3650 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3651 MaxLinkBPP = 32;
3652 }
3653
3654 if (DesiredBPP == 0) {
3655 if (DSCEnable) {
3656 if (MaxLinkBPP < MinDSCBPP) {
3657 return BPP_INVALID;
3658 } else if (MaxLinkBPP >= MaxDSCBPP) {
3659 return MaxDSCBPP;
3660 } else {
3661 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3662 }
3663 } else {
3664 if (MaxLinkBPP >= NonDSCBPP2) {
3665 return NonDSCBPP2;
3666 } else if (MaxLinkBPP >= NonDSCBPP1) {
3667 return NonDSCBPP1;
3668 } else if (MaxLinkBPP >= NonDSCBPP0) {
3669 return 16.0;
3670 } else {
3671 return BPP_INVALID;
3672 }
3673 }
3674 } else {
3675 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3676 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3677 return BPP_INVALID;
3678 } else {
3679 return DesiredBPP;
3680 }
3681 }
3682 return BPP_INVALID;
3683 }
3684
3685 static noinline void CalculatePrefetchSchedulePerPlane(
3686 struct display_mode_lib *mode_lib,
3687 double HostVMInefficiencyFactor,
3688 int i,
3689 unsigned j,
3690 unsigned k)
3691 {
3692 struct vba_vars_st *v = &mode_lib->vba;
3693 Pipe myPipe;
3694
3695 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3696 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3697 myPipe.PixelClock = v->PixelClock[k];
3698 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3699 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3700 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3701 myPipe.VRatio = mode_lib->vba.VRatio[k];
3702 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3703
3704 myPipe.SourceScan = v->SourceScan[k];
3705 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3706 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3707 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3708 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3709 myPipe.InterlaceEnable = v->Interlace[k];
3710 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3711 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3712 myPipe.HTotal = v->HTotal[k];
3713 myPipe.DCCEnable = v->DCCEnable[k];
3714 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3715 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3716 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3717 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3718 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3719 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3720 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3721 mode_lib,
3722 HostVMInefficiencyFactor,
3723 &myPipe,
3724 v->DSCDelayPerState[i][k],
3725 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3726 v->DPPCLKDelaySCL,
3727 v->DPPCLKDelaySCLLBOnly,
3728 v->DPPCLKDelayCNVCCursor,
3729 v->DISPCLKDelaySubtotal,
3730 v->SwathWidthYThisState[k] / v->HRatio[k],
3731 v->OutputFormat[k],
3732 v->MaxInterDCNTileRepeaters,
3733 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3734 v->MaximumVStartup[i][j][k],
3735 v->GPUVMMaxPageTableLevels,
3736 v->GPUVMEnable,
3737 v->HostVMEnable,
3738 v->HostVMMaxNonCachedPageTableLevels,
3739 v->HostVMMinPageSize,
3740 v->DynamicMetadataEnable[k],
3741 v->DynamicMetadataVMEnabled,
3742 v->DynamicMetadataLinesBeforeActiveRequired[k],
3743 v->DynamicMetadataTransmittedBytes[k],
3744 v->UrgLatency[i],
3745 v->ExtraLatency,
3746 v->TimeCalc,
3747 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3748 v->MetaRowBytes[i][j][k],
3749 v->DPTEBytesPerRow[i][j][k],
3750 v->PrefetchLinesY[i][j][k],
3751 v->SwathWidthYThisState[k],
3752 v->PrefillY[k],
3753 v->MaxNumSwY[k],
3754 v->PrefetchLinesC[i][j][k],
3755 v->SwathWidthCThisState[k],
3756 v->PrefillC[k],
3757 v->MaxNumSwC[k],
3758 v->swath_width_luma_ub_this_state[k],
3759 v->swath_width_chroma_ub_this_state[k],
3760 v->SwathHeightYThisState[k],
3761 v->SwathHeightCThisState[k],
3762 v->TWait,
3763 &v->DSTXAfterScaler[k],
3764 &v->DSTYAfterScaler[k],
3765 &v->LineTimesForPrefetch[k],
3766 &v->PrefetchBW[k],
3767 &v->LinesForMetaPTE[k],
3768 &v->LinesForMetaAndDPTERow[k],
3769 &v->VRatioPreY[i][j][k],
3770 &v->VRatioPreC[i][j][k],
3771 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3772 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3773 &v->NoTimeForDynamicMetadata[i][j][k],
3774 &v->Tno_bw[k],
3775 &v->prefetch_vmrow_bw[k],
3776 &v->dummy7[k],
3777 &v->dummy8[k],
3778 &v->dummy13[k],
3779 &v->VUpdateOffsetPix[k],
3780 &v->VUpdateWidthPix[k],
3781 &v->VReadyOffsetPix[k]);
3782 }
3783
3784 static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[])
3785 {
3786 int i, total_pipes = 0;
3787 for (i = 0; i < NumberOfActivePlanes; i++)
3788 total_pipes += NoOfDPPThisState[i];
3789 DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
3790 if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE)
3791 DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE;
3792 for (i = 1; i < NumberOfActivePlanes; i++)
3793 DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0];
3794 }
3795
3796
3797 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3798 {
3799 struct vba_vars_st *v = &mode_lib->vba;
3800
3801 int i, j;
3802 unsigned int k, m;
3803 int ReorderingBytes;
3804 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3805 bool NoChroma = true;
3806 bool EnoughWritebackUnits = true;
3807 bool P2IWith420 = false;
3808 bool DSCOnlyIfNecessaryWithBPP = false;
3809 bool DSC422NativeNotSupported = false;
3810 double MaxTotalVActiveRDBandwidth;
3811 bool ViewportExceedsSurface = false;
3812 bool FMTBufferExceeded = false;
3813
3814 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3815
3816 CalculateMinAndMaxPrefetchMode(
3817 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3818 &MinPrefetchMode, &MaxPrefetchMode);
3819
3820 /*Scale Ratio, taps Support Check*/
3821
3822 v->ScaleRatioAndTapsSupport = true;
3823 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3824 if (v->ScalerEnabled[k] == false
3825 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3826 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3827 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3828 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3829 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3830 v->ScaleRatioAndTapsSupport = false;
3831 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3832 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3833 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3834 || v->VRatio[k] > v->vtaps[k]
3835 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3836 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3837 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3838 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3839 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3840 || v->HRatioChroma[k] > v->MaxHSCLRatio
3841 || v->VRatioChroma[k] > v->MaxVSCLRatio
3842 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3843 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3844 v->ScaleRatioAndTapsSupport = false;
3845 }
3846 }
3847 /*Source Format, Pixel Format and Scan Support Check*/
3848
3849 v->SourceFormatPixelAndScanSupport = true;
3850 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3851 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3852 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3853 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3854 v->SourceFormatPixelAndScanSupport = false;
3855 }
3856 }
3857 /*Bandwidth Support Check*/
3858
3859 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3860 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3861 v->SourcePixelFormat[k],
3862 v->SurfaceTiling[k],
3863 &v->BytePerPixelY[k],
3864 &v->BytePerPixelC[k],
3865 &v->BytePerPixelInDETY[k],
3866 &v->BytePerPixelInDETC[k],
3867 &v->Read256BlockHeightY[k],
3868 &v->Read256BlockHeightC[k],
3869 &v->Read256BlockWidthY[k],
3870 &v->Read256BlockWidthC[k]);
3871 }
3872 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3873 if (v->SourceScan[k] != dm_vert) {
3874 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3875 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3876 } else {
3877 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3878 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3879 }
3880 }
3881 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3882 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3883 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3884 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3885 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3886 }
3887 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3888 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3889 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3890 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3891 } else if (v->WritebackEnable[k] == true) {
3892 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3893 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3894 } else {
3895 v->WriteBandwidth[k] = 0.0;
3896 }
3897 }
3898
3899 /*Writeback Latency support check*/
3900
3901 v->WritebackLatencySupport = true;
3902 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3903 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3904 v->WritebackLatencySupport = false;
3905 }
3906 }
3907
3908 /*Writeback Mode Support Check*/
3909
3910 v->TotalNumberOfActiveWriteback = 0;
3911 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3912 if (v->WritebackEnable[k] == true) {
3913 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3914 }
3915 }
3916
3917 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3918 EnoughWritebackUnits = false;
3919 }
3920
3921 /*Writeback Scale Ratio and Taps Support Check*/
3922
3923 v->WritebackScaleRatioAndTapsSupport = true;
3924 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3925 if (v->WritebackEnable[k] == true) {
3926 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
3927 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
3928 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
3929 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
3930 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
3931 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
3932 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
3933 v->WritebackScaleRatioAndTapsSupport = false;
3934 }
3935 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3936 v->WritebackScaleRatioAndTapsSupport = false;
3937 }
3938 }
3939 }
3940 /*Maximum DISPCLK/DPPCLK Support check*/
3941
3942 v->WritebackRequiredDISPCLK = 0.0;
3943 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3944 if (v->WritebackEnable[k] == true) {
3945 v->WritebackRequiredDISPCLK = dml_max(
3946 v->WritebackRequiredDISPCLK,
3947 dml31_CalculateWriteBackDISPCLK(
3948 v->WritebackPixelFormat[k],
3949 v->PixelClock[k],
3950 v->WritebackHRatio[k],
3951 v->WritebackVRatio[k],
3952 v->WritebackHTaps[k],
3953 v->WritebackVTaps[k],
3954 v->WritebackSourceWidth[k],
3955 v->WritebackDestinationWidth[k],
3956 v->HTotal[k],
3957 v->WritebackLineBufferSize));
3958 }
3959 }
3960 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3961 if (v->HRatio[k] > 1.0) {
3962 v->PSCL_FACTOR[k] = dml_min(
3963 v->MaxDCHUBToPSCLThroughput,
3964 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3965 } else {
3966 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3967 }
3968 if (v->BytePerPixelC[k] == 0.0) {
3969 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3970 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3971 * dml_max3(
3972 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3973 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3974 1.0);
3975 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3976 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3977 }
3978 } else {
3979 if (v->HRatioChroma[k] > 1.0) {
3980 v->PSCL_FACTOR_CHROMA[k] = dml_min(
3981 v->MaxDCHUBToPSCLThroughput,
3982 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3983 } else {
3984 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3985 }
3986 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3987 * dml_max5(
3988 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3989 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3990 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3991 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3992 1.0);
3993 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3994 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3995 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3996 }
3997 }
3998 }
3999 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4000 int MaximumSwathWidthSupportLuma;
4001 int MaximumSwathWidthSupportChroma;
4002
4003 if (v->SurfaceTiling[k] == dm_sw_linear) {
4004 MaximumSwathWidthSupportLuma = 8192.0;
4005 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4006 MaximumSwathWidthSupportLuma = 2880.0;
4007 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4008 MaximumSwathWidthSupportLuma = 3840.0;
4009 } else {
4010 MaximumSwathWidthSupportLuma = 5760.0;
4011 }
4012
4013 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4014 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4015 } else {
4016 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4017 }
4018 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4019 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4020 if (v->BytePerPixelC[k] == 0.0) {
4021 v->MaximumSwathWidthInLineBufferChroma = 0;
4022 } else {
4023 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4024 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4025 }
4026 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4027 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4028 }
4029
4030 CalculateSwathAndDETConfiguration(
4031 true,
4032 v->NumberOfActivePlanes,
4033 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
4034 v->DETBufferSizeInKByte,
4035 v->MaximumSwathWidthLuma,
4036 v->MaximumSwathWidthChroma,
4037 v->SourceScan,
4038 v->SourcePixelFormat,
4039 v->SurfaceTiling,
4040 v->ViewportWidth,
4041 v->ViewportHeight,
4042 v->SurfaceWidthY,
4043 v->SurfaceWidthC,
4044 v->SurfaceHeightY,
4045 v->SurfaceHeightC,
4046 v->Read256BlockHeightY,
4047 v->Read256BlockHeightC,
4048 v->Read256BlockWidthY,
4049 v->Read256BlockWidthC,
4050 v->odm_combine_dummy,
4051 v->BlendingAndTiming,
4052 v->BytePerPixelY,
4053 v->BytePerPixelC,
4054 v->BytePerPixelInDETY,
4055 v->BytePerPixelInDETC,
4056 v->HActive,
4057 v->HRatio,
4058 v->HRatioChroma,
4059 v->NoOfDPPThisState,
4060 v->swath_width_luma_ub_this_state,
4061 v->swath_width_chroma_ub_this_state,
4062 v->SwathWidthYThisState,
4063 v->SwathWidthCThisState,
4064 v->SwathHeightYThisState,
4065 v->SwathHeightCThisState,
4066 v->DETBufferSizeYThisState,
4067 v->DETBufferSizeCThisState,
4068 v->SingleDPPViewportSizeSupportPerPlane,
4069 &v->ViewportSizeSupport[0][0]);
4070
4071 for (i = 0; i < v->soc.num_states; i++) {
4072 for (j = 0; j < 2; j++) {
4073 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4074 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4075 v->RequiredDISPCLK[i][j] = 0.0;
4076 v->DISPCLK_DPPCLK_Support[i][j] = true;
4077 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4078 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4079 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4080 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4081 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4082 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4083 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4084 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4085 }
4086 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4087 * (1 + v->DISPCLKRampingMargin / 100.0);
4088 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4089 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4090 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4091 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4092 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4093 }
4094 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4095 * (1 + v->DISPCLKRampingMargin / 100.0);
4096 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4097 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4098 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4099 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4100 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4101 }
4102
4103 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4104 || !(v->Output[k] == dm_dp ||
4105 v->Output[k] == dm_dp2p0 ||
4106 v->Output[k] == dm_edp)) {
4107 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4108 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4109
4110 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4111 FMTBufferExceeded = true;
4112 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4113 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4114 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4115 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4116 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4117 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4118 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4119 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4120 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4121 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4122 } else {
4123 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4124 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4125 }
4126 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4127 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4128 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4129 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4130 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4131 } else {
4132 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4133 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4134 }
4135 }
4136 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4137 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4138 if (v->Output[k] == dm_hdmi) {
4139 FMTBufferExceeded = true;
4140 } else if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4141 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4142 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4143
4144 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4145 FMTBufferExceeded = true;
4146 } else {
4147 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4148 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4149 }
4150 }
4151 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4152 v->MPCCombine[i][j][k] = false;
4153 v->NoOfDPP[i][j][k] = 4;
4154 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4155 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4156 v->MPCCombine[i][j][k] = false;
4157 v->NoOfDPP[i][j][k] = 2;
4158 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4159 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4160 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4161 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4162 v->MPCCombine[i][j][k] = false;
4163 v->NoOfDPP[i][j][k] = 1;
4164 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4165 } else {
4166 v->MPCCombine[i][j][k] = true;
4167 v->NoOfDPP[i][j][k] = 2;
4168 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4169 }
4170 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4171 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4172 > v->MaxDppclkRoundedDownToDFSGranularity)
4173 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4174 v->DISPCLK_DPPCLK_Support[i][j] = false;
4175 }
4176 if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) {
4177 v->MPCCombine[i][j][k] = true;
4178 v->NoOfDPP[i][j][k] = 2;
4179 }
4180 }
4181 v->TotalNumberOfActiveDPP[i][j] = 0;
4182 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4183 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4184 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4185 if (v->NoOfDPP[i][j][k] == 1)
4186 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4187 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4188 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4189 NoChroma = false;
4190 }
4191
4192 // UPTO
4193 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4194 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4195 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4196 double BWOfNonSplitPlaneOfMaximumBandwidth;
4197 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4198 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4199 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4200 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4201 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4202 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4203 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4204 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4205 }
4206 }
4207 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4208 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4209 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4210 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4211 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4212 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4213 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4214 }
4215 }
4216 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4217 v->RequiredDISPCLK[i][j] = 0.0;
4218 v->DISPCLK_DPPCLK_Support[i][j] = true;
4219 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4220 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4221 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4222 v->MPCCombine[i][j][k] = true;
4223 v->NoOfDPP[i][j][k] = 2;
4224 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4225 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4226 } else {
4227 v->MPCCombine[i][j][k] = false;
4228 v->NoOfDPP[i][j][k] = 1;
4229 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4230 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4231 }
4232 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4233 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4234 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4235 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4236 } else {
4237 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4238 }
4239 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4240 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4241 > v->MaxDppclkRoundedDownToDFSGranularity)
4242 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4243 v->DISPCLK_DPPCLK_Support[i][j] = false;
4244 }
4245 }
4246 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4247 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4248 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4249 }
4250 }
4251 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4252 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4253 v->DISPCLK_DPPCLK_Support[i][j] = false;
4254 }
4255 }
4256 }
4257
4258 /*Total Available Pipes Support Check*/
4259
4260 for (i = 0; i < v->soc.num_states; i++) {
4261 for (j = 0; j < 2; j++) {
4262 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4263 v->TotalAvailablePipesSupport[i][j] = true;
4264 } else {
4265 v->TotalAvailablePipesSupport[i][j] = false;
4266 }
4267 }
4268 }
4269 /*Display IO and DSC Support Check*/
4270
4271 v->NonsupportedDSCInputBPC = false;
4272 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4273 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4274 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4275 v->NonsupportedDSCInputBPC = true;
4276 }
4277 }
4278
4279 /*Number Of DSC Slices*/
4280 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4281 if (v->BlendingAndTiming[k] == k) {
4282 if (v->PixelClockBackEnd[k] > 3200) {
4283 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4284 } else if (v->PixelClockBackEnd[k] > 1360) {
4285 v->NumberOfDSCSlices[k] = 8;
4286 } else if (v->PixelClockBackEnd[k] > 680) {
4287 v->NumberOfDSCSlices[k] = 4;
4288 } else if (v->PixelClockBackEnd[k] > 340) {
4289 v->NumberOfDSCSlices[k] = 2;
4290 } else {
4291 v->NumberOfDSCSlices[k] = 1;
4292 }
4293 } else {
4294 v->NumberOfDSCSlices[k] = 0;
4295 }
4296 }
4297
4298 for (i = 0; i < v->soc.num_states; i++) {
4299 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4300 v->RequiresDSC[i][k] = false;
4301 v->RequiresFEC[i][k] = false;
4302 if (v->BlendingAndTiming[k] == k) {
4303 if (v->Output[k] == dm_hdmi) {
4304 v->RequiresDSC[i][k] = false;
4305 v->RequiresFEC[i][k] = false;
4306 v->OutputBppPerState[i][k] = TruncToValidBPP(
4307 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4308 3,
4309 v->HTotal[k],
4310 v->HActive[k],
4311 v->PixelClockBackEnd[k],
4312 v->ForcedOutputLinkBPP[k],
4313 false,
4314 v->Output[k],
4315 v->OutputFormat[k],
4316 v->DSCInputBitPerComponent[k],
4317 v->NumberOfDSCSlices[k],
4318 v->AudioSampleRate[k],
4319 v->AudioSampleLayout[k],
4320 v->ODMCombineEnablePerState[i][k]);
4321 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4322 if (v->DSCEnable[k] == true) {
4323 v->RequiresDSC[i][k] = true;
4324 v->LinkDSCEnable = true;
4325 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4326 v->RequiresFEC[i][k] = true;
4327 } else {
4328 v->RequiresFEC[i][k] = false;
4329 }
4330 } else {
4331 v->RequiresDSC[i][k] = false;
4332 v->LinkDSCEnable = false;
4333 if (v->Output[k] == dm_dp2p0) {
4334 v->RequiresFEC[i][k] = true;
4335 } else {
4336 v->RequiresFEC[i][k] = false;
4337 }
4338 }
4339 if (v->Output[k] == dm_dp2p0) {
4340 v->Outbpp = BPP_INVALID;
4341 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4342 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4343 v->Outbpp = TruncToValidBPP(
4344 (1.0 - v->Downspreading / 100.0) * 10000,
4345 v->OutputLinkDPLanes[k],
4346 v->HTotal[k],
4347 v->HActive[k],
4348 v->PixelClockBackEnd[k],
4349 v->ForcedOutputLinkBPP[k],
4350 v->LinkDSCEnable,
4351 v->Output[k],
4352 v->OutputFormat[k],
4353 v->DSCInputBitPerComponent[k],
4354 v->NumberOfDSCSlices[k],
4355 v->AudioSampleRate[k],
4356 v->AudioSampleLayout[k],
4357 v->ODMCombineEnablePerState[i][k]);
4358 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4359 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4360 v->RequiresDSC[i][k] = true;
4361 v->LinkDSCEnable = true;
4362 v->Outbpp = TruncToValidBPP(
4363 (1.0 - v->Downspreading / 100.0) * 10000,
4364 v->OutputLinkDPLanes[k],
4365 v->HTotal[k],
4366 v->HActive[k],
4367 v->PixelClockBackEnd[k],
4368 v->ForcedOutputLinkBPP[k],
4369 v->LinkDSCEnable,
4370 v->Output[k],
4371 v->OutputFormat[k],
4372 v->DSCInputBitPerComponent[k],
4373 v->NumberOfDSCSlices[k],
4374 v->AudioSampleRate[k],
4375 v->AudioSampleLayout[k],
4376 v->ODMCombineEnablePerState[i][k]);
4377 }
4378 v->OutputBppPerState[i][k] = v->Outbpp;
4379 // TODO: Need some other way to handle this nonsense
4380 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4381 }
4382 if (v->Outbpp == BPP_INVALID &&
4383 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4384 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4385 v->Outbpp = TruncToValidBPP(
4386 (1.0 - v->Downspreading / 100.0) * 13500,
4387 v->OutputLinkDPLanes[k],
4388 v->HTotal[k],
4389 v->HActive[k],
4390 v->PixelClockBackEnd[k],
4391 v->ForcedOutputLinkBPP[k],
4392 v->LinkDSCEnable,
4393 v->Output[k],
4394 v->OutputFormat[k],
4395 v->DSCInputBitPerComponent[k],
4396 v->NumberOfDSCSlices[k],
4397 v->AudioSampleRate[k],
4398 v->AudioSampleLayout[k],
4399 v->ODMCombineEnablePerState[i][k]);
4400 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4401 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4402 v->RequiresDSC[i][k] = true;
4403 v->LinkDSCEnable = true;
4404 v->Outbpp = TruncToValidBPP(
4405 (1.0 - v->Downspreading / 100.0) * 13500,
4406 v->OutputLinkDPLanes[k],
4407 v->HTotal[k],
4408 v->HActive[k],
4409 v->PixelClockBackEnd[k],
4410 v->ForcedOutputLinkBPP[k],
4411 v->LinkDSCEnable,
4412 v->Output[k],
4413 v->OutputFormat[k],
4414 v->DSCInputBitPerComponent[k],
4415 v->NumberOfDSCSlices[k],
4416 v->AudioSampleRate[k],
4417 v->AudioSampleLayout[k],
4418 v->ODMCombineEnablePerState[i][k]);
4419 }
4420 v->OutputBppPerState[i][k] = v->Outbpp;
4421 // TODO: Need some other way to handle this nonsense
4422 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4423 }
4424 if (v->Outbpp == BPP_INVALID &&
4425 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4426 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4427 v->Outbpp = TruncToValidBPP(
4428 (1.0 - v->Downspreading / 100.0) * 20000,
4429 v->OutputLinkDPLanes[k],
4430 v->HTotal[k],
4431 v->HActive[k],
4432 v->PixelClockBackEnd[k],
4433 v->ForcedOutputLinkBPP[k],
4434 v->LinkDSCEnable,
4435 v->Output[k],
4436 v->OutputFormat[k],
4437 v->DSCInputBitPerComponent[k],
4438 v->NumberOfDSCSlices[k],
4439 v->AudioSampleRate[k],
4440 v->AudioSampleLayout[k],
4441 v->ODMCombineEnablePerState[i][k]);
4442 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4443 v->ForcedOutputLinkBPP[k] == 0) {
4444 v->RequiresDSC[i][k] = true;
4445 v->LinkDSCEnable = true;
4446 v->Outbpp = TruncToValidBPP(
4447 (1.0 - v->Downspreading / 100.0) * 20000,
4448 v->OutputLinkDPLanes[k],
4449 v->HTotal[k],
4450 v->HActive[k],
4451 v->PixelClockBackEnd[k],
4452 v->ForcedOutputLinkBPP[k],
4453 v->LinkDSCEnable,
4454 v->Output[k],
4455 v->OutputFormat[k],
4456 v->DSCInputBitPerComponent[k],
4457 v->NumberOfDSCSlices[k],
4458 v->AudioSampleRate[k],
4459 v->AudioSampleLayout[k],
4460 v->ODMCombineEnablePerState[i][k]);
4461 }
4462 v->OutputBppPerState[i][k] = v->Outbpp;
4463 // TODO: Need some other way to handle this nonsense
4464 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4465 }
4466 } else {
4467 v->Outbpp = BPP_INVALID;
4468 if (v->PHYCLKPerState[i] >= 270.0) {
4469 v->Outbpp = TruncToValidBPP(
4470 (1.0 - v->Downspreading / 100.0) * 2700,
4471 v->OutputLinkDPLanes[k],
4472 v->HTotal[k],
4473 v->HActive[k],
4474 v->PixelClockBackEnd[k],
4475 v->ForcedOutputLinkBPP[k],
4476 v->LinkDSCEnable,
4477 v->Output[k],
4478 v->OutputFormat[k],
4479 v->DSCInputBitPerComponent[k],
4480 v->NumberOfDSCSlices[k],
4481 v->AudioSampleRate[k],
4482 v->AudioSampleLayout[k],
4483 v->ODMCombineEnablePerState[i][k]);
4484 v->OutputBppPerState[i][k] = v->Outbpp;
4485 // TODO: Need some other way to handle this nonsense
4486 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4487 }
4488 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4489 v->Outbpp = TruncToValidBPP(
4490 (1.0 - v->Downspreading / 100.0) * 5400,
4491 v->OutputLinkDPLanes[k],
4492 v->HTotal[k],
4493 v->HActive[k],
4494 v->PixelClockBackEnd[k],
4495 v->ForcedOutputLinkBPP[k],
4496 v->LinkDSCEnable,
4497 v->Output[k],
4498 v->OutputFormat[k],
4499 v->DSCInputBitPerComponent[k],
4500 v->NumberOfDSCSlices[k],
4501 v->AudioSampleRate[k],
4502 v->AudioSampleLayout[k],
4503 v->ODMCombineEnablePerState[i][k]);
4504 v->OutputBppPerState[i][k] = v->Outbpp;
4505 // TODO: Need some other way to handle this nonsense
4506 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4507 }
4508 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4509 v->Outbpp = TruncToValidBPP(
4510 (1.0 - v->Downspreading / 100.0) * 8100,
4511 v->OutputLinkDPLanes[k],
4512 v->HTotal[k],
4513 v->HActive[k],
4514 v->PixelClockBackEnd[k],
4515 v->ForcedOutputLinkBPP[k],
4516 v->LinkDSCEnable,
4517 v->Output[k],
4518 v->OutputFormat[k],
4519 v->DSCInputBitPerComponent[k],
4520 v->NumberOfDSCSlices[k],
4521 v->AudioSampleRate[k],
4522 v->AudioSampleLayout[k],
4523 v->ODMCombineEnablePerState[i][k]);
4524 v->OutputBppPerState[i][k] = v->Outbpp;
4525 // TODO: Need some other way to handle this nonsense
4526 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4527 }
4528 }
4529 }
4530 } else {
4531 v->OutputBppPerState[i][k] = 0;
4532 }
4533 }
4534 }
4535
4536 for (i = 0; i < v->soc.num_states; i++) {
4537 v->LinkCapacitySupport[i] = true;
4538 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4539 if (v->BlendingAndTiming[k] == k
4540 && (v->Output[k] == dm_dp ||
4541 v->Output[k] == dm_edp ||
4542 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4543 v->LinkCapacitySupport[i] = false;
4544 }
4545 }
4546 }
4547
4548 // UPTO 2172
4549 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4550 if (v->BlendingAndTiming[k] == k
4551 && (v->Output[k] == dm_dp ||
4552 v->Output[k] == dm_edp ||
4553 v->Output[k] == dm_hdmi)) {
4554 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4555 P2IWith420 = true;
4556 }
4557 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4558 && !v->DSC422NativeSupport) {
4559 DSC422NativeNotSupported = true;
4560 }
4561 }
4562 }
4563
4564 for (i = 0; i < v->soc.num_states; ++i) {
4565 v->ODMCombine4To1SupportCheckOK[i] = true;
4566 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4567 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4568 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4569 || v->Output[k] == dm_hdmi)) {
4570 v->ODMCombine4To1SupportCheckOK[i] = false;
4571 }
4572 }
4573 }
4574
4575 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4576
4577 for (i = 0; i < v->soc.num_states; i++) {
4578 v->NotEnoughDSCUnits[i] = false;
4579 v->TotalDSCUnitsRequired = 0.0;
4580 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4581 if (v->RequiresDSC[i][k] == true) {
4582 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4583 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4584 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4585 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4586 } else {
4587 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4588 }
4589 }
4590 }
4591 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4592 v->NotEnoughDSCUnits[i] = true;
4593 }
4594 }
4595 /*DSC Delay per state*/
4596
4597 for (i = 0; i < v->soc.num_states; i++) {
4598 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4599 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4600 v->BPP = 0.0;
4601 } else {
4602 v->BPP = v->OutputBppPerState[i][k];
4603 }
4604 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4605 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4606 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4607 v->DSCInputBitPerComponent[k],
4608 v->BPP,
4609 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4610 v->NumberOfDSCSlices[k],
4611 v->OutputFormat[k],
4612 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4613 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4614 v->DSCDelayPerState[i][k] = 2.0
4615 * (dscceComputeDelay(
4616 v->DSCInputBitPerComponent[k],
4617 v->BPP,
4618 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4619 v->NumberOfDSCSlices[k] / 2,
4620 v->OutputFormat[k],
4621 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4622 } else {
4623 v->DSCDelayPerState[i][k] = 4.0
4624 * (dscceComputeDelay(
4625 v->DSCInputBitPerComponent[k],
4626 v->BPP,
4627 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4628 v->NumberOfDSCSlices[k] / 4,
4629 v->OutputFormat[k],
4630 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4631 }
4632 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4633 } else {
4634 v->DSCDelayPerState[i][k] = 0.0;
4635 }
4636 }
4637 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4638 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4639 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4640 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4641 }
4642 }
4643 }
4644 }
4645
4646 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4647 //
4648 for (i = 0; i < v->soc.num_states; ++i) {
4649 for (j = 0; j <= 1; ++j) {
4650 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4651 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4652 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4653 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4654 }
4655
4656 if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0])
4657 PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte);
4658 CalculateSwathAndDETConfiguration(
4659 false,
4660 v->NumberOfActivePlanes,
4661 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
4662 v->DETBufferSizeInKByte,
4663 v->MaximumSwathWidthLuma,
4664 v->MaximumSwathWidthChroma,
4665 v->SourceScan,
4666 v->SourcePixelFormat,
4667 v->SurfaceTiling,
4668 v->ViewportWidth,
4669 v->ViewportHeight,
4670 v->SurfaceWidthY,
4671 v->SurfaceWidthC,
4672 v->SurfaceHeightY,
4673 v->SurfaceHeightC,
4674 v->Read256BlockHeightY,
4675 v->Read256BlockHeightC,
4676 v->Read256BlockWidthY,
4677 v->Read256BlockWidthC,
4678 v->ODMCombineEnableThisState,
4679 v->BlendingAndTiming,
4680 v->BytePerPixelY,
4681 v->BytePerPixelC,
4682 v->BytePerPixelInDETY,
4683 v->BytePerPixelInDETC,
4684 v->HActive,
4685 v->HRatio,
4686 v->HRatioChroma,
4687 v->NoOfDPPThisState,
4688 v->swath_width_luma_ub_this_state,
4689 v->swath_width_chroma_ub_this_state,
4690 v->SwathWidthYThisState,
4691 v->SwathWidthCThisState,
4692 v->SwathHeightYThisState,
4693 v->SwathHeightCThisState,
4694 v->DETBufferSizeYThisState,
4695 v->DETBufferSizeCThisState,
4696 v->dummystring,
4697 &v->ViewportSizeSupport[i][j]);
4698
4699 CalculateDCFCLKDeepSleep(
4700 mode_lib,
4701 v->NumberOfActivePlanes,
4702 v->BytePerPixelY,
4703 v->BytePerPixelC,
4704 v->VRatio,
4705 v->VRatioChroma,
4706 v->SwathWidthYThisState,
4707 v->SwathWidthCThisState,
4708 v->NoOfDPPThisState,
4709 v->HRatio,
4710 v->HRatioChroma,
4711 v->PixelClock,
4712 v->PSCL_FACTOR,
4713 v->PSCL_FACTOR_CHROMA,
4714 v->RequiredDPPCLKThisState,
4715 v->ReadBandwidthLuma,
4716 v->ReadBandwidthChroma,
4717 v->ReturnBusWidth,
4718 &v->ProjectedDCFCLKDeepSleep[i][j]);
4719
4720 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4721 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4722 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4723 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4724 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4725 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4726 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4727 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4728 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4729 }
4730 }
4731 }
4732
4733 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4734 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4735 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4736 }
4737
4738 for (i = 0; i < v->soc.num_states; i++) {
4739 for (j = 0; j < 2; j++) {
4740 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4741
4742 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4743 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4744 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4745 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4746 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4747 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4748 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4749 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4750 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4751 }
4752
4753 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4754 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4755 if (v->DCCEnable[k] == true) {
4756 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4757 }
4758 }
4759
4760 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4761 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4762 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4763
4764 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4765 && v->SourceScan[k] != dm_vert) {
4766 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4767 / 2;
4768 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4769 } else {
4770 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4771 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4772 }
4773
4774 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4775 mode_lib,
4776 v->DCCEnable[k],
4777 v->Read256BlockHeightC[k],
4778 v->Read256BlockWidthC[k],
4779 v->SourcePixelFormat[k],
4780 v->SurfaceTiling[k],
4781 v->BytePerPixelC[k],
4782 v->SourceScan[k],
4783 v->SwathWidthCThisState[k],
4784 v->ViewportHeightChroma[k],
4785 v->GPUVMEnable,
4786 v->HostVMEnable,
4787 v->HostVMMaxNonCachedPageTableLevels,
4788 v->GPUVMMinPageSize,
4789 v->HostVMMinPageSize,
4790 v->PTEBufferSizeInRequestsForChroma,
4791 v->PitchC[k],
4792 0.0,
4793 &v->MacroTileWidthC[k],
4794 &v->MetaRowBytesC,
4795 &v->DPTEBytesPerRowC,
4796 &v->PTEBufferSizeNotExceededC[i][j][k],
4797 &v->dummyinteger7,
4798 &v->dpte_row_height_chroma[k],
4799 &v->dummyinteger28,
4800 &v->dummyinteger26,
4801 &v->dummyinteger23,
4802 &v->meta_row_height_chroma[k],
4803 &v->dummyinteger8,
4804 &v->dummyinteger9,
4805 &v->dummyinteger19,
4806 &v->dummyinteger20,
4807 &v->dummyinteger17,
4808 &v->dummyinteger10,
4809 &v->dummyinteger11);
4810
4811 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4812 mode_lib,
4813 v->VRatioChroma[k],
4814 v->VTAPsChroma[k],
4815 v->Interlace[k],
4816 v->ProgressiveToInterlaceUnitInOPP,
4817 v->SwathHeightCThisState[k],
4818 v->ViewportYStartC[k],
4819 &v->PrefillC[k],
4820 &v->MaxNumSwC[k]);
4821 } else {
4822 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4823 v->PTEBufferSizeInRequestsForChroma = 0;
4824 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4825 v->MetaRowBytesC = 0.0;
4826 v->DPTEBytesPerRowC = 0.0;
4827 v->PrefetchLinesC[i][j][k] = 0.0;
4828 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4829 }
4830 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4831 mode_lib,
4832 v->DCCEnable[k],
4833 v->Read256BlockHeightY[k],
4834 v->Read256BlockWidthY[k],
4835 v->SourcePixelFormat[k],
4836 v->SurfaceTiling[k],
4837 v->BytePerPixelY[k],
4838 v->SourceScan[k],
4839 v->SwathWidthYThisState[k],
4840 v->ViewportHeight[k],
4841 v->GPUVMEnable,
4842 v->HostVMEnable,
4843 v->HostVMMaxNonCachedPageTableLevels,
4844 v->GPUVMMinPageSize,
4845 v->HostVMMinPageSize,
4846 v->PTEBufferSizeInRequestsForLuma,
4847 v->PitchY[k],
4848 v->DCCMetaPitchY[k],
4849 &v->MacroTileWidthY[k],
4850 &v->MetaRowBytesY,
4851 &v->DPTEBytesPerRowY,
4852 &v->PTEBufferSizeNotExceededY[i][j][k],
4853 &v->dummyinteger7,
4854 &v->dpte_row_height[k],
4855 &v->dummyinteger29,
4856 &v->dummyinteger27,
4857 &v->dummyinteger24,
4858 &v->meta_row_height[k],
4859 &v->dummyinteger25,
4860 &v->dpte_group_bytes[k],
4861 &v->dummyinteger21,
4862 &v->dummyinteger22,
4863 &v->dummyinteger18,
4864 &v->dummyinteger5,
4865 &v->dummyinteger6);
4866 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4867 mode_lib,
4868 v->VRatio[k],
4869 v->vtaps[k],
4870 v->Interlace[k],
4871 v->ProgressiveToInterlaceUnitInOPP,
4872 v->SwathHeightYThisState[k],
4873 v->ViewportYStartY[k],
4874 &v->PrefillY[k],
4875 &v->MaxNumSwY[k]);
4876 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4877 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4878 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4879
4880 CalculateRowBandwidth(
4881 v->GPUVMEnable,
4882 v->SourcePixelFormat[k],
4883 v->VRatio[k],
4884 v->VRatioChroma[k],
4885 v->DCCEnable[k],
4886 v->HTotal[k] / v->PixelClock[k],
4887 v->MetaRowBytesY,
4888 v->MetaRowBytesC,
4889 v->meta_row_height[k],
4890 v->meta_row_height_chroma[k],
4891 v->DPTEBytesPerRowY,
4892 v->DPTEBytesPerRowC,
4893 v->dpte_row_height[k],
4894 v->dpte_row_height_chroma[k],
4895 &v->meta_row_bandwidth[i][j][k],
4896 &v->dpte_row_bandwidth[i][j][k]);
4897 }
4898 /*DCCMetaBufferSizeSupport(i, j) = True
4899 For k = 0 To NumberOfActivePlanes - 1
4900 If MetaRowBytes(i, j, k) > 24064 Then
4901 DCCMetaBufferSizeSupport(i, j) = False
4902 End If
4903 Next k*/
4904 v->DCCMetaBufferSizeSupport[i][j] = true;
4905 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4906 if (v->MetaRowBytes[i][j][k] > 24064)
4907 v->DCCMetaBufferSizeSupport[i][j] = false;
4908 }
4909 v->UrgLatency[i] = CalculateUrgentLatency(
4910 v->UrgentLatencyPixelDataOnly,
4911 v->UrgentLatencyPixelMixedWithVMData,
4912 v->UrgentLatencyVMDataOnly,
4913 v->DoUrgentLatencyAdjustment,
4914 v->UrgentLatencyAdjustmentFabricClockComponent,
4915 v->UrgentLatencyAdjustmentFabricClockReference,
4916 v->FabricClockPerState[i]);
4917
4918 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4919 CalculateUrgentBurstFactor(
4920 v->swath_width_luma_ub_this_state[k],
4921 v->swath_width_chroma_ub_this_state[k],
4922 v->SwathHeightYThisState[k],
4923 v->SwathHeightCThisState[k],
4924 v->HTotal[k] / v->PixelClock[k],
4925 v->UrgLatency[i],
4926 v->CursorBufferSize,
4927 v->CursorWidth[k][0],
4928 v->CursorBPP[k][0],
4929 v->VRatio[k],
4930 v->VRatioChroma[k],
4931 v->BytePerPixelInDETY[k],
4932 v->BytePerPixelInDETC[k],
4933 v->DETBufferSizeYThisState[k],
4934 v->DETBufferSizeCThisState[k],
4935 &v->UrgentBurstFactorCursor[k],
4936 &v->UrgentBurstFactorLuma[k],
4937 &v->UrgentBurstFactorChroma[k],
4938 &NotUrgentLatencyHiding[k]);
4939 }
4940
4941 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4942 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4943 if (NotUrgentLatencyHiding[k]) {
4944 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4945 }
4946 }
4947
4948 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4949 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4950 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4951 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4952 }
4953
4954 v->TotalVActivePixelBandwidth[i][j] = 0;
4955 v->TotalVActiveCursorBandwidth[i][j] = 0;
4956 v->TotalMetaRowBandwidth[i][j] = 0;
4957 v->TotalDPTERowBandwidth[i][j] = 0;
4958 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4959 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4960 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4961 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4962 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4963 }
4964 }
4965 }
4966
4967 //Calculate Return BW
4968 for (i = 0; i < v->soc.num_states; ++i) {
4969 for (j = 0; j <= 1; ++j) {
4970 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4971 if (v->BlendingAndTiming[k] == k) {
4972 if (v->WritebackEnable[k] == true) {
4973 v->WritebackDelayTime[k] = v->WritebackLatency
4974 + CalculateWriteBackDelay(
4975 v->WritebackPixelFormat[k],
4976 v->WritebackHRatio[k],
4977 v->WritebackVRatio[k],
4978 v->WritebackVTaps[k],
4979 v->WritebackDestinationWidth[k],
4980 v->WritebackDestinationHeight[k],
4981 v->WritebackSourceHeight[k],
4982 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4983 } else {
4984 v->WritebackDelayTime[k] = 0.0;
4985 }
4986 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4987 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4988 v->WritebackDelayTime[k] = dml_max(
4989 v->WritebackDelayTime[k],
4990 v->WritebackLatency
4991 + CalculateWriteBackDelay(
4992 v->WritebackPixelFormat[m],
4993 v->WritebackHRatio[m],
4994 v->WritebackVRatio[m],
4995 v->WritebackVTaps[m],
4996 v->WritebackDestinationWidth[m],
4997 v->WritebackDestinationHeight[m],
4998 v->WritebackSourceHeight[m],
4999 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5000 }
5001 }
5002 }
5003 }
5004 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5005 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5006 if (v->BlendingAndTiming[k] == m) {
5007 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5008 }
5009 }
5010 }
5011 v->MaxMaxVStartup[i][j] = 0;
5012 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5013 v->MaximumVStartup[i][j][k] =
5014 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5015 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5016 v->VTotal[k] - v->VActive[k]
5017 - dml_max(
5018 1.0,
5019 dml_ceil(
5020 1.0 * v->WritebackDelayTime[k]
5021 / (v->HTotal[k]
5022 / v->PixelClock[k]),
5023 1.0));
5024 if (v->MaximumVStartup[i][j][k] > 1023)
5025 v->MaximumVStartup[i][j][k] = 1023;
5026 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5027 }
5028 }
5029 }
5030
5031 ReorderingBytes = v->NumberOfChannels
5032 * dml_max3(
5033 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5034 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5035 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5036
5037 for (i = 0; i < v->soc.num_states; ++i) {
5038 for (j = 0; j <= 1; ++j) {
5039 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5040 }
5041 }
5042
5043 if (v->UseMinimumRequiredDCFCLK == true)
5044 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5045
5046 for (i = 0; i < v->soc.num_states; ++i) {
5047 for (j = 0; j <= 1; ++j) {
5048 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5049 v->ReturnBusWidth * v->DCFCLKState[i][j],
5050 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5051 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5052 double PixelDataOnlyReturnBWPerState = dml_min(
5053 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5054 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5055 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5056 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5057 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5058
5059 if (v->HostVMEnable != true) {
5060 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5061 } else {
5062 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5063 }
5064 }
5065 }
5066
5067 //Re-ordering Buffer Support Check
5068 for (i = 0; i < v->soc.num_states; ++i) {
5069 for (j = 0; j <= 1; ++j) {
5070 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5071 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5072 v->ROBSupport[i][j] = true;
5073 } else {
5074 v->ROBSupport[i][j] = false;
5075 }
5076 }
5077 }
5078
5079 //Vertical Active BW support check
5080
5081 MaxTotalVActiveRDBandwidth = 0;
5082 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5083 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5084 }
5085
5086 for (i = 0; i < v->soc.num_states; ++i) {
5087 for (j = 0; j <= 1; ++j) {
5088 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5089 dml_min(
5090 v->ReturnBusWidth * v->DCFCLKState[i][j],
5091 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5092 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5093 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5094 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5095
5096 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5097 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5098 } else {
5099 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5100 }
5101 }
5102 }
5103
5104 v->UrgentLatency = CalculateUrgentLatency(
5105 v->UrgentLatencyPixelDataOnly,
5106 v->UrgentLatencyPixelMixedWithVMData,
5107 v->UrgentLatencyVMDataOnly,
5108 v->DoUrgentLatencyAdjustment,
5109 v->UrgentLatencyAdjustmentFabricClockComponent,
5110 v->UrgentLatencyAdjustmentFabricClockReference,
5111 v->FabricClock);
5112 //Prefetch Check
5113 for (i = 0; i < v->soc.num_states; ++i) {
5114 for (j = 0; j <= 1; ++j) {
5115 double VMDataOnlyReturnBWPerState;
5116 double HostVMInefficiencyFactor = 1;
5117 int NextPrefetchModeState = MinPrefetchMode;
5118 bool UnboundedRequestEnabledThisState = false;
5119 int CompressedBufferSizeInkByteThisState = 0;
5120 double dummy;
5121
5122 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5123
5124 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5125 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5126 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5127 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5128 }
5129
5130 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5131 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5132 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5133 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5134 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5135 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5136 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5137 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5138 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5139 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5140 }
5141
5142 VMDataOnlyReturnBWPerState = dml_min(
5143 dml_min(
5144 v->ReturnBusWidth * v->DCFCLKState[i][j],
5145 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5146 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5147 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5148 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5149 if (v->GPUVMEnable && v->HostVMEnable)
5150 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5151
5152 v->ExtraLatency = CalculateExtraLatency(
5153 v->RoundTripPingLatencyCycles,
5154 ReorderingBytes,
5155 v->DCFCLKState[i][j],
5156 v->TotalNumberOfActiveDPP[i][j],
5157 v->PixelChunkSizeInKByte,
5158 v->TotalNumberOfDCCActiveDPP[i][j],
5159 v->MetaChunkSize,
5160 v->ReturnBWPerState[i][j],
5161 v->GPUVMEnable,
5162 v->HostVMEnable,
5163 v->NumberOfActivePlanes,
5164 v->NoOfDPPThisState,
5165 v->dpte_group_bytes,
5166 HostVMInefficiencyFactor,
5167 v->HostVMMinPageSize,
5168 v->HostVMMaxNonCachedPageTableLevels);
5169
5170 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5171 do {
5172 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5173 v->MaxVStartup = v->NextMaxVStartup;
5174
5175 v->TWait = CalculateTWait(
5176 v->PrefetchModePerState[i][j],
5177 v->DRAMClockChangeLatency,
5178 v->UrgLatency[i],
5179 v->SREnterPlusExitTime);
5180
5181 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5182 CalculatePrefetchSchedulePerPlane(mode_lib,
5183 HostVMInefficiencyFactor,
5184 i, j, k);
5185 }
5186
5187 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5188 CalculateUrgentBurstFactor(
5189 v->swath_width_luma_ub_this_state[k],
5190 v->swath_width_chroma_ub_this_state[k],
5191 v->SwathHeightYThisState[k],
5192 v->SwathHeightCThisState[k],
5193 v->HTotal[k] / v->PixelClock[k],
5194 v->UrgLatency[i],
5195 v->CursorBufferSize,
5196 v->CursorWidth[k][0],
5197 v->CursorBPP[k][0],
5198 v->VRatioPreY[i][j][k],
5199 v->VRatioPreC[i][j][k],
5200 v->BytePerPixelInDETY[k],
5201 v->BytePerPixelInDETC[k],
5202 v->DETBufferSizeYThisState[k],
5203 v->DETBufferSizeCThisState[k],
5204 &v->UrgentBurstFactorCursorPre[k],
5205 &v->UrgentBurstFactorLumaPre[k],
5206 &v->UrgentBurstFactorChromaPre[k],
5207 &v->NotUrgentLatencyHidingPre[k]);
5208 }
5209
5210 v->MaximumReadBandwidthWithPrefetch = 0.0;
5211 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5212 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5213 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5214
5215 v->MaximumReadBandwidthWithPrefetch =
5216 v->MaximumReadBandwidthWithPrefetch
5217 + dml_max3(
5218 v->VActivePixelBandwidth[i][j][k]
5219 + v->VActiveCursorBandwidth[i][j][k]
5220 + v->NoOfDPP[i][j][k]
5221 * (v->meta_row_bandwidth[i][j][k]
5222 + v->dpte_row_bandwidth[i][j][k]),
5223 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5224 v->NoOfDPP[i][j][k]
5225 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5226 * v->UrgentBurstFactorLumaPre[k]
5227 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5228 * v->UrgentBurstFactorChromaPre[k])
5229 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5230 }
5231
5232 v->NotEnoughUrgentLatencyHidingPre = false;
5233 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5234 if (v->NotUrgentLatencyHidingPre[k] == true) {
5235 v->NotEnoughUrgentLatencyHidingPre = true;
5236 }
5237 }
5238
5239 v->PrefetchSupported[i][j] = true;
5240 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5241 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5242 v->PrefetchSupported[i][j] = false;
5243 }
5244 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5245 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5246 || v->NoTimeForPrefetch[i][j][k] == true) {
5247 v->PrefetchSupported[i][j] = false;
5248 }
5249 }
5250
5251 v->DynamicMetadataSupported[i][j] = true;
5252 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5253 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5254 v->DynamicMetadataSupported[i][j] = false;
5255 }
5256 }
5257
5258 v->VRatioInPrefetchSupported[i][j] = true;
5259 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5260 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5261 v->VRatioInPrefetchSupported[i][j] = false;
5262 }
5263 }
5264 v->AnyLinesForVMOrRowTooLarge = false;
5265 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5266 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5267 v->AnyLinesForVMOrRowTooLarge = true;
5268 }
5269 }
5270
5271 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5272
5273 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5274 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5275 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5276 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5277 - dml_max(
5278 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5279 v->NoOfDPP[i][j][k]
5280 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5281 * v->UrgentBurstFactorLumaPre[k]
5282 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5283 * v->UrgentBurstFactorChromaPre[k])
5284 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5285 }
5286 v->TotImmediateFlipBytes = 0.0;
5287 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5288 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5289 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5290 + v->DPTEBytesPerRow[i][j][k]);
5291 }
5292
5293 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5294 CalculateFlipSchedule(
5295 mode_lib,
5296 k,
5297 HostVMInefficiencyFactor,
5298 v->ExtraLatency,
5299 v->UrgLatency[i],
5300 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5301 v->MetaRowBytes[i][j][k],
5302 v->DPTEBytesPerRow[i][j][k]);
5303 }
5304 v->total_dcn_read_bw_with_flip = 0.0;
5305 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5306 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5307 + dml_max3(
5308 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5309 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5310 + v->VActiveCursorBandwidth[i][j][k],
5311 v->NoOfDPP[i][j][k]
5312 * (v->final_flip_bw[k]
5313 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5314 * v->UrgentBurstFactorLumaPre[k]
5315 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5316 * v->UrgentBurstFactorChromaPre[k])
5317 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5318 }
5319 v->ImmediateFlipSupportedForState[i][j] = true;
5320 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5321 v->ImmediateFlipSupportedForState[i][j] = false;
5322 }
5323 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5324 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5325 v->ImmediateFlipSupportedForState[i][j] = false;
5326 }
5327 }
5328 } else {
5329 v->ImmediateFlipSupportedForState[i][j] = false;
5330 }
5331
5332 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5333 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5334 NextPrefetchModeState = NextPrefetchModeState + 1;
5335 } else {
5336 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5337 }
5338 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5339 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5340 && ((v->HostVMEnable == false &&
5341 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5342 || v->ImmediateFlipSupportedForState[i][j] == true))
5343 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5344
5345 CalculateUnboundedRequestAndCompressedBufferSize(
5346 v->DETBufferSizeInKByte[0],
5347 v->ConfigReturnBufferSizeInKByte,
5348 v->UseUnboundedRequesting,
5349 v->TotalNumberOfActiveDPP[i][j],
5350 NoChroma,
5351 v->MaxNumDPP,
5352 v->CompressedBufferSegmentSizeInkByte,
5353 v->Output,
5354 &UnboundedRequestEnabledThisState,
5355 &CompressedBufferSizeInkByteThisState);
5356
5357 CalculateWatermarksAndDRAMSpeedChangeSupport(
5358 mode_lib,
5359 v->PrefetchModePerState[i][j],
5360 v->DCFCLKState[i][j],
5361 v->ReturnBWPerState[i][j],
5362 v->UrgLatency[i],
5363 v->ExtraLatency,
5364 v->SOCCLKPerState[i],
5365 v->ProjectedDCFCLKDeepSleep[i][j],
5366 v->DETBufferSizeYThisState,
5367 v->DETBufferSizeCThisState,
5368 v->SwathHeightYThisState,
5369 v->SwathHeightCThisState,
5370 v->SwathWidthYThisState,
5371 v->SwathWidthCThisState,
5372 v->NoOfDPPThisState,
5373 v->BytePerPixelInDETY,
5374 v->BytePerPixelInDETC,
5375 UnboundedRequestEnabledThisState,
5376 CompressedBufferSizeInkByteThisState,
5377 &v->DRAMClockChangeSupport[i][j],
5378 &dummy,
5379 &dummy,
5380 &dummy,
5381 &dummy);
5382 }
5383 }
5384
5385 /*PTE Buffer Size Check*/
5386 for (i = 0; i < v->soc.num_states; i++) {
5387 for (j = 0; j < 2; j++) {
5388 v->PTEBufferSizeNotExceeded[i][j] = true;
5389 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5390 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5391 v->PTEBufferSizeNotExceeded[i][j] = false;
5392 }
5393 }
5394 }
5395 }
5396
5397 /*Cursor Support Check*/
5398 v->CursorSupport = true;
5399 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5400 if (v->CursorWidth[k][0] > 0.0) {
5401 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5402 v->CursorSupport = false;
5403 }
5404 }
5405 }
5406
5407 /*Valid Pitch Check*/
5408 v->PitchSupport = true;
5409 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5410 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5411 if (v->DCCEnable[k] == true) {
5412 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5413 } else {
5414 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5415 }
5416 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5417 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5418 && v->SourcePixelFormat[k] != dm_mono_8) {
5419 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5420 if (v->DCCEnable[k] == true) {
5421 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5422 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5423 64.0 * v->Read256BlockWidthC[k]);
5424 } else {
5425 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5426 }
5427 } else {
5428 v->AlignedCPitch[k] = v->PitchC[k];
5429 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5430 }
5431 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5432 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5433 v->PitchSupport = false;
5434 }
5435 }
5436
5437 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5438 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5439 ViewportExceedsSurface = true;
5440 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5441 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5442 && v->SourcePixelFormat[k] != dm_rgbe) {
5443 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5444 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5445 ViewportExceedsSurface = true;
5446 }
5447 }
5448 }
5449 }
5450
5451 /*Mode Support, Voltage State and SOC Configuration*/
5452 for (i = v->soc.num_states - 1; i >= 0; i--) {
5453 for (j = 0; j < 2; j++) {
5454 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5455 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5456 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5457 && v->DTBCLKRequiredMoreThanSupported[i] == false
5458 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5459 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5460 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5461 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5462 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5463 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5464 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5465 && ((v->HostVMEnable == false
5466 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5467 || v->ImmediateFlipSupportedForState[i][j] == true)
5468 && FMTBufferExceeded == false) {
5469 v->ModeSupport[i][j] = true;
5470 } else {
5471 v->ModeSupport[i][j] = false;
5472 #ifdef __DML_VBA_DEBUG__
5473 if (v->ScaleRatioAndTapsSupport == false)
5474 dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed");
5475 if (v->SourceFormatPixelAndScanSupport == false)
5476 dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed");
5477 if (v->ViewportSizeSupport[i][j] == false)
5478 dml_print("DML SUPPORT: ViewportSizeSupport failed");
5479 if (v->LinkCapacitySupport[i] == false)
5480 dml_print("DML SUPPORT: LinkCapacitySupport failed");
5481 if (v->ODMCombine4To1SupportCheckOK[i] == false)
5482 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5483 if (v->NotEnoughDSCUnits[i] == true)
5484 dml_print("DML SUPPORT: NotEnoughDSCUnits");
5485 if (v->DTBCLKRequiredMoreThanSupported[i] == true)
5486 dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported");
5487 if (v->ROBSupport[i][j] == false)
5488 dml_print("DML SUPPORT: ROBSupport failed");
5489 if (v->DISPCLK_DPPCLK_Support[i][j] == false)
5490 dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed");
5491 if (v->TotalAvailablePipesSupport[i][j] == false)
5492 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5493 if (EnoughWritebackUnits == false)
5494 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5495 if (v->WritebackLatencySupport == false)
5496 dml_print("DML SUPPORT: WritebackLatencySupport failed");
5497 if (v->WritebackScaleRatioAndTapsSupport == false)
5498 dml_print("DML SUPPORT: DSC422NativeNotSupported ");
5499 if (v->CursorSupport == false)
5500 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5501 if (v->PitchSupport == false)
5502 dml_print("DML SUPPORT: PitchSupport failed");
5503 if (ViewportExceedsSurface == true)
5504 dml_print("DML SUPPORT: ViewportExceedsSurface failed");
5505 if (v->PrefetchSupported[i][j] == false)
5506 dml_print("DML SUPPORT: PrefetchSupported failed");
5507 if (v->DynamicMetadataSupported[i][j] == false)
5508 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5509 if (v->TotalVerticalActiveBandwidthSupport[i][j] == false)
5510 dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed");
5511 if (v->VRatioInPrefetchSupported[i][j] == false)
5512 dml_print("DML SUPPORT: VRatioInPrefetchSupported failed");
5513 if (v->PTEBufferSizeNotExceeded[i][j] == false)
5514 dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed");
5515 if (v->NonsupportedDSCInputBPC == true)
5516 dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed");
5517 if (!((v->HostVMEnable == false
5518 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5519 || v->ImmediateFlipSupportedForState[i][j] == true))
5520 dml_print("DML SUPPORT: ImmediateFlipRequirement failed");
5521 if (FMTBufferExceeded == true)
5522 dml_print("DML SUPPORT: FMTBufferExceeded failed");
5523 #endif
5524 }
5525 }
5526 }
5527
5528 {
5529 unsigned int MaximumMPCCombine = 0;
5530 for (i = v->soc.num_states; i >= 0; i--) {
5531 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5532 v->VoltageLevel = i;
5533 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5534 if (v->ModeSupport[i][0] == true) {
5535 MaximumMPCCombine = 0;
5536 } else {
5537 MaximumMPCCombine = 1;
5538 }
5539 }
5540 }
5541 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5542 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5543 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5544 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5545 }
5546 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5547 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5548 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5549 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5550 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5551 v->maxMpcComb = MaximumMPCCombine;
5552 }
5553 }
5554
5555 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5556 struct display_mode_lib *mode_lib,
5557 unsigned int PrefetchMode,
5558 double DCFCLK,
5559 double ReturnBW,
5560 double UrgentLatency,
5561 double ExtraLatency,
5562 double SOCCLK,
5563 double DCFCLKDeepSleep,
5564 unsigned int DETBufferSizeY[],
5565 unsigned int DETBufferSizeC[],
5566 unsigned int SwathHeightY[],
5567 unsigned int SwathHeightC[],
5568 double SwathWidthY[],
5569 double SwathWidthC[],
5570 unsigned int DPPPerPlane[],
5571 double BytePerPixelDETY[],
5572 double BytePerPixelDETC[],
5573 bool UnboundedRequestEnabled,
5574 int unsigned CompressedBufferSizeInkByte,
5575 enum clock_change_support *DRAMClockChangeSupport,
5576 double *StutterExitWatermark,
5577 double *StutterEnterPlusExitWatermark,
5578 double *Z8StutterExitWatermark,
5579 double *Z8StutterEnterPlusExitWatermark)
5580 {
5581 struct vba_vars_st *v = &mode_lib->vba;
5582 double EffectiveLBLatencyHidingY;
5583 double EffectiveLBLatencyHidingC;
5584 double LinesInDETY[DC__NUM_DPP__MAX];
5585 double LinesInDETC;
5586 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5587 unsigned int LinesInDETCRoundedDownToSwath;
5588 double FullDETBufferingTimeY;
5589 double FullDETBufferingTimeC;
5590 double ActiveDRAMClockChangeLatencyMarginY;
5591 double ActiveDRAMClockChangeLatencyMarginC;
5592 double WritebackDRAMClockChangeLatencyMargin;
5593 double PlaneWithMinActiveDRAMClockChangeMargin;
5594 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5595 double WritebackDRAMClockChangeLatencyHiding;
5596 double TotalPixelBW = 0.0;
5597 int k, j;
5598
5599 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5600
5601 #ifdef __DML_VBA_DEBUG__
5602 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5603 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5604 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5605 #endif
5606
5607 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5608
5609 #ifdef __DML_VBA_DEBUG__
5610 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5611 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5612 #endif
5613
5614 v->TotalActiveWriteback = 0;
5615 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5616 if (v->WritebackEnable[k] == true) {
5617 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5618 }
5619 }
5620
5621 if (v->TotalActiveWriteback <= 1) {
5622 v->WritebackUrgentWatermark = v->WritebackLatency;
5623 } else {
5624 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5625 }
5626
5627 if (v->TotalActiveWriteback <= 1) {
5628 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5629 } else {
5630 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5631 }
5632
5633 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5634 TotalPixelBW = TotalPixelBW
5635 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5636 / (v->HTotal[k] / v->PixelClock[k]);
5637 }
5638
5639 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5640 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5641
5642 v->LBLatencyHidingSourceLinesY = dml_min(
5643 (double) v->MaxLineBufferLines,
5644 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5645
5646 v->LBLatencyHidingSourceLinesC = dml_min(
5647 (double) v->MaxLineBufferLines,
5648 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5649
5650 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5651
5652 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5653
5654 if (UnboundedRequestEnabled) {
5655 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5656 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5657 }
5658
5659 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5660 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5661 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5662 if (BytePerPixelDETC[k] > 0) {
5663 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5664 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5665 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5666 } else {
5667 LinesInDETC = 0;
5668 FullDETBufferingTimeC = 999999;
5669 }
5670
5671 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5672 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5673
5674 if (v->NumberOfActivePlanes > 1) {
5675 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5676 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5677 }
5678
5679 if (BytePerPixelDETC[k] > 0) {
5680 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5681 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5682
5683 if (v->NumberOfActivePlanes > 1) {
5684 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5685 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5686 }
5687 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5688 } else {
5689 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5690 }
5691
5692 if (v->WritebackEnable[k] == true) {
5693 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5694 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5695 if (v->WritebackPixelFormat[k] == dm_444_64) {
5696 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5697 }
5698 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5699 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5700 }
5701 }
5702
5703 v->MinActiveDRAMClockChangeMargin = 999999;
5704 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5705 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5706 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5707 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5708 if (v->BlendingAndTiming[k] == k) {
5709 PlaneWithMinActiveDRAMClockChangeMargin = k;
5710 } else {
5711 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5712 if (v->BlendingAndTiming[k] == j) {
5713 PlaneWithMinActiveDRAMClockChangeMargin = j;
5714 }
5715 }
5716 }
5717 }
5718 }
5719
5720 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5721
5722 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5723 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5724 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5725 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5726 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5727 }
5728 }
5729
5730 v->TotalNumberOfActiveOTG = 0;
5731
5732 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5733 if (v->BlendingAndTiming[k] == k) {
5734 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5735 }
5736 }
5737
5738 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5739 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5740 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5741 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5742 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5743 } else {
5744 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5745 }
5746
5747 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5748 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5749 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5750 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5751
5752 #ifdef __DML_VBA_DEBUG__
5753 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5754 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5755 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5756 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5757 #endif
5758 }
5759
5760 static void CalculateDCFCLKDeepSleep(
5761 struct display_mode_lib *mode_lib,
5762 unsigned int NumberOfActivePlanes,
5763 int BytePerPixelY[],
5764 int BytePerPixelC[],
5765 double VRatio[],
5766 double VRatioChroma[],
5767 double SwathWidthY[],
5768 double SwathWidthC[],
5769 unsigned int DPPPerPlane[],
5770 double HRatio[],
5771 double HRatioChroma[],
5772 double PixelClock[],
5773 double PSCL_THROUGHPUT[],
5774 double PSCL_THROUGHPUT_CHROMA[],
5775 double DPPCLK[],
5776 double ReadBandwidthLuma[],
5777 double ReadBandwidthChroma[],
5778 int ReturnBusWidth,
5779 double *DCFCLKDeepSleep)
5780 {
5781 struct vba_vars_st *v = &mode_lib->vba;
5782 double DisplayPipeLineDeliveryTimeLuma;
5783 double DisplayPipeLineDeliveryTimeChroma;
5784 double ReadBandwidth = 0.0;
5785 int k;
5786
5787 for (k = 0; k < NumberOfActivePlanes; ++k) {
5788
5789 if (VRatio[k] <= 1) {
5790 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5791 } else {
5792 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5793 }
5794 if (BytePerPixelC[k] == 0) {
5795 DisplayPipeLineDeliveryTimeChroma = 0;
5796 } else {
5797 if (VRatioChroma[k] <= 1) {
5798 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5799 } else {
5800 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5801 }
5802 }
5803
5804 if (BytePerPixelC[k] > 0) {
5805 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5806 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5807 } else {
5808 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5809 }
5810 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5811
5812 }
5813
5814 for (k = 0; k < NumberOfActivePlanes; ++k) {
5815 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5816 }
5817
5818 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5819
5820 for (k = 0; k < NumberOfActivePlanes; ++k) {
5821 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5822 }
5823 }
5824
5825 static void CalculateUrgentBurstFactor(
5826 int swath_width_luma_ub,
5827 int swath_width_chroma_ub,
5828 unsigned int SwathHeightY,
5829 unsigned int SwathHeightC,
5830 double LineTime,
5831 double UrgentLatency,
5832 double CursorBufferSize,
5833 unsigned int CursorWidth,
5834 unsigned int CursorBPP,
5835 double VRatio,
5836 double VRatioC,
5837 double BytePerPixelInDETY,
5838 double BytePerPixelInDETC,
5839 double DETBufferSizeY,
5840 double DETBufferSizeC,
5841 double *UrgentBurstFactorCursor,
5842 double *UrgentBurstFactorLuma,
5843 double *UrgentBurstFactorChroma,
5844 bool *NotEnoughUrgentLatencyHiding)
5845 {
5846 double LinesInDETLuma;
5847 double LinesInDETChroma;
5848 unsigned int LinesInCursorBuffer;
5849 double CursorBufferSizeInTime;
5850 double DETBufferSizeInTimeLuma;
5851 double DETBufferSizeInTimeChroma;
5852
5853 *NotEnoughUrgentLatencyHiding = 0;
5854
5855 if (CursorWidth > 0) {
5856 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5857 if (VRatio > 0) {
5858 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5859 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5860 *NotEnoughUrgentLatencyHiding = 1;
5861 *UrgentBurstFactorCursor = 0;
5862 } else {
5863 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5864 }
5865 } else {
5866 *UrgentBurstFactorCursor = 1;
5867 }
5868 }
5869
5870 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5871 if (VRatio > 0) {
5872 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5873 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5874 *NotEnoughUrgentLatencyHiding = 1;
5875 *UrgentBurstFactorLuma = 0;
5876 } else {
5877 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5878 }
5879 } else {
5880 *UrgentBurstFactorLuma = 1;
5881 }
5882
5883 if (BytePerPixelInDETC > 0) {
5884 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5885 if (VRatio > 0) {
5886 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5887 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5888 *NotEnoughUrgentLatencyHiding = 1;
5889 *UrgentBurstFactorChroma = 0;
5890 } else {
5891 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5892 }
5893 } else {
5894 *UrgentBurstFactorChroma = 1;
5895 }
5896 }
5897 }
5898
5899 static void CalculatePixelDeliveryTimes(
5900 unsigned int NumberOfActivePlanes,
5901 double VRatio[],
5902 double VRatioChroma[],
5903 double VRatioPrefetchY[],
5904 double VRatioPrefetchC[],
5905 unsigned int swath_width_luma_ub[],
5906 unsigned int swath_width_chroma_ub[],
5907 unsigned int DPPPerPlane[],
5908 double HRatio[],
5909 double HRatioChroma[],
5910 double PixelClock[],
5911 double PSCL_THROUGHPUT[],
5912 double PSCL_THROUGHPUT_CHROMA[],
5913 double DPPCLK[],
5914 int BytePerPixelC[],
5915 enum scan_direction_class SourceScan[],
5916 unsigned int NumberOfCursors[],
5917 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5918 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5919 unsigned int BlockWidth256BytesY[],
5920 unsigned int BlockHeight256BytesY[],
5921 unsigned int BlockWidth256BytesC[],
5922 unsigned int BlockHeight256BytesC[],
5923 double DisplayPipeLineDeliveryTimeLuma[],
5924 double DisplayPipeLineDeliveryTimeChroma[],
5925 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5926 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5927 double DisplayPipeRequestDeliveryTimeLuma[],
5928 double DisplayPipeRequestDeliveryTimeChroma[],
5929 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5930 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5931 double CursorRequestDeliveryTime[],
5932 double CursorRequestDeliveryTimePrefetch[])
5933 {
5934 double req_per_swath_ub;
5935 int k;
5936
5937 for (k = 0; k < NumberOfActivePlanes; ++k) {
5938 if (VRatio[k] <= 1) {
5939 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5940 } else {
5941 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5942 }
5943
5944 if (BytePerPixelC[k] == 0) {
5945 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5946 } else {
5947 if (VRatioChroma[k] <= 1) {
5948 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5949 } else {
5950 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5951 }
5952 }
5953
5954 if (VRatioPrefetchY[k] <= 1) {
5955 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5956 } else {
5957 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5958 }
5959
5960 if (BytePerPixelC[k] == 0) {
5961 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5962 } else {
5963 if (VRatioPrefetchC[k] <= 1) {
5964 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5965 } else {
5966 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5967 }
5968 }
5969 }
5970
5971 for (k = 0; k < NumberOfActivePlanes; ++k) {
5972 if (SourceScan[k] != dm_vert) {
5973 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5974 } else {
5975 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5976 }
5977 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5978 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5979 if (BytePerPixelC[k] == 0) {
5980 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5981 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5982 } else {
5983 if (SourceScan[k] != dm_vert) {
5984 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5985 } else {
5986 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5987 }
5988 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5989 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5990 }
5991 #ifdef __DML_VBA_DEBUG__
5992 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5993 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5994 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5995 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
5996 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
5997 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
5998 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
5999 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6000 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6001 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6002 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6003 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6004 #endif
6005 }
6006
6007 for (k = 0; k < NumberOfActivePlanes; ++k) {
6008 int cursor_req_per_width;
6009 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6010 if (NumberOfCursors[k] > 0) {
6011 if (VRatio[k] <= 1) {
6012 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6013 } else {
6014 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6015 }
6016 if (VRatioPrefetchY[k] <= 1) {
6017 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6018 } else {
6019 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6020 }
6021 } else {
6022 CursorRequestDeliveryTime[k] = 0;
6023 CursorRequestDeliveryTimePrefetch[k] = 0;
6024 }
6025 #ifdef __DML_VBA_DEBUG__
6026 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6027 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6028 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6029 #endif
6030 }
6031 }
6032
6033 static void CalculateMetaAndPTETimes(
6034 int NumberOfActivePlanes,
6035 bool GPUVMEnable,
6036 int MetaChunkSize,
6037 int MinMetaChunkSizeBytes,
6038 int HTotal[],
6039 double VRatio[],
6040 double VRatioChroma[],
6041 double DestinationLinesToRequestRowInVBlank[],
6042 double DestinationLinesToRequestRowInImmediateFlip[],
6043 bool DCCEnable[],
6044 double PixelClock[],
6045 int BytePerPixelY[],
6046 int BytePerPixelC[],
6047 enum scan_direction_class SourceScan[],
6048 int dpte_row_height[],
6049 int dpte_row_height_chroma[],
6050 int meta_row_width[],
6051 int meta_row_width_chroma[],
6052 int meta_row_height[],
6053 int meta_row_height_chroma[],
6054 int meta_req_width[],
6055 int meta_req_width_chroma[],
6056 int meta_req_height[],
6057 int meta_req_height_chroma[],
6058 int dpte_group_bytes[],
6059 int PTERequestSizeY[],
6060 int PTERequestSizeC[],
6061 int PixelPTEReqWidthY[],
6062 int PixelPTEReqHeightY[],
6063 int PixelPTEReqWidthC[],
6064 int PixelPTEReqHeightC[],
6065 int dpte_row_width_luma_ub[],
6066 int dpte_row_width_chroma_ub[],
6067 double DST_Y_PER_PTE_ROW_NOM_L[],
6068 double DST_Y_PER_PTE_ROW_NOM_C[],
6069 double DST_Y_PER_META_ROW_NOM_L[],
6070 double DST_Y_PER_META_ROW_NOM_C[],
6071 double TimePerMetaChunkNominal[],
6072 double TimePerChromaMetaChunkNominal[],
6073 double TimePerMetaChunkVBlank[],
6074 double TimePerChromaMetaChunkVBlank[],
6075 double TimePerMetaChunkFlip[],
6076 double TimePerChromaMetaChunkFlip[],
6077 double time_per_pte_group_nom_luma[],
6078 double time_per_pte_group_vblank_luma[],
6079 double time_per_pte_group_flip_luma[],
6080 double time_per_pte_group_nom_chroma[],
6081 double time_per_pte_group_vblank_chroma[],
6082 double time_per_pte_group_flip_chroma[])
6083 {
6084 unsigned int meta_chunk_width;
6085 unsigned int min_meta_chunk_width;
6086 unsigned int meta_chunk_per_row_int;
6087 unsigned int meta_row_remainder;
6088 unsigned int meta_chunk_threshold;
6089 unsigned int meta_chunks_per_row_ub;
6090 unsigned int meta_chunk_width_chroma;
6091 unsigned int min_meta_chunk_width_chroma;
6092 unsigned int meta_chunk_per_row_int_chroma;
6093 unsigned int meta_row_remainder_chroma;
6094 unsigned int meta_chunk_threshold_chroma;
6095 unsigned int meta_chunks_per_row_ub_chroma;
6096 unsigned int dpte_group_width_luma;
6097 unsigned int dpte_groups_per_row_luma_ub;
6098 unsigned int dpte_group_width_chroma;
6099 unsigned int dpte_groups_per_row_chroma_ub;
6100 int k;
6101
6102 for (k = 0; k < NumberOfActivePlanes; ++k) {
6103 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6104 if (BytePerPixelC[k] == 0) {
6105 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6106 } else {
6107 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6108 }
6109 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6110 if (BytePerPixelC[k] == 0) {
6111 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6112 } else {
6113 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6114 }
6115 }
6116
6117 for (k = 0; k < NumberOfActivePlanes; ++k) {
6118 if (DCCEnable[k] == true) {
6119 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6120 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6121 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6122 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6123 if (SourceScan[k] != dm_vert) {
6124 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6125 } else {
6126 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6127 }
6128 if (meta_row_remainder <= meta_chunk_threshold) {
6129 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6130 } else {
6131 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6132 }
6133 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6134 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6135 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6136 if (BytePerPixelC[k] == 0) {
6137 TimePerChromaMetaChunkNominal[k] = 0;
6138 TimePerChromaMetaChunkVBlank[k] = 0;
6139 TimePerChromaMetaChunkFlip[k] = 0;
6140 } else {
6141 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6142 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6143 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6144 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6145 if (SourceScan[k] != dm_vert) {
6146 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6147 } else {
6148 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6149 }
6150 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6151 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6152 } else {
6153 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6154 }
6155 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6156 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6157 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6158 }
6159 } else {
6160 TimePerMetaChunkNominal[k] = 0;
6161 TimePerMetaChunkVBlank[k] = 0;
6162 TimePerMetaChunkFlip[k] = 0;
6163 TimePerChromaMetaChunkNominal[k] = 0;
6164 TimePerChromaMetaChunkVBlank[k] = 0;
6165 TimePerChromaMetaChunkFlip[k] = 0;
6166 }
6167 }
6168
6169 for (k = 0; k < NumberOfActivePlanes; ++k) {
6170 if (GPUVMEnable == true) {
6171 if (SourceScan[k] != dm_vert) {
6172 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6173 } else {
6174 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6175 }
6176 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6177 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6178 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6179 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6180 if (BytePerPixelC[k] == 0) {
6181 time_per_pte_group_nom_chroma[k] = 0;
6182 time_per_pte_group_vblank_chroma[k] = 0;
6183 time_per_pte_group_flip_chroma[k] = 0;
6184 } else {
6185 if (SourceScan[k] != dm_vert) {
6186 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6187 } else {
6188 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6189 }
6190 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6191 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6192 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6193 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6194 }
6195 } else {
6196 time_per_pte_group_nom_luma[k] = 0;
6197 time_per_pte_group_vblank_luma[k] = 0;
6198 time_per_pte_group_flip_luma[k] = 0;
6199 time_per_pte_group_nom_chroma[k] = 0;
6200 time_per_pte_group_vblank_chroma[k] = 0;
6201 time_per_pte_group_flip_chroma[k] = 0;
6202 }
6203 }
6204 }
6205
6206 static void CalculateVMGroupAndRequestTimes(
6207 unsigned int NumberOfActivePlanes,
6208 bool GPUVMEnable,
6209 unsigned int GPUVMMaxPageTableLevels,
6210 unsigned int HTotal[],
6211 int BytePerPixelC[],
6212 double DestinationLinesToRequestVMInVBlank[],
6213 double DestinationLinesToRequestVMInImmediateFlip[],
6214 bool DCCEnable[],
6215 double PixelClock[],
6216 int dpte_row_width_luma_ub[],
6217 int dpte_row_width_chroma_ub[],
6218 int vm_group_bytes[],
6219 unsigned int dpde0_bytes_per_frame_ub_l[],
6220 unsigned int dpde0_bytes_per_frame_ub_c[],
6221 int meta_pte_bytes_per_frame_ub_l[],
6222 int meta_pte_bytes_per_frame_ub_c[],
6223 double TimePerVMGroupVBlank[],
6224 double TimePerVMGroupFlip[],
6225 double TimePerVMRequestVBlank[],
6226 double TimePerVMRequestFlip[])
6227 {
6228 int num_group_per_lower_vm_stage;
6229 int num_req_per_lower_vm_stage;
6230 int k;
6231
6232 for (k = 0; k < NumberOfActivePlanes; ++k) {
6233 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6234 if (DCCEnable[k] == false) {
6235 if (BytePerPixelC[k] > 0) {
6236 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6237 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6238 } else {
6239 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6240 }
6241 } else {
6242 if (GPUVMMaxPageTableLevels == 1) {
6243 if (BytePerPixelC[k] > 0) {
6244 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6245 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6246 } else {
6247 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6248 }
6249 } else {
6250 if (BytePerPixelC[k] > 0) {
6251 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6252 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6253 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6254 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6255 } else {
6256 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6257 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6258 }
6259 }
6260 }
6261
6262 if (DCCEnable[k] == false) {
6263 if (BytePerPixelC[k] > 0) {
6264 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6265 } else {
6266 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6267 }
6268 } else {
6269 if (GPUVMMaxPageTableLevels == 1) {
6270 if (BytePerPixelC[k] > 0) {
6271 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6272 } else {
6273 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6274 }
6275 } else {
6276 if (BytePerPixelC[k] > 0) {
6277 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6278 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6279 } else {
6280 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6281 }
6282 }
6283 }
6284
6285 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6286 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6287 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6288 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6289
6290 if (GPUVMMaxPageTableLevels > 2) {
6291 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6292 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6293 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6294 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6295 }
6296
6297 } else {
6298 TimePerVMGroupVBlank[k] = 0;
6299 TimePerVMGroupFlip[k] = 0;
6300 TimePerVMRequestVBlank[k] = 0;
6301 TimePerVMRequestFlip[k] = 0;
6302 }
6303 }
6304 }
6305
6306 static void CalculateStutterEfficiency(
6307 struct display_mode_lib *mode_lib,
6308 int CompressedBufferSizeInkByte,
6309 bool UnboundedRequestEnabled,
6310 int ConfigReturnBufferSizeInKByte,
6311 int MetaFIFOSizeInKEntries,
6312 int ZeroSizeBufferEntries,
6313 int NumberOfActivePlanes,
6314 int ROBBufferSizeInKByte,
6315 double TotalDataReadBandwidth,
6316 double DCFCLK,
6317 double ReturnBW,
6318 double COMPBUF_RESERVED_SPACE_64B,
6319 double COMPBUF_RESERVED_SPACE_ZS,
6320 double SRExitTime,
6321 double SRExitZ8Time,
6322 bool SynchronizedVBlank,
6323 double Z8StutterEnterPlusExitWatermark,
6324 double StutterEnterPlusExitWatermark,
6325 bool ProgressiveToInterlaceUnitInOPP,
6326 bool Interlace[],
6327 double MinTTUVBlank[],
6328 int DPPPerPlane[],
6329 unsigned int DETBufferSizeY[],
6330 int BytePerPixelY[],
6331 double BytePerPixelDETY[],
6332 double SwathWidthY[],
6333 int SwathHeightY[],
6334 int SwathHeightC[],
6335 double NetDCCRateLuma[],
6336 double NetDCCRateChroma[],
6337 double DCCFractionOfZeroSizeRequestsLuma[],
6338 double DCCFractionOfZeroSizeRequestsChroma[],
6339 int HTotal[],
6340 int VTotal[],
6341 double PixelClock[],
6342 double VRatio[],
6343 enum scan_direction_class SourceScan[],
6344 int BlockHeight256BytesY[],
6345 int BlockWidth256BytesY[],
6346 int BlockHeight256BytesC[],
6347 int BlockWidth256BytesC[],
6348 int DCCYMaxUncompressedBlock[],
6349 int DCCCMaxUncompressedBlock[],
6350 int VActive[],
6351 bool DCCEnable[],
6352 bool WritebackEnable[],
6353 double ReadBandwidthPlaneLuma[],
6354 double ReadBandwidthPlaneChroma[],
6355 double meta_row_bw[],
6356 double dpte_row_bw[],
6357 double *StutterEfficiencyNotIncludingVBlank,
6358 double *StutterEfficiency,
6359 int *NumberOfStutterBurstsPerFrame,
6360 double *Z8StutterEfficiencyNotIncludingVBlank,
6361 double *Z8StutterEfficiency,
6362 int *Z8NumberOfStutterBurstsPerFrame,
6363 double *StutterPeriod)
6364 {
6365 struct vba_vars_st *v = &mode_lib->vba;
6366
6367 double DETBufferingTimeY;
6368 double SwathWidthYCriticalPlane = 0;
6369 double VActiveTimeCriticalPlane = 0;
6370 double FrameTimeCriticalPlane = 0;
6371 int BytePerPixelYCriticalPlane = 0;
6372 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6373 double MinTTUVBlankCriticalPlane = 0;
6374 double TotalCompressedReadBandwidth;
6375 double TotalRowReadBandwidth;
6376 double AverageDCCCompressionRate;
6377 double EffectiveCompressedBufferSize;
6378 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6379 double StutterBurstTime;
6380 int TotalActiveWriteback;
6381 double LinesInDETY;
6382 double LinesInDETYRoundedDownToSwath;
6383 double MaximumEffectiveCompressionLuma;
6384 double MaximumEffectiveCompressionChroma;
6385 double TotalZeroSizeRequestReadBandwidth;
6386 double TotalZeroSizeCompressedReadBandwidth;
6387 double AverageDCCZeroSizeFraction;
6388 double AverageZeroSizeCompressionRate;
6389 int TotalNumberOfActiveOTG = 0;
6390 double LastStutterPeriod = 0.0;
6391 double LastZ8StutterPeriod = 0.0;
6392 int k;
6393
6394 TotalZeroSizeRequestReadBandwidth = 0;
6395 TotalZeroSizeCompressedReadBandwidth = 0;
6396 TotalRowReadBandwidth = 0;
6397 TotalCompressedReadBandwidth = 0;
6398
6399 for (k = 0; k < NumberOfActivePlanes; ++k) {
6400 if (DCCEnable[k] == true) {
6401 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6402 || DCCYMaxUncompressedBlock[k] < 256) {
6403 MaximumEffectiveCompressionLuma = 2;
6404 } else {
6405 MaximumEffectiveCompressionLuma = 4;
6406 }
6407 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6408 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6409 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6410 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6411 if (ReadBandwidthPlaneChroma[k] > 0) {
6412 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6413 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6414 MaximumEffectiveCompressionChroma = 2;
6415 } else {
6416 MaximumEffectiveCompressionChroma = 4;
6417 }
6418 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6419 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6420 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6421 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6422 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6423 }
6424 } else {
6425 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6426 }
6427 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6428 }
6429
6430 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6431 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6432
6433 #ifdef __DML_VBA_DEBUG__
6434 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6435 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6436 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6437 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6438 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6439 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6440 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6441 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6442 #endif
6443
6444 if (AverageDCCZeroSizeFraction == 1) {
6445 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6446 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6447 } else if (AverageDCCZeroSizeFraction > 0) {
6448 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6449 EffectiveCompressedBufferSize = dml_min(
6450 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6451 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6452 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6453 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6454 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6455 dml_print(
6456 "DML::%s: min 2 = %f\n",
6457 __func__,
6458 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6459 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6460 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6461 } else {
6462 EffectiveCompressedBufferSize = dml_min(
6463 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6464 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6465 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6466 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6467 }
6468
6469 #ifdef __DML_VBA_DEBUG__
6470 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6471 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6472 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6473 #endif
6474
6475 *StutterPeriod = 0;
6476 for (k = 0; k < NumberOfActivePlanes; ++k) {
6477 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6478 / BytePerPixelDETY[k] / SwathWidthY[k];
6479 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6480 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6481 #ifdef __DML_VBA_DEBUG__
6482 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6483 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6484 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6485 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6486 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6487 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6488 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6489 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6490 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6491 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6492 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6493 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6494 #endif
6495
6496 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6497 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6498
6499 *StutterPeriod = DETBufferingTimeY;
6500 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6501 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6502 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6503 SwathWidthYCriticalPlane = SwathWidthY[k];
6504 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6505 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6506
6507 #ifdef __DML_VBA_DEBUG__
6508 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6509 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6510 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6511 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6512 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6513 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6514 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6515 #endif
6516 }
6517 }
6518
6519 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6520 #ifdef __DML_VBA_DEBUG__
6521 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6522 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6523 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6524 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6525 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6526 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6527 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6528 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6529 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6530 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6531 #endif
6532
6533 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6534 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6535 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6536 #ifdef __DML_VBA_DEBUG__
6537 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6538 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6539 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6540 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6541 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6542 #endif
6543 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6544
6545 dml_print(
6546 "DML::%s: Time to finish residue swath=%f\n",
6547 __func__,
6548 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6549
6550 TotalActiveWriteback = 0;
6551 for (k = 0; k < NumberOfActivePlanes; ++k) {
6552 if (WritebackEnable[k]) {
6553 TotalActiveWriteback = TotalActiveWriteback + 1;
6554 }
6555 }
6556
6557 if (TotalActiveWriteback == 0) {
6558 #ifdef __DML_VBA_DEBUG__
6559 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6560 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6561 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6562 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6563 #endif
6564 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6565 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6566 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6567 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6568 } else {
6569 *StutterEfficiencyNotIncludingVBlank = 0.;
6570 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6571 *NumberOfStutterBurstsPerFrame = 0;
6572 *Z8NumberOfStutterBurstsPerFrame = 0;
6573 }
6574 #ifdef __DML_VBA_DEBUG__
6575 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6576 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6577 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6578 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6579 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6580 #endif
6581
6582 for (k = 0; k < NumberOfActivePlanes; ++k) {
6583 if (v->BlendingAndTiming[k] == k) {
6584 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6585 }
6586 }
6587
6588 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6589 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6590
6591 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6592 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6593 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6594 } else {
6595 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6596 }
6597 } else {
6598 *StutterEfficiency = 0;
6599 }
6600
6601 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6602 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6603 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6604 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6605 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6606 } else {
6607 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6608 }
6609 } else {
6610 *Z8StutterEfficiency = 0.;
6611 }
6612
6613 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6614 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6615 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6616 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6617 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6618 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6619 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6620 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6621 }
6622
6623 static void CalculateSwathAndDETConfiguration(
6624 bool ForceSingleDPP,
6625 int NumberOfActivePlanes,
6626 bool DETSharedByAllDPP,
6627 unsigned int DETBufferSizeInKByteA[],
6628 double MaximumSwathWidthLuma[],
6629 double MaximumSwathWidthChroma[],
6630 enum scan_direction_class SourceScan[],
6631 enum source_format_class SourcePixelFormat[],
6632 enum dm_swizzle_mode SurfaceTiling[],
6633 int ViewportWidth[],
6634 int ViewportHeight[],
6635 int SurfaceWidthY[],
6636 int SurfaceWidthC[],
6637 int SurfaceHeightY[],
6638 int SurfaceHeightC[],
6639 int Read256BytesBlockHeightY[],
6640 int Read256BytesBlockHeightC[],
6641 int Read256BytesBlockWidthY[],
6642 int Read256BytesBlockWidthC[],
6643 enum odm_combine_mode ODMCombineEnabled[],
6644 int BlendingAndTiming[],
6645 int BytePerPixY[],
6646 int BytePerPixC[],
6647 double BytePerPixDETY[],
6648 double BytePerPixDETC[],
6649 int HActive[],
6650 double HRatio[],
6651 double HRatioChroma[],
6652 int DPPPerPlane[],
6653 int swath_width_luma_ub[],
6654 int swath_width_chroma_ub[],
6655 double SwathWidth[],
6656 double SwathWidthChroma[],
6657 int SwathHeightY[],
6658 int SwathHeightC[],
6659 unsigned int DETBufferSizeY[],
6660 unsigned int DETBufferSizeC[],
6661 bool ViewportSizeSupportPerPlane[],
6662 bool *ViewportSizeSupport)
6663 {
6664 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6665 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6666 int MinimumSwathHeightY;
6667 int MinimumSwathHeightC;
6668 int RoundedUpMaxSwathSizeBytesY;
6669 int RoundedUpMaxSwathSizeBytesC;
6670 int RoundedUpMinSwathSizeBytesY;
6671 int RoundedUpMinSwathSizeBytesC;
6672 int RoundedUpSwathSizeBytesY;
6673 int RoundedUpSwathSizeBytesC;
6674 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6675 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6676 int k;
6677
6678 CalculateSwathWidth(
6679 ForceSingleDPP,
6680 NumberOfActivePlanes,
6681 SourcePixelFormat,
6682 SourceScan,
6683 ViewportWidth,
6684 ViewportHeight,
6685 SurfaceWidthY,
6686 SurfaceWidthC,
6687 SurfaceHeightY,
6688 SurfaceHeightC,
6689 ODMCombineEnabled,
6690 BytePerPixY,
6691 BytePerPixC,
6692 Read256BytesBlockHeightY,
6693 Read256BytesBlockHeightC,
6694 Read256BytesBlockWidthY,
6695 Read256BytesBlockWidthC,
6696 BlendingAndTiming,
6697 HActive,
6698 HRatio,
6699 DPPPerPlane,
6700 SwathWidthSingleDPP,
6701 SwathWidthSingleDPPChroma,
6702 SwathWidth,
6703 SwathWidthChroma,
6704 MaximumSwathHeightY,
6705 MaximumSwathHeightC,
6706 swath_width_luma_ub,
6707 swath_width_chroma_ub);
6708
6709 *ViewportSizeSupport = true;
6710 for (k = 0; k < NumberOfActivePlanes; ++k) {
6711 unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k];
6712
6713 if (DETSharedByAllDPP && DPPPerPlane[k])
6714 DETBufferSizeInKByte /= DPPPerPlane[k];
6715 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6716 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6717 if (SurfaceTiling[k] == dm_sw_linear
6718 || (SourcePixelFormat[k] == dm_444_64
6719 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6720 && SourceScan[k] != dm_vert)) {
6721 MinimumSwathHeightY = MaximumSwathHeightY[k];
6722 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6723 MinimumSwathHeightY = MaximumSwathHeightY[k];
6724 } else {
6725 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6726 }
6727 MinimumSwathHeightC = MaximumSwathHeightC[k];
6728 } else {
6729 if (SurfaceTiling[k] == dm_sw_linear) {
6730 MinimumSwathHeightY = MaximumSwathHeightY[k];
6731 MinimumSwathHeightC = MaximumSwathHeightC[k];
6732 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6733 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6734 MinimumSwathHeightC = MaximumSwathHeightC[k];
6735 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6736 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6737 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6738 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6739 MinimumSwathHeightY = MaximumSwathHeightY[k];
6740 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6741 } else {
6742 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6743 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6744 }
6745 }
6746
6747 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6748 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6749 if (SourcePixelFormat[k] == dm_420_10) {
6750 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6751 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6752 }
6753 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6754 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6755 if (SourcePixelFormat[k] == dm_420_10) {
6756 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6757 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6758 }
6759
6760 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6761 SwathHeightY[k] = MaximumSwathHeightY[k];
6762 SwathHeightC[k] = MaximumSwathHeightC[k];
6763 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6764 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6765 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6766 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6767 SwathHeightY[k] = MinimumSwathHeightY;
6768 SwathHeightC[k] = MaximumSwathHeightC[k];
6769 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6770 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6771 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6772 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6773 SwathHeightY[k] = MaximumSwathHeightY[k];
6774 SwathHeightC[k] = MinimumSwathHeightC;
6775 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6776 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6777 } else {
6778 SwathHeightY[k] = MinimumSwathHeightY;
6779 SwathHeightC[k] = MinimumSwathHeightC;
6780 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6781 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6782 }
6783 {
6784 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6785 if (SwathHeightC[k] == 0) {
6786 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6787 DETBufferSizeC[k] = 0;
6788 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6789 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6790 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6791 } else {
6792 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6793 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6794 }
6795
6796 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6797 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6798 *ViewportSizeSupport = false;
6799 ViewportSizeSupportPerPlane[k] = false;
6800 } else {
6801 ViewportSizeSupportPerPlane[k] = true;
6802 }
6803 }
6804 }
6805 }
6806
6807 static void CalculateSwathWidth(
6808 bool ForceSingleDPP,
6809 int NumberOfActivePlanes,
6810 enum source_format_class SourcePixelFormat[],
6811 enum scan_direction_class SourceScan[],
6812 int ViewportWidth[],
6813 int ViewportHeight[],
6814 int SurfaceWidthY[],
6815 int SurfaceWidthC[],
6816 int SurfaceHeightY[],
6817 int SurfaceHeightC[],
6818 enum odm_combine_mode ODMCombineEnabled[],
6819 int BytePerPixY[],
6820 int BytePerPixC[],
6821 int Read256BytesBlockHeightY[],
6822 int Read256BytesBlockHeightC[],
6823 int Read256BytesBlockWidthY[],
6824 int Read256BytesBlockWidthC[],
6825 int BlendingAndTiming[],
6826 int HActive[],
6827 double HRatio[],
6828 int DPPPerPlane[],
6829 double SwathWidthSingleDPPY[],
6830 double SwathWidthSingleDPPC[],
6831 double SwathWidthY[],
6832 double SwathWidthC[],
6833 int MaximumSwathHeightY[],
6834 int MaximumSwathHeightC[],
6835 int swath_width_luma_ub[],
6836 int swath_width_chroma_ub[])
6837 {
6838 enum odm_combine_mode MainPlaneODMCombine;
6839 int j, k;
6840
6841 #ifdef __DML_VBA_DEBUG__
6842 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6843 #endif
6844
6845 for (k = 0; k < NumberOfActivePlanes; ++k) {
6846 if (SourceScan[k] != dm_vert) {
6847 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6848 } else {
6849 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6850 }
6851
6852 #ifdef __DML_VBA_DEBUG__
6853 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6854 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6855 #endif
6856
6857 MainPlaneODMCombine = ODMCombineEnabled[k];
6858 for (j = 0; j < NumberOfActivePlanes; ++j) {
6859 if (BlendingAndTiming[k] == j) {
6860 MainPlaneODMCombine = ODMCombineEnabled[j];
6861 }
6862 }
6863
6864 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6865 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6866 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6867 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6868 } else if (DPPPerPlane[k] == 2) {
6869 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6870 } else {
6871 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6872 }
6873
6874 #ifdef __DML_VBA_DEBUG__
6875 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6876 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6877 #endif
6878
6879 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6880 SwathWidthC[k] = SwathWidthY[k] / 2;
6881 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6882 } else {
6883 SwathWidthC[k] = SwathWidthY[k];
6884 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6885 }
6886
6887 if (ForceSingleDPP == true) {
6888 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6889 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6890 }
6891 {
6892 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6893 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6894
6895 #ifdef __DML_VBA_DEBUG__
6896 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6897 #endif
6898
6899 if (SourceScan[k] != dm_vert) {
6900 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6901 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6902 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6903 if (BytePerPixC[k] > 0) {
6904 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6905
6906 swath_width_chroma_ub[k] = dml_min(
6907 surface_width_ub_c,
6908 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6909 } else {
6910 swath_width_chroma_ub[k] = 0;
6911 }
6912 } else {
6913 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6914 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6915 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6916 if (BytePerPixC[k] > 0) {
6917 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6918
6919 swath_width_chroma_ub[k] = dml_min(
6920 surface_height_ub_c,
6921 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6922 } else {
6923 swath_width_chroma_ub[k] = 0;
6924 }
6925 }
6926 }
6927 }
6928 }
6929
6930 static double CalculateExtraLatency(
6931 int RoundTripPingLatencyCycles,
6932 int ReorderingBytes,
6933 double DCFCLK,
6934 int TotalNumberOfActiveDPP,
6935 int PixelChunkSizeInKByte,
6936 int TotalNumberOfDCCActiveDPP,
6937 int MetaChunkSize,
6938 double ReturnBW,
6939 bool GPUVMEnable,
6940 bool HostVMEnable,
6941 int NumberOfActivePlanes,
6942 int NumberOfDPP[],
6943 int dpte_group_bytes[],
6944 double HostVMInefficiencyFactor,
6945 double HostVMMinPageSize,
6946 int HostVMMaxNonCachedPageTableLevels)
6947 {
6948 double ExtraLatencyBytes;
6949 double ExtraLatency;
6950
6951 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6952 ReorderingBytes,
6953 TotalNumberOfActiveDPP,
6954 PixelChunkSizeInKByte,
6955 TotalNumberOfDCCActiveDPP,
6956 MetaChunkSize,
6957 GPUVMEnable,
6958 HostVMEnable,
6959 NumberOfActivePlanes,
6960 NumberOfDPP,
6961 dpte_group_bytes,
6962 HostVMInefficiencyFactor,
6963 HostVMMinPageSize,
6964 HostVMMaxNonCachedPageTableLevels);
6965
6966 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6967
6968 #ifdef __DML_VBA_DEBUG__
6969 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6970 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6971 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6972 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6973 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6974 #endif
6975
6976 return ExtraLatency;
6977 }
6978
6979 static double CalculateExtraLatencyBytes(
6980 int ReorderingBytes,
6981 int TotalNumberOfActiveDPP,
6982 int PixelChunkSizeInKByte,
6983 int TotalNumberOfDCCActiveDPP,
6984 int MetaChunkSize,
6985 bool GPUVMEnable,
6986 bool HostVMEnable,
6987 int NumberOfActivePlanes,
6988 int NumberOfDPP[],
6989 int dpte_group_bytes[],
6990 double HostVMInefficiencyFactor,
6991 double HostVMMinPageSize,
6992 int HostVMMaxNonCachedPageTableLevels)
6993 {
6994 double ret;
6995 int HostVMDynamicLevels = 0, k;
6996
6997 if (GPUVMEnable == true && HostVMEnable == true) {
6998 if (HostVMMinPageSize < 2048) {
6999 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7000 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
7001 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7002 } else {
7003 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7004 }
7005 } else {
7006 HostVMDynamicLevels = 0;
7007 }
7008
7009 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7010
7011 if (GPUVMEnable == true) {
7012 for (k = 0; k < NumberOfActivePlanes; ++k) {
7013 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7014 }
7015 }
7016 return ret;
7017 }
7018
7019 static double CalculateUrgentLatency(
7020 double UrgentLatencyPixelDataOnly,
7021 double UrgentLatencyPixelMixedWithVMData,
7022 double UrgentLatencyVMDataOnly,
7023 bool DoUrgentLatencyAdjustment,
7024 double UrgentLatencyAdjustmentFabricClockComponent,
7025 double UrgentLatencyAdjustmentFabricClockReference,
7026 double FabricClock)
7027 {
7028 double ret;
7029
7030 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7031 if (DoUrgentLatencyAdjustment == true) {
7032 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7033 }
7034 return ret;
7035 }
7036
7037 static noinline_for_stack void UseMinimumDCFCLK(
7038 struct display_mode_lib *mode_lib,
7039 int MaxPrefetchMode,
7040 int ReorderingBytes)
7041 {
7042 struct vba_vars_st *v = &mode_lib->vba;
7043 int dummy1, i, j, k;
7044 double NormalEfficiency, dummy2, dummy3;
7045 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7046
7047 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7048 for (i = 0; i < v->soc.num_states; ++i) {
7049 for (j = 0; j <= 1; ++j) {
7050 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7051 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7052 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7053 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7054 double MinimumTWait;
7055 double NonDPTEBandwidth;
7056 double DPTEBandwidth;
7057 double DCFCLKRequiredForAverageBandwidth;
7058 double ExtraLatencyBytes;
7059 double ExtraLatencyCycles;
7060 double DCFCLKRequiredForPeakBandwidth;
7061 int NoOfDPPState[DC__NUM_DPP__MAX];
7062 double MinimumTvmPlus2Tr0;
7063
7064 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7065 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7066 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7067 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7068 }
7069
7070 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7071 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7072 }
7073
7074 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7075 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7076 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7077 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7078 DCFCLKRequiredForAverageBandwidth = dml_max3(
7079 v->ProjectedDCFCLKDeepSleep[i][j],
7080 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7081 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7082 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7083
7084 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7085 ReorderingBytes,
7086 v->TotalNumberOfActiveDPP[i][j],
7087 v->PixelChunkSizeInKByte,
7088 v->TotalNumberOfDCCActiveDPP[i][j],
7089 v->MetaChunkSize,
7090 v->GPUVMEnable,
7091 v->HostVMEnable,
7092 v->NumberOfActivePlanes,
7093 NoOfDPPState,
7094 v->dpte_group_bytes,
7095 1,
7096 v->HostVMMinPageSize,
7097 v->HostVMMaxNonCachedPageTableLevels);
7098 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7099 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7100 double DCFCLKCyclesRequiredInPrefetch;
7101 double ExpectedPrefetchBWAcceleration;
7102 double PrefetchTime;
7103
7104 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7105 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7106 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7107 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7108 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7109 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7110 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7111 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7112 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7113 DynamicMetadataVMExtraLatency[k] =
7114 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7115 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7116 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7117 - v->UrgLatency[i]
7118 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7119 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7120 - DynamicMetadataVMExtraLatency[k];
7121
7122 if (PrefetchTime > 0) {
7123 double ExpectedVRatioPrefetch;
7124 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7125 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7126 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7127 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7128 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7129 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7130 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7131 }
7132 } else {
7133 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7134 }
7135 if (v->DynamicMetadataEnable[k] == true) {
7136 double TSetupPipe;
7137 double TdmbfPipe;
7138 double TdmsksPipe;
7139 double TdmecPipe;
7140 double AllowedTimeForUrgentExtraLatency;
7141
7142 CalculateVupdateAndDynamicMetadataParameters(
7143 v->MaxInterDCNTileRepeaters,
7144 v->RequiredDPPCLK[i][j][k],
7145 v->RequiredDISPCLK[i][j],
7146 v->ProjectedDCFCLKDeepSleep[i][j],
7147 v->PixelClock[k],
7148 v->HTotal[k],
7149 v->VTotal[k] - v->VActive[k],
7150 v->DynamicMetadataTransmittedBytes[k],
7151 v->DynamicMetadataLinesBeforeActiveRequired[k],
7152 v->Interlace[k],
7153 v->ProgressiveToInterlaceUnitInOPP,
7154 &TSetupPipe,
7155 &TdmbfPipe,
7156 &TdmecPipe,
7157 &TdmsksPipe,
7158 &dummy1,
7159 &dummy2,
7160 &dummy3);
7161 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7162 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7163 if (AllowedTimeForUrgentExtraLatency > 0) {
7164 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7165 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7166 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7167 } else {
7168 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7169 }
7170 }
7171 }
7172 DCFCLKRequiredForPeakBandwidth = 0;
7173 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7174 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7175 }
7176 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7177 * (v->GPUVMEnable == true ?
7178 (v->HostVMEnable == true ?
7179 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7180 0);
7181 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7182 double MaximumTvmPlus2Tr0PlusTsw;
7183 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7184 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7185 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7186 } else {
7187 DCFCLKRequiredForPeakBandwidth = dml_max3(
7188 DCFCLKRequiredForPeakBandwidth,
7189 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7190 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7191 }
7192 }
7193 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7194 }
7195 }
7196 }
7197
7198 static void CalculateUnboundedRequestAndCompressedBufferSize(
7199 unsigned int DETBufferSizeInKByte,
7200 int ConfigReturnBufferSizeInKByte,
7201 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7202 int TotalActiveDPP,
7203 bool NoChromaPlanes,
7204 int MaxNumDPP,
7205 int CompressedBufferSegmentSizeInkByteFinal,
7206 enum output_encoder_class *Output,
7207 bool *UnboundedRequestEnabled,
7208 int *CompressedBufferSizeInkByte)
7209 {
7210 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7211
7212 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7213 *CompressedBufferSizeInkByte = (
7214 *UnboundedRequestEnabled == true ?
7215 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7216 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7217 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7218
7219 #ifdef __DML_VBA_DEBUG__
7220 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7221 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7222 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7223 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7224 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7225 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7226 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7227 #endif
7228 }
7229
7230 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7231 {
7232 bool ret_val = false;
7233
7234 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7235 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
7236 ret_val = false;
7237 }
7238 return (ret_val);
7239 }
7240
7241