2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "../display_mode_lib.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
46 // For DML-C changes that hasn't been propagated to VBA yet
47 //#define __DML_VBA_ALLOW_DELTA__
49 // Move these to ip paramaters/constant
51 // At which vstartup the DML start to try if the mode can be supported
52 #define __DML_VBA_MIN_VSTARTUP__ 9
54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
57 // fudge factor for min dcfclk calclation
58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
69 enum scan_direction_class SourceScan;
70 unsigned int BlockWidth256BytesY;
71 unsigned int BlockHeight256BytesY;
72 unsigned int BlockWidth256BytesC;
73 unsigned int BlockHeight256BytesC;
74 unsigned int InterlaceEnable;
75 unsigned int NumberOfCursors;
78 unsigned int DCCEnable;
79 bool ODMCombineIsEnabled;
80 enum source_format_class SourcePixelFormat;
83 bool ProgressiveToInterlaceUnitInOPP;
87 #define BPP_BLENDED_PIPE 0xffffffff
89 static bool CalculateBytePerPixelAnd256BBlockSizes(
90 enum source_format_class SourcePixelFormat,
91 enum dm_swizzle_mode SurfaceTiling,
92 unsigned int *BytePerPixelY,
93 unsigned int *BytePerPixelC,
94 double *BytePerPixelDETY,
95 double *BytePerPixelDETC,
96 unsigned int *BlockHeight256BytesY,
97 unsigned int *BlockHeight256BytesC,
98 unsigned int *BlockWidth256BytesY,
99 unsigned int *BlockWidth256BytesC);
100 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
101 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
102 static unsigned int dscceComputeDelay(
105 unsigned int sliceWidth,
106 unsigned int numSlices,
107 enum output_format_class pixelFormat,
108 enum output_encoder_class Output);
109 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
110 static bool CalculatePrefetchSchedule(
111 struct display_mode_lib *mode_lib,
112 double HostVMInefficiencyFactor,
114 unsigned int DSCDelay,
115 double DPPCLKDelaySubtotalPlusCNVCFormater,
116 double DPPCLKDelaySCL,
117 double DPPCLKDelaySCLLBOnly,
118 double DPPCLKDelayCNVCCursor,
119 double DISPCLKDelaySubtotal,
120 unsigned int DPP_RECOUT_WIDTH,
121 enum output_format_class OutputFormat,
122 unsigned int MaxInterDCNTileRepeaters,
123 unsigned int VStartup,
124 unsigned int MaxVStartup,
125 unsigned int GPUVMPageTableLevels,
128 unsigned int HostVMMaxNonCachedPageTableLevels,
129 double HostVMMinPageSize,
130 bool DynamicMetadataEnable,
131 bool DynamicMetadataVMEnabled,
132 int DynamicMetadataLinesBeforeActiveRequired,
133 unsigned int DynamicMetadataTransmittedBytes,
134 double UrgentLatency,
135 double UrgentExtraLatency,
137 unsigned int PDEAndMetaPTEBytesFrame,
138 unsigned int MetaRowByte,
139 unsigned int PixelPTEBytesPerRow,
140 double PrefetchSourceLinesY,
141 unsigned int SwathWidthY,
142 double VInitPreFillY,
143 unsigned int MaxNumSwathY,
144 double PrefetchSourceLinesC,
145 unsigned int SwathWidthC,
146 double VInitPreFillC,
147 unsigned int MaxNumSwathC,
148 int swath_width_luma_ub,
149 int swath_width_chroma_ub,
150 unsigned int SwathHeightY,
151 unsigned int SwathHeightC,
153 double *DSTXAfterScaler,
154 double *DSTYAfterScaler,
155 double *DestinationLinesForPrefetch,
156 double *PrefetchBandwidth,
157 double *DestinationLinesToRequestVMInVBlank,
158 double *DestinationLinesToRequestRowInVBlank,
159 double *VRatioPrefetchY,
160 double *VRatioPrefetchC,
161 double *RequiredPrefetchPixDataBWLuma,
162 double *RequiredPrefetchPixDataBWChroma,
163 bool *NotEnoughTimeForDynamicMetadata,
165 double *prefetch_vmrow_bw,
169 int *VUpdateOffsetPix,
170 double *VUpdateWidthPix,
171 double *VReadyOffsetPix);
172 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
173 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
174 static void CalculateDCCConfiguration(
176 bool DCCProgrammingAssumesScanDirectionUnknown,
177 enum source_format_class SourcePixelFormat,
178 unsigned int SurfaceWidthLuma,
179 unsigned int SurfaceWidthChroma,
180 unsigned int SurfaceHeightLuma,
181 unsigned int SurfaceHeightChroma,
182 double DETBufferSize,
183 unsigned int RequestHeight256ByteLuma,
184 unsigned int RequestHeight256ByteChroma,
185 enum dm_swizzle_mode TilingFormat,
186 unsigned int BytePerPixelY,
187 unsigned int BytePerPixelC,
188 double BytePerPixelDETY,
189 double BytePerPixelDETC,
190 enum scan_direction_class ScanOrientation,
191 unsigned int *MaxUncompressedBlockLuma,
192 unsigned int *MaxUncompressedBlockChroma,
193 unsigned int *MaxCompressedBlockLuma,
194 unsigned int *MaxCompressedBlockChroma,
195 unsigned int *IndependentBlockLuma,
196 unsigned int *IndependentBlockChroma);
197 static double CalculatePrefetchSourceLines(
198 struct display_mode_lib *mode_lib,
202 bool ProgressiveToInterlaceUnitInOPP,
203 unsigned int SwathHeight,
204 unsigned int ViewportYStart,
205 double *VInitPreFill,
206 unsigned int *MaxNumSwath);
207 static unsigned int CalculateVMAndRowBytes(
208 struct display_mode_lib *mode_lib,
210 unsigned int BlockHeight256Bytes,
211 unsigned int BlockWidth256Bytes,
212 enum source_format_class SourcePixelFormat,
213 unsigned int SurfaceTiling,
214 unsigned int BytePerPixel,
215 enum scan_direction_class ScanDirection,
216 unsigned int SwathWidth,
217 unsigned int ViewportHeight,
220 unsigned int HostVMMaxNonCachedPageTableLevels,
221 unsigned int GPUVMMinPageSize,
222 unsigned int HostVMMinPageSize,
223 unsigned int PTEBufferSizeInRequests,
225 unsigned int DCCMetaPitch,
226 unsigned int *MacroTileWidth,
227 unsigned int *MetaRowByte,
228 unsigned int *PixelPTEBytesPerRow,
229 bool *PTEBufferSizeNotExceeded,
230 int *dpte_row_width_ub,
231 unsigned int *dpte_row_height,
232 unsigned int *MetaRequestWidth,
233 unsigned int *MetaRequestHeight,
234 unsigned int *meta_row_width,
235 unsigned int *meta_row_height,
237 unsigned int *dpte_group_bytes,
238 unsigned int *PixelPTEReqWidth,
239 unsigned int *PixelPTEReqHeight,
240 unsigned int *PTERequestSize,
241 int *DPDE0BytesFrame,
242 int *MetaPTEBytesFrame);
243 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
244 static void CalculateRowBandwidth(
246 enum source_format_class SourcePixelFormat,
251 unsigned int MetaRowByteLuma,
252 unsigned int MetaRowByteChroma,
253 unsigned int meta_row_height_luma,
254 unsigned int meta_row_height_chroma,
255 unsigned int PixelPTEBytesPerRowLuma,
256 unsigned int PixelPTEBytesPerRowChroma,
257 unsigned int dpte_row_height_luma,
258 unsigned int dpte_row_height_chroma,
260 double *dpte_row_bw);
262 static void CalculateFlipSchedule(
263 struct display_mode_lib *mode_lib,
264 double HostVMInefficiencyFactor,
265 double UrgentExtraLatency,
266 double UrgentLatency,
267 unsigned int GPUVMMaxPageTableLevels,
269 unsigned int HostVMMaxNonCachedPageTableLevels,
271 double HostVMMinPageSize,
272 double PDEAndMetaPTEBytesPerFrame,
274 double DPTEBytesPerRow,
275 double BandwidthAvailableForImmediateFlip,
276 unsigned int TotImmediateFlipBytes,
277 enum source_format_class SourcePixelFormat,
283 unsigned int dpte_row_height,
284 unsigned int meta_row_height,
285 unsigned int dpte_row_height_chroma,
286 unsigned int meta_row_height_chroma,
287 double *DestinationLinesToRequestVMInImmediateFlip,
288 double *DestinationLinesToRequestRowInImmediateFlip,
289 double *final_flip_bw,
290 bool *ImmediateFlipSupportedForPipe);
291 static double CalculateWriteBackDelay(
292 enum source_format_class WritebackPixelFormat,
293 double WritebackHRatio,
294 double WritebackVRatio,
295 unsigned int WritebackVTaps,
296 int WritebackDestinationWidth,
297 int WritebackDestinationHeight,
298 int WritebackSourceHeight,
299 unsigned int HTotal);
301 static void CalculateVupdateAndDynamicMetadataParameters(
302 int MaxInterDCNTileRepeaters,
305 double DCFClkDeepSleep,
309 int DynamicMetadataTransmittedBytes,
310 int DynamicMetadataLinesBeforeActiveRequired,
312 bool ProgressiveToInterlaceUnitInOPP,
317 int *VUpdateOffsetPix,
318 double *VUpdateWidthPix,
319 double *VReadyOffsetPix);
321 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
322 struct display_mode_lib *mode_lib,
323 unsigned int PrefetchMode,
324 unsigned int NumberOfActivePlanes,
325 unsigned int MaxLineBufferLines,
326 unsigned int LineBufferSize,
327 unsigned int WritebackInterfaceBufferSize,
330 bool SynchronizedVBlank,
331 unsigned int dpte_group_bytes[],
332 unsigned int MetaChunkSize,
333 double UrgentLatency,
335 double WritebackLatency,
336 double WritebackChunkSize,
338 double DRAMClockChangeLatency,
340 double SREnterPlusExitTime,
342 double SREnterPlusExitZ8Time,
343 double DCFCLKDeepSleep,
344 unsigned int DETBufferSizeY[],
345 unsigned int DETBufferSizeC[],
346 unsigned int SwathHeightY[],
347 unsigned int SwathHeightC[],
348 unsigned int LBBitPerPixel[],
349 double SwathWidthY[],
350 double SwathWidthC[],
352 double HRatioChroma[],
353 unsigned int vtaps[],
354 unsigned int VTAPsChroma[],
356 double VRatioChroma[],
357 unsigned int HTotal[],
359 unsigned int BlendingAndTiming[],
360 unsigned int DPPPerPlane[],
361 double BytePerPixelDETY[],
362 double BytePerPixelDETC[],
363 double DSTXAfterScaler[],
364 double DSTYAfterScaler[],
365 bool WritebackEnable[],
366 enum source_format_class WritebackPixelFormat[],
367 double WritebackDestinationWidth[],
368 double WritebackDestinationHeight[],
369 double WritebackSourceHeight[],
370 bool UnboundedRequestEnabled,
371 int unsigned CompressedBufferSizeInkByte,
372 enum clock_change_support *DRAMClockChangeSupport,
373 double *UrgentWatermark,
374 double *WritebackUrgentWatermark,
375 double *DRAMClockChangeWatermark,
376 double *WritebackDRAMClockChangeWatermark,
377 double *StutterExitWatermark,
378 double *StutterEnterPlusExitWatermark,
379 double *Z8StutterExitWatermark,
380 double *Z8StutterEnterPlusExitWatermark,
381 double *MinActiveDRAMClockChangeLatencySupported);
383 static void CalculateDCFCLKDeepSleep(
384 struct display_mode_lib *mode_lib,
385 unsigned int NumberOfActivePlanes,
389 double VRatioChroma[],
390 double SwathWidthY[],
391 double SwathWidthC[],
392 unsigned int DPPPerPlane[],
394 double HRatioChroma[],
396 double PSCL_THROUGHPUT[],
397 double PSCL_THROUGHPUT_CHROMA[],
399 double ReadBandwidthLuma[],
400 double ReadBandwidthChroma[],
402 double *DCFCLKDeepSleep);
404 static void CalculateUrgentBurstFactor(
405 int swath_width_luma_ub,
406 int swath_width_chroma_ub,
407 unsigned int SwathHeightY,
408 unsigned int SwathHeightC,
410 double UrgentLatency,
411 double CursorBufferSize,
412 unsigned int CursorWidth,
413 unsigned int CursorBPP,
416 double BytePerPixelInDETY,
417 double BytePerPixelInDETC,
418 double DETBufferSizeY,
419 double DETBufferSizeC,
420 double *UrgentBurstFactorCursor,
421 double *UrgentBurstFactorLuma,
422 double *UrgentBurstFactorChroma,
423 bool *NotEnoughUrgentLatencyHiding);
425 static void UseMinimumDCFCLK(
426 struct display_mode_lib *mode_lib,
428 int ReorderingBytes);
430 static void CalculatePixelDeliveryTimes(
431 unsigned int NumberOfActivePlanes,
433 double VRatioChroma[],
434 double VRatioPrefetchY[],
435 double VRatioPrefetchC[],
436 unsigned int swath_width_luma_ub[],
437 unsigned int swath_width_chroma_ub[],
438 unsigned int DPPPerPlane[],
440 double HRatioChroma[],
442 double PSCL_THROUGHPUT[],
443 double PSCL_THROUGHPUT_CHROMA[],
446 enum scan_direction_class SourceScan[],
447 unsigned int NumberOfCursors[],
448 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
449 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
450 unsigned int BlockWidth256BytesY[],
451 unsigned int BlockHeight256BytesY[],
452 unsigned int BlockWidth256BytesC[],
453 unsigned int BlockHeight256BytesC[],
454 double DisplayPipeLineDeliveryTimeLuma[],
455 double DisplayPipeLineDeliveryTimeChroma[],
456 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
457 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
458 double DisplayPipeRequestDeliveryTimeLuma[],
459 double DisplayPipeRequestDeliveryTimeChroma[],
460 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
461 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
462 double CursorRequestDeliveryTime[],
463 double CursorRequestDeliveryTimePrefetch[]);
465 static void CalculateMetaAndPTETimes(
466 int NumberOfActivePlanes,
469 int MinMetaChunkSizeBytes,
472 double VRatioChroma[],
473 double DestinationLinesToRequestRowInVBlank[],
474 double DestinationLinesToRequestRowInImmediateFlip[],
479 enum scan_direction_class SourceScan[],
480 int dpte_row_height[],
481 int dpte_row_height_chroma[],
482 int meta_row_width[],
483 int meta_row_width_chroma[],
484 int meta_row_height[],
485 int meta_row_height_chroma[],
486 int meta_req_width[],
487 int meta_req_width_chroma[],
488 int meta_req_height[],
489 int meta_req_height_chroma[],
490 int dpte_group_bytes[],
491 int PTERequestSizeY[],
492 int PTERequestSizeC[],
493 int PixelPTEReqWidthY[],
494 int PixelPTEReqHeightY[],
495 int PixelPTEReqWidthC[],
496 int PixelPTEReqHeightC[],
497 int dpte_row_width_luma_ub[],
498 int dpte_row_width_chroma_ub[],
499 double DST_Y_PER_PTE_ROW_NOM_L[],
500 double DST_Y_PER_PTE_ROW_NOM_C[],
501 double DST_Y_PER_META_ROW_NOM_L[],
502 double DST_Y_PER_META_ROW_NOM_C[],
503 double TimePerMetaChunkNominal[],
504 double TimePerChromaMetaChunkNominal[],
505 double TimePerMetaChunkVBlank[],
506 double TimePerChromaMetaChunkVBlank[],
507 double TimePerMetaChunkFlip[],
508 double TimePerChromaMetaChunkFlip[],
509 double time_per_pte_group_nom_luma[],
510 double time_per_pte_group_vblank_luma[],
511 double time_per_pte_group_flip_luma[],
512 double time_per_pte_group_nom_chroma[],
513 double time_per_pte_group_vblank_chroma[],
514 double time_per_pte_group_flip_chroma[]);
516 static void CalculateVMGroupAndRequestTimes(
517 unsigned int NumberOfActivePlanes,
519 unsigned int GPUVMMaxPageTableLevels,
520 unsigned int HTotal[],
522 double DestinationLinesToRequestVMInVBlank[],
523 double DestinationLinesToRequestVMInImmediateFlip[],
526 int dpte_row_width_luma_ub[],
527 int dpte_row_width_chroma_ub[],
528 int vm_group_bytes[],
529 unsigned int dpde0_bytes_per_frame_ub_l[],
530 unsigned int dpde0_bytes_per_frame_ub_c[],
531 int meta_pte_bytes_per_frame_ub_l[],
532 int meta_pte_bytes_per_frame_ub_c[],
533 double TimePerVMGroupVBlank[],
534 double TimePerVMGroupFlip[],
535 double TimePerVMRequestVBlank[],
536 double TimePerVMRequestFlip[]);
538 static void CalculateStutterEfficiency(
539 struct display_mode_lib *mode_lib,
540 int CompressedBufferSizeInkByte,
541 bool UnboundedRequestEnabled,
542 int ConfigReturnBufferSizeInKByte,
543 int MetaFIFOSizeInKEntries,
544 int ZeroSizeBufferEntries,
545 int NumberOfActivePlanes,
546 int ROBBufferSizeInKByte,
547 double TotalDataReadBandwidth,
550 double COMPBUF_RESERVED_SPACE_64B,
551 double COMPBUF_RESERVED_SPACE_ZS,
554 bool SynchronizedVBlank,
555 double Z8StutterEnterPlusExitWatermark,
556 double StutterEnterPlusExitWatermark,
557 bool ProgressiveToInterlaceUnitInOPP,
559 double MinTTUVBlank[],
561 unsigned int DETBufferSizeY[],
563 double BytePerPixelDETY[],
564 double SwathWidthY[],
567 double NetDCCRateLuma[],
568 double NetDCCRateChroma[],
569 double DCCFractionOfZeroSizeRequestsLuma[],
570 double DCCFractionOfZeroSizeRequestsChroma[],
575 enum scan_direction_class SourceScan[],
576 int BlockHeight256BytesY[],
577 int BlockWidth256BytesY[],
578 int BlockHeight256BytesC[],
579 int BlockWidth256BytesC[],
580 int DCCYMaxUncompressedBlock[],
581 int DCCCMaxUncompressedBlock[],
584 bool WritebackEnable[],
585 double ReadBandwidthPlaneLuma[],
586 double ReadBandwidthPlaneChroma[],
587 double meta_row_bw[],
588 double dpte_row_bw[],
589 double *StutterEfficiencyNotIncludingVBlank,
590 double *StutterEfficiency,
591 int *NumberOfStutterBurstsPerFrame,
592 double *Z8StutterEfficiencyNotIncludingVBlank,
593 double *Z8StutterEfficiency,
594 int *Z8NumberOfStutterBurstsPerFrame,
595 double *StutterPeriod);
597 static void CalculateSwathAndDETConfiguration(
599 int NumberOfActivePlanes,
600 unsigned int DETBufferSizeInKByte,
601 double MaximumSwathWidthLuma[],
602 double MaximumSwathWidthChroma[],
603 enum scan_direction_class SourceScan[],
604 enum source_format_class SourcePixelFormat[],
605 enum dm_swizzle_mode SurfaceTiling[],
607 int ViewportHeight[],
610 int SurfaceHeightY[],
611 int SurfaceHeightC[],
612 int Read256BytesBlockHeightY[],
613 int Read256BytesBlockHeightC[],
614 int Read256BytesBlockWidthY[],
615 int Read256BytesBlockWidthC[],
616 enum odm_combine_mode ODMCombineEnabled[],
617 int BlendingAndTiming[],
620 double BytePerPixDETY[],
621 double BytePerPixDETC[],
624 double HRatioChroma[],
626 int swath_width_luma_ub[],
627 int swath_width_chroma_ub[],
629 double SwathWidthChroma[],
632 unsigned int DETBufferSizeY[],
633 unsigned int DETBufferSizeC[],
634 bool ViewportSizeSupportPerPlane[],
635 bool *ViewportSizeSupport);
636 static void CalculateSwathWidth(
638 int NumberOfActivePlanes,
639 enum source_format_class SourcePixelFormat[],
640 enum scan_direction_class SourceScan[],
642 int ViewportHeight[],
645 int SurfaceHeightY[],
646 int SurfaceHeightC[],
647 enum odm_combine_mode ODMCombineEnabled[],
650 int Read256BytesBlockHeightY[],
651 int Read256BytesBlockHeightC[],
652 int Read256BytesBlockWidthY[],
653 int Read256BytesBlockWidthC[],
654 int BlendingAndTiming[],
658 double SwathWidthSingleDPPY[],
659 double SwathWidthSingleDPPC[],
660 double SwathWidthY[],
661 double SwathWidthC[],
662 int MaximumSwathHeightY[],
663 int MaximumSwathHeightC[],
664 int swath_width_luma_ub[],
665 int swath_width_chroma_ub[]);
667 static double CalculateExtraLatency(
668 int RoundTripPingLatencyCycles,
671 int TotalNumberOfActiveDPP,
672 int PixelChunkSizeInKByte,
673 int TotalNumberOfDCCActiveDPP,
678 int NumberOfActivePlanes,
680 int dpte_group_bytes[],
681 double HostVMInefficiencyFactor,
682 double HostVMMinPageSize,
683 int HostVMMaxNonCachedPageTableLevels);
685 static double CalculateExtraLatencyBytes(
687 int TotalNumberOfActiveDPP,
688 int PixelChunkSizeInKByte,
689 int TotalNumberOfDCCActiveDPP,
693 int NumberOfActivePlanes,
695 int dpte_group_bytes[],
696 double HostVMInefficiencyFactor,
697 double HostVMMinPageSize,
698 int HostVMMaxNonCachedPageTableLevels);
700 static double CalculateUrgentLatency(
701 double UrgentLatencyPixelDataOnly,
702 double UrgentLatencyPixelMixedWithVMData,
703 double UrgentLatencyVMDataOnly,
704 bool DoUrgentLatencyAdjustment,
705 double UrgentLatencyAdjustmentFabricClockComponent,
706 double UrgentLatencyAdjustmentFabricClockReference,
707 double FabricClockSingle);
709 static void CalculateUnboundedRequestAndCompressedBufferSize(
710 unsigned int DETBufferSizeInKByte,
711 int ConfigReturnBufferSizeInKByte,
712 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
716 int CompressedBufferSegmentSizeInkByteFinal,
717 enum output_encoder_class *Output,
718 bool *UnboundedRequestEnabled,
719 int *CompressedBufferSizeInkByte);
721 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
723 void dml31_recalculate(struct display_mode_lib *mode_lib)
725 ModeSupportAndSystemConfiguration(mode_lib);
726 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
727 DisplayPipeConfiguration(mode_lib);
728 #ifdef __DML_VBA_DEBUG__
729 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
731 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
734 static unsigned int dscceComputeDelay(
737 unsigned int sliceWidth,
738 unsigned int numSlices,
739 enum output_format_class pixelFormat,
740 enum output_encoder_class Output)
742 // valid bpc = source bits per component in the set of {8, 10, 12}
743 // valid bpp = increments of 1/16 of a bit
744 // min = 6/7/8 in N420/N422/444, respectively
745 // max = such that compression is 1:1
746 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
747 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
748 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
751 unsigned int rcModelSize = 8192;
753 // N422/N420 operate at 2 pixels per clock
754 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
756 if (pixelFormat == dm_420)
758 else if (pixelFormat == dm_444)
760 else if (pixelFormat == dm_n422)
762 // #all other modes operate at 1 pixel per clock
766 //initial transmit delay as per PPS
767 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
777 //divide by pixel per cycle to compute slice width as seen by DSC
778 w = sliceWidth / pixelsPerClock;
780 //422 mode has an additional cycle of delay
781 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
786 //main calculation for the dscce
787 ix = initalXmitDelay + 45;
792 ax = (a + 2) / 3 + D + 6 + 1;
793 L = (ax + wx - 1) / wx;
794 if ((ix % w) == 0 && P != 0)
798 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
800 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
801 pixels = Delay * 3 * pixelsPerClock;
805 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
807 unsigned int Delay = 0;
809 if (pixelFormat == dm_420) {
814 // dscc - input deserializer
816 // dscc gets pixels every other cycle
818 // dscc - input cdc fifo
820 // dscc gets pixels every other cycle
822 // dscc - cdc uncertainty
824 // dscc - output cdc fifo
826 // dscc gets pixels every other cycle
828 // dscc - cdc uncertainty
830 // dscc - output serializer
834 } else if (pixelFormat == dm_n422) {
839 // dscc - input deserializer
841 // dscc - input cdc fifo
843 // dscc - cdc uncertainty
845 // dscc - output cdc fifo
847 // dscc - cdc uncertainty
849 // dscc - output serializer
858 // dscc - input deserializer
860 // dscc - input cdc fifo
862 // dscc - cdc uncertainty
864 // dscc - output cdc fifo
866 // dscc - output serializer
868 // dscc - cdc uncertainty
877 static bool CalculatePrefetchSchedule(
878 struct display_mode_lib *mode_lib,
879 double HostVMInefficiencyFactor,
881 unsigned int DSCDelay,
882 double DPPCLKDelaySubtotalPlusCNVCFormater,
883 double DPPCLKDelaySCL,
884 double DPPCLKDelaySCLLBOnly,
885 double DPPCLKDelayCNVCCursor,
886 double DISPCLKDelaySubtotal,
887 unsigned int DPP_RECOUT_WIDTH,
888 enum output_format_class OutputFormat,
889 unsigned int MaxInterDCNTileRepeaters,
890 unsigned int VStartup,
891 unsigned int MaxVStartup,
892 unsigned int GPUVMPageTableLevels,
895 unsigned int HostVMMaxNonCachedPageTableLevels,
896 double HostVMMinPageSize,
897 bool DynamicMetadataEnable,
898 bool DynamicMetadataVMEnabled,
899 int DynamicMetadataLinesBeforeActiveRequired,
900 unsigned int DynamicMetadataTransmittedBytes,
901 double UrgentLatency,
902 double UrgentExtraLatency,
904 unsigned int PDEAndMetaPTEBytesFrame,
905 unsigned int MetaRowByte,
906 unsigned int PixelPTEBytesPerRow,
907 double PrefetchSourceLinesY,
908 unsigned int SwathWidthY,
909 double VInitPreFillY,
910 unsigned int MaxNumSwathY,
911 double PrefetchSourceLinesC,
912 unsigned int SwathWidthC,
913 double VInitPreFillC,
914 unsigned int MaxNumSwathC,
915 int swath_width_luma_ub,
916 int swath_width_chroma_ub,
917 unsigned int SwathHeightY,
918 unsigned int SwathHeightC,
920 double *DSTXAfterScaler,
921 double *DSTYAfterScaler,
922 double *DestinationLinesForPrefetch,
923 double *PrefetchBandwidth,
924 double *DestinationLinesToRequestVMInVBlank,
925 double *DestinationLinesToRequestRowInVBlank,
926 double *VRatioPrefetchY,
927 double *VRatioPrefetchC,
928 double *RequiredPrefetchPixDataBWLuma,
929 double *RequiredPrefetchPixDataBWChroma,
930 bool *NotEnoughTimeForDynamicMetadata,
932 double *prefetch_vmrow_bw,
936 int *VUpdateOffsetPix,
937 double *VUpdateWidthPix,
938 double *VReadyOffsetPix)
940 bool MyError = false;
941 unsigned int DPPCycles, DISPCLKCycles;
942 double DSTTotalPixelsAfterScaler;
944 double dst_y_prefetch_equ;
946 double prefetch_bw_oto;
947 double prefetch_bw_pr;
950 double Tvm_oto_lines;
951 double Tr0_oto_lines;
952 double dst_y_prefetch_oto;
953 double TimeForFetchingMetaPTE = 0;
954 double TimeForFetchingRowInVBlank = 0;
955 double LinesToRequestPrefetchPixelData = 0;
956 unsigned int HostVMDynamicLevelsTrips;
960 double Tvm_trips_rounded;
961 double Tr0_trips_rounded;
964 double prefetch_bw_equ;
970 double prefetch_sw_bytes;
973 int max_vratio_pre = 4;
979 if (GPUVMEnable == true && HostVMEnable == true) {
980 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
982 HostVMDynamicLevelsTrips = 0;
984 #ifdef __DML_VBA_DEBUG__
985 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
987 CalculateVupdateAndDynamicMetadataParameters(
988 MaxInterDCNTileRepeaters,
991 myPipe->DCFCLKDeepSleep,
995 DynamicMetadataTransmittedBytes,
996 DynamicMetadataLinesBeforeActiveRequired,
997 myPipe->InterlaceEnable,
998 myPipe->ProgressiveToInterlaceUnitInOPP,
1007 LineTime = myPipe->HTotal / myPipe->PixelClock;
1008 trip_to_mem = UrgentLatency;
1009 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
1011 #ifdef __DML_VBA_ALLOW_DELTA__
1012 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
1014 if (DynamicMetadataVMEnabled == true) {
1016 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
1018 *Tdmdl = TWait + UrgentExtraLatency;
1021 #ifdef __DML_VBA_ALLOW_DELTA__
1022 if (DynamicMetadataEnable == false) {
1027 if (DynamicMetadataEnable == true) {
1028 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
1029 *NotEnoughTimeForDynamicMetadata = true;
1030 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
1031 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
1032 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
1033 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
1034 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
1036 *NotEnoughTimeForDynamicMetadata = false;
1039 *NotEnoughTimeForDynamicMetadata = false;
1042 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1044 if (myPipe->ScalerEnabled)
1045 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1047 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1049 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1051 DISPCLKCycles = DISPCLKDelaySubtotal;
1053 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1056 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1058 #ifdef __DML_VBA_DEBUG__
1059 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1060 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1061 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1062 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1063 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1064 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1065 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1066 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1069 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1071 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1072 *DSTYAfterScaler = 1;
1074 *DSTYAfterScaler = 0;
1076 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1077 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1078 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1080 #ifdef __DML_VBA_DEBUG__
1081 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1086 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1087 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1088 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1090 #ifdef __DML_VBA_ALLOW_DELTA__
1091 if (!myPipe->DCCEnable) {
1093 Tr0_trips_rounded = 0.0;
1099 Tvm_trips_rounded = 0.0;
1103 if (GPUVMPageTableLevels >= 3) {
1104 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1108 } else if (!myPipe->DCCEnable) {
1111 *Tno_bw = LineTime / 4;
1114 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1115 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1117 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1119 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
1120 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1121 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1122 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1123 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1125 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1126 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1127 Tsw_oto = Lsw_oto * LineTime;
1129 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1131 #ifdef __DML_VBA_DEBUG__
1132 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1133 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1134 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1135 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1136 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1137 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1140 if (GPUVMEnable == true)
1141 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1143 Tvm_oto = LineTime / 4.0;
1145 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1146 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1150 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1153 #ifdef __DML_VBA_DEBUG__
1154 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1155 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1156 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1157 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1158 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1159 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1160 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1161 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1162 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1165 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1166 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1167 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1168 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1169 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1170 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1172 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1174 if (prefetch_sw_bytes < dep_bytes)
1175 prefetch_sw_bytes = 2 * dep_bytes;
1177 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1178 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1179 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1180 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1181 dml_print("DML: LineTime: %f\n", LineTime);
1182 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1184 dml_print("DML: LineTime: %f\n", LineTime);
1185 dml_print("DML: VStartup: %d\n", VStartup);
1186 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1187 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1188 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1189 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1190 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1191 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1192 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1193 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1194 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1195 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1196 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1198 *PrefetchBandwidth = 0;
1199 *DestinationLinesToRequestVMInVBlank = 0;
1200 *DestinationLinesToRequestRowInVBlank = 0;
1201 *VRatioPrefetchY = 0;
1202 *VRatioPrefetchC = 0;
1203 *RequiredPrefetchPixDataBWLuma = 0;
1204 if (dst_y_prefetch_equ > 1) {
1205 double PrefetchBandwidth1;
1206 double PrefetchBandwidth2;
1207 double PrefetchBandwidth3;
1208 double PrefetchBandwidth4;
1210 if (Tpre_rounded - *Tno_bw > 0) {
1211 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1212 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1213 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1215 PrefetchBandwidth1 = 0;
1218 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1219 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1220 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1223 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1224 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1226 PrefetchBandwidth2 = 0;
1228 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1229 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1230 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1231 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1233 PrefetchBandwidth3 = 0;
1236 #ifdef __DML_VBA_DEBUG__
1237 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1238 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1239 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1241 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1242 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1243 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1246 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1247 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1249 PrefetchBandwidth4 = 0;
1256 if (PrefetchBandwidth1 > 0) {
1257 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1258 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1267 if (PrefetchBandwidth2 > 0) {
1268 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1269 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1278 if (PrefetchBandwidth3 > 0) {
1279 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1280 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1290 prefetch_bw_equ = PrefetchBandwidth1;
1291 } else if (Case2OK) {
1292 prefetch_bw_equ = PrefetchBandwidth2;
1293 } else if (Case3OK) {
1294 prefetch_bw_equ = PrefetchBandwidth3;
1296 prefetch_bw_equ = PrefetchBandwidth4;
1299 #ifdef __DML_VBA_DEBUG__
1300 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1301 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1302 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1303 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1306 if (prefetch_bw_equ > 0) {
1307 if (GPUVMEnable == true) {
1308 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1310 Tvm_equ = LineTime / 4;
1313 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1315 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1317 (LineTime - Tvm_equ) / 2,
1320 Tr0_equ = (LineTime - Tvm_equ) / 2;
1325 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1329 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1330 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1331 TimeForFetchingMetaPTE = Tvm_oto;
1332 TimeForFetchingRowInVBlank = Tr0_oto;
1333 *PrefetchBandwidth = prefetch_bw_oto;
1335 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1336 TimeForFetchingMetaPTE = Tvm_equ;
1337 TimeForFetchingRowInVBlank = Tr0_equ;
1338 *PrefetchBandwidth = prefetch_bw_equ;
1341 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1343 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1345 #ifdef __DML_VBA_ALLOW_DELTA__
1346 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1347 // See note above dated 5/30/2018
1348 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1349 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1351 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1354 #ifdef __DML_VBA_DEBUG__
1355 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1356 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1357 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1358 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1359 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1360 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1361 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1364 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1366 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1367 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1368 #ifdef __DML_VBA_DEBUG__
1369 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1370 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1371 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1373 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1374 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1375 *VRatioPrefetchY = dml_max(
1376 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1377 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1378 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1381 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1382 *VRatioPrefetchY = 0;
1384 #ifdef __DML_VBA_DEBUG__
1385 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1386 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1387 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1391 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1392 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1394 #ifdef __DML_VBA_DEBUG__
1395 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1396 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1397 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1399 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1400 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1401 *VRatioPrefetchC = dml_max(
1403 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1404 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1407 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1408 *VRatioPrefetchC = 0;
1410 #ifdef __DML_VBA_DEBUG__
1411 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1412 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1413 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1417 #ifdef __DML_VBA_DEBUG__
1418 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1419 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1420 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1423 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1425 #ifdef __DML_VBA_DEBUG__
1426 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1429 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1433 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1434 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1435 *VRatioPrefetchY = 0;
1436 *VRatioPrefetchC = 0;
1437 *RequiredPrefetchPixDataBWLuma = 0;
1438 *RequiredPrefetchPixDataBWChroma = 0;
1442 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1443 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1444 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1445 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1447 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1448 (double) LinesToRequestPrefetchPixelData * LineTime);
1449 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1450 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1451 (double) myPipe->HTotal)) * LineTime);
1452 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1453 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1454 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1455 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1456 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1460 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1464 double prefetch_vm_bw;
1465 double prefetch_row_bw;
1467 if (PDEAndMetaPTEBytesFrame == 0) {
1469 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1470 #ifdef __DML_VBA_DEBUG__
1471 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1472 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1473 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1474 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1476 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1477 #ifdef __DML_VBA_DEBUG__
1478 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1483 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1486 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1487 prefetch_row_bw = 0;
1488 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1489 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1491 #ifdef __DML_VBA_DEBUG__
1492 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1493 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1494 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1495 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1498 prefetch_row_bw = 0;
1500 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1503 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1507 *PrefetchBandwidth = 0;
1508 TimeForFetchingMetaPTE = 0;
1509 TimeForFetchingRowInVBlank = 0;
1510 *DestinationLinesToRequestVMInVBlank = 0;
1511 *DestinationLinesToRequestRowInVBlank = 0;
1512 *DestinationLinesForPrefetch = 0;
1513 LinesToRequestPrefetchPixelData = 0;
1514 *VRatioPrefetchY = 0;
1515 *VRatioPrefetchC = 0;
1516 *RequiredPrefetchPixDataBWLuma = 0;
1517 *RequiredPrefetchPixDataBWChroma = 0;
1523 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1525 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1528 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1530 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1533 static void CalculateDCCConfiguration(
1535 bool DCCProgrammingAssumesScanDirectionUnknown,
1536 enum source_format_class SourcePixelFormat,
1537 unsigned int SurfaceWidthLuma,
1538 unsigned int SurfaceWidthChroma,
1539 unsigned int SurfaceHeightLuma,
1540 unsigned int SurfaceHeightChroma,
1541 double DETBufferSize,
1542 unsigned int RequestHeight256ByteLuma,
1543 unsigned int RequestHeight256ByteChroma,
1544 enum dm_swizzle_mode TilingFormat,
1545 unsigned int BytePerPixelY,
1546 unsigned int BytePerPixelC,
1547 double BytePerPixelDETY,
1548 double BytePerPixelDETC,
1549 enum scan_direction_class ScanOrientation,
1550 unsigned int *MaxUncompressedBlockLuma,
1551 unsigned int *MaxUncompressedBlockChroma,
1552 unsigned int *MaxCompressedBlockLuma,
1553 unsigned int *MaxCompressedBlockChroma,
1554 unsigned int *IndependentBlockLuma,
1555 unsigned int *IndependentBlockChroma)
1564 double detile_buf_vp_horz_limit;
1565 double detile_buf_vp_vert_limit;
1567 int MAS_vp_horz_limit;
1568 int MAS_vp_vert_limit;
1569 int max_vp_horz_width;
1570 int max_vp_vert_height;
1571 int eff_surf_width_l;
1572 int eff_surf_width_c;
1573 int eff_surf_height_l;
1574 int eff_surf_height_c;
1576 int full_swath_bytes_horz_wc_l;
1577 int full_swath_bytes_horz_wc_c;
1578 int full_swath_bytes_vert_wc_l;
1579 int full_swath_bytes_vert_wc_c;
1580 int req128_horz_wc_l;
1581 int req128_horz_wc_c;
1582 int req128_vert_wc_l;
1583 int req128_vert_wc_c;
1584 int segment_order_horz_contiguous_luma;
1585 int segment_order_horz_contiguous_chroma;
1586 int segment_order_vert_contiguous_luma;
1587 int segment_order_vert_contiguous_chroma;
1590 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1592 RequestType RequestLuma;
1593 RequestType RequestChroma;
1595 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1601 if (BytePerPixelY == 1)
1603 if (BytePerPixelC == 1)
1605 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1607 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1610 if (BytePerPixelC == 0) {
1611 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1612 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1613 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1615 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1616 detile_buf_vp_horz_limit = (double) swath_buf_size
1617 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1618 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1619 detile_buf_vp_vert_limit = (double) swath_buf_size
1620 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1623 if (SourcePixelFormat == dm_420_10) {
1624 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1625 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1628 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1629 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1631 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1632 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1633 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1634 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1635 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1636 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1637 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1638 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1640 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1641 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1642 if (BytePerPixelC > 0) {
1643 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1644 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1646 full_swath_bytes_horz_wc_c = 0;
1647 full_swath_bytes_vert_wc_c = 0;
1650 if (SourcePixelFormat == dm_420_10) {
1651 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1652 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1653 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1654 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1657 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1658 req128_horz_wc_l = 0;
1659 req128_horz_wc_c = 0;
1660 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1661 req128_horz_wc_l = 0;
1662 req128_horz_wc_c = 1;
1663 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1664 req128_horz_wc_l = 1;
1665 req128_horz_wc_c = 0;
1667 req128_horz_wc_l = 1;
1668 req128_horz_wc_c = 1;
1671 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1672 req128_vert_wc_l = 0;
1673 req128_vert_wc_c = 0;
1674 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1675 req128_vert_wc_l = 0;
1676 req128_vert_wc_c = 1;
1677 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1678 req128_vert_wc_l = 1;
1679 req128_vert_wc_c = 0;
1681 req128_vert_wc_l = 1;
1682 req128_vert_wc_c = 1;
1685 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1686 segment_order_horz_contiguous_luma = 0;
1688 segment_order_horz_contiguous_luma = 1;
1690 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1691 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1692 segment_order_vert_contiguous_luma = 0;
1694 segment_order_vert_contiguous_luma = 1;
1696 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1697 segment_order_horz_contiguous_chroma = 0;
1699 segment_order_horz_contiguous_chroma = 1;
1701 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1702 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1703 segment_order_vert_contiguous_chroma = 0;
1705 segment_order_vert_contiguous_chroma = 1;
1708 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1709 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1710 RequestLuma = REQ_256Bytes;
1711 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1712 RequestLuma = REQ_128BytesNonContiguous;
1714 RequestLuma = REQ_128BytesContiguous;
1716 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1717 RequestChroma = REQ_256Bytes;
1718 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1719 RequestChroma = REQ_128BytesNonContiguous;
1721 RequestChroma = REQ_128BytesContiguous;
1723 } else if (ScanOrientation != dm_vert) {
1724 if (req128_horz_wc_l == 0) {
1725 RequestLuma = REQ_256Bytes;
1726 } else if (segment_order_horz_contiguous_luma == 0) {
1727 RequestLuma = REQ_128BytesNonContiguous;
1729 RequestLuma = REQ_128BytesContiguous;
1731 if (req128_horz_wc_c == 0) {
1732 RequestChroma = REQ_256Bytes;
1733 } else if (segment_order_horz_contiguous_chroma == 0) {
1734 RequestChroma = REQ_128BytesNonContiguous;
1736 RequestChroma = REQ_128BytesContiguous;
1739 if (req128_vert_wc_l == 0) {
1740 RequestLuma = REQ_256Bytes;
1741 } else if (segment_order_vert_contiguous_luma == 0) {
1742 RequestLuma = REQ_128BytesNonContiguous;
1744 RequestLuma = REQ_128BytesContiguous;
1746 if (req128_vert_wc_c == 0) {
1747 RequestChroma = REQ_256Bytes;
1748 } else if (segment_order_vert_contiguous_chroma == 0) {
1749 RequestChroma = REQ_128BytesNonContiguous;
1751 RequestChroma = REQ_128BytesContiguous;
1755 if (RequestLuma == REQ_256Bytes) {
1756 *MaxUncompressedBlockLuma = 256;
1757 *MaxCompressedBlockLuma = 256;
1758 *IndependentBlockLuma = 0;
1759 } else if (RequestLuma == REQ_128BytesContiguous) {
1760 *MaxUncompressedBlockLuma = 256;
1761 *MaxCompressedBlockLuma = 128;
1762 *IndependentBlockLuma = 128;
1764 *MaxUncompressedBlockLuma = 256;
1765 *MaxCompressedBlockLuma = 64;
1766 *IndependentBlockLuma = 64;
1769 if (RequestChroma == REQ_256Bytes) {
1770 *MaxUncompressedBlockChroma = 256;
1771 *MaxCompressedBlockChroma = 256;
1772 *IndependentBlockChroma = 0;
1773 } else if (RequestChroma == REQ_128BytesContiguous) {
1774 *MaxUncompressedBlockChroma = 256;
1775 *MaxCompressedBlockChroma = 128;
1776 *IndependentBlockChroma = 128;
1778 *MaxUncompressedBlockChroma = 256;
1779 *MaxCompressedBlockChroma = 64;
1780 *IndependentBlockChroma = 64;
1783 if (DCCEnabled != true || BytePerPixelC == 0) {
1784 *MaxUncompressedBlockChroma = 0;
1785 *MaxCompressedBlockChroma = 0;
1786 *IndependentBlockChroma = 0;
1789 if (DCCEnabled != true) {
1790 *MaxUncompressedBlockLuma = 0;
1791 *MaxCompressedBlockLuma = 0;
1792 *IndependentBlockLuma = 0;
1796 static double CalculatePrefetchSourceLines(
1797 struct display_mode_lib *mode_lib,
1801 bool ProgressiveToInterlaceUnitInOPP,
1802 unsigned int SwathHeight,
1803 unsigned int ViewportYStart,
1804 double *VInitPreFill,
1805 unsigned int *MaxNumSwath)
1807 struct vba_vars_st *v = &mode_lib->vba;
1808 unsigned int MaxPartialSwath;
1810 if (ProgressiveToInterlaceUnitInOPP)
1811 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1813 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1815 if (!v->IgnoreViewportPositioning) {
1817 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1819 if (*VInitPreFill > 1.0)
1820 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1822 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1823 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1827 if (ViewportYStart != 0)
1828 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1830 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1832 if (*VInitPreFill > 1.0)
1833 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1835 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1838 #ifdef __DML_VBA_DEBUG__
1839 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1840 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1841 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1842 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1843 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1844 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1845 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1846 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1847 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1849 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1852 static unsigned int CalculateVMAndRowBytes(
1853 struct display_mode_lib *mode_lib,
1855 unsigned int BlockHeight256Bytes,
1856 unsigned int BlockWidth256Bytes,
1857 enum source_format_class SourcePixelFormat,
1858 unsigned int SurfaceTiling,
1859 unsigned int BytePerPixel,
1860 enum scan_direction_class ScanDirection,
1861 unsigned int SwathWidth,
1862 unsigned int ViewportHeight,
1865 unsigned int HostVMMaxNonCachedPageTableLevels,
1866 unsigned int GPUVMMinPageSize,
1867 unsigned int HostVMMinPageSize,
1868 unsigned int PTEBufferSizeInRequests,
1870 unsigned int DCCMetaPitch,
1871 unsigned int *MacroTileWidth,
1872 unsigned int *MetaRowByte,
1873 unsigned int *PixelPTEBytesPerRow,
1874 bool *PTEBufferSizeNotExceeded,
1875 int *dpte_row_width_ub,
1876 unsigned int *dpte_row_height,
1877 unsigned int *MetaRequestWidth,
1878 unsigned int *MetaRequestHeight,
1879 unsigned int *meta_row_width,
1880 unsigned int *meta_row_height,
1881 int *vm_group_bytes,
1882 unsigned int *dpte_group_bytes,
1883 unsigned int *PixelPTEReqWidth,
1884 unsigned int *PixelPTEReqHeight,
1885 unsigned int *PTERequestSize,
1886 int *DPDE0BytesFrame,
1887 int *MetaPTEBytesFrame)
1889 struct vba_vars_st *v = &mode_lib->vba;
1890 unsigned int MPDEBytesFrame;
1891 unsigned int DCCMetaSurfaceBytes;
1892 unsigned int MacroTileSizeBytes;
1893 unsigned int MacroTileHeight;
1894 unsigned int ExtraDPDEBytesFrame;
1895 unsigned int PDEAndMetaPTEBytesFrame;
1896 unsigned int PixelPTEReqHeightPTEs = 0;
1897 unsigned int HostVMDynamicLevels = 0;
1898 double FractionOfPTEReturnDrop;
1900 if (GPUVMEnable == true && HostVMEnable == true) {
1901 if (HostVMMinPageSize < 2048) {
1902 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1903 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1904 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1906 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1910 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1911 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1912 if (ScanDirection != dm_vert) {
1913 *meta_row_height = *MetaRequestHeight;
1914 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1915 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1917 *meta_row_height = *MetaRequestWidth;
1918 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1919 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1921 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1922 if (GPUVMEnable == true) {
1923 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1924 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1926 *MetaPTEBytesFrame = 0;
1930 if (DCCEnable != true) {
1931 *MetaPTEBytesFrame = 0;
1936 if (SurfaceTiling == dm_sw_linear) {
1937 MacroTileSizeBytes = 256;
1938 MacroTileHeight = BlockHeight256Bytes;
1940 MacroTileSizeBytes = 65536;
1941 MacroTileHeight = 16 * BlockHeight256Bytes;
1943 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1945 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1946 if (ScanDirection != dm_vert) {
1947 *DPDE0BytesFrame = 64
1949 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1953 *DPDE0BytesFrame = 64
1955 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1959 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1961 *DPDE0BytesFrame = 0;
1962 ExtraDPDEBytesFrame = 0;
1965 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1967 #ifdef __DML_VBA_DEBUG__
1968 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1969 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1970 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1971 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1972 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1975 if (HostVMEnable == true) {
1976 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1978 #ifdef __DML_VBA_DEBUG__
1979 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1982 if (SurfaceTiling == dm_sw_linear) {
1983 PixelPTEReqHeightPTEs = 1;
1984 *PixelPTEReqHeight = 1;
1985 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1986 *PTERequestSize = 64;
1987 FractionOfPTEReturnDrop = 0;
1988 } else if (MacroTileSizeBytes == 4096) {
1989 PixelPTEReqHeightPTEs = 1;
1990 *PixelPTEReqHeight = MacroTileHeight;
1991 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1992 *PTERequestSize = 64;
1993 if (ScanDirection != dm_vert)
1994 FractionOfPTEReturnDrop = 0;
1996 FractionOfPTEReturnDrop = 7 / 8;
1997 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1998 PixelPTEReqHeightPTEs = 16;
1999 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2000 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2001 *PTERequestSize = 128;
2002 FractionOfPTEReturnDrop = 0;
2004 PixelPTEReqHeightPTEs = 1;
2005 *PixelPTEReqHeight = MacroTileHeight;
2006 *PixelPTEReqWidth = 8 * *MacroTileWidth;
2007 *PTERequestSize = 64;
2008 FractionOfPTEReturnDrop = 0;
2011 if (SurfaceTiling == dm_sw_linear) {
2012 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2013 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2014 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2015 } else if (ScanDirection != dm_vert) {
2016 *dpte_row_height = *PixelPTEReqHeight;
2017 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2018 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2020 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
2021 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
2022 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2025 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
2026 *PTEBufferSizeNotExceeded = true;
2028 *PTEBufferSizeNotExceeded = false;
2031 if (GPUVMEnable != true) {
2032 *PixelPTEBytesPerRow = 0;
2033 *PTEBufferSizeNotExceeded = true;
2036 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2038 if (HostVMEnable == true) {
2039 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2042 if (HostVMEnable == true) {
2043 *vm_group_bytes = 512;
2044 *dpte_group_bytes = 512;
2045 } else if (GPUVMEnable == true) {
2046 *vm_group_bytes = 2048;
2047 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2048 *dpte_group_bytes = 512;
2050 *dpte_group_bytes = 2048;
2053 *vm_group_bytes = 0;
2054 *dpte_group_bytes = 0;
2056 return PDEAndMetaPTEBytesFrame;
2059 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2061 struct vba_vars_st *v = &mode_lib->vba;
2063 double HostVMInefficiencyFactor = 1.0;
2064 bool NoChromaPlanes = true;
2066 double VMDataOnlyReturnBW;
2067 double MaxTotalRDBandwidth = 0;
2068 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2070 v->WritebackDISPCLK = 0.0;
2071 v->DISPCLKWithRamping = 0;
2072 v->DISPCLKWithoutRamping = 0;
2073 v->GlobalDPPCLK = 0.0;
2074 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2076 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2077 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2078 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2079 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2080 if (v->HostVMEnable != true) {
2081 v->ReturnBW = dml_min(
2082 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2083 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2085 v->ReturnBW = dml_min(
2086 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2087 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2090 /* End DAL custom code */
2092 // DISPCLK and DPPCLK Calculation
2094 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2095 if (v->WritebackEnable[k]) {
2096 v->WritebackDISPCLK = dml_max(
2097 v->WritebackDISPCLK,
2098 dml31_CalculateWriteBackDISPCLK(
2099 v->WritebackPixelFormat[k],
2101 v->WritebackHRatio[k],
2102 v->WritebackVRatio[k],
2103 v->WritebackHTaps[k],
2104 v->WritebackVTaps[k],
2105 v->WritebackSourceWidth[k],
2106 v->WritebackDestinationWidth[k],
2108 v->WritebackLineBufferSize));
2112 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2113 if (v->HRatio[k] > 1) {
2114 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2115 v->MaxDCHUBToPSCLThroughput,
2116 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2118 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2121 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2123 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2124 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2126 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2127 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2130 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2131 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2132 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2133 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2135 if (v->HRatioChroma[k] > 1) {
2136 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2137 v->MaxDCHUBToPSCLThroughput,
2138 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2140 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2142 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2144 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2145 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2148 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2149 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2152 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2156 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2157 if (v->BlendingAndTiming[k] != k)
2159 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2160 v->DISPCLKWithRamping = dml_max(
2161 v->DISPCLKWithRamping,
2162 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2163 * (1 + v->DISPCLKRampingMargin / 100));
2164 v->DISPCLKWithoutRamping = dml_max(
2165 v->DISPCLKWithoutRamping,
2166 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2167 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2168 v->DISPCLKWithRamping = dml_max(
2169 v->DISPCLKWithRamping,
2170 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2171 * (1 + v->DISPCLKRampingMargin / 100));
2172 v->DISPCLKWithoutRamping = dml_max(
2173 v->DISPCLKWithoutRamping,
2174 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2176 v->DISPCLKWithRamping = dml_max(
2177 v->DISPCLKWithRamping,
2178 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2179 v->DISPCLKWithoutRamping = dml_max(
2180 v->DISPCLKWithoutRamping,
2181 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2185 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2186 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2188 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2189 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2190 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2191 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2192 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2193 v->DISPCLKDPPCLKVCOSpeed);
2194 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2195 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2196 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2197 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2199 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2201 v->DISPCLK = v->DISPCLK_calculated;
2202 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2204 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2205 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2206 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2208 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2209 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2210 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2211 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2214 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2215 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2218 // Urgent and B P-State/DRAM Clock Change Watermark
2219 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2220 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2222 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2223 CalculateBytePerPixelAnd256BBlockSizes(
2224 v->SourcePixelFormat[k],
2225 v->SurfaceTiling[k],
2226 &v->BytePerPixelY[k],
2227 &v->BytePerPixelC[k],
2228 &v->BytePerPixelDETY[k],
2229 &v->BytePerPixelDETC[k],
2230 &v->BlockHeight256BytesY[k],
2231 &v->BlockHeight256BytesC[k],
2232 &v->BlockWidth256BytesY[k],
2233 &v->BlockWidth256BytesC[k]);
2236 CalculateSwathWidth(
2238 v->NumberOfActivePlanes,
2239 v->SourcePixelFormat,
2247 v->ODMCombineEnabled,
2250 v->BlockHeight256BytesY,
2251 v->BlockHeight256BytesC,
2252 v->BlockWidth256BytesY,
2253 v->BlockWidth256BytesC,
2254 v->BlendingAndTiming,
2258 v->SwathWidthSingleDPPY,
2259 v->SwathWidthSingleDPPC,
2264 v->swath_width_luma_ub,
2265 v->swath_width_chroma_ub);
2267 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2268 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2270 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2271 * v->VRatioChroma[k];
2272 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2275 // DCFCLK Deep Sleep
2276 CalculateDCFCLKDeepSleep(
2278 v->NumberOfActivePlanes,
2289 v->PSCL_THROUGHPUT_LUMA,
2290 v->PSCL_THROUGHPUT_CHROMA,
2292 v->ReadBandwidthPlaneLuma,
2293 v->ReadBandwidthPlaneChroma,
2295 &v->DCFCLKDeepSleep);
2298 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2299 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2300 v->DSCCLK_calculated[k] = 0.0;
2302 if (v->OutputFormat[k] == dm_420)
2303 v->DSCFormatFactor = 2;
2304 else if (v->OutputFormat[k] == dm_444)
2305 v->DSCFormatFactor = 1;
2306 else if (v->OutputFormat[k] == dm_n422)
2307 v->DSCFormatFactor = 2;
2309 v->DSCFormatFactor = 1;
2310 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2311 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2312 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2313 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2314 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2315 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2317 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2318 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2323 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2324 double BPP = v->OutputBpp[k];
2326 if (v->DSCEnabled[k] && BPP != 0) {
2327 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2328 v->DSCDelay[k] = dscceComputeDelay(
2329 v->DSCInputBitPerComponent[k],
2331 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2332 v->NumberOfDSCSlices[k],
2334 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2335 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2337 * (dscceComputeDelay(
2338 v->DSCInputBitPerComponent[k],
2340 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2341 v->NumberOfDSCSlices[k] / 2.0,
2343 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2346 * (dscceComputeDelay(
2347 v->DSCInputBitPerComponent[k],
2349 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2350 v->NumberOfDSCSlices[k] / 4.0,
2352 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2354 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2360 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2361 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2362 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2363 v->DSCDelay[k] = v->DSCDelay[j];
2366 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2367 unsigned int PDEAndMetaPTEBytesFrameY;
2368 unsigned int PixelPTEBytesPerRowY;
2369 unsigned int MetaRowByteY;
2370 unsigned int MetaRowByteC;
2371 unsigned int PDEAndMetaPTEBytesFrameC;
2372 unsigned int PixelPTEBytesPerRowC;
2373 bool PTEBufferSizeNotExceededY;
2374 bool PTEBufferSizeNotExceededC;
2376 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2377 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2378 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2379 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2380 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2382 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2383 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2386 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2389 v->BlockHeight256BytesC[k],
2390 v->BlockWidth256BytesC[k],
2391 v->SourcePixelFormat[k],
2392 v->SurfaceTiling[k],
2393 v->BytePerPixelC[k],
2396 v->ViewportHeightChroma[k],
2399 v->HostVMMaxNonCachedPageTableLevels,
2400 v->GPUVMMinPageSize,
2401 v->HostVMMinPageSize,
2402 v->PTEBufferSizeInRequestsForChroma,
2404 v->DCCMetaPitchC[k],
2405 &v->MacroTileWidthC[k],
2407 &PixelPTEBytesPerRowC,
2408 &PTEBufferSizeNotExceededC,
2409 &v->dpte_row_width_chroma_ub[k],
2410 &v->dpte_row_height_chroma[k],
2411 &v->meta_req_width_chroma[k],
2412 &v->meta_req_height_chroma[k],
2413 &v->meta_row_width_chroma[k],
2414 &v->meta_row_height_chroma[k],
2417 &v->PixelPTEReqWidthC[k],
2418 &v->PixelPTEReqHeightC[k],
2419 &v->PTERequestSizeC[k],
2420 &v->dpde0_bytes_per_frame_ub_c[k],
2421 &v->meta_pte_bytes_per_frame_ub_c[k]);
2423 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2428 v->ProgressiveToInterlaceUnitInOPP,
2430 v->ViewportYStartC[k],
2431 &v->VInitPreFillC[k],
2432 &v->MaxNumSwathC[k]);
2434 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2435 v->PTEBufferSizeInRequestsForChroma = 0;
2436 PixelPTEBytesPerRowC = 0;
2437 PDEAndMetaPTEBytesFrameC = 0;
2439 v->MaxNumSwathC[k] = 0;
2440 v->PrefetchSourceLinesC[k] = 0;
2443 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2446 v->BlockHeight256BytesY[k],
2447 v->BlockWidth256BytesY[k],
2448 v->SourcePixelFormat[k],
2449 v->SurfaceTiling[k],
2450 v->BytePerPixelY[k],
2453 v->ViewportHeight[k],
2456 v->HostVMMaxNonCachedPageTableLevels,
2457 v->GPUVMMinPageSize,
2458 v->HostVMMinPageSize,
2459 v->PTEBufferSizeInRequestsForLuma,
2461 v->DCCMetaPitchY[k],
2462 &v->MacroTileWidthY[k],
2464 &PixelPTEBytesPerRowY,
2465 &PTEBufferSizeNotExceededY,
2466 &v->dpte_row_width_luma_ub[k],
2467 &v->dpte_row_height[k],
2468 &v->meta_req_width[k],
2469 &v->meta_req_height[k],
2470 &v->meta_row_width[k],
2471 &v->meta_row_height[k],
2472 &v->vm_group_bytes[k],
2473 &v->dpte_group_bytes[k],
2474 &v->PixelPTEReqWidthY[k],
2475 &v->PixelPTEReqHeightY[k],
2476 &v->PTERequestSizeY[k],
2477 &v->dpde0_bytes_per_frame_ub_l[k],
2478 &v->meta_pte_bytes_per_frame_ub_l[k]);
2480 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2485 v->ProgressiveToInterlaceUnitInOPP,
2487 v->ViewportYStartY[k],
2488 &v->VInitPreFillY[k],
2489 &v->MaxNumSwathY[k]);
2490 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2491 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2492 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2494 CalculateRowBandwidth(
2496 v->SourcePixelFormat[k],
2500 v->HTotal[k] / v->PixelClock[k],
2503 v->meta_row_height[k],
2504 v->meta_row_height_chroma[k],
2505 PixelPTEBytesPerRowY,
2506 PixelPTEBytesPerRowC,
2507 v->dpte_row_height[k],
2508 v->dpte_row_height_chroma[k],
2510 &v->dpte_row_bw[k]);
2513 v->TotalDCCActiveDPP = 0;
2514 v->TotalActiveDPP = 0;
2515 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2516 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2517 if (v->DCCEnable[k])
2518 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2519 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2520 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2521 NoChromaPlanes = false;
2524 ReorderBytes = v->NumberOfChannels
2526 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2527 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2528 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2530 VMDataOnlyReturnBW = dml_min(
2531 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2532 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2533 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2534 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2536 #ifdef __DML_VBA_DEBUG__
2537 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2538 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2539 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2540 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2541 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2542 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2543 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2544 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2545 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2546 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2547 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2550 if (v->GPUVMEnable && v->HostVMEnable)
2551 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2553 v->UrgentExtraLatency = CalculateExtraLatency(
2554 v->RoundTripPingLatencyCycles,
2558 v->PixelChunkSizeInKByte,
2559 v->TotalDCCActiveDPP,
2564 v->NumberOfActivePlanes,
2566 v->dpte_group_bytes,
2567 HostVMInefficiencyFactor,
2568 v->HostVMMinPageSize,
2569 v->HostVMMaxNonCachedPageTableLevels);
2571 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2573 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2574 if (v->BlendingAndTiming[k] == k) {
2575 if (v->WritebackEnable[k] == true) {
2576 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2577 + CalculateWriteBackDelay(
2578 v->WritebackPixelFormat[k],
2579 v->WritebackHRatio[k],
2580 v->WritebackVRatio[k],
2581 v->WritebackVTaps[k],
2582 v->WritebackDestinationWidth[k],
2583 v->WritebackDestinationHeight[k],
2584 v->WritebackSourceHeight[k],
2585 v->HTotal[k]) / v->DISPCLK;
2587 v->WritebackDelay[v->VoltageLevel][k] = 0;
2588 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2589 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2590 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2591 v->WritebackDelay[v->VoltageLevel][k],
2593 + CalculateWriteBackDelay(
2594 v->WritebackPixelFormat[j],
2595 v->WritebackHRatio[j],
2596 v->WritebackVRatio[j],
2597 v->WritebackVTaps[j],
2598 v->WritebackDestinationWidth[j],
2599 v->WritebackDestinationHeight[j],
2600 v->WritebackSourceHeight[j],
2601 v->HTotal[k]) / v->DISPCLK);
2607 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2608 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2609 if (v->BlendingAndTiming[k] == j)
2610 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2612 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2613 v->MaxVStartupLines[k] =
2614 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2615 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2616 v->VTotal[k] - v->VActive[k]
2620 (double) v->WritebackDelay[v->VoltageLevel][k]
2621 / (v->HTotal[k] / v->PixelClock[k]),
2623 if (v->MaxVStartupLines[k] > 1023)
2624 v->MaxVStartupLines[k] = 1023;
2626 #ifdef __DML_VBA_DEBUG__
2627 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2628 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2629 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2633 v->MaximumMaxVStartupLines = 0;
2634 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2635 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2638 // We don't really care to iterate between the various prefetch modes
2639 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2641 v->UrgentLatency = CalculateUrgentLatency(
2642 v->UrgentLatencyPixelDataOnly,
2643 v->UrgentLatencyPixelMixedWithVMData,
2644 v->UrgentLatencyVMDataOnly,
2645 v->DoUrgentLatencyAdjustment,
2646 v->UrgentLatencyAdjustmentFabricClockComponent,
2647 v->UrgentLatencyAdjustmentFabricClockReference,
2650 v->FractionOfUrgentBandwidth = 0.0;
2651 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2653 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2656 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2657 bool DestinationLineTimesForPrefetchLessThan2 = false;
2658 bool VRatioPrefetchMoreThan4 = false;
2659 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2660 MaxTotalRDBandwidth = 0;
2662 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2664 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2667 myPipe.DPPCLK = v->DPPCLK[k];
2668 myPipe.DISPCLK = v->DISPCLK;
2669 myPipe.PixelClock = v->PixelClock[k];
2670 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2671 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2672 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2673 myPipe.VRatio = v->VRatio[k];
2674 myPipe.VRatioChroma = v->VRatioChroma[k];
2675 myPipe.SourceScan = v->SourceScan[k];
2676 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2677 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2678 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2679 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2680 myPipe.InterlaceEnable = v->Interlace[k];
2681 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2682 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2683 myPipe.HTotal = v->HTotal[k];
2684 myPipe.DCCEnable = v->DCCEnable[k];
2685 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2686 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2687 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2688 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2689 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2690 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2691 v->ErrorResult[k] = CalculatePrefetchSchedule(
2693 HostVMInefficiencyFactor,
2696 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2698 v->DPPCLKDelaySCLLBOnly,
2699 v->DPPCLKDelayCNVCCursor,
2700 v->DISPCLKDelaySubtotal,
2701 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2703 v->MaxInterDCNTileRepeaters,
2704 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2705 v->MaxVStartupLines[k],
2706 v->GPUVMMaxPageTableLevels,
2709 v->HostVMMaxNonCachedPageTableLevels,
2710 v->HostVMMinPageSize,
2711 v->DynamicMetadataEnable[k],
2712 v->DynamicMetadataVMEnabled,
2713 v->DynamicMetadataLinesBeforeActiveRequired[k],
2714 v->DynamicMetadataTransmittedBytes[k],
2716 v->UrgentExtraLatency,
2718 v->PDEAndMetaPTEBytesFrame[k],
2720 v->PixelPTEBytesPerRow[k],
2721 v->PrefetchSourceLinesY[k],
2723 v->VInitPreFillY[k],
2725 v->PrefetchSourceLinesC[k],
2727 v->VInitPreFillC[k],
2729 v->swath_width_luma_ub[k],
2730 v->swath_width_chroma_ub[k],
2734 &v->DSTXAfterScaler[k],
2735 &v->DSTYAfterScaler[k],
2736 &v->DestinationLinesForPrefetch[k],
2737 &v->PrefetchBandwidth[k],
2738 &v->DestinationLinesToRequestVMInVBlank[k],
2739 &v->DestinationLinesToRequestRowInVBlank[k],
2740 &v->VRatioPrefetchY[k],
2741 &v->VRatioPrefetchC[k],
2742 &v->RequiredPrefetchPixDataBWLuma[k],
2743 &v->RequiredPrefetchPixDataBWChroma[k],
2744 &v->NotEnoughTimeForDynamicMetadata[k],
2746 &v->prefetch_vmrow_bw[k],
2750 &v->VUpdateOffsetPix[k],
2751 &v->VUpdateWidthPix[k],
2752 &v->VReadyOffsetPix[k]);
2754 #ifdef __DML_VBA_DEBUG__
2755 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2757 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2760 v->NoEnoughUrgentLatencyHiding = false;
2761 v->NoEnoughUrgentLatencyHidingPre = false;
2763 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2764 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2765 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2766 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2767 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2769 CalculateUrgentBurstFactor(
2770 v->swath_width_luma_ub[k],
2771 v->swath_width_chroma_ub[k],
2774 v->HTotal[k] / v->PixelClock[k],
2776 v->CursorBufferSize,
2777 v->CursorWidth[k][0],
2781 v->BytePerPixelDETY[k],
2782 v->BytePerPixelDETC[k],
2783 v->DETBufferSizeY[k],
2784 v->DETBufferSizeC[k],
2785 &v->UrgBurstFactorCursor[k],
2786 &v->UrgBurstFactorLuma[k],
2787 &v->UrgBurstFactorChroma[k],
2788 &v->NoUrgentLatencyHiding[k]);
2790 CalculateUrgentBurstFactor(
2791 v->swath_width_luma_ub[k],
2792 v->swath_width_chroma_ub[k],
2795 v->HTotal[k] / v->PixelClock[k],
2797 v->CursorBufferSize,
2798 v->CursorWidth[k][0],
2800 v->VRatioPrefetchY[k],
2801 v->VRatioPrefetchC[k],
2802 v->BytePerPixelDETY[k],
2803 v->BytePerPixelDETC[k],
2804 v->DETBufferSizeY[k],
2805 v->DETBufferSizeC[k],
2806 &v->UrgBurstFactorCursorPre[k],
2807 &v->UrgBurstFactorLumaPre[k],
2808 &v->UrgBurstFactorChromaPre[k],
2809 &v->NoUrgentLatencyHidingPre[k]);
2811 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2813 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2814 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2815 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2816 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2817 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2819 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2820 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2821 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2823 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2825 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2826 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2827 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2828 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2829 + v->cursor_bw_pre[k]);
2831 #ifdef __DML_VBA_DEBUG__
2832 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2833 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2834 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2835 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2836 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2838 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2839 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2841 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2842 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2843 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2844 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2845 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2846 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2847 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2848 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2849 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2850 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2853 if (v->DestinationLinesForPrefetch[k] < 2)
2854 DestinationLineTimesForPrefetchLessThan2 = true;
2856 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2857 VRatioPrefetchMoreThan4 = true;
2859 if (v->NoUrgentLatencyHiding[k] == true)
2860 v->NoEnoughUrgentLatencyHiding = true;
2862 if (v->NoUrgentLatencyHidingPre[k] == true)
2863 v->NoEnoughUrgentLatencyHidingPre = true;
2866 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2868 #ifdef __DML_VBA_DEBUG__
2869 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2870 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2871 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2874 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2875 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2876 v->PrefetchModeSupported = true;
2878 v->PrefetchModeSupported = false;
2879 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2880 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2881 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2882 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2886 // This error result check was done after the PrefetchModeSupported. So we will
2887 // still try to calculate flip schedule even prefetch mode not supported
2888 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2889 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2890 v->PrefetchModeSupported = false;
2891 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2895 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2896 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2897 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2898 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2900 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2901 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2902 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2904 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2905 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2906 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2909 v->TotImmediateFlipBytes = 0;
2910 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2911 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2912 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2914 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2915 CalculateFlipSchedule(
2917 HostVMInefficiencyFactor,
2918 v->UrgentExtraLatency,
2920 v->GPUVMMaxPageTableLevels,
2922 v->HostVMMaxNonCachedPageTableLevels,
2924 v->HostVMMinPageSize,
2925 v->PDEAndMetaPTEBytesFrame[k],
2927 v->PixelPTEBytesPerRow[k],
2928 v->BandwidthAvailableForImmediateFlip,
2929 v->TotImmediateFlipBytes,
2930 v->SourcePixelFormat[k],
2931 v->HTotal[k] / v->PixelClock[k],
2936 v->dpte_row_height[k],
2937 v->meta_row_height[k],
2938 v->dpte_row_height_chroma[k],
2939 v->meta_row_height_chroma[k],
2940 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2941 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2942 &v->final_flip_bw[k],
2943 &v->ImmediateFlipSupportedForPipe[k]);
2946 v->total_dcn_read_bw_with_flip = 0.0;
2947 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2948 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2949 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2951 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2952 v->DPPPerPlane[k] * v->final_flip_bw[k]
2953 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2954 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2955 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2957 * (v->final_flip_bw[k]
2958 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2959 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2960 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2961 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2963 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2964 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2965 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2967 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2968 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2970 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2972 v->ImmediateFlipSupported = true;
2973 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2974 #ifdef __DML_VBA_DEBUG__
2975 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2977 v->ImmediateFlipSupported = false;
2978 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2980 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2981 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2982 #ifdef __DML_VBA_DEBUG__
2983 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2986 v->ImmediateFlipSupported = false;
2990 v->ImmediateFlipSupported = false;
2993 v->PrefetchAndImmediateFlipSupported =
2994 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2995 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2996 v->ImmediateFlipSupported)) ? true : false;
2997 #ifdef __DML_VBA_DEBUG__
2998 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2999 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
3000 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
3001 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
3002 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
3003 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
3005 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
3007 v->VStartupLines = v->VStartupLines + 1;
3008 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
3009 ASSERT(v->PrefetchAndImmediateFlipSupported);
3011 // Unbounded Request Enabled
3012 CalculateUnboundedRequestAndCompressedBufferSize(
3013 v->DETBufferSizeInKByte[0],
3014 v->ConfigReturnBufferSizeInKByte,
3015 v->UseUnboundedRequesting,
3019 v->CompressedBufferSegmentSizeInkByte,
3021 &v->UnboundedRequestEnabled,
3022 &v->CompressedBufferSizeInkByte);
3024 //Watermarks and NB P-State/DRAM Clock Change Support
3026 enum clock_change_support DRAMClockChangeSupport; // dummy
3027 CalculateWatermarksAndDRAMSpeedChangeSupport(
3030 v->NumberOfActivePlanes,
3031 v->MaxLineBufferLines,
3033 v->WritebackInterfaceBufferSize,
3036 v->SynchronizedVBlank,
3037 v->dpte_group_bytes,
3040 v->UrgentExtraLatency,
3041 v->WritebackLatency,
3042 v->WritebackChunkSize,
3044 v->DRAMClockChangeLatency,
3046 v->SREnterPlusExitTime,
3048 v->SREnterPlusExitZ8Time,
3065 v->BlendingAndTiming,
3067 v->BytePerPixelDETY,
3068 v->BytePerPixelDETC,
3072 v->WritebackPixelFormat,
3073 v->WritebackDestinationWidth,
3074 v->WritebackDestinationHeight,
3075 v->WritebackSourceHeight,
3076 v->UnboundedRequestEnabled,
3077 v->CompressedBufferSizeInkByte,
3078 &DRAMClockChangeSupport,
3079 &v->UrgentWatermark,
3080 &v->WritebackUrgentWatermark,
3081 &v->DRAMClockChangeWatermark,
3082 &v->WritebackDRAMClockChangeWatermark,
3083 &v->StutterExitWatermark,
3084 &v->StutterEnterPlusExitWatermark,
3085 &v->Z8StutterExitWatermark,
3086 &v->Z8StutterEnterPlusExitWatermark,
3087 &v->MinActiveDRAMClockChangeLatencySupported);
3089 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3090 if (v->WritebackEnable[k] == true) {
3091 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
3093 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3095 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3100 //Display Pipeline Delivery Time in Prefetch, Groups
3101 CalculatePixelDeliveryTimes(
3102 v->NumberOfActivePlanes,
3107 v->swath_width_luma_ub,
3108 v->swath_width_chroma_ub,
3113 v->PSCL_THROUGHPUT_LUMA,
3114 v->PSCL_THROUGHPUT_CHROMA,
3121 v->BlockWidth256BytesY,
3122 v->BlockHeight256BytesY,
3123 v->BlockWidth256BytesC,
3124 v->BlockHeight256BytesC,
3125 v->DisplayPipeLineDeliveryTimeLuma,
3126 v->DisplayPipeLineDeliveryTimeChroma,
3127 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3128 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3129 v->DisplayPipeRequestDeliveryTimeLuma,
3130 v->DisplayPipeRequestDeliveryTimeChroma,
3131 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3132 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3133 v->CursorRequestDeliveryTime,
3134 v->CursorRequestDeliveryTimePrefetch);
3136 CalculateMetaAndPTETimes(
3137 v->NumberOfActivePlanes,
3140 v->MinMetaChunkSizeBytes,
3144 v->DestinationLinesToRequestRowInVBlank,
3145 v->DestinationLinesToRequestRowInImmediateFlip,
3152 v->dpte_row_height_chroma,
3154 v->meta_row_width_chroma,
3156 v->meta_row_height_chroma,
3158 v->meta_req_width_chroma,
3160 v->meta_req_height_chroma,
3161 v->dpte_group_bytes,
3164 v->PixelPTEReqWidthY,
3165 v->PixelPTEReqHeightY,
3166 v->PixelPTEReqWidthC,
3167 v->PixelPTEReqHeightC,
3168 v->dpte_row_width_luma_ub,
3169 v->dpte_row_width_chroma_ub,
3170 v->DST_Y_PER_PTE_ROW_NOM_L,
3171 v->DST_Y_PER_PTE_ROW_NOM_C,
3172 v->DST_Y_PER_META_ROW_NOM_L,
3173 v->DST_Y_PER_META_ROW_NOM_C,
3174 v->TimePerMetaChunkNominal,
3175 v->TimePerChromaMetaChunkNominal,
3176 v->TimePerMetaChunkVBlank,
3177 v->TimePerChromaMetaChunkVBlank,
3178 v->TimePerMetaChunkFlip,
3179 v->TimePerChromaMetaChunkFlip,
3180 v->time_per_pte_group_nom_luma,
3181 v->time_per_pte_group_vblank_luma,
3182 v->time_per_pte_group_flip_luma,
3183 v->time_per_pte_group_nom_chroma,
3184 v->time_per_pte_group_vblank_chroma,
3185 v->time_per_pte_group_flip_chroma);
3187 CalculateVMGroupAndRequestTimes(
3188 v->NumberOfActivePlanes,
3190 v->GPUVMMaxPageTableLevels,
3193 v->DestinationLinesToRequestVMInVBlank,
3194 v->DestinationLinesToRequestVMInImmediateFlip,
3197 v->dpte_row_width_luma_ub,
3198 v->dpte_row_width_chroma_ub,
3200 v->dpde0_bytes_per_frame_ub_l,
3201 v->dpde0_bytes_per_frame_ub_c,
3202 v->meta_pte_bytes_per_frame_ub_l,
3203 v->meta_pte_bytes_per_frame_ub_c,
3204 v->TimePerVMGroupVBlank,
3205 v->TimePerVMGroupFlip,
3206 v->TimePerVMRequestVBlank,
3207 v->TimePerVMRequestFlip);
3210 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3211 if (PrefetchMode == 0) {
3212 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3213 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3214 v->MinTTUVBlank[k] = dml_max(
3215 v->DRAMClockChangeWatermark,
3216 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3217 } else if (PrefetchMode == 1) {
3218 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3219 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3220 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3222 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3223 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3224 v->MinTTUVBlank[k] = v->UrgentWatermark;
3226 if (!v->DynamicMetadataEnable[k])
3227 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3230 // DCC Configuration
3232 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3233 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3234 v->SourcePixelFormat[k],
3235 v->SurfaceWidthY[k],
3236 v->SurfaceWidthC[k],
3237 v->SurfaceHeightY[k],
3238 v->SurfaceHeightC[k],
3239 v->DETBufferSizeInKByte[0] * 1024,
3240 v->BlockHeight256BytesY[k],
3241 v->BlockHeight256BytesC[k],
3242 v->SurfaceTiling[k],
3243 v->BytePerPixelY[k],
3244 v->BytePerPixelC[k],
3245 v->BytePerPixelDETY[k],
3246 v->BytePerPixelDETC[k],
3248 &v->DCCYMaxUncompressedBlock[k],
3249 &v->DCCCMaxUncompressedBlock[k],
3250 &v->DCCYMaxCompressedBlock[k],
3251 &v->DCCCMaxCompressedBlock[k],
3252 &v->DCCYIndependentBlock[k],
3253 &v->DCCCIndependentBlock[k]);
3256 // VStartup Adjustment
3257 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3258 bool isInterlaceTiming;
3259 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3260 #ifdef __DML_VBA_DEBUG__
3261 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3264 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3266 #ifdef __DML_VBA_DEBUG__
3267 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3268 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3269 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3270 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3273 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3274 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3275 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3278 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3280 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3281 - v->VFrontPorch[k])
3282 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3283 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3285 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3287 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3288 <= (isInterlaceTiming ?
3289 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3290 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3291 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3293 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3295 #ifdef __DML_VBA_DEBUG__
3296 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3297 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3298 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3299 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3300 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3301 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3302 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3303 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3304 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3305 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3306 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3311 //Maximum Bandwidth Used
3312 double TotalWRBandwidth = 0;
3313 double MaxPerPlaneVActiveWRBandwidth = 0;
3314 double WRBandwidth = 0;
3315 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3316 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3317 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3318 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3319 } else if (v->WritebackEnable[k] == true) {
3320 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3321 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3323 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3324 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3327 v->TotalDataReadBandwidth = 0;
3328 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3329 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3332 // Stutter Efficiency
3333 CalculateStutterEfficiency(
3335 v->CompressedBufferSizeInkByte,
3336 v->UnboundedRequestEnabled,
3337 v->ConfigReturnBufferSizeInKByte,
3338 v->MetaFIFOSizeInKEntries,
3339 v->ZeroSizeBufferEntries,
3340 v->NumberOfActivePlanes,
3341 v->ROBBufferSizeInKByte,
3342 v->TotalDataReadBandwidth,
3345 v->COMPBUF_RESERVED_SPACE_64B,
3346 v->COMPBUF_RESERVED_SPACE_ZS,
3349 v->SynchronizedVBlank,
3350 v->StutterEnterPlusExitWatermark,
3351 v->Z8StutterEnterPlusExitWatermark,
3352 v->ProgressiveToInterlaceUnitInOPP,
3358 v->BytePerPixelDETY,
3364 v->DCCFractionOfZeroSizeRequestsLuma,
3365 v->DCCFractionOfZeroSizeRequestsChroma,
3371 v->BlockHeight256BytesY,
3372 v->BlockWidth256BytesY,
3373 v->BlockHeight256BytesC,
3374 v->BlockWidth256BytesC,
3375 v->DCCYMaxUncompressedBlock,
3376 v->DCCCMaxUncompressedBlock,
3380 v->ReadBandwidthPlaneLuma,
3381 v->ReadBandwidthPlaneChroma,
3384 &v->StutterEfficiencyNotIncludingVBlank,
3385 &v->StutterEfficiency,
3386 &v->NumberOfStutterBurstsPerFrame,
3387 &v->Z8StutterEfficiencyNotIncludingVBlank,
3388 &v->Z8StutterEfficiency,
3389 &v->Z8NumberOfStutterBurstsPerFrame,
3393 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3395 struct vba_vars_st *v = &mode_lib->vba;
3396 // Display Pipe Configuration
3397 double BytePerPixDETY[DC__NUM_DPP__MAX];
3398 double BytePerPixDETC[DC__NUM_DPP__MAX];
3399 int BytePerPixY[DC__NUM_DPP__MAX];
3400 int BytePerPixC[DC__NUM_DPP__MAX];
3401 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3402 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3403 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3404 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3405 double dummy1[DC__NUM_DPP__MAX];
3406 double dummy2[DC__NUM_DPP__MAX];
3407 double dummy3[DC__NUM_DPP__MAX];
3408 double dummy4[DC__NUM_DPP__MAX];
3409 int dummy5[DC__NUM_DPP__MAX];
3410 int dummy6[DC__NUM_DPP__MAX];
3411 bool dummy7[DC__NUM_DPP__MAX];
3412 bool dummysinglestring;
3416 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3418 CalculateBytePerPixelAnd256BBlockSizes(
3419 v->SourcePixelFormat[k],
3420 v->SurfaceTiling[k],
3425 &Read256BytesBlockHeightY[k],
3426 &Read256BytesBlockHeightC[k],
3427 &Read256BytesBlockWidthY[k],
3428 &Read256BytesBlockWidthC[k]);
3431 CalculateSwathAndDETConfiguration(
3433 v->NumberOfActivePlanes,
3434 v->DETBufferSizeInKByte[0],
3438 v->SourcePixelFormat,
3446 Read256BytesBlockHeightY,
3447 Read256BytesBlockHeightC,
3448 Read256BytesBlockWidthY,
3449 Read256BytesBlockWidthC,
3450 v->ODMCombineEnabled,
3451 v->BlendingAndTiming,
3469 &dummysinglestring);
3472 static bool CalculateBytePerPixelAnd256BBlockSizes(
3473 enum source_format_class SourcePixelFormat,
3474 enum dm_swizzle_mode SurfaceTiling,
3475 unsigned int *BytePerPixelY,
3476 unsigned int *BytePerPixelC,
3477 double *BytePerPixelDETY,
3478 double *BytePerPixelDETC,
3479 unsigned int *BlockHeight256BytesY,
3480 unsigned int *BlockHeight256BytesC,
3481 unsigned int *BlockWidth256BytesY,
3482 unsigned int *BlockWidth256BytesC)
3484 if (SourcePixelFormat == dm_444_64) {
3485 *BytePerPixelDETY = 8;
3486 *BytePerPixelDETC = 0;
3489 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3490 *BytePerPixelDETY = 4;
3491 *BytePerPixelDETC = 0;
3494 } else if (SourcePixelFormat == dm_444_16) {
3495 *BytePerPixelDETY = 2;
3496 *BytePerPixelDETC = 0;
3499 } else if (SourcePixelFormat == dm_444_8) {
3500 *BytePerPixelDETY = 1;
3501 *BytePerPixelDETC = 0;
3504 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3505 *BytePerPixelDETY = 4;
3506 *BytePerPixelDETC = 1;
3509 } else if (SourcePixelFormat == dm_420_8) {
3510 *BytePerPixelDETY = 1;
3511 *BytePerPixelDETC = 2;
3514 } else if (SourcePixelFormat == dm_420_12) {
3515 *BytePerPixelDETY = 2;
3516 *BytePerPixelDETC = 4;
3520 *BytePerPixelDETY = 4.0 / 3;
3521 *BytePerPixelDETC = 8.0 / 3;
3526 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3527 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3528 if (SurfaceTiling == dm_sw_linear) {
3529 *BlockHeight256BytesY = 1;
3530 } else if (SourcePixelFormat == dm_444_64) {
3531 *BlockHeight256BytesY = 4;
3532 } else if (SourcePixelFormat == dm_444_8) {
3533 *BlockHeight256BytesY = 16;
3535 *BlockHeight256BytesY = 8;
3537 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3538 *BlockHeight256BytesC = 0;
3539 *BlockWidth256BytesC = 0;
3541 if (SurfaceTiling == dm_sw_linear) {
3542 *BlockHeight256BytesY = 1;
3543 *BlockHeight256BytesC = 1;
3544 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3545 *BlockHeight256BytesY = 8;
3546 *BlockHeight256BytesC = 16;
3547 } else if (SourcePixelFormat == dm_420_8) {
3548 *BlockHeight256BytesY = 16;
3549 *BlockHeight256BytesC = 8;
3551 *BlockHeight256BytesY = 8;
3552 *BlockHeight256BytesC = 8;
3554 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3555 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3560 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3562 if (PrefetchMode == 0) {
3563 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3564 } else if (PrefetchMode == 1) {
3565 return dml_max(SREnterPlusExitTime, UrgentLatency);
3567 return UrgentLatency;
3571 double dml31_CalculateWriteBackDISPCLK(
3572 enum source_format_class WritebackPixelFormat,
3574 double WritebackHRatio,
3575 double WritebackVRatio,
3576 unsigned int WritebackHTaps,
3577 unsigned int WritebackVTaps,
3578 long WritebackSourceWidth,
3579 long WritebackDestinationWidth,
3580 unsigned int HTotal,
3581 unsigned int WritebackLineBufferSize)
3583 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3585 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3586 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3587 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3588 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3591 static double CalculateWriteBackDelay(
3592 enum source_format_class WritebackPixelFormat,
3593 double WritebackHRatio,
3594 double WritebackVRatio,
3595 unsigned int WritebackVTaps,
3596 int WritebackDestinationWidth,
3597 int WritebackDestinationHeight,
3598 int WritebackSourceHeight,
3599 unsigned int HTotal)
3601 double CalculateWriteBackDelay;
3603 double Output_lines_last_notclamped;
3604 double WritebackVInit;
3606 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3607 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3608 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3609 if (Output_lines_last_notclamped < 0) {
3610 CalculateWriteBackDelay = 0;
3612 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3614 return CalculateWriteBackDelay;
3617 static void CalculateVupdateAndDynamicMetadataParameters(
3618 int MaxInterDCNTileRepeaters,
3621 double DCFClkDeepSleep,
3625 int DynamicMetadataTransmittedBytes,
3626 int DynamicMetadataLinesBeforeActiveRequired,
3627 int InterlaceEnable,
3628 bool ProgressiveToInterlaceUnitInOPP,
3633 int *VUpdateOffsetPix,
3634 double *VUpdateWidthPix,
3635 double *VReadyOffsetPix)
3637 double TotalRepeaterDelayTime;
3639 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3640 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3641 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3642 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3643 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3644 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3645 *Tdmec = HTotal / PixelClock;
3646 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3647 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3649 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3651 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3652 *Tdmsks = *Tdmsks / 2;
3654 #ifdef __DML_VBA_DEBUG__
3655 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3656 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3657 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3661 static void CalculateRowBandwidth(
3663 enum source_format_class SourcePixelFormat,
3665 double VRatioChroma,
3668 unsigned int MetaRowByteLuma,
3669 unsigned int MetaRowByteChroma,
3670 unsigned int meta_row_height_luma,
3671 unsigned int meta_row_height_chroma,
3672 unsigned int PixelPTEBytesPerRowLuma,
3673 unsigned int PixelPTEBytesPerRowChroma,
3674 unsigned int dpte_row_height_luma,
3675 unsigned int dpte_row_height_chroma,
3676 double *meta_row_bw,
3677 double *dpte_row_bw)
3679 if (DCCEnable != true) {
3681 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3682 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3684 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3687 if (GPUVMEnable != true) {
3689 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3690 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3691 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3693 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3697 static void CalculateFlipSchedule(
3698 struct display_mode_lib *mode_lib,
3699 double HostVMInefficiencyFactor,
3700 double UrgentExtraLatency,
3701 double UrgentLatency,
3702 unsigned int GPUVMMaxPageTableLevels,
3704 unsigned int HostVMMaxNonCachedPageTableLevels,
3706 double HostVMMinPageSize,
3707 double PDEAndMetaPTEBytesPerFrame,
3708 double MetaRowBytes,
3709 double DPTEBytesPerRow,
3710 double BandwidthAvailableForImmediateFlip,
3711 unsigned int TotImmediateFlipBytes,
3712 enum source_format_class SourcePixelFormat,
3715 double VRatioChroma,
3718 unsigned int dpte_row_height,
3719 unsigned int meta_row_height,
3720 unsigned int dpte_row_height_chroma,
3721 unsigned int meta_row_height_chroma,
3722 double *DestinationLinesToRequestVMInImmediateFlip,
3723 double *DestinationLinesToRequestRowInImmediateFlip,
3724 double *final_flip_bw,
3725 bool *ImmediateFlipSupportedForPipe)
3727 double min_row_time = 0.0;
3728 unsigned int HostVMDynamicLevelsTrips;
3729 double TimeForFetchingMetaPTEImmediateFlip;
3730 double TimeForFetchingRowInVBlankImmediateFlip;
3731 double ImmediateFlipBW;
3733 if (GPUVMEnable == true && HostVMEnable == true) {
3734 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3736 HostVMDynamicLevelsTrips = 0;
3739 if (GPUVMEnable == true || DCCEnable == true) {
3740 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3743 if (GPUVMEnable == true) {
3744 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3745 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3746 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3749 TimeForFetchingMetaPTEImmediateFlip = 0;
3752 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3753 if ((GPUVMEnable == true || DCCEnable == true)) {
3754 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3755 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3756 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3759 TimeForFetchingRowInVBlankImmediateFlip = 0;
3762 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3764 if (GPUVMEnable == true) {
3765 *final_flip_bw = dml_max(
3766 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3767 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3768 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3769 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3774 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3775 if (GPUVMEnable == true && DCCEnable != true) {
3776 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3777 } else if (GPUVMEnable != true && DCCEnable == true) {
3778 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3780 min_row_time = dml_min4(
3781 dpte_row_height * LineTime / VRatio,
3782 meta_row_height * LineTime / VRatio,
3783 dpte_row_height_chroma * LineTime / VRatioChroma,
3784 meta_row_height_chroma * LineTime / VRatioChroma);
3787 if (GPUVMEnable == true && DCCEnable != true) {
3788 min_row_time = dpte_row_height * LineTime / VRatio;
3789 } else if (GPUVMEnable != true && DCCEnable == true) {
3790 min_row_time = meta_row_height * LineTime / VRatio;
3792 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3796 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3797 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3798 *ImmediateFlipSupportedForPipe = false;
3800 *ImmediateFlipSupportedForPipe = true;
3803 #ifdef __DML_VBA_DEBUG__
3804 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
3805 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
3806 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3807 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3808 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3809 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
3814 static double TruncToValidBPP(
3822 enum output_encoder_class Output,
3823 enum output_format_class Format,
3824 unsigned int DSCInputBitPerComponent,
3828 enum odm_combine_mode ODMCombine)
3837 if (Format == dm_420) {
3842 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3843 } else if (Format == dm_444) {
3848 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3855 if (Format == dm_n422) {
3857 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3860 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3864 if (DSCEnable && Output == dm_dp) {
3865 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3867 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3870 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3872 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3876 if (DesiredBPP == 0) {
3878 if (MaxLinkBPP < MinDSCBPP) {
3880 } else if (MaxLinkBPP >= MaxDSCBPP) {
3883 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3886 if (MaxLinkBPP >= NonDSCBPP2) {
3888 } else if (MaxLinkBPP >= NonDSCBPP1) {
3890 } else if (MaxLinkBPP >= NonDSCBPP0) {
3897 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3898 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3907 static noinline void CalculatePrefetchSchedulePerPlane(
3908 struct display_mode_lib *mode_lib,
3909 double HostVMInefficiencyFactor,
3914 struct vba_vars_st *v = &mode_lib->vba;
3917 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3918 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3919 myPipe.PixelClock = v->PixelClock[k];
3920 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3921 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3922 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3923 myPipe.VRatio = mode_lib->vba.VRatio[k];
3924 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3926 myPipe.SourceScan = v->SourceScan[k];
3927 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3928 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3929 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3930 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3931 myPipe.InterlaceEnable = v->Interlace[k];
3932 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3933 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3934 myPipe.HTotal = v->HTotal[k];
3935 myPipe.DCCEnable = v->DCCEnable[k];
3936 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3937 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3938 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3939 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3940 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3941 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3942 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3944 HostVMInefficiencyFactor,
3946 v->DSCDelayPerState[i][k],
3947 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3949 v->DPPCLKDelaySCLLBOnly,
3950 v->DPPCLKDelayCNVCCursor,
3951 v->DISPCLKDelaySubtotal,
3952 v->SwathWidthYThisState[k] / v->HRatio[k],
3954 v->MaxInterDCNTileRepeaters,
3955 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3956 v->MaximumVStartup[i][j][k],
3957 v->GPUVMMaxPageTableLevels,
3960 v->HostVMMaxNonCachedPageTableLevels,
3961 v->HostVMMinPageSize,
3962 v->DynamicMetadataEnable[k],
3963 v->DynamicMetadataVMEnabled,
3964 v->DynamicMetadataLinesBeforeActiveRequired[k],
3965 v->DynamicMetadataTransmittedBytes[k],
3969 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3970 v->MetaRowBytes[i][j][k],
3971 v->DPTEBytesPerRow[i][j][k],
3972 v->PrefetchLinesY[i][j][k],
3973 v->SwathWidthYThisState[k],
3976 v->PrefetchLinesC[i][j][k],
3977 v->SwathWidthCThisState[k],
3980 v->swath_width_luma_ub_this_state[k],
3981 v->swath_width_chroma_ub_this_state[k],
3982 v->SwathHeightYThisState[k],
3983 v->SwathHeightCThisState[k],
3985 &v->DSTXAfterScaler[k],
3986 &v->DSTYAfterScaler[k],
3987 &v->LineTimesForPrefetch[k],
3989 &v->LinesForMetaPTE[k],
3990 &v->LinesForMetaAndDPTERow[k],
3991 &v->VRatioPreY[i][j][k],
3992 &v->VRatioPreC[i][j][k],
3993 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3994 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3995 &v->NoTimeForDynamicMetadata[i][j][k],
3997 &v->prefetch_vmrow_bw[k],
4001 &v->VUpdateOffsetPix[k],
4002 &v->VUpdateWidthPix[k],
4003 &v->VReadyOffsetPix[k]);
4006 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
4008 struct vba_vars_st *v = &mode_lib->vba;
4012 int ReorderingBytes;
4013 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
4014 bool NoChroma = true;
4015 bool EnoughWritebackUnits = true;
4016 bool P2IWith420 = false;
4017 bool DSCOnlyIfNecessaryWithBPP = false;
4018 bool DSC422NativeNotSupported = false;
4019 double MaxTotalVActiveRDBandwidth;
4020 bool ViewportExceedsSurface = false;
4021 bool FMTBufferExceeded = false;
4023 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
4025 CalculateMinAndMaxPrefetchMode(
4026 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
4027 &MinPrefetchMode, &MaxPrefetchMode);
4029 /*Scale Ratio, taps Support Check*/
4031 v->ScaleRatioAndTapsSupport = true;
4032 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4033 if (v->ScalerEnabled[k] == false
4034 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
4035 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
4036 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
4037 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
4038 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
4039 v->ScaleRatioAndTapsSupport = false;
4040 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
4041 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
4042 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
4043 || v->VRatio[k] > v->vtaps[k]
4044 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
4045 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
4046 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
4047 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
4048 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
4049 || v->HRatioChroma[k] > v->MaxHSCLRatio
4050 || v->VRatioChroma[k] > v->MaxVSCLRatio
4051 || v->HRatioChroma[k] > v->HTAPsChroma[k]
4052 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
4053 v->ScaleRatioAndTapsSupport = false;
4056 /*Source Format, Pixel Format and Scan Support Check*/
4058 v->SourceFormatPixelAndScanSupport = true;
4059 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4060 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
4061 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
4062 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
4063 v->SourceFormatPixelAndScanSupport = false;
4066 /*Bandwidth Support Check*/
4068 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4069 CalculateBytePerPixelAnd256BBlockSizes(
4070 v->SourcePixelFormat[k],
4071 v->SurfaceTiling[k],
4072 &v->BytePerPixelY[k],
4073 &v->BytePerPixelC[k],
4074 &v->BytePerPixelInDETY[k],
4075 &v->BytePerPixelInDETC[k],
4076 &v->Read256BlockHeightY[k],
4077 &v->Read256BlockHeightC[k],
4078 &v->Read256BlockWidthY[k],
4079 &v->Read256BlockWidthC[k]);
4081 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4082 if (v->SourceScan[k] != dm_vert) {
4083 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
4084 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
4086 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
4087 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
4090 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4091 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
4092 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4093 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
4094 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
4096 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4097 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
4098 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4099 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
4100 } else if (v->WritebackEnable[k] == true) {
4101 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4102 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
4104 v->WriteBandwidth[k] = 0.0;
4108 /*Writeback Latency support check*/
4110 v->WritebackLatencySupport = true;
4111 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4112 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
4113 v->WritebackLatencySupport = false;
4117 /*Writeback Mode Support Check*/
4119 v->TotalNumberOfActiveWriteback = 0;
4120 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4121 if (v->WritebackEnable[k] == true) {
4122 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4126 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4127 EnoughWritebackUnits = false;
4130 /*Writeback Scale Ratio and Taps Support Check*/
4132 v->WritebackScaleRatioAndTapsSupport = true;
4133 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4134 if (v->WritebackEnable[k] == true) {
4135 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4136 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4137 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4138 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4139 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4140 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4141 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4142 v->WritebackScaleRatioAndTapsSupport = false;
4144 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4145 v->WritebackScaleRatioAndTapsSupport = false;
4149 /*Maximum DISPCLK/DPPCLK Support check*/
4151 v->WritebackRequiredDISPCLK = 0.0;
4152 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4153 if (v->WritebackEnable[k] == true) {
4154 v->WritebackRequiredDISPCLK = dml_max(
4155 v->WritebackRequiredDISPCLK,
4156 dml31_CalculateWriteBackDISPCLK(
4157 v->WritebackPixelFormat[k],
4159 v->WritebackHRatio[k],
4160 v->WritebackVRatio[k],
4161 v->WritebackHTaps[k],
4162 v->WritebackVTaps[k],
4163 v->WritebackSourceWidth[k],
4164 v->WritebackDestinationWidth[k],
4166 v->WritebackLineBufferSize));
4169 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4170 if (v->HRatio[k] > 1.0) {
4171 v->PSCL_FACTOR[k] = dml_min(
4172 v->MaxDCHUBToPSCLThroughput,
4173 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4175 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4177 if (v->BytePerPixelC[k] == 0.0) {
4178 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4179 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4181 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4182 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4184 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4185 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4188 if (v->HRatioChroma[k] > 1.0) {
4189 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4190 v->MaxDCHUBToPSCLThroughput,
4191 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4193 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4195 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4197 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4198 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4199 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4200 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4202 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4203 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4204 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4208 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4209 int MaximumSwathWidthSupportLuma;
4210 int MaximumSwathWidthSupportChroma;
4212 if (v->SurfaceTiling[k] == dm_sw_linear) {
4213 MaximumSwathWidthSupportLuma = 8192.0;
4214 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4215 MaximumSwathWidthSupportLuma = 2880.0;
4216 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4217 MaximumSwathWidthSupportLuma = 3840.0;
4219 MaximumSwathWidthSupportLuma = 5760.0;
4222 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4223 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4225 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4227 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4228 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4229 if (v->BytePerPixelC[k] == 0.0) {
4230 v->MaximumSwathWidthInLineBufferChroma = 0;
4232 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4233 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4235 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4236 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4239 CalculateSwathAndDETConfiguration(
4241 v->NumberOfActivePlanes,
4242 v->DETBufferSizeInKByte[0],
4243 v->MaximumSwathWidthLuma,
4244 v->MaximumSwathWidthChroma,
4246 v->SourcePixelFormat,
4254 v->Read256BlockHeightY,
4255 v->Read256BlockHeightC,
4256 v->Read256BlockWidthY,
4257 v->Read256BlockWidthC,
4258 v->odm_combine_dummy,
4259 v->BlendingAndTiming,
4262 v->BytePerPixelInDETY,
4263 v->BytePerPixelInDETC,
4267 v->NoOfDPPThisState,
4268 v->swath_width_luma_ub_this_state,
4269 v->swath_width_chroma_ub_this_state,
4270 v->SwathWidthYThisState,
4271 v->SwathWidthCThisState,
4272 v->SwathHeightYThisState,
4273 v->SwathHeightCThisState,
4274 v->DETBufferSizeYThisState,
4275 v->DETBufferSizeCThisState,
4276 v->SingleDPPViewportSizeSupportPerPlane,
4277 &v->ViewportSizeSupport[0][0]);
4279 for (i = 0; i < v->soc.num_states; i++) {
4280 for (j = 0; j < 2; j++) {
4281 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4282 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4283 v->RequiredDISPCLK[i][j] = 0.0;
4284 v->DISPCLK_DPPCLK_Support[i][j] = true;
4285 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4286 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4287 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4288 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4289 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4290 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4291 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4292 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4294 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4295 * (1 + v->DISPCLKRampingMargin / 100.0);
4296 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4297 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4298 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4299 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4300 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4302 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4303 * (1 + v->DISPCLKRampingMargin / 100.0);
4304 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4305 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4306 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4307 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4308 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4311 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4312 || !(v->Output[k] == dm_dp ||
4313 v->Output[k] == dm_edp)) {
4314 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4315 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4317 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4318 FMTBufferExceeded = true;
4319 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4320 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4321 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4322 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4323 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4324 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4325 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4326 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4327 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4328 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4330 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4331 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4333 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4334 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4335 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4336 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4337 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4339 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4340 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4343 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4344 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4345 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4346 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4347 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4349 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4350 FMTBufferExceeded = true;
4352 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4353 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4356 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4357 v->MPCCombine[i][j][k] = false;
4358 v->NoOfDPP[i][j][k] = 4;
4359 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4360 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4361 v->MPCCombine[i][j][k] = false;
4362 v->NoOfDPP[i][j][k] = 2;
4363 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4364 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4365 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4366 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4367 v->MPCCombine[i][j][k] = false;
4368 v->NoOfDPP[i][j][k] = 1;
4369 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4371 v->MPCCombine[i][j][k] = true;
4372 v->NoOfDPP[i][j][k] = 2;
4373 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4375 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4376 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4377 > v->MaxDppclkRoundedDownToDFSGranularity)
4378 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4379 v->DISPCLK_DPPCLK_Support[i][j] = false;
4382 v->TotalNumberOfActiveDPP[i][j] = 0;
4383 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4384 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4385 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4386 if (v->NoOfDPP[i][j][k] == 1)
4387 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4388 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4389 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4394 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4395 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4396 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4397 double BWOfNonSplitPlaneOfMaximumBandwidth;
4398 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4399 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4400 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4401 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4402 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4403 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4404 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4405 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4408 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4409 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4410 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4411 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4412 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4413 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4414 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4417 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4418 v->RequiredDISPCLK[i][j] = 0.0;
4419 v->DISPCLK_DPPCLK_Support[i][j] = true;
4420 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4421 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4422 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4423 v->MPCCombine[i][j][k] = true;
4424 v->NoOfDPP[i][j][k] = 2;
4425 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4426 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4428 v->MPCCombine[i][j][k] = false;
4429 v->NoOfDPP[i][j][k] = 1;
4430 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4431 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4433 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4434 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4435 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4436 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4438 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4440 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4441 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4442 > v->MaxDppclkRoundedDownToDFSGranularity)
4443 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4444 v->DISPCLK_DPPCLK_Support[i][j] = false;
4447 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4448 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4449 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4452 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4453 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4454 v->DISPCLK_DPPCLK_Support[i][j] = false;
4459 /*Total Available Pipes Support Check*/
4461 for (i = 0; i < v->soc.num_states; i++) {
4462 for (j = 0; j < 2; j++) {
4463 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4464 v->TotalAvailablePipesSupport[i][j] = true;
4466 v->TotalAvailablePipesSupport[i][j] = false;
4470 /*Display IO and DSC Support Check*/
4472 v->NonsupportedDSCInputBPC = false;
4473 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4474 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4475 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4476 v->NonsupportedDSCInputBPC = true;
4480 /*Number Of DSC Slices*/
4481 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4482 if (v->BlendingAndTiming[k] == k) {
4483 if (v->PixelClockBackEnd[k] > 3200) {
4484 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4485 } else if (v->PixelClockBackEnd[k] > 1360) {
4486 v->NumberOfDSCSlices[k] = 8;
4487 } else if (v->PixelClockBackEnd[k] > 680) {
4488 v->NumberOfDSCSlices[k] = 4;
4489 } else if (v->PixelClockBackEnd[k] > 340) {
4490 v->NumberOfDSCSlices[k] = 2;
4492 v->NumberOfDSCSlices[k] = 1;
4495 v->NumberOfDSCSlices[k] = 0;
4499 for (i = 0; i < v->soc.num_states; i++) {
4500 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4501 v->RequiresDSC[i][k] = false;
4502 v->RequiresFEC[i][k] = false;
4503 if (v->BlendingAndTiming[k] == k) {
4504 if (v->Output[k] == dm_hdmi) {
4505 v->RequiresDSC[i][k] = false;
4506 v->RequiresFEC[i][k] = false;
4507 v->OutputBppPerState[i][k] = TruncToValidBPP(
4508 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4512 v->PixelClockBackEnd[k],
4513 v->ForcedOutputLinkBPP[k],
4517 v->DSCInputBitPerComponent[k],
4518 v->NumberOfDSCSlices[k],
4519 v->AudioSampleRate[k],
4520 v->AudioSampleLayout[k],
4521 v->ODMCombineEnablePerState[i][k]);
4522 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4523 if (v->DSCEnable[k] == true) {
4524 v->RequiresDSC[i][k] = true;
4525 v->LinkDSCEnable = true;
4526 if (v->Output[k] == dm_dp) {
4527 v->RequiresFEC[i][k] = true;
4529 v->RequiresFEC[i][k] = false;
4532 v->RequiresDSC[i][k] = false;
4533 v->LinkDSCEnable = false;
4534 v->RequiresFEC[i][k] = false;
4537 v->Outbpp = BPP_INVALID;
4538 if (v->PHYCLKPerState[i] >= 270.0) {
4539 v->Outbpp = TruncToValidBPP(
4540 (1.0 - v->Downspreading / 100.0) * 2700,
4541 v->OutputLinkDPLanes[k],
4544 v->PixelClockBackEnd[k],
4545 v->ForcedOutputLinkBPP[k],
4549 v->DSCInputBitPerComponent[k],
4550 v->NumberOfDSCSlices[k],
4551 v->AudioSampleRate[k],
4552 v->AudioSampleLayout[k],
4553 v->ODMCombineEnablePerState[i][k]);
4554 v->OutputBppPerState[i][k] = v->Outbpp;
4555 // TODO: Need some other way to handle this nonsense
4556 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4558 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4559 v->Outbpp = TruncToValidBPP(
4560 (1.0 - v->Downspreading / 100.0) * 5400,
4561 v->OutputLinkDPLanes[k],
4564 v->PixelClockBackEnd[k],
4565 v->ForcedOutputLinkBPP[k],
4569 v->DSCInputBitPerComponent[k],
4570 v->NumberOfDSCSlices[k],
4571 v->AudioSampleRate[k],
4572 v->AudioSampleLayout[k],
4573 v->ODMCombineEnablePerState[i][k]);
4574 v->OutputBppPerState[i][k] = v->Outbpp;
4575 // TODO: Need some other way to handle this nonsense
4576 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4578 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4579 v->Outbpp = TruncToValidBPP(
4580 (1.0 - v->Downspreading / 100.0) * 8100,
4581 v->OutputLinkDPLanes[k],
4584 v->PixelClockBackEnd[k],
4585 v->ForcedOutputLinkBPP[k],
4589 v->DSCInputBitPerComponent[k],
4590 v->NumberOfDSCSlices[k],
4591 v->AudioSampleRate[k],
4592 v->AudioSampleLayout[k],
4593 v->ODMCombineEnablePerState[i][k]);
4594 v->OutputBppPerState[i][k] = v->Outbpp;
4595 // TODO: Need some other way to handle this nonsense
4596 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4598 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4599 v->Outbpp = TruncToValidBPP(
4600 (1.0 - v->Downspreading / 100.0) * 10000,
4604 v->PixelClockBackEnd[k],
4605 v->ForcedOutputLinkBPP[k],
4609 v->DSCInputBitPerComponent[k],
4610 v->NumberOfDSCSlices[k],
4611 v->AudioSampleRate[k],
4612 v->AudioSampleLayout[k],
4613 v->ODMCombineEnablePerState[i][k]);
4614 v->OutputBppPerState[i][k] = v->Outbpp;
4615 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4617 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4618 v->Outbpp = TruncToValidBPP(
4623 v->PixelClockBackEnd[k],
4624 v->ForcedOutputLinkBPP[k],
4628 v->DSCInputBitPerComponent[k],
4629 v->NumberOfDSCSlices[k],
4630 v->AudioSampleRate[k],
4631 v->AudioSampleLayout[k],
4632 v->ODMCombineEnablePerState[i][k]);
4633 v->OutputBppPerState[i][k] = v->Outbpp;
4634 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4638 v->OutputBppPerState[i][k] = 0;
4643 for (i = 0; i < v->soc.num_states; i++) {
4644 v->LinkCapacitySupport[i] = true;
4645 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4646 if (v->BlendingAndTiming[k] == k
4647 && (v->Output[k] == dm_dp ||
4648 v->Output[k] == dm_edp ||
4649 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4650 v->LinkCapacitySupport[i] = false;
4656 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4657 if (v->BlendingAndTiming[k] == k
4658 && (v->Output[k] == dm_dp ||
4659 v->Output[k] == dm_edp ||
4660 v->Output[k] == dm_hdmi)) {
4661 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4664 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4665 && !v->DSC422NativeSupport) {
4666 DSC422NativeNotSupported = true;
4671 for (i = 0; i < v->soc.num_states; ++i) {
4672 v->ODMCombine4To1SupportCheckOK[i] = true;
4673 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4674 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4675 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4676 || v->Output[k] == dm_hdmi)) {
4677 v->ODMCombine4To1SupportCheckOK[i] = false;
4682 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4684 for (i = 0; i < v->soc.num_states; i++) {
4685 v->NotEnoughDSCUnits[i] = false;
4686 v->TotalDSCUnitsRequired = 0.0;
4687 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4688 if (v->RequiresDSC[i][k] == true) {
4689 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4690 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4691 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4692 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4694 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4698 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4699 v->NotEnoughDSCUnits[i] = true;
4702 /*DSC Delay per state*/
4704 for (i = 0; i < v->soc.num_states; i++) {
4705 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4706 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4709 v->BPP = v->OutputBppPerState[i][k];
4711 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4712 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4713 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4714 v->DSCInputBitPerComponent[k],
4716 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4717 v->NumberOfDSCSlices[k],
4719 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4720 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4721 v->DSCDelayPerState[i][k] = 2.0
4722 * (dscceComputeDelay(
4723 v->DSCInputBitPerComponent[k],
4725 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4726 v->NumberOfDSCSlices[k] / 2,
4728 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4730 v->DSCDelayPerState[i][k] = 4.0
4731 * (dscceComputeDelay(
4732 v->DSCInputBitPerComponent[k],
4734 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4735 v->NumberOfDSCSlices[k] / 4,
4737 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4739 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4741 v->DSCDelayPerState[i][k] = 0.0;
4744 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4745 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4746 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4747 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4753 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4755 for (i = 0; i < v->soc.num_states; ++i) {
4756 for (j = 0; j <= 1; ++j) {
4757 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4758 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4759 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4760 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4763 CalculateSwathAndDETConfiguration(
4765 v->NumberOfActivePlanes,
4766 v->DETBufferSizeInKByte[0],
4767 v->MaximumSwathWidthLuma,
4768 v->MaximumSwathWidthChroma,
4770 v->SourcePixelFormat,
4778 v->Read256BlockHeightY,
4779 v->Read256BlockHeightC,
4780 v->Read256BlockWidthY,
4781 v->Read256BlockWidthC,
4782 v->ODMCombineEnableThisState,
4783 v->BlendingAndTiming,
4786 v->BytePerPixelInDETY,
4787 v->BytePerPixelInDETC,
4791 v->NoOfDPPThisState,
4792 v->swath_width_luma_ub_this_state,
4793 v->swath_width_chroma_ub_this_state,
4794 v->SwathWidthYThisState,
4795 v->SwathWidthCThisState,
4796 v->SwathHeightYThisState,
4797 v->SwathHeightCThisState,
4798 v->DETBufferSizeYThisState,
4799 v->DETBufferSizeCThisState,
4801 &v->ViewportSizeSupport[i][j]);
4803 CalculateDCFCLKDeepSleep(
4805 v->NumberOfActivePlanes,
4810 v->SwathWidthYThisState,
4811 v->SwathWidthCThisState,
4812 v->NoOfDPPThisState,
4817 v->PSCL_FACTOR_CHROMA,
4818 v->RequiredDPPCLKThisState,
4819 v->ReadBandwidthLuma,
4820 v->ReadBandwidthChroma,
4822 &v->ProjectedDCFCLKDeepSleep[i][j]);
4824 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4825 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4826 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4827 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4828 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4829 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4830 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4831 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4832 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4837 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4838 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4839 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4842 for (i = 0; i < v->soc.num_states; i++) {
4843 for (j = 0; j < 2; j++) {
4844 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4846 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4847 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4848 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4849 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4850 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4851 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4852 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4853 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4854 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4857 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4858 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4859 if (v->DCCEnable[k] == true) {
4860 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4864 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4865 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4866 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4868 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4869 && v->SourceScan[k] != dm_vert) {
4870 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4872 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4874 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4875 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4878 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4881 v->Read256BlockHeightC[k],
4882 v->Read256BlockWidthC[k],
4883 v->SourcePixelFormat[k],
4884 v->SurfaceTiling[k],
4885 v->BytePerPixelC[k],
4887 v->SwathWidthCThisState[k],
4888 v->ViewportHeightChroma[k],
4891 v->HostVMMaxNonCachedPageTableLevels,
4892 v->GPUVMMinPageSize,
4893 v->HostVMMinPageSize,
4894 v->PTEBufferSizeInRequestsForChroma,
4897 &v->MacroTileWidthC[k],
4899 &v->DPTEBytesPerRowC,
4900 &v->PTEBufferSizeNotExceededC[i][j][k],
4902 &v->dpte_row_height_chroma[k],
4906 &v->meta_row_height_chroma[k],
4913 &v->dummyinteger11);
4915 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4920 v->ProgressiveToInterlaceUnitInOPP,
4921 v->SwathHeightCThisState[k],
4922 v->ViewportYStartC[k],
4926 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4927 v->PTEBufferSizeInRequestsForChroma = 0;
4928 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4929 v->MetaRowBytesC = 0.0;
4930 v->DPTEBytesPerRowC = 0.0;
4931 v->PrefetchLinesC[i][j][k] = 0.0;
4932 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4934 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4937 v->Read256BlockHeightY[k],
4938 v->Read256BlockWidthY[k],
4939 v->SourcePixelFormat[k],
4940 v->SurfaceTiling[k],
4941 v->BytePerPixelY[k],
4943 v->SwathWidthYThisState[k],
4944 v->ViewportHeight[k],
4947 v->HostVMMaxNonCachedPageTableLevels,
4948 v->GPUVMMinPageSize,
4949 v->HostVMMinPageSize,
4950 v->PTEBufferSizeInRequestsForLuma,
4952 v->DCCMetaPitchY[k],
4953 &v->MacroTileWidthY[k],
4955 &v->DPTEBytesPerRowY,
4956 &v->PTEBufferSizeNotExceededY[i][j][k],
4958 &v->dpte_row_height[k],
4962 &v->meta_row_height[k],
4964 &v->dpte_group_bytes[k],
4970 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4975 v->ProgressiveToInterlaceUnitInOPP,
4976 v->SwathHeightYThisState[k],
4977 v->ViewportYStartY[k],
4980 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4981 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4982 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4984 CalculateRowBandwidth(
4986 v->SourcePixelFormat[k],
4990 v->HTotal[k] / v->PixelClock[k],
4993 v->meta_row_height[k],
4994 v->meta_row_height_chroma[k],
4995 v->DPTEBytesPerRowY,
4996 v->DPTEBytesPerRowC,
4997 v->dpte_row_height[k],
4998 v->dpte_row_height_chroma[k],
4999 &v->meta_row_bandwidth[i][j][k],
5000 &v->dpte_row_bandwidth[i][j][k]);
5002 /*DCCMetaBufferSizeSupport(i, j) = True
5003 For k = 0 To NumberOfActivePlanes - 1
5004 If MetaRowBytes(i, j, k) > 24064 Then
5005 DCCMetaBufferSizeSupport(i, j) = False
5008 v->DCCMetaBufferSizeSupport[i][j] = true;
5009 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5010 if (v->MetaRowBytes[i][j][k] > 24064)
5011 v->DCCMetaBufferSizeSupport[i][j] = false;
5013 v->UrgLatency[i] = CalculateUrgentLatency(
5014 v->UrgentLatencyPixelDataOnly,
5015 v->UrgentLatencyPixelMixedWithVMData,
5016 v->UrgentLatencyVMDataOnly,
5017 v->DoUrgentLatencyAdjustment,
5018 v->UrgentLatencyAdjustmentFabricClockComponent,
5019 v->UrgentLatencyAdjustmentFabricClockReference,
5020 v->FabricClockPerState[i]);
5022 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5023 CalculateUrgentBurstFactor(
5024 v->swath_width_luma_ub_this_state[k],
5025 v->swath_width_chroma_ub_this_state[k],
5026 v->SwathHeightYThisState[k],
5027 v->SwathHeightCThisState[k],
5028 v->HTotal[k] / v->PixelClock[k],
5030 v->CursorBufferSize,
5031 v->CursorWidth[k][0],
5035 v->BytePerPixelInDETY[k],
5036 v->BytePerPixelInDETC[k],
5037 v->DETBufferSizeYThisState[k],
5038 v->DETBufferSizeCThisState[k],
5039 &v->UrgentBurstFactorCursor[k],
5040 &v->UrgentBurstFactorLuma[k],
5041 &v->UrgentBurstFactorChroma[k],
5042 &NotUrgentLatencyHiding[k]);
5045 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5046 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5047 if (NotUrgentLatencyHiding[k]) {
5048 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5052 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5053 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5054 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5055 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5058 v->TotalVActivePixelBandwidth[i][j] = 0;
5059 v->TotalVActiveCursorBandwidth[i][j] = 0;
5060 v->TotalMetaRowBandwidth[i][j] = 0;
5061 v->TotalDPTERowBandwidth[i][j] = 0;
5062 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5063 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5064 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5065 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5066 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5071 //Calculate Return BW
5072 for (i = 0; i < v->soc.num_states; ++i) {
5073 for (j = 0; j <= 1; ++j) {
5074 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5075 if (v->BlendingAndTiming[k] == k) {
5076 if (v->WritebackEnable[k] == true) {
5077 v->WritebackDelayTime[k] = v->WritebackLatency
5078 + CalculateWriteBackDelay(
5079 v->WritebackPixelFormat[k],
5080 v->WritebackHRatio[k],
5081 v->WritebackVRatio[k],
5082 v->WritebackVTaps[k],
5083 v->WritebackDestinationWidth[k],
5084 v->WritebackDestinationHeight[k],
5085 v->WritebackSourceHeight[k],
5086 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5088 v->WritebackDelayTime[k] = 0.0;
5090 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5091 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5092 v->WritebackDelayTime[k] = dml_max(
5093 v->WritebackDelayTime[k],
5095 + CalculateWriteBackDelay(
5096 v->WritebackPixelFormat[m],
5097 v->WritebackHRatio[m],
5098 v->WritebackVRatio[m],
5099 v->WritebackVTaps[m],
5100 v->WritebackDestinationWidth[m],
5101 v->WritebackDestinationHeight[m],
5102 v->WritebackSourceHeight[m],
5103 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5108 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5109 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5110 if (v->BlendingAndTiming[k] == m) {
5111 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5115 v->MaxMaxVStartup[i][j] = 0;
5116 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5117 v->MaximumVStartup[i][j][k] =
5118 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5119 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5120 v->VTotal[k] - v->VActive[k]
5124 1.0 * v->WritebackDelayTime[k]
5126 / v->PixelClock[k]),
5128 if (v->MaximumVStartup[i][j][k] > 1023)
5129 v->MaximumVStartup[i][j][k] = 1023;
5130 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5135 ReorderingBytes = v->NumberOfChannels
5137 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5138 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5139 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5141 for (i = 0; i < v->soc.num_states; ++i) {
5142 for (j = 0; j <= 1; ++j) {
5143 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5147 if (v->UseMinimumRequiredDCFCLK == true)
5148 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5150 for (i = 0; i < v->soc.num_states; ++i) {
5151 for (j = 0; j <= 1; ++j) {
5152 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5153 v->ReturnBusWidth * v->DCFCLKState[i][j],
5154 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5155 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5156 double PixelDataOnlyReturnBWPerState = dml_min(
5157 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5158 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5159 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5160 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5161 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5163 if (v->HostVMEnable != true) {
5164 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5166 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5171 //Re-ordering Buffer Support Check
5172 for (i = 0; i < v->soc.num_states; ++i) {
5173 for (j = 0; j <= 1; ++j) {
5174 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5175 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5176 v->ROBSupport[i][j] = true;
5178 v->ROBSupport[i][j] = false;
5183 //Vertical Active BW support check
5185 MaxTotalVActiveRDBandwidth = 0;
5186 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5187 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5190 for (i = 0; i < v->soc.num_states; ++i) {
5191 for (j = 0; j <= 1; ++j) {
5192 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5194 v->ReturnBusWidth * v->DCFCLKState[i][j],
5195 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5196 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5197 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5198 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5200 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5201 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5203 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5208 v->UrgentLatency = CalculateUrgentLatency(
5209 v->UrgentLatencyPixelDataOnly,
5210 v->UrgentLatencyPixelMixedWithVMData,
5211 v->UrgentLatencyVMDataOnly,
5212 v->DoUrgentLatencyAdjustment,
5213 v->UrgentLatencyAdjustmentFabricClockComponent,
5214 v->UrgentLatencyAdjustmentFabricClockReference,
5217 for (i = 0; i < v->soc.num_states; ++i) {
5218 for (j = 0; j <= 1; ++j) {
5219 double VMDataOnlyReturnBWPerState;
5220 double HostVMInefficiencyFactor = 1;
5221 int NextPrefetchModeState = MinPrefetchMode;
5222 bool UnboundedRequestEnabledThisState = false;
5223 int CompressedBufferSizeInkByteThisState = 0;
5226 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5228 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5229 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5230 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5231 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5234 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5235 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5236 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5237 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5238 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5239 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5240 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5241 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5242 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5243 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5246 VMDataOnlyReturnBWPerState = dml_min(
5248 v->ReturnBusWidth * v->DCFCLKState[i][j],
5249 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5250 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5251 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5252 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5253 if (v->GPUVMEnable && v->HostVMEnable)
5254 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5256 v->ExtraLatency = CalculateExtraLatency(
5257 v->RoundTripPingLatencyCycles,
5259 v->DCFCLKState[i][j],
5260 v->TotalNumberOfActiveDPP[i][j],
5261 v->PixelChunkSizeInKByte,
5262 v->TotalNumberOfDCCActiveDPP[i][j],
5264 v->ReturnBWPerState[i][j],
5267 v->NumberOfActivePlanes,
5268 v->NoOfDPPThisState,
5269 v->dpte_group_bytes,
5270 HostVMInefficiencyFactor,
5271 v->HostVMMinPageSize,
5272 v->HostVMMaxNonCachedPageTableLevels);
5274 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5276 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5277 v->MaxVStartup = v->NextMaxVStartup;
5279 v->TWait = CalculateTWait(
5280 v->PrefetchModePerState[i][j],
5281 v->DRAMClockChangeLatency,
5283 v->SREnterPlusExitTime);
5285 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5286 CalculatePrefetchSchedulePerPlane(mode_lib,
5287 HostVMInefficiencyFactor,
5291 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5292 CalculateUrgentBurstFactor(
5293 v->swath_width_luma_ub_this_state[k],
5294 v->swath_width_chroma_ub_this_state[k],
5295 v->SwathHeightYThisState[k],
5296 v->SwathHeightCThisState[k],
5297 v->HTotal[k] / v->PixelClock[k],
5299 v->CursorBufferSize,
5300 v->CursorWidth[k][0],
5302 v->VRatioPreY[i][j][k],
5303 v->VRatioPreC[i][j][k],
5304 v->BytePerPixelInDETY[k],
5305 v->BytePerPixelInDETC[k],
5306 v->DETBufferSizeYThisState[k],
5307 v->DETBufferSizeCThisState[k],
5308 &v->UrgentBurstFactorCursorPre[k],
5309 &v->UrgentBurstFactorLumaPre[k],
5310 &v->UrgentBurstFactorChroma[k],
5311 &v->NotUrgentLatencyHidingPre[k]);
5314 v->MaximumReadBandwidthWithPrefetch = 0.0;
5315 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5316 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5317 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5319 v->MaximumReadBandwidthWithPrefetch =
5320 v->MaximumReadBandwidthWithPrefetch
5322 v->VActivePixelBandwidth[i][j][k]
5323 + v->VActiveCursorBandwidth[i][j][k]
5324 + v->NoOfDPP[i][j][k]
5325 * (v->meta_row_bandwidth[i][j][k]
5326 + v->dpte_row_bandwidth[i][j][k]),
5327 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5329 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5330 * v->UrgentBurstFactorLumaPre[k]
5331 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5332 * v->UrgentBurstFactorChromaPre[k])
5333 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5336 v->NotEnoughUrgentLatencyHidingPre = false;
5337 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5338 if (v->NotUrgentLatencyHidingPre[k] == true) {
5339 v->NotEnoughUrgentLatencyHidingPre = true;
5343 v->PrefetchSupported[i][j] = true;
5344 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5345 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5346 v->PrefetchSupported[i][j] = false;
5348 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5349 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5350 || v->NoTimeForPrefetch[i][j][k] == true) {
5351 v->PrefetchSupported[i][j] = false;
5355 v->DynamicMetadataSupported[i][j] = true;
5356 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5357 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5358 v->DynamicMetadataSupported[i][j] = false;
5362 v->VRatioInPrefetchSupported[i][j] = true;
5363 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5364 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5365 v->VRatioInPrefetchSupported[i][j] = false;
5368 v->AnyLinesForVMOrRowTooLarge = false;
5369 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5370 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5371 v->AnyLinesForVMOrRowTooLarge = true;
5375 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5377 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5378 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5379 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5380 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5382 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5384 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5385 * v->UrgentBurstFactorLumaPre[k]
5386 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5387 * v->UrgentBurstFactorChromaPre[k])
5388 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5390 v->TotImmediateFlipBytes = 0.0;
5391 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5392 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5393 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5394 + v->DPTEBytesPerRow[i][j][k];
5397 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5398 CalculateFlipSchedule(
5400 HostVMInefficiencyFactor,
5403 v->GPUVMMaxPageTableLevels,
5405 v->HostVMMaxNonCachedPageTableLevels,
5407 v->HostVMMinPageSize,
5408 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5409 v->MetaRowBytes[i][j][k],
5410 v->DPTEBytesPerRow[i][j][k],
5411 v->BandwidthAvailableForImmediateFlip,
5412 v->TotImmediateFlipBytes,
5413 v->SourcePixelFormat[k],
5414 v->HTotal[k] / v->PixelClock[k],
5419 v->dpte_row_height[k],
5420 v->meta_row_height[k],
5421 v->dpte_row_height_chroma[k],
5422 v->meta_row_height_chroma[k],
5423 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5424 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5425 &v->final_flip_bw[k],
5426 &v->ImmediateFlipSupportedForPipe[k]);
5428 v->total_dcn_read_bw_with_flip = 0.0;
5429 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5430 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5432 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5433 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5434 + v->VActiveCursorBandwidth[i][j][k],
5436 * (v->final_flip_bw[k]
5437 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5438 * v->UrgentBurstFactorLumaPre[k]
5439 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5440 * v->UrgentBurstFactorChromaPre[k])
5441 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5443 v->ImmediateFlipSupportedForState[i][j] = true;
5444 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5445 v->ImmediateFlipSupportedForState[i][j] = false;
5447 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5448 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5449 v->ImmediateFlipSupportedForState[i][j] = false;
5453 v->ImmediateFlipSupportedForState[i][j] = false;
5456 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5457 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5458 NextPrefetchModeState = NextPrefetchModeState + 1;
5460 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5462 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5463 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5464 && ((v->HostVMEnable == false &&
5465 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5466 || v->ImmediateFlipSupportedForState[i][j] == true))
5467 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5469 CalculateUnboundedRequestAndCompressedBufferSize(
5470 v->DETBufferSizeInKByte[0],
5471 v->ConfigReturnBufferSizeInKByte,
5472 v->UseUnboundedRequesting,
5473 v->TotalNumberOfActiveDPP[i][j],
5476 v->CompressedBufferSegmentSizeInkByte,
5478 &UnboundedRequestEnabledThisState,
5479 &CompressedBufferSizeInkByteThisState);
5481 CalculateWatermarksAndDRAMSpeedChangeSupport(
5483 v->PrefetchModePerState[i][j],
5484 v->NumberOfActivePlanes,
5485 v->MaxLineBufferLines,
5487 v->WritebackInterfaceBufferSize,
5488 v->DCFCLKState[i][j],
5489 v->ReturnBWPerState[i][j],
5490 v->SynchronizedVBlank,
5491 v->dpte_group_bytes,
5495 v->WritebackLatency,
5496 v->WritebackChunkSize,
5497 v->SOCCLKPerState[i],
5498 v->DRAMClockChangeLatency,
5500 v->SREnterPlusExitTime,
5502 v->SREnterPlusExitZ8Time,
5503 v->ProjectedDCFCLKDeepSleep[i][j],
5504 v->DETBufferSizeYThisState,
5505 v->DETBufferSizeCThisState,
5506 v->SwathHeightYThisState,
5507 v->SwathHeightCThisState,
5509 v->SwathWidthYThisState,
5510 v->SwathWidthCThisState,
5519 v->BlendingAndTiming,
5520 v->NoOfDPPThisState,
5521 v->BytePerPixelInDETY,
5522 v->BytePerPixelInDETC,
5526 v->WritebackPixelFormat,
5527 v->WritebackDestinationWidth,
5528 v->WritebackDestinationHeight,
5529 v->WritebackSourceHeight,
5530 UnboundedRequestEnabledThisState,
5531 CompressedBufferSizeInkByteThisState,
5532 &v->DRAMClockChangeSupport[i][j],
5533 &v->UrgentWatermark,
5534 &v->WritebackUrgentWatermark,
5535 &v->DRAMClockChangeWatermark,
5536 &v->WritebackDRAMClockChangeWatermark,
5541 &v->MinActiveDRAMClockChangeLatencySupported);
5545 /*PTE Buffer Size Check*/
5546 for (i = 0; i < v->soc.num_states; i++) {
5547 for (j = 0; j < 2; j++) {
5548 v->PTEBufferSizeNotExceeded[i][j] = true;
5549 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5550 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5551 v->PTEBufferSizeNotExceeded[i][j] = false;
5557 /*Cursor Support Check*/
5558 v->CursorSupport = true;
5559 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5560 if (v->CursorWidth[k][0] > 0.0) {
5561 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5562 v->CursorSupport = false;
5567 /*Valid Pitch Check*/
5568 v->PitchSupport = true;
5569 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5570 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5571 if (v->DCCEnable[k] == true) {
5572 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5574 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5576 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5577 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5578 && v->SourcePixelFormat[k] != dm_mono_8) {
5579 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5580 if (v->DCCEnable[k] == true) {
5581 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5582 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5583 64.0 * v->Read256BlockWidthC[k]);
5585 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5588 v->AlignedCPitch[k] = v->PitchC[k];
5589 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5591 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5592 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5593 v->PitchSupport = false;
5597 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5598 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5599 ViewportExceedsSurface = true;
5600 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5601 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5602 && v->SourcePixelFormat[k] != dm_rgbe) {
5603 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5604 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5605 ViewportExceedsSurface = true;
5611 /*Mode Support, Voltage State and SOC Configuration*/
5612 for (i = v->soc.num_states - 1; i >= 0; i--) {
5613 for (j = 0; j < 2; j++) {
5614 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5615 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5616 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5617 && v->DTBCLKRequiredMoreThanSupported[i] == false
5618 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5619 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5620 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5621 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5622 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5623 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5624 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5625 && ((v->HostVMEnable == false
5626 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5627 || v->ImmediateFlipSupportedForState[i][j] == true)
5628 && FMTBufferExceeded == false) {
5629 v->ModeSupport[i][j] = true;
5631 v->ModeSupport[i][j] = false;
5637 unsigned int MaximumMPCCombine = 0;
5638 for (i = v->soc.num_states; i >= 0; i--) {
5639 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5640 v->VoltageLevel = i;
5641 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5642 if (v->ModeSupport[i][0] == true) {
5643 MaximumMPCCombine = 0;
5645 MaximumMPCCombine = 1;
5649 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5650 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5651 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5652 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5654 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5655 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5656 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5657 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5658 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5659 v->maxMpcComb = MaximumMPCCombine;
5663 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5664 struct display_mode_lib *mode_lib,
5665 unsigned int PrefetchMode,
5666 unsigned int NumberOfActivePlanes,
5667 unsigned int MaxLineBufferLines,
5668 unsigned int LineBufferSize,
5669 unsigned int WritebackInterfaceBufferSize,
5672 bool SynchronizedVBlank,
5673 unsigned int dpte_group_bytes[],
5674 unsigned int MetaChunkSize,
5675 double UrgentLatency,
5676 double ExtraLatency,
5677 double WritebackLatency,
5678 double WritebackChunkSize,
5680 double DRAMClockChangeLatency,
5682 double SREnterPlusExitTime,
5683 double SRExitZ8Time,
5684 double SREnterPlusExitZ8Time,
5685 double DCFCLKDeepSleep,
5686 unsigned int DETBufferSizeY[],
5687 unsigned int DETBufferSizeC[],
5688 unsigned int SwathHeightY[],
5689 unsigned int SwathHeightC[],
5690 unsigned int LBBitPerPixel[],
5691 double SwathWidthY[],
5692 double SwathWidthC[],
5694 double HRatioChroma[],
5695 unsigned int vtaps[],
5696 unsigned int VTAPsChroma[],
5698 double VRatioChroma[],
5699 unsigned int HTotal[],
5700 double PixelClock[],
5701 unsigned int BlendingAndTiming[],
5702 unsigned int DPPPerPlane[],
5703 double BytePerPixelDETY[],
5704 double BytePerPixelDETC[],
5705 double DSTXAfterScaler[],
5706 double DSTYAfterScaler[],
5707 bool WritebackEnable[],
5708 enum source_format_class WritebackPixelFormat[],
5709 double WritebackDestinationWidth[],
5710 double WritebackDestinationHeight[],
5711 double WritebackSourceHeight[],
5712 bool UnboundedRequestEnabled,
5713 int unsigned CompressedBufferSizeInkByte,
5714 enum clock_change_support *DRAMClockChangeSupport,
5715 double *UrgentWatermark,
5716 double *WritebackUrgentWatermark,
5717 double *DRAMClockChangeWatermark,
5718 double *WritebackDRAMClockChangeWatermark,
5719 double *StutterExitWatermark,
5720 double *StutterEnterPlusExitWatermark,
5721 double *Z8StutterExitWatermark,
5722 double *Z8StutterEnterPlusExitWatermark,
5723 double *MinActiveDRAMClockChangeLatencySupported)
5725 struct vba_vars_st *v = &mode_lib->vba;
5726 double EffectiveLBLatencyHidingY;
5727 double EffectiveLBLatencyHidingC;
5728 double LinesInDETY[DC__NUM_DPP__MAX];
5730 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5731 unsigned int LinesInDETCRoundedDownToSwath;
5732 double FullDETBufferingTimeY;
5733 double FullDETBufferingTimeC;
5734 double ActiveDRAMClockChangeLatencyMarginY;
5735 double ActiveDRAMClockChangeLatencyMarginC;
5736 double WritebackDRAMClockChangeLatencyMargin;
5737 double PlaneWithMinActiveDRAMClockChangeMargin;
5738 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5739 double WritebackDRAMClockChangeLatencyHiding;
5740 double TotalPixelBW = 0.0;
5743 *UrgentWatermark = UrgentLatency + ExtraLatency;
5745 #ifdef __DML_VBA_DEBUG__
5746 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5747 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5748 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
5751 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
5753 #ifdef __DML_VBA_DEBUG__
5754 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
5755 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
5758 v->TotalActiveWriteback = 0;
5759 for (k = 0; k < NumberOfActivePlanes; ++k) {
5760 if (WritebackEnable[k] == true) {
5761 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5765 if (v->TotalActiveWriteback <= 1) {
5766 *WritebackUrgentWatermark = WritebackLatency;
5768 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5771 if (v->TotalActiveWriteback <= 1) {
5772 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
5774 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5777 for (k = 0; k < NumberOfActivePlanes; ++k) {
5778 TotalPixelBW = TotalPixelBW
5779 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
5780 / (HTotal[k] / PixelClock[k]);
5783 for (k = 0; k < NumberOfActivePlanes; ++k) {
5784 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5786 v->LBLatencyHidingSourceLinesY = dml_min(
5787 (double) MaxLineBufferLines,
5788 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
5790 v->LBLatencyHidingSourceLinesC = dml_min(
5791 (double) MaxLineBufferLines,
5792 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
5794 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
5796 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
5798 if (UnboundedRequestEnabled) {
5799 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5800 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
5803 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5804 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5805 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
5806 if (BytePerPixelDETC[k] > 0) {
5807 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5808 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5809 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
5812 FullDETBufferingTimeC = 999999;
5815 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5816 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5818 if (NumberOfActivePlanes > 1) {
5819 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5820 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
5823 if (BytePerPixelDETC[k] > 0) {
5824 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5825 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
5827 if (NumberOfActivePlanes > 1) {
5828 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5829 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
5831 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5833 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5836 if (WritebackEnable[k] == true) {
5837 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
5838 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
5839 if (WritebackPixelFormat[k] == dm_444_64) {
5840 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5842 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5843 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5847 v->MinActiveDRAMClockChangeMargin = 999999;
5848 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5849 for (k = 0; k < NumberOfActivePlanes; ++k) {
5850 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5851 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5852 if (BlendingAndTiming[k] == k) {
5853 PlaneWithMinActiveDRAMClockChangeMargin = k;
5855 for (j = 0; j < NumberOfActivePlanes; ++j) {
5856 if (BlendingAndTiming[k] == j) {
5857 PlaneWithMinActiveDRAMClockChangeMargin = j;
5864 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
5866 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5867 for (k = 0; k < NumberOfActivePlanes; ++k) {
5868 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5869 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5870 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5874 v->TotalNumberOfActiveOTG = 0;
5876 for (k = 0; k < NumberOfActivePlanes; ++k) {
5877 if (BlendingAndTiming[k] == k) {
5878 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5882 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5883 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5884 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5885 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5886 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5888 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5891 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5892 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5893 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5894 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5896 #ifdef __DML_VBA_DEBUG__
5897 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5898 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5899 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5900 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5904 static void CalculateDCFCLKDeepSleep(
5905 struct display_mode_lib *mode_lib,
5906 unsigned int NumberOfActivePlanes,
5907 int BytePerPixelY[],
5908 int BytePerPixelC[],
5910 double VRatioChroma[],
5911 double SwathWidthY[],
5912 double SwathWidthC[],
5913 unsigned int DPPPerPlane[],
5915 double HRatioChroma[],
5916 double PixelClock[],
5917 double PSCL_THROUGHPUT[],
5918 double PSCL_THROUGHPUT_CHROMA[],
5920 double ReadBandwidthLuma[],
5921 double ReadBandwidthChroma[],
5923 double *DCFCLKDeepSleep)
5925 struct vba_vars_st *v = &mode_lib->vba;
5926 double DisplayPipeLineDeliveryTimeLuma;
5927 double DisplayPipeLineDeliveryTimeChroma;
5928 double ReadBandwidth = 0.0;
5931 for (k = 0; k < NumberOfActivePlanes; ++k) {
5933 if (VRatio[k] <= 1) {
5934 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5936 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5938 if (BytePerPixelC[k] == 0) {
5939 DisplayPipeLineDeliveryTimeChroma = 0;
5941 if (VRatioChroma[k] <= 1) {
5942 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5944 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5948 if (BytePerPixelC[k] > 0) {
5949 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5950 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5952 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5954 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5958 for (k = 0; k < NumberOfActivePlanes; ++k) {
5959 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5962 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5964 for (k = 0; k < NumberOfActivePlanes; ++k) {
5965 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5969 static void CalculateUrgentBurstFactor(
5970 int swath_width_luma_ub,
5971 int swath_width_chroma_ub,
5972 unsigned int SwathHeightY,
5973 unsigned int SwathHeightC,
5975 double UrgentLatency,
5976 double CursorBufferSize,
5977 unsigned int CursorWidth,
5978 unsigned int CursorBPP,
5981 double BytePerPixelInDETY,
5982 double BytePerPixelInDETC,
5983 double DETBufferSizeY,
5984 double DETBufferSizeC,
5985 double *UrgentBurstFactorCursor,
5986 double *UrgentBurstFactorLuma,
5987 double *UrgentBurstFactorChroma,
5988 bool *NotEnoughUrgentLatencyHiding)
5990 double LinesInDETLuma;
5991 double LinesInDETChroma;
5992 unsigned int LinesInCursorBuffer;
5993 double CursorBufferSizeInTime;
5994 double DETBufferSizeInTimeLuma;
5995 double DETBufferSizeInTimeChroma;
5997 *NotEnoughUrgentLatencyHiding = 0;
5999 if (CursorWidth > 0) {
6000 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
6002 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
6003 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
6004 *NotEnoughUrgentLatencyHiding = 1;
6005 *UrgentBurstFactorCursor = 0;
6007 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
6010 *UrgentBurstFactorCursor = 1;
6014 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
6016 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
6017 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
6018 *NotEnoughUrgentLatencyHiding = 1;
6019 *UrgentBurstFactorLuma = 0;
6021 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
6024 *UrgentBurstFactorLuma = 1;
6027 if (BytePerPixelInDETC > 0) {
6028 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
6030 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
6031 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
6032 *NotEnoughUrgentLatencyHiding = 1;
6033 *UrgentBurstFactorChroma = 0;
6035 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
6038 *UrgentBurstFactorChroma = 1;
6043 static void CalculatePixelDeliveryTimes(
6044 unsigned int NumberOfActivePlanes,
6046 double VRatioChroma[],
6047 double VRatioPrefetchY[],
6048 double VRatioPrefetchC[],
6049 unsigned int swath_width_luma_ub[],
6050 unsigned int swath_width_chroma_ub[],
6051 unsigned int DPPPerPlane[],
6053 double HRatioChroma[],
6054 double PixelClock[],
6055 double PSCL_THROUGHPUT[],
6056 double PSCL_THROUGHPUT_CHROMA[],
6058 int BytePerPixelC[],
6059 enum scan_direction_class SourceScan[],
6060 unsigned int NumberOfCursors[],
6061 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6062 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6063 unsigned int BlockWidth256BytesY[],
6064 unsigned int BlockHeight256BytesY[],
6065 unsigned int BlockWidth256BytesC[],
6066 unsigned int BlockHeight256BytesC[],
6067 double DisplayPipeLineDeliveryTimeLuma[],
6068 double DisplayPipeLineDeliveryTimeChroma[],
6069 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6070 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6071 double DisplayPipeRequestDeliveryTimeLuma[],
6072 double DisplayPipeRequestDeliveryTimeChroma[],
6073 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6074 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6075 double CursorRequestDeliveryTime[],
6076 double CursorRequestDeliveryTimePrefetch[])
6078 double req_per_swath_ub;
6081 for (k = 0; k < NumberOfActivePlanes; ++k) {
6082 if (VRatio[k] <= 1) {
6083 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6085 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6088 if (BytePerPixelC[k] == 0) {
6089 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6091 if (VRatioChroma[k] <= 1) {
6092 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6094 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6098 if (VRatioPrefetchY[k] <= 1) {
6099 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6101 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6104 if (BytePerPixelC[k] == 0) {
6105 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6107 if (VRatioPrefetchC[k] <= 1) {
6108 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6110 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6115 for (k = 0; k < NumberOfActivePlanes; ++k) {
6116 if (SourceScan[k] != dm_vert) {
6117 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6119 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6121 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6122 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6123 if (BytePerPixelC[k] == 0) {
6124 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6125 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6127 if (SourceScan[k] != dm_vert) {
6128 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6130 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6132 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6133 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6135 #ifdef __DML_VBA_DEBUG__
6136 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6137 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6138 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6139 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6140 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6141 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6142 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6143 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6144 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6145 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6146 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6147 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6151 for (k = 0; k < NumberOfActivePlanes; ++k) {
6152 int cursor_req_per_width;
6153 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6154 if (NumberOfCursors[k] > 0) {
6155 if (VRatio[k] <= 1) {
6156 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6158 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6160 if (VRatioPrefetchY[k] <= 1) {
6161 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6163 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6166 CursorRequestDeliveryTime[k] = 0;
6167 CursorRequestDeliveryTimePrefetch[k] = 0;
6169 #ifdef __DML_VBA_DEBUG__
6170 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6171 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6172 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6177 static void CalculateMetaAndPTETimes(
6178 int NumberOfActivePlanes,
6181 int MinMetaChunkSizeBytes,
6184 double VRatioChroma[],
6185 double DestinationLinesToRequestRowInVBlank[],
6186 double DestinationLinesToRequestRowInImmediateFlip[],
6188 double PixelClock[],
6189 int BytePerPixelY[],
6190 int BytePerPixelC[],
6191 enum scan_direction_class SourceScan[],
6192 int dpte_row_height[],
6193 int dpte_row_height_chroma[],
6194 int meta_row_width[],
6195 int meta_row_width_chroma[],
6196 int meta_row_height[],
6197 int meta_row_height_chroma[],
6198 int meta_req_width[],
6199 int meta_req_width_chroma[],
6200 int meta_req_height[],
6201 int meta_req_height_chroma[],
6202 int dpte_group_bytes[],
6203 int PTERequestSizeY[],
6204 int PTERequestSizeC[],
6205 int PixelPTEReqWidthY[],
6206 int PixelPTEReqHeightY[],
6207 int PixelPTEReqWidthC[],
6208 int PixelPTEReqHeightC[],
6209 int dpte_row_width_luma_ub[],
6210 int dpte_row_width_chroma_ub[],
6211 double DST_Y_PER_PTE_ROW_NOM_L[],
6212 double DST_Y_PER_PTE_ROW_NOM_C[],
6213 double DST_Y_PER_META_ROW_NOM_L[],
6214 double DST_Y_PER_META_ROW_NOM_C[],
6215 double TimePerMetaChunkNominal[],
6216 double TimePerChromaMetaChunkNominal[],
6217 double TimePerMetaChunkVBlank[],
6218 double TimePerChromaMetaChunkVBlank[],
6219 double TimePerMetaChunkFlip[],
6220 double TimePerChromaMetaChunkFlip[],
6221 double time_per_pte_group_nom_luma[],
6222 double time_per_pte_group_vblank_luma[],
6223 double time_per_pte_group_flip_luma[],
6224 double time_per_pte_group_nom_chroma[],
6225 double time_per_pte_group_vblank_chroma[],
6226 double time_per_pte_group_flip_chroma[])
6228 unsigned int meta_chunk_width;
6229 unsigned int min_meta_chunk_width;
6230 unsigned int meta_chunk_per_row_int;
6231 unsigned int meta_row_remainder;
6232 unsigned int meta_chunk_threshold;
6233 unsigned int meta_chunks_per_row_ub;
6234 unsigned int meta_chunk_width_chroma;
6235 unsigned int min_meta_chunk_width_chroma;
6236 unsigned int meta_chunk_per_row_int_chroma;
6237 unsigned int meta_row_remainder_chroma;
6238 unsigned int meta_chunk_threshold_chroma;
6239 unsigned int meta_chunks_per_row_ub_chroma;
6240 unsigned int dpte_group_width_luma;
6241 unsigned int dpte_groups_per_row_luma_ub;
6242 unsigned int dpte_group_width_chroma;
6243 unsigned int dpte_groups_per_row_chroma_ub;
6246 for (k = 0; k < NumberOfActivePlanes; ++k) {
6247 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6248 if (BytePerPixelC[k] == 0) {
6249 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6251 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6253 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6254 if (BytePerPixelC[k] == 0) {
6255 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6257 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6261 for (k = 0; k < NumberOfActivePlanes; ++k) {
6262 if (DCCEnable[k] == true) {
6263 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6264 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6265 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6266 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6267 if (SourceScan[k] != dm_vert) {
6268 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6270 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6272 if (meta_row_remainder <= meta_chunk_threshold) {
6273 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6275 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6277 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6278 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6279 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6280 if (BytePerPixelC[k] == 0) {
6281 TimePerChromaMetaChunkNominal[k] = 0;
6282 TimePerChromaMetaChunkVBlank[k] = 0;
6283 TimePerChromaMetaChunkFlip[k] = 0;
6285 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6286 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6287 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6288 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6289 if (SourceScan[k] != dm_vert) {
6290 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6292 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6294 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6295 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6297 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6299 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6300 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6301 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6304 TimePerMetaChunkNominal[k] = 0;
6305 TimePerMetaChunkVBlank[k] = 0;
6306 TimePerMetaChunkFlip[k] = 0;
6307 TimePerChromaMetaChunkNominal[k] = 0;
6308 TimePerChromaMetaChunkVBlank[k] = 0;
6309 TimePerChromaMetaChunkFlip[k] = 0;
6313 for (k = 0; k < NumberOfActivePlanes; ++k) {
6314 if (GPUVMEnable == true) {
6315 if (SourceScan[k] != dm_vert) {
6316 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6318 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6320 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6321 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6322 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6323 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6324 if (BytePerPixelC[k] == 0) {
6325 time_per_pte_group_nom_chroma[k] = 0;
6326 time_per_pte_group_vblank_chroma[k] = 0;
6327 time_per_pte_group_flip_chroma[k] = 0;
6329 if (SourceScan[k] != dm_vert) {
6330 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6332 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6334 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6335 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6336 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6337 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6340 time_per_pte_group_nom_luma[k] = 0;
6341 time_per_pte_group_vblank_luma[k] = 0;
6342 time_per_pte_group_flip_luma[k] = 0;
6343 time_per_pte_group_nom_chroma[k] = 0;
6344 time_per_pte_group_vblank_chroma[k] = 0;
6345 time_per_pte_group_flip_chroma[k] = 0;
6350 static void CalculateVMGroupAndRequestTimes(
6351 unsigned int NumberOfActivePlanes,
6353 unsigned int GPUVMMaxPageTableLevels,
6354 unsigned int HTotal[],
6355 int BytePerPixelC[],
6356 double DestinationLinesToRequestVMInVBlank[],
6357 double DestinationLinesToRequestVMInImmediateFlip[],
6359 double PixelClock[],
6360 int dpte_row_width_luma_ub[],
6361 int dpte_row_width_chroma_ub[],
6362 int vm_group_bytes[],
6363 unsigned int dpde0_bytes_per_frame_ub_l[],
6364 unsigned int dpde0_bytes_per_frame_ub_c[],
6365 int meta_pte_bytes_per_frame_ub_l[],
6366 int meta_pte_bytes_per_frame_ub_c[],
6367 double TimePerVMGroupVBlank[],
6368 double TimePerVMGroupFlip[],
6369 double TimePerVMRequestVBlank[],
6370 double TimePerVMRequestFlip[])
6372 int num_group_per_lower_vm_stage;
6373 int num_req_per_lower_vm_stage;
6376 for (k = 0; k < NumberOfActivePlanes; ++k) {
6377 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6378 if (DCCEnable[k] == false) {
6379 if (BytePerPixelC[k] > 0) {
6380 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6381 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6383 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6386 if (GPUVMMaxPageTableLevels == 1) {
6387 if (BytePerPixelC[k] > 0) {
6388 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6389 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6391 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6394 if (BytePerPixelC[k] > 0) {
6395 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6396 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6397 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6398 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6400 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6401 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6406 if (DCCEnable[k] == false) {
6407 if (BytePerPixelC[k] > 0) {
6408 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6410 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6413 if (GPUVMMaxPageTableLevels == 1) {
6414 if (BytePerPixelC[k] > 0) {
6415 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6417 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6420 if (BytePerPixelC[k] > 0) {
6421 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6422 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6424 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6429 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6430 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6431 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6432 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6434 if (GPUVMMaxPageTableLevels > 2) {
6435 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6436 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6437 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6438 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6442 TimePerVMGroupVBlank[k] = 0;
6443 TimePerVMGroupFlip[k] = 0;
6444 TimePerVMRequestVBlank[k] = 0;
6445 TimePerVMRequestFlip[k] = 0;
6450 static void CalculateStutterEfficiency(
6451 struct display_mode_lib *mode_lib,
6452 int CompressedBufferSizeInkByte,
6453 bool UnboundedRequestEnabled,
6454 int ConfigReturnBufferSizeInKByte,
6455 int MetaFIFOSizeInKEntries,
6456 int ZeroSizeBufferEntries,
6457 int NumberOfActivePlanes,
6458 int ROBBufferSizeInKByte,
6459 double TotalDataReadBandwidth,
6462 double COMPBUF_RESERVED_SPACE_64B,
6463 double COMPBUF_RESERVED_SPACE_ZS,
6465 double SRExitZ8Time,
6466 bool SynchronizedVBlank,
6467 double Z8StutterEnterPlusExitWatermark,
6468 double StutterEnterPlusExitWatermark,
6469 bool ProgressiveToInterlaceUnitInOPP,
6471 double MinTTUVBlank[],
6473 unsigned int DETBufferSizeY[],
6474 int BytePerPixelY[],
6475 double BytePerPixelDETY[],
6476 double SwathWidthY[],
6479 double NetDCCRateLuma[],
6480 double NetDCCRateChroma[],
6481 double DCCFractionOfZeroSizeRequestsLuma[],
6482 double DCCFractionOfZeroSizeRequestsChroma[],
6485 double PixelClock[],
6487 enum scan_direction_class SourceScan[],
6488 int BlockHeight256BytesY[],
6489 int BlockWidth256BytesY[],
6490 int BlockHeight256BytesC[],
6491 int BlockWidth256BytesC[],
6492 int DCCYMaxUncompressedBlock[],
6493 int DCCCMaxUncompressedBlock[],
6496 bool WritebackEnable[],
6497 double ReadBandwidthPlaneLuma[],
6498 double ReadBandwidthPlaneChroma[],
6499 double meta_row_bw[],
6500 double dpte_row_bw[],
6501 double *StutterEfficiencyNotIncludingVBlank,
6502 double *StutterEfficiency,
6503 int *NumberOfStutterBurstsPerFrame,
6504 double *Z8StutterEfficiencyNotIncludingVBlank,
6505 double *Z8StutterEfficiency,
6506 int *Z8NumberOfStutterBurstsPerFrame,
6507 double *StutterPeriod)
6509 struct vba_vars_st *v = &mode_lib->vba;
6511 double DETBufferingTimeY;
6512 double SwathWidthYCriticalPlane = 0;
6513 double VActiveTimeCriticalPlane = 0;
6514 double FrameTimeCriticalPlane = 0;
6515 int BytePerPixelYCriticalPlane = 0;
6516 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6517 double MinTTUVBlankCriticalPlane = 0;
6518 double TotalCompressedReadBandwidth;
6519 double TotalRowReadBandwidth;
6520 double AverageDCCCompressionRate;
6521 double EffectiveCompressedBufferSize;
6522 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6523 double StutterBurstTime;
6524 int TotalActiveWriteback;
6526 double LinesInDETYRoundedDownToSwath;
6527 double MaximumEffectiveCompressionLuma;
6528 double MaximumEffectiveCompressionChroma;
6529 double TotalZeroSizeRequestReadBandwidth;
6530 double TotalZeroSizeCompressedReadBandwidth;
6531 double AverageDCCZeroSizeFraction;
6532 double AverageZeroSizeCompressionRate;
6533 int TotalNumberOfActiveOTG = 0;
6534 double LastStutterPeriod = 0.0;
6535 double LastZ8StutterPeriod = 0.0;
6538 TotalZeroSizeRequestReadBandwidth = 0;
6539 TotalZeroSizeCompressedReadBandwidth = 0;
6540 TotalRowReadBandwidth = 0;
6541 TotalCompressedReadBandwidth = 0;
6543 for (k = 0; k < NumberOfActivePlanes; ++k) {
6544 if (DCCEnable[k] == true) {
6545 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6546 || DCCYMaxUncompressedBlock[k] < 256) {
6547 MaximumEffectiveCompressionLuma = 2;
6549 MaximumEffectiveCompressionLuma = 4;
6551 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6552 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6553 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6554 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6555 if (ReadBandwidthPlaneChroma[k] > 0) {
6556 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6557 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6558 MaximumEffectiveCompressionChroma = 2;
6560 MaximumEffectiveCompressionChroma = 4;
6562 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6563 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6564 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6565 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6566 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6569 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6571 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6574 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6575 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6577 #ifdef __DML_VBA_DEBUG__
6578 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6579 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6580 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6581 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6582 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6583 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6584 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6585 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6588 if (AverageDCCZeroSizeFraction == 1) {
6589 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6590 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6591 } else if (AverageDCCZeroSizeFraction > 0) {
6592 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6593 EffectiveCompressedBufferSize = dml_min(
6594 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6595 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6596 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6597 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6598 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6600 "DML::%s: min 2 = %f\n",
6602 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6603 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6604 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6606 EffectiveCompressedBufferSize = dml_min(
6607 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6608 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6609 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6610 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6613 #ifdef __DML_VBA_DEBUG__
6614 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6615 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6616 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6620 for (k = 0; k < NumberOfActivePlanes; ++k) {
6621 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6622 / BytePerPixelDETY[k] / SwathWidthY[k];
6623 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6624 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6625 #ifdef __DML_VBA_DEBUG__
6626 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6627 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6628 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6629 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6630 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6631 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6632 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6633 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6634 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6635 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6636 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6637 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6640 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6641 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6643 *StutterPeriod = DETBufferingTimeY;
6644 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6645 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6646 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6647 SwathWidthYCriticalPlane = SwathWidthY[k];
6648 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6649 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6651 #ifdef __DML_VBA_DEBUG__
6652 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6653 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6654 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6655 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6656 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6657 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6658 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6663 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6664 #ifdef __DML_VBA_DEBUG__
6665 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6666 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6667 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6668 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6669 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6670 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6671 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6672 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6673 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6674 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6677 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6678 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6679 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6680 #ifdef __DML_VBA_DEBUG__
6681 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6682 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6683 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6684 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6685 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6687 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6690 "DML::%s: Time to finish residue swath=%f\n",
6692 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6694 TotalActiveWriteback = 0;
6695 for (k = 0; k < NumberOfActivePlanes; ++k) {
6696 if (WritebackEnable[k]) {
6697 TotalActiveWriteback = TotalActiveWriteback + 1;
6701 if (TotalActiveWriteback == 0) {
6702 #ifdef __DML_VBA_DEBUG__
6703 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6704 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6705 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6706 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6708 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6709 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6710 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6711 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6713 *StutterEfficiencyNotIncludingVBlank = 0.;
6714 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6715 *NumberOfStutterBurstsPerFrame = 0;
6716 *Z8NumberOfStutterBurstsPerFrame = 0;
6718 #ifdef __DML_VBA_DEBUG__
6719 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6720 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6721 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6722 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6723 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6726 for (k = 0; k < NumberOfActivePlanes; ++k) {
6727 if (v->BlendingAndTiming[k] == k) {
6728 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6732 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6733 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6735 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6736 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6737 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6739 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6742 *StutterEfficiency = 0;
6745 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6746 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6747 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6748 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6749 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6751 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6754 *Z8StutterEfficiency = 0.;
6757 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6758 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6759 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6760 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6761 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6762 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6763 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6764 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6767 static void CalculateSwathAndDETConfiguration(
6768 bool ForceSingleDPP,
6769 int NumberOfActivePlanes,
6770 unsigned int DETBufferSizeInKByte,
6771 double MaximumSwathWidthLuma[],
6772 double MaximumSwathWidthChroma[],
6773 enum scan_direction_class SourceScan[],
6774 enum source_format_class SourcePixelFormat[],
6775 enum dm_swizzle_mode SurfaceTiling[],
6776 int ViewportWidth[],
6777 int ViewportHeight[],
6778 int SurfaceWidthY[],
6779 int SurfaceWidthC[],
6780 int SurfaceHeightY[],
6781 int SurfaceHeightC[],
6782 int Read256BytesBlockHeightY[],
6783 int Read256BytesBlockHeightC[],
6784 int Read256BytesBlockWidthY[],
6785 int Read256BytesBlockWidthC[],
6786 enum odm_combine_mode ODMCombineEnabled[],
6787 int BlendingAndTiming[],
6790 double BytePerPixDETY[],
6791 double BytePerPixDETC[],
6794 double HRatioChroma[],
6796 int swath_width_luma_ub[],
6797 int swath_width_chroma_ub[],
6798 double SwathWidth[],
6799 double SwathWidthChroma[],
6802 unsigned int DETBufferSizeY[],
6803 unsigned int DETBufferSizeC[],
6804 bool ViewportSizeSupportPerPlane[],
6805 bool *ViewportSizeSupport)
6807 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6808 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6809 int MinimumSwathHeightY;
6810 int MinimumSwathHeightC;
6811 int RoundedUpMaxSwathSizeBytesY;
6812 int RoundedUpMaxSwathSizeBytesC;
6813 int RoundedUpMinSwathSizeBytesY;
6814 int RoundedUpMinSwathSizeBytesC;
6815 int RoundedUpSwathSizeBytesY;
6816 int RoundedUpSwathSizeBytesC;
6817 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6818 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6821 CalculateSwathWidth(
6823 NumberOfActivePlanes,
6835 Read256BytesBlockHeightY,
6836 Read256BytesBlockHeightC,
6837 Read256BytesBlockWidthY,
6838 Read256BytesBlockWidthC,
6843 SwathWidthSingleDPP,
6844 SwathWidthSingleDPPChroma,
6847 MaximumSwathHeightY,
6848 MaximumSwathHeightC,
6849 swath_width_luma_ub,
6850 swath_width_chroma_ub);
6852 *ViewportSizeSupport = true;
6853 for (k = 0; k < NumberOfActivePlanes; ++k) {
6854 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6855 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6856 if (SurfaceTiling[k] == dm_sw_linear
6857 || (SourcePixelFormat[k] == dm_444_64
6858 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6859 && SourceScan[k] != dm_vert)) {
6860 MinimumSwathHeightY = MaximumSwathHeightY[k];
6861 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6862 MinimumSwathHeightY = MaximumSwathHeightY[k];
6864 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6866 MinimumSwathHeightC = MaximumSwathHeightC[k];
6868 if (SurfaceTiling[k] == dm_sw_linear) {
6869 MinimumSwathHeightY = MaximumSwathHeightY[k];
6870 MinimumSwathHeightC = MaximumSwathHeightC[k];
6871 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6872 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6873 MinimumSwathHeightC = MaximumSwathHeightC[k];
6874 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6875 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6876 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6877 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6878 MinimumSwathHeightY = MaximumSwathHeightY[k];
6879 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6881 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6882 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6886 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6887 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6888 if (SourcePixelFormat[k] == dm_420_10) {
6889 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6890 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6892 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6893 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6894 if (SourcePixelFormat[k] == dm_420_10) {
6895 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6896 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6899 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6900 SwathHeightY[k] = MaximumSwathHeightY[k];
6901 SwathHeightC[k] = MaximumSwathHeightC[k];
6902 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6903 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6904 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6905 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6906 SwathHeightY[k] = MinimumSwathHeightY;
6907 SwathHeightC[k] = MaximumSwathHeightC[k];
6908 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6909 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6910 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6911 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6912 SwathHeightY[k] = MaximumSwathHeightY[k];
6913 SwathHeightC[k] = MinimumSwathHeightC;
6914 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6915 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6917 SwathHeightY[k] = MinimumSwathHeightY;
6918 SwathHeightC[k] = MinimumSwathHeightC;
6919 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6920 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6923 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6924 if (SwathHeightC[k] == 0) {
6925 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6926 DETBufferSizeC[k] = 0;
6927 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6928 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6929 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6931 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6932 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6935 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6936 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6937 *ViewportSizeSupport = false;
6938 ViewportSizeSupportPerPlane[k] = false;
6940 ViewportSizeSupportPerPlane[k] = true;
6946 static void CalculateSwathWidth(
6947 bool ForceSingleDPP,
6948 int NumberOfActivePlanes,
6949 enum source_format_class SourcePixelFormat[],
6950 enum scan_direction_class SourceScan[],
6951 int ViewportWidth[],
6952 int ViewportHeight[],
6953 int SurfaceWidthY[],
6954 int SurfaceWidthC[],
6955 int SurfaceHeightY[],
6956 int SurfaceHeightC[],
6957 enum odm_combine_mode ODMCombineEnabled[],
6960 int Read256BytesBlockHeightY[],
6961 int Read256BytesBlockHeightC[],
6962 int Read256BytesBlockWidthY[],
6963 int Read256BytesBlockWidthC[],
6964 int BlendingAndTiming[],
6968 double SwathWidthSingleDPPY[],
6969 double SwathWidthSingleDPPC[],
6970 double SwathWidthY[],
6971 double SwathWidthC[],
6972 int MaximumSwathHeightY[],
6973 int MaximumSwathHeightC[],
6974 int swath_width_luma_ub[],
6975 int swath_width_chroma_ub[])
6977 enum odm_combine_mode MainPlaneODMCombine;
6980 #ifdef __DML_VBA_DEBUG__
6981 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6984 for (k = 0; k < NumberOfActivePlanes; ++k) {
6985 if (SourceScan[k] != dm_vert) {
6986 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6988 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6991 #ifdef __DML_VBA_DEBUG__
6992 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6993 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6996 MainPlaneODMCombine = ODMCombineEnabled[k];
6997 for (j = 0; j < NumberOfActivePlanes; ++j) {
6998 if (BlendingAndTiming[k] == j) {
6999 MainPlaneODMCombine = ODMCombineEnabled[j];
7003 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
7004 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
7005 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
7006 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
7007 } else if (DPPPerPlane[k] == 2) {
7008 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
7010 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7013 #ifdef __DML_VBA_DEBUG__
7014 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
7015 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
7018 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
7019 SwathWidthC[k] = SwathWidthY[k] / 2;
7020 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
7022 SwathWidthC[k] = SwathWidthY[k];
7023 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
7026 if (ForceSingleDPP == true) {
7027 SwathWidthY[k] = SwathWidthSingleDPPY[k];
7028 SwathWidthC[k] = SwathWidthSingleDPPC[k];
7031 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
7032 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
7033 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
7034 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
7036 #ifdef __DML_VBA_DEBUG__
7037 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
7040 if (SourceScan[k] != dm_vert) {
7041 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
7042 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
7043 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
7044 if (BytePerPixC[k] > 0) {
7045 swath_width_chroma_ub[k] = dml_min(
7047 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
7049 swath_width_chroma_ub[k] = 0;
7052 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
7053 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
7054 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
7055 if (BytePerPixC[k] > 0) {
7056 swath_width_chroma_ub[k] = dml_min(
7057 surface_height_ub_c,
7058 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7060 swath_width_chroma_ub[k] = 0;
7067 static double CalculateExtraLatency(
7068 int RoundTripPingLatencyCycles,
7069 int ReorderingBytes,
7071 int TotalNumberOfActiveDPP,
7072 int PixelChunkSizeInKByte,
7073 int TotalNumberOfDCCActiveDPP,
7078 int NumberOfActivePlanes,
7080 int dpte_group_bytes[],
7081 double HostVMInefficiencyFactor,
7082 double HostVMMinPageSize,
7083 int HostVMMaxNonCachedPageTableLevels)
7085 double ExtraLatencyBytes;
7086 double ExtraLatency;
7088 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7090 TotalNumberOfActiveDPP,
7091 PixelChunkSizeInKByte,
7092 TotalNumberOfDCCActiveDPP,
7096 NumberOfActivePlanes,
7099 HostVMInefficiencyFactor,
7101 HostVMMaxNonCachedPageTableLevels);
7103 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7105 #ifdef __DML_VBA_DEBUG__
7106 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7107 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7108 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7109 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7110 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7113 return ExtraLatency;
7116 static double CalculateExtraLatencyBytes(
7117 int ReorderingBytes,
7118 int TotalNumberOfActiveDPP,
7119 int PixelChunkSizeInKByte,
7120 int TotalNumberOfDCCActiveDPP,
7124 int NumberOfActivePlanes,
7126 int dpte_group_bytes[],
7127 double HostVMInefficiencyFactor,
7128 double HostVMMinPageSize,
7129 int HostVMMaxNonCachedPageTableLevels)
7132 int HostVMDynamicLevels = 0, k;
7134 if (GPUVMEnable == true && HostVMEnable == true) {
7135 if (HostVMMinPageSize < 2048) {
7136 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7137 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
7138 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
7140 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
7143 HostVMDynamicLevels = 0;
7146 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7148 if (GPUVMEnable == true) {
7149 for (k = 0; k < NumberOfActivePlanes; ++k) {
7150 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7156 static double CalculateUrgentLatency(
7157 double UrgentLatencyPixelDataOnly,
7158 double UrgentLatencyPixelMixedWithVMData,
7159 double UrgentLatencyVMDataOnly,
7160 bool DoUrgentLatencyAdjustment,
7161 double UrgentLatencyAdjustmentFabricClockComponent,
7162 double UrgentLatencyAdjustmentFabricClockReference,
7167 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7168 if (DoUrgentLatencyAdjustment == true) {
7169 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7174 static void UseMinimumDCFCLK(
7175 struct display_mode_lib *mode_lib,
7176 int MaxPrefetchMode,
7177 int ReorderingBytes)
7179 struct vba_vars_st *v = &mode_lib->vba;
7180 int dummy1, i, j, k;
7181 double NormalEfficiency, dummy2, dummy3;
7182 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7184 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7185 for (i = 0; i < v->soc.num_states; ++i) {
7186 for (j = 0; j <= 1; ++j) {
7187 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7188 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7189 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7190 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7191 double MinimumTWait;
7192 double NonDPTEBandwidth;
7193 double DPTEBandwidth;
7194 double DCFCLKRequiredForAverageBandwidth;
7195 double ExtraLatencyBytes;
7196 double ExtraLatencyCycles;
7197 double DCFCLKRequiredForPeakBandwidth;
7198 int NoOfDPPState[DC__NUM_DPP__MAX];
7199 double MinimumTvmPlus2Tr0;
7201 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7202 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7203 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7204 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7207 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7208 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7211 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7212 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7213 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7214 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7215 DCFCLKRequiredForAverageBandwidth = dml_max3(
7216 v->ProjectedDCFCLKDeepSleep[i][j],
7217 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7218 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7219 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7221 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7223 v->TotalNumberOfActiveDPP[i][j],
7224 v->PixelChunkSizeInKByte,
7225 v->TotalNumberOfDCCActiveDPP[i][j],
7229 v->NumberOfActivePlanes,
7231 v->dpte_group_bytes,
7233 v->HostVMMinPageSize,
7234 v->HostVMMaxNonCachedPageTableLevels);
7235 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7236 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7237 double DCFCLKCyclesRequiredInPrefetch;
7238 double ExpectedPrefetchBWAcceleration;
7239 double PrefetchTime;
7241 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7242 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7243 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7244 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7245 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7246 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7247 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7248 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7249 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7250 DynamicMetadataVMExtraLatency[k] =
7251 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7252 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7253 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7255 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7256 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7257 - DynamicMetadataVMExtraLatency[k];
7259 if (PrefetchTime > 0) {
7260 double ExpectedVRatioPrefetch;
7261 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7262 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7263 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7264 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7265 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7266 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7267 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7270 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7272 if (v->DynamicMetadataEnable[k] == true) {
7277 double AllowedTimeForUrgentExtraLatency;
7279 CalculateVupdateAndDynamicMetadataParameters(
7280 v->MaxInterDCNTileRepeaters,
7281 v->RequiredDPPCLK[i][j][k],
7282 v->RequiredDISPCLK[i][j],
7283 v->ProjectedDCFCLKDeepSleep[i][j],
7286 v->VTotal[k] - v->VActive[k],
7287 v->DynamicMetadataTransmittedBytes[k],
7288 v->DynamicMetadataLinesBeforeActiveRequired[k],
7290 v->ProgressiveToInterlaceUnitInOPP,
7298 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7299 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7300 if (AllowedTimeForUrgentExtraLatency > 0) {
7301 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7302 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7303 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7305 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7309 DCFCLKRequiredForPeakBandwidth = 0;
7310 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7311 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7313 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7314 * (v->GPUVMEnable == true ?
7315 (v->HostVMEnable == true ?
7316 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7318 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7319 double MaximumTvmPlus2Tr0PlusTsw;
7320 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7321 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7322 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7324 DCFCLKRequiredForPeakBandwidth = dml_max3(
7325 DCFCLKRequiredForPeakBandwidth,
7326 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7327 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7330 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7335 static void CalculateUnboundedRequestAndCompressedBufferSize(
7336 unsigned int DETBufferSizeInKByte,
7337 int ConfigReturnBufferSizeInKByte,
7338 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7340 bool NoChromaPlanes,
7342 int CompressedBufferSegmentSizeInkByteFinal,
7343 enum output_encoder_class *Output,
7344 bool *UnboundedRequestEnabled,
7345 int *CompressedBufferSizeInkByte)
7347 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7349 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7350 *CompressedBufferSizeInkByte = (
7351 *UnboundedRequestEnabled == true ?
7352 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7353 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7354 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7356 #ifdef __DML_VBA_DEBUG__
7357 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7358 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7359 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7360 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7361 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7362 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7363 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7367 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7369 bool ret_val = false;
7371 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7372 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {