2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "../display_mode_lib.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
46 // For DML-C changes that hasn't been propagated to VBA yet
47 //#define __DML_VBA_ALLOW_DELTA__
49 // Move these to ip paramaters/constant
51 // At which vstartup the DML start to try if the mode can be supported
52 #define __DML_VBA_MIN_VSTARTUP__ 9
54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
57 // fudge factor for min dcfclk calclation
58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
69 enum scan_direction_class SourceScan;
70 unsigned int BlockWidth256BytesY;
71 unsigned int BlockHeight256BytesY;
72 unsigned int BlockWidth256BytesC;
73 unsigned int BlockHeight256BytesC;
74 unsigned int InterlaceEnable;
75 unsigned int NumberOfCursors;
78 unsigned int DCCEnable;
79 bool ODMCombineIsEnabled;
80 enum source_format_class SourcePixelFormat;
83 bool ProgressiveToInterlaceUnitInOPP;
87 #define BPP_BLENDED_PIPE 0xffffffff
89 static bool CalculateBytePerPixelAnd256BBlockSizes(
90 enum source_format_class SourcePixelFormat,
91 enum dm_swizzle_mode SurfaceTiling,
92 unsigned int *BytePerPixelY,
93 unsigned int *BytePerPixelC,
94 double *BytePerPixelDETY,
95 double *BytePerPixelDETC,
96 unsigned int *BlockHeight256BytesY,
97 unsigned int *BlockHeight256BytesC,
98 unsigned int *BlockWidth256BytesY,
99 unsigned int *BlockWidth256BytesC);
100 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
101 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
102 static unsigned int dscceComputeDelay(
105 unsigned int sliceWidth,
106 unsigned int numSlices,
107 enum output_format_class pixelFormat,
108 enum output_encoder_class Output);
109 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
110 static bool CalculatePrefetchSchedule(
111 struct display_mode_lib *mode_lib,
112 double HostVMInefficiencyFactor,
114 unsigned int DSCDelay,
115 double DPPCLKDelaySubtotalPlusCNVCFormater,
116 double DPPCLKDelaySCL,
117 double DPPCLKDelaySCLLBOnly,
118 double DPPCLKDelayCNVCCursor,
119 double DISPCLKDelaySubtotal,
120 unsigned int DPP_RECOUT_WIDTH,
121 enum output_format_class OutputFormat,
122 unsigned int MaxInterDCNTileRepeaters,
123 unsigned int VStartup,
124 unsigned int MaxVStartup,
125 unsigned int GPUVMPageTableLevels,
128 unsigned int HostVMMaxNonCachedPageTableLevels,
129 double HostVMMinPageSize,
130 bool DynamicMetadataEnable,
131 bool DynamicMetadataVMEnabled,
132 int DynamicMetadataLinesBeforeActiveRequired,
133 unsigned int DynamicMetadataTransmittedBytes,
134 double UrgentLatency,
135 double UrgentExtraLatency,
137 unsigned int PDEAndMetaPTEBytesFrame,
138 unsigned int MetaRowByte,
139 unsigned int PixelPTEBytesPerRow,
140 double PrefetchSourceLinesY,
141 unsigned int SwathWidthY,
142 double VInitPreFillY,
143 unsigned int MaxNumSwathY,
144 double PrefetchSourceLinesC,
145 unsigned int SwathWidthC,
146 double VInitPreFillC,
147 unsigned int MaxNumSwathC,
148 int swath_width_luma_ub,
149 int swath_width_chroma_ub,
150 unsigned int SwathHeightY,
151 unsigned int SwathHeightC,
153 double *DSTXAfterScaler,
154 double *DSTYAfterScaler,
155 double *DestinationLinesForPrefetch,
156 double *PrefetchBandwidth,
157 double *DestinationLinesToRequestVMInVBlank,
158 double *DestinationLinesToRequestRowInVBlank,
159 double *VRatioPrefetchY,
160 double *VRatioPrefetchC,
161 double *RequiredPrefetchPixDataBWLuma,
162 double *RequiredPrefetchPixDataBWChroma,
163 bool *NotEnoughTimeForDynamicMetadata,
165 double *prefetch_vmrow_bw,
169 int *VUpdateOffsetPix,
170 double *VUpdateWidthPix,
171 double *VReadyOffsetPix);
172 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
173 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
174 static void CalculateDCCConfiguration(
176 bool DCCProgrammingAssumesScanDirectionUnknown,
177 enum source_format_class SourcePixelFormat,
178 unsigned int SurfaceWidthLuma,
179 unsigned int SurfaceWidthChroma,
180 unsigned int SurfaceHeightLuma,
181 unsigned int SurfaceHeightChroma,
182 double DETBufferSize,
183 unsigned int RequestHeight256ByteLuma,
184 unsigned int RequestHeight256ByteChroma,
185 enum dm_swizzle_mode TilingFormat,
186 unsigned int BytePerPixelY,
187 unsigned int BytePerPixelC,
188 double BytePerPixelDETY,
189 double BytePerPixelDETC,
190 enum scan_direction_class ScanOrientation,
191 unsigned int *MaxUncompressedBlockLuma,
192 unsigned int *MaxUncompressedBlockChroma,
193 unsigned int *MaxCompressedBlockLuma,
194 unsigned int *MaxCompressedBlockChroma,
195 unsigned int *IndependentBlockLuma,
196 unsigned int *IndependentBlockChroma);
197 static double CalculatePrefetchSourceLines(
198 struct display_mode_lib *mode_lib,
202 bool ProgressiveToInterlaceUnitInOPP,
203 unsigned int SwathHeight,
204 unsigned int ViewportYStart,
205 double *VInitPreFill,
206 unsigned int *MaxNumSwath);
207 static unsigned int CalculateVMAndRowBytes(
208 struct display_mode_lib *mode_lib,
210 unsigned int BlockHeight256Bytes,
211 unsigned int BlockWidth256Bytes,
212 enum source_format_class SourcePixelFormat,
213 unsigned int SurfaceTiling,
214 unsigned int BytePerPixel,
215 enum scan_direction_class ScanDirection,
216 unsigned int SwathWidth,
217 unsigned int ViewportHeight,
220 unsigned int HostVMMaxNonCachedPageTableLevels,
221 unsigned int GPUVMMinPageSize,
222 unsigned int HostVMMinPageSize,
223 unsigned int PTEBufferSizeInRequests,
225 unsigned int DCCMetaPitch,
226 unsigned int *MacroTileWidth,
227 unsigned int *MetaRowByte,
228 unsigned int *PixelPTEBytesPerRow,
229 bool *PTEBufferSizeNotExceeded,
230 int *dpte_row_width_ub,
231 unsigned int *dpte_row_height,
232 unsigned int *MetaRequestWidth,
233 unsigned int *MetaRequestHeight,
234 unsigned int *meta_row_width,
235 unsigned int *meta_row_height,
237 unsigned int *dpte_group_bytes,
238 unsigned int *PixelPTEReqWidth,
239 unsigned int *PixelPTEReqHeight,
240 unsigned int *PTERequestSize,
241 int *DPDE0BytesFrame,
242 int *MetaPTEBytesFrame);
243 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
244 static void CalculateRowBandwidth(
246 enum source_format_class SourcePixelFormat,
251 unsigned int MetaRowByteLuma,
252 unsigned int MetaRowByteChroma,
253 unsigned int meta_row_height_luma,
254 unsigned int meta_row_height_chroma,
255 unsigned int PixelPTEBytesPerRowLuma,
256 unsigned int PixelPTEBytesPerRowChroma,
257 unsigned int dpte_row_height_luma,
258 unsigned int dpte_row_height_chroma,
260 double *dpte_row_bw);
262 static void CalculateFlipSchedule(
263 struct display_mode_lib *mode_lib,
264 double HostVMInefficiencyFactor,
265 double UrgentExtraLatency,
266 double UrgentLatency,
267 unsigned int GPUVMMaxPageTableLevels,
269 unsigned int HostVMMaxNonCachedPageTableLevels,
271 double HostVMMinPageSize,
272 double PDEAndMetaPTEBytesPerFrame,
274 double DPTEBytesPerRow,
275 double BandwidthAvailableForImmediateFlip,
276 unsigned int TotImmediateFlipBytes,
277 enum source_format_class SourcePixelFormat,
283 unsigned int dpte_row_height,
284 unsigned int meta_row_height,
285 unsigned int dpte_row_height_chroma,
286 unsigned int meta_row_height_chroma,
287 double *DestinationLinesToRequestVMInImmediateFlip,
288 double *DestinationLinesToRequestRowInImmediateFlip,
289 double *final_flip_bw,
290 bool *ImmediateFlipSupportedForPipe);
291 static double CalculateWriteBackDelay(
292 enum source_format_class WritebackPixelFormat,
293 double WritebackHRatio,
294 double WritebackVRatio,
295 unsigned int WritebackVTaps,
296 int WritebackDestinationWidth,
297 int WritebackDestinationHeight,
298 int WritebackSourceHeight,
299 unsigned int HTotal);
301 static void CalculateVupdateAndDynamicMetadataParameters(
302 int MaxInterDCNTileRepeaters,
305 double DCFClkDeepSleep,
309 int DynamicMetadataTransmittedBytes,
310 int DynamicMetadataLinesBeforeActiveRequired,
312 bool ProgressiveToInterlaceUnitInOPP,
317 int *VUpdateOffsetPix,
318 double *VUpdateWidthPix,
319 double *VReadyOffsetPix);
321 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
322 struct display_mode_lib *mode_lib,
323 unsigned int PrefetchMode,
326 double UrgentLatency,
329 double DCFCLKDeepSleep,
330 unsigned int DETBufferSizeY[],
331 unsigned int DETBufferSizeC[],
332 unsigned int SwathHeightY[],
333 unsigned int SwathHeightC[],
334 double SwathWidthY[],
335 double SwathWidthC[],
336 unsigned int DPPPerPlane[],
337 double BytePerPixelDETY[],
338 double BytePerPixelDETC[],
339 bool UnboundedRequestEnabled,
340 int unsigned CompressedBufferSizeInkByte,
341 enum clock_change_support *DRAMClockChangeSupport,
342 double *StutterExitWatermark,
343 double *StutterEnterPlusExitWatermark,
344 double *Z8StutterExitWatermark,
345 double *Z8StutterEnterPlusExitWatermark);
347 static void CalculateDCFCLKDeepSleep(
348 struct display_mode_lib *mode_lib,
349 unsigned int NumberOfActivePlanes,
353 double VRatioChroma[],
354 double SwathWidthY[],
355 double SwathWidthC[],
356 unsigned int DPPPerPlane[],
358 double HRatioChroma[],
360 double PSCL_THROUGHPUT[],
361 double PSCL_THROUGHPUT_CHROMA[],
363 double ReadBandwidthLuma[],
364 double ReadBandwidthChroma[],
366 double *DCFCLKDeepSleep);
368 static void CalculateUrgentBurstFactor(
369 int swath_width_luma_ub,
370 int swath_width_chroma_ub,
371 unsigned int SwathHeightY,
372 unsigned int SwathHeightC,
374 double UrgentLatency,
375 double CursorBufferSize,
376 unsigned int CursorWidth,
377 unsigned int CursorBPP,
380 double BytePerPixelInDETY,
381 double BytePerPixelInDETC,
382 double DETBufferSizeY,
383 double DETBufferSizeC,
384 double *UrgentBurstFactorCursor,
385 double *UrgentBurstFactorLuma,
386 double *UrgentBurstFactorChroma,
387 bool *NotEnoughUrgentLatencyHiding);
389 static void UseMinimumDCFCLK(
390 struct display_mode_lib *mode_lib,
392 int ReorderingBytes);
394 static void CalculatePixelDeliveryTimes(
395 unsigned int NumberOfActivePlanes,
397 double VRatioChroma[],
398 double VRatioPrefetchY[],
399 double VRatioPrefetchC[],
400 unsigned int swath_width_luma_ub[],
401 unsigned int swath_width_chroma_ub[],
402 unsigned int DPPPerPlane[],
404 double HRatioChroma[],
406 double PSCL_THROUGHPUT[],
407 double PSCL_THROUGHPUT_CHROMA[],
410 enum scan_direction_class SourceScan[],
411 unsigned int NumberOfCursors[],
412 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
413 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
414 unsigned int BlockWidth256BytesY[],
415 unsigned int BlockHeight256BytesY[],
416 unsigned int BlockWidth256BytesC[],
417 unsigned int BlockHeight256BytesC[],
418 double DisplayPipeLineDeliveryTimeLuma[],
419 double DisplayPipeLineDeliveryTimeChroma[],
420 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
421 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
422 double DisplayPipeRequestDeliveryTimeLuma[],
423 double DisplayPipeRequestDeliveryTimeChroma[],
424 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
425 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
426 double CursorRequestDeliveryTime[],
427 double CursorRequestDeliveryTimePrefetch[]);
429 static void CalculateMetaAndPTETimes(
430 int NumberOfActivePlanes,
433 int MinMetaChunkSizeBytes,
436 double VRatioChroma[],
437 double DestinationLinesToRequestRowInVBlank[],
438 double DestinationLinesToRequestRowInImmediateFlip[],
443 enum scan_direction_class SourceScan[],
444 int dpte_row_height[],
445 int dpte_row_height_chroma[],
446 int meta_row_width[],
447 int meta_row_width_chroma[],
448 int meta_row_height[],
449 int meta_row_height_chroma[],
450 int meta_req_width[],
451 int meta_req_width_chroma[],
452 int meta_req_height[],
453 int meta_req_height_chroma[],
454 int dpte_group_bytes[],
455 int PTERequestSizeY[],
456 int PTERequestSizeC[],
457 int PixelPTEReqWidthY[],
458 int PixelPTEReqHeightY[],
459 int PixelPTEReqWidthC[],
460 int PixelPTEReqHeightC[],
461 int dpte_row_width_luma_ub[],
462 int dpte_row_width_chroma_ub[],
463 double DST_Y_PER_PTE_ROW_NOM_L[],
464 double DST_Y_PER_PTE_ROW_NOM_C[],
465 double DST_Y_PER_META_ROW_NOM_L[],
466 double DST_Y_PER_META_ROW_NOM_C[],
467 double TimePerMetaChunkNominal[],
468 double TimePerChromaMetaChunkNominal[],
469 double TimePerMetaChunkVBlank[],
470 double TimePerChromaMetaChunkVBlank[],
471 double TimePerMetaChunkFlip[],
472 double TimePerChromaMetaChunkFlip[],
473 double time_per_pte_group_nom_luma[],
474 double time_per_pte_group_vblank_luma[],
475 double time_per_pte_group_flip_luma[],
476 double time_per_pte_group_nom_chroma[],
477 double time_per_pte_group_vblank_chroma[],
478 double time_per_pte_group_flip_chroma[]);
480 static void CalculateVMGroupAndRequestTimes(
481 unsigned int NumberOfActivePlanes,
483 unsigned int GPUVMMaxPageTableLevels,
484 unsigned int HTotal[],
486 double DestinationLinesToRequestVMInVBlank[],
487 double DestinationLinesToRequestVMInImmediateFlip[],
490 int dpte_row_width_luma_ub[],
491 int dpte_row_width_chroma_ub[],
492 int vm_group_bytes[],
493 unsigned int dpde0_bytes_per_frame_ub_l[],
494 unsigned int dpde0_bytes_per_frame_ub_c[],
495 int meta_pte_bytes_per_frame_ub_l[],
496 int meta_pte_bytes_per_frame_ub_c[],
497 double TimePerVMGroupVBlank[],
498 double TimePerVMGroupFlip[],
499 double TimePerVMRequestVBlank[],
500 double TimePerVMRequestFlip[]);
502 static void CalculateStutterEfficiency(
503 struct display_mode_lib *mode_lib,
504 int CompressedBufferSizeInkByte,
505 bool UnboundedRequestEnabled,
506 int ConfigReturnBufferSizeInKByte,
507 int MetaFIFOSizeInKEntries,
508 int ZeroSizeBufferEntries,
509 int NumberOfActivePlanes,
510 int ROBBufferSizeInKByte,
511 double TotalDataReadBandwidth,
514 double COMPBUF_RESERVED_SPACE_64B,
515 double COMPBUF_RESERVED_SPACE_ZS,
518 bool SynchronizedVBlank,
519 double Z8StutterEnterPlusExitWatermark,
520 double StutterEnterPlusExitWatermark,
521 bool ProgressiveToInterlaceUnitInOPP,
523 double MinTTUVBlank[],
525 unsigned int DETBufferSizeY[],
527 double BytePerPixelDETY[],
528 double SwathWidthY[],
531 double NetDCCRateLuma[],
532 double NetDCCRateChroma[],
533 double DCCFractionOfZeroSizeRequestsLuma[],
534 double DCCFractionOfZeroSizeRequestsChroma[],
539 enum scan_direction_class SourceScan[],
540 int BlockHeight256BytesY[],
541 int BlockWidth256BytesY[],
542 int BlockHeight256BytesC[],
543 int BlockWidth256BytesC[],
544 int DCCYMaxUncompressedBlock[],
545 int DCCCMaxUncompressedBlock[],
548 bool WritebackEnable[],
549 double ReadBandwidthPlaneLuma[],
550 double ReadBandwidthPlaneChroma[],
551 double meta_row_bw[],
552 double dpte_row_bw[],
553 double *StutterEfficiencyNotIncludingVBlank,
554 double *StutterEfficiency,
555 int *NumberOfStutterBurstsPerFrame,
556 double *Z8StutterEfficiencyNotIncludingVBlank,
557 double *Z8StutterEfficiency,
558 int *Z8NumberOfStutterBurstsPerFrame,
559 double *StutterPeriod);
561 static void CalculateSwathAndDETConfiguration(
563 int NumberOfActivePlanes,
564 unsigned int DETBufferSizeInKByte,
565 double MaximumSwathWidthLuma[],
566 double MaximumSwathWidthChroma[],
567 enum scan_direction_class SourceScan[],
568 enum source_format_class SourcePixelFormat[],
569 enum dm_swizzle_mode SurfaceTiling[],
571 int ViewportHeight[],
574 int SurfaceHeightY[],
575 int SurfaceHeightC[],
576 int Read256BytesBlockHeightY[],
577 int Read256BytesBlockHeightC[],
578 int Read256BytesBlockWidthY[],
579 int Read256BytesBlockWidthC[],
580 enum odm_combine_mode ODMCombineEnabled[],
581 int BlendingAndTiming[],
584 double BytePerPixDETY[],
585 double BytePerPixDETC[],
588 double HRatioChroma[],
590 int swath_width_luma_ub[],
591 int swath_width_chroma_ub[],
593 double SwathWidthChroma[],
596 unsigned int DETBufferSizeY[],
597 unsigned int DETBufferSizeC[],
598 bool ViewportSizeSupportPerPlane[],
599 bool *ViewportSizeSupport);
600 static void CalculateSwathWidth(
602 int NumberOfActivePlanes,
603 enum source_format_class SourcePixelFormat[],
604 enum scan_direction_class SourceScan[],
606 int ViewportHeight[],
609 int SurfaceHeightY[],
610 int SurfaceHeightC[],
611 enum odm_combine_mode ODMCombineEnabled[],
614 int Read256BytesBlockHeightY[],
615 int Read256BytesBlockHeightC[],
616 int Read256BytesBlockWidthY[],
617 int Read256BytesBlockWidthC[],
618 int BlendingAndTiming[],
622 double SwathWidthSingleDPPY[],
623 double SwathWidthSingleDPPC[],
624 double SwathWidthY[],
625 double SwathWidthC[],
626 int MaximumSwathHeightY[],
627 int MaximumSwathHeightC[],
628 int swath_width_luma_ub[],
629 int swath_width_chroma_ub[]);
631 static double CalculateExtraLatency(
632 int RoundTripPingLatencyCycles,
635 int TotalNumberOfActiveDPP,
636 int PixelChunkSizeInKByte,
637 int TotalNumberOfDCCActiveDPP,
642 int NumberOfActivePlanes,
644 int dpte_group_bytes[],
645 double HostVMInefficiencyFactor,
646 double HostVMMinPageSize,
647 int HostVMMaxNonCachedPageTableLevels);
649 static double CalculateExtraLatencyBytes(
651 int TotalNumberOfActiveDPP,
652 int PixelChunkSizeInKByte,
653 int TotalNumberOfDCCActiveDPP,
657 int NumberOfActivePlanes,
659 int dpte_group_bytes[],
660 double HostVMInefficiencyFactor,
661 double HostVMMinPageSize,
662 int HostVMMaxNonCachedPageTableLevels);
664 static double CalculateUrgentLatency(
665 double UrgentLatencyPixelDataOnly,
666 double UrgentLatencyPixelMixedWithVMData,
667 double UrgentLatencyVMDataOnly,
668 bool DoUrgentLatencyAdjustment,
669 double UrgentLatencyAdjustmentFabricClockComponent,
670 double UrgentLatencyAdjustmentFabricClockReference,
671 double FabricClockSingle);
673 static void CalculateUnboundedRequestAndCompressedBufferSize(
674 unsigned int DETBufferSizeInKByte,
675 int ConfigReturnBufferSizeInKByte,
676 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
680 int CompressedBufferSegmentSizeInkByteFinal,
681 enum output_encoder_class *Output,
682 bool *UnboundedRequestEnabled,
683 int *CompressedBufferSizeInkByte);
685 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
687 void dml31_recalculate(struct display_mode_lib *mode_lib)
689 ModeSupportAndSystemConfiguration(mode_lib);
690 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
691 DisplayPipeConfiguration(mode_lib);
692 #ifdef __DML_VBA_DEBUG__
693 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
695 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
698 static unsigned int dscceComputeDelay(
701 unsigned int sliceWidth,
702 unsigned int numSlices,
703 enum output_format_class pixelFormat,
704 enum output_encoder_class Output)
706 // valid bpc = source bits per component in the set of {8, 10, 12}
707 // valid bpp = increments of 1/16 of a bit
708 // min = 6/7/8 in N420/N422/444, respectively
709 // max = such that compression is 1:1
710 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
711 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
712 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
715 unsigned int rcModelSize = 8192;
717 // N422/N420 operate at 2 pixels per clock
718 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
720 if (pixelFormat == dm_420)
722 else if (pixelFormat == dm_444)
724 else if (pixelFormat == dm_n422)
726 // #all other modes operate at 1 pixel per clock
730 //initial transmit delay as per PPS
731 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
741 //divide by pixel per cycle to compute slice width as seen by DSC
742 w = sliceWidth / pixelsPerClock;
744 //422 mode has an additional cycle of delay
745 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
750 //main calculation for the dscce
751 ix = initalXmitDelay + 45;
756 ax = (a + 2) / 3 + D + 6 + 1;
757 L = (ax + wx - 1) / wx;
758 if ((ix % w) == 0 && P != 0)
762 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
764 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
765 pixels = Delay * 3 * pixelsPerClock;
769 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
771 unsigned int Delay = 0;
773 if (pixelFormat == dm_420) {
778 // dscc - input deserializer
780 // dscc gets pixels every other cycle
782 // dscc - input cdc fifo
784 // dscc gets pixels every other cycle
786 // dscc - cdc uncertainty
788 // dscc - output cdc fifo
790 // dscc gets pixels every other cycle
792 // dscc - cdc uncertainty
794 // dscc - output serializer
798 } else if (pixelFormat == dm_n422) {
803 // dscc - input deserializer
805 // dscc - input cdc fifo
807 // dscc - cdc uncertainty
809 // dscc - output cdc fifo
811 // dscc - cdc uncertainty
813 // dscc - output serializer
822 // dscc - input deserializer
824 // dscc - input cdc fifo
826 // dscc - cdc uncertainty
828 // dscc - output cdc fifo
830 // dscc - output serializer
832 // dscc - cdc uncertainty
841 static bool CalculatePrefetchSchedule(
842 struct display_mode_lib *mode_lib,
843 double HostVMInefficiencyFactor,
845 unsigned int DSCDelay,
846 double DPPCLKDelaySubtotalPlusCNVCFormater,
847 double DPPCLKDelaySCL,
848 double DPPCLKDelaySCLLBOnly,
849 double DPPCLKDelayCNVCCursor,
850 double DISPCLKDelaySubtotal,
851 unsigned int DPP_RECOUT_WIDTH,
852 enum output_format_class OutputFormat,
853 unsigned int MaxInterDCNTileRepeaters,
854 unsigned int VStartup,
855 unsigned int MaxVStartup,
856 unsigned int GPUVMPageTableLevels,
859 unsigned int HostVMMaxNonCachedPageTableLevels,
860 double HostVMMinPageSize,
861 bool DynamicMetadataEnable,
862 bool DynamicMetadataVMEnabled,
863 int DynamicMetadataLinesBeforeActiveRequired,
864 unsigned int DynamicMetadataTransmittedBytes,
865 double UrgentLatency,
866 double UrgentExtraLatency,
868 unsigned int PDEAndMetaPTEBytesFrame,
869 unsigned int MetaRowByte,
870 unsigned int PixelPTEBytesPerRow,
871 double PrefetchSourceLinesY,
872 unsigned int SwathWidthY,
873 double VInitPreFillY,
874 unsigned int MaxNumSwathY,
875 double PrefetchSourceLinesC,
876 unsigned int SwathWidthC,
877 double VInitPreFillC,
878 unsigned int MaxNumSwathC,
879 int swath_width_luma_ub,
880 int swath_width_chroma_ub,
881 unsigned int SwathHeightY,
882 unsigned int SwathHeightC,
884 double *DSTXAfterScaler,
885 double *DSTYAfterScaler,
886 double *DestinationLinesForPrefetch,
887 double *PrefetchBandwidth,
888 double *DestinationLinesToRequestVMInVBlank,
889 double *DestinationLinesToRequestRowInVBlank,
890 double *VRatioPrefetchY,
891 double *VRatioPrefetchC,
892 double *RequiredPrefetchPixDataBWLuma,
893 double *RequiredPrefetchPixDataBWChroma,
894 bool *NotEnoughTimeForDynamicMetadata,
896 double *prefetch_vmrow_bw,
900 int *VUpdateOffsetPix,
901 double *VUpdateWidthPix,
902 double *VReadyOffsetPix)
904 bool MyError = false;
905 unsigned int DPPCycles, DISPCLKCycles;
906 double DSTTotalPixelsAfterScaler;
908 double dst_y_prefetch_equ;
910 double prefetch_bw_oto;
911 double prefetch_bw_pr;
914 double Tvm_oto_lines;
915 double Tr0_oto_lines;
916 double dst_y_prefetch_oto;
917 double TimeForFetchingMetaPTE = 0;
918 double TimeForFetchingRowInVBlank = 0;
919 double LinesToRequestPrefetchPixelData = 0;
920 unsigned int HostVMDynamicLevelsTrips;
924 double Tvm_trips_rounded;
925 double Tr0_trips_rounded;
928 double prefetch_bw_equ;
934 double prefetch_sw_bytes;
937 int max_vratio_pre = 4;
943 if (GPUVMEnable == true && HostVMEnable == true) {
944 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
946 HostVMDynamicLevelsTrips = 0;
948 #ifdef __DML_VBA_DEBUG__
949 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
951 CalculateVupdateAndDynamicMetadataParameters(
952 MaxInterDCNTileRepeaters,
955 myPipe->DCFCLKDeepSleep,
959 DynamicMetadataTransmittedBytes,
960 DynamicMetadataLinesBeforeActiveRequired,
961 myPipe->InterlaceEnable,
962 myPipe->ProgressiveToInterlaceUnitInOPP,
971 LineTime = myPipe->HTotal / myPipe->PixelClock;
972 trip_to_mem = UrgentLatency;
973 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
975 #ifdef __DML_VBA_ALLOW_DELTA__
976 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
978 if (DynamicMetadataVMEnabled == true) {
980 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
982 *Tdmdl = TWait + UrgentExtraLatency;
985 #ifdef __DML_VBA_ALLOW_DELTA__
986 if (DynamicMetadataEnable == false) {
991 if (DynamicMetadataEnable == true) {
992 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
993 *NotEnoughTimeForDynamicMetadata = true;
994 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
995 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
996 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
997 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
998 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
1000 *NotEnoughTimeForDynamicMetadata = false;
1003 *NotEnoughTimeForDynamicMetadata = false;
1006 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
1008 if (myPipe->ScalerEnabled)
1009 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1011 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1013 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1015 DISPCLKCycles = DISPCLKDelaySubtotal;
1017 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1020 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1022 #ifdef __DML_VBA_DEBUG__
1023 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1024 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1025 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1026 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1027 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1028 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1029 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1030 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1033 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1035 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1036 *DSTYAfterScaler = 1;
1038 *DSTYAfterScaler = 0;
1040 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1041 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1042 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1044 #ifdef __DML_VBA_DEBUG__
1045 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1050 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1051 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1052 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1054 #ifdef __DML_VBA_ALLOW_DELTA__
1055 if (!myPipe->DCCEnable) {
1057 Tr0_trips_rounded = 0.0;
1063 Tvm_trips_rounded = 0.0;
1067 if (GPUVMPageTableLevels >= 3) {
1068 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1072 } else if (!myPipe->DCCEnable) {
1075 *Tno_bw = LineTime / 4;
1078 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1079 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1081 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1083 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
1084 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1085 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1086 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1087 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1089 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1090 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1091 Tsw_oto = Lsw_oto * LineTime;
1093 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1095 #ifdef __DML_VBA_DEBUG__
1096 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1097 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1098 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1099 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1100 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1101 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1104 if (GPUVMEnable == true)
1105 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1107 Tvm_oto = LineTime / 4.0;
1109 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1110 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1114 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1117 #ifdef __DML_VBA_DEBUG__
1118 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1119 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1120 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1121 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1122 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1123 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1124 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1125 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1126 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1129 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1130 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1131 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1132 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1133 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1134 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1136 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1138 if (prefetch_sw_bytes < dep_bytes)
1139 prefetch_sw_bytes = 2 * dep_bytes;
1141 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1142 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1143 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1144 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1145 dml_print("DML: LineTime: %f\n", LineTime);
1146 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1148 dml_print("DML: LineTime: %f\n", LineTime);
1149 dml_print("DML: VStartup: %d\n", VStartup);
1150 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1151 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1152 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1153 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1154 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1155 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1156 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1157 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1158 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1159 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1160 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1162 *PrefetchBandwidth = 0;
1163 *DestinationLinesToRequestVMInVBlank = 0;
1164 *DestinationLinesToRequestRowInVBlank = 0;
1165 *VRatioPrefetchY = 0;
1166 *VRatioPrefetchC = 0;
1167 *RequiredPrefetchPixDataBWLuma = 0;
1168 if (dst_y_prefetch_equ > 1) {
1169 double PrefetchBandwidth1;
1170 double PrefetchBandwidth2;
1171 double PrefetchBandwidth3;
1172 double PrefetchBandwidth4;
1174 if (Tpre_rounded - *Tno_bw > 0) {
1175 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1176 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1177 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1179 PrefetchBandwidth1 = 0;
1182 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1183 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1184 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1187 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1188 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1190 PrefetchBandwidth2 = 0;
1192 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1193 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1194 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1195 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1197 PrefetchBandwidth3 = 0;
1200 #ifdef __DML_VBA_DEBUG__
1201 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1202 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1203 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1205 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1206 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1207 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1210 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1211 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1213 PrefetchBandwidth4 = 0;
1220 if (PrefetchBandwidth1 > 0) {
1221 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1222 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1231 if (PrefetchBandwidth2 > 0) {
1232 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1233 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1242 if (PrefetchBandwidth3 > 0) {
1243 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1244 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1254 prefetch_bw_equ = PrefetchBandwidth1;
1255 } else if (Case2OK) {
1256 prefetch_bw_equ = PrefetchBandwidth2;
1257 } else if (Case3OK) {
1258 prefetch_bw_equ = PrefetchBandwidth3;
1260 prefetch_bw_equ = PrefetchBandwidth4;
1263 #ifdef __DML_VBA_DEBUG__
1264 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1265 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1266 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1267 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1270 if (prefetch_bw_equ > 0) {
1271 if (GPUVMEnable == true) {
1272 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1274 Tvm_equ = LineTime / 4;
1277 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1279 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1281 (LineTime - Tvm_equ) / 2,
1284 Tr0_equ = (LineTime - Tvm_equ) / 2;
1289 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1293 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1294 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1295 TimeForFetchingMetaPTE = Tvm_oto;
1296 TimeForFetchingRowInVBlank = Tr0_oto;
1297 *PrefetchBandwidth = prefetch_bw_oto;
1299 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1300 TimeForFetchingMetaPTE = Tvm_equ;
1301 TimeForFetchingRowInVBlank = Tr0_equ;
1302 *PrefetchBandwidth = prefetch_bw_equ;
1305 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1307 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1309 #ifdef __DML_VBA_ALLOW_DELTA__
1310 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1311 // See note above dated 5/30/2018
1312 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1313 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1315 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1318 #ifdef __DML_VBA_DEBUG__
1319 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1320 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1321 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1322 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1323 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1324 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1325 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1328 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1330 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1331 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1332 #ifdef __DML_VBA_DEBUG__
1333 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1334 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1335 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1337 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1338 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1339 *VRatioPrefetchY = dml_max(
1340 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1341 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1342 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1345 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1346 *VRatioPrefetchY = 0;
1348 #ifdef __DML_VBA_DEBUG__
1349 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1350 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1351 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1355 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1356 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1358 #ifdef __DML_VBA_DEBUG__
1359 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1360 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1361 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1363 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1364 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1365 *VRatioPrefetchC = dml_max(
1367 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1368 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1371 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1372 *VRatioPrefetchC = 0;
1374 #ifdef __DML_VBA_DEBUG__
1375 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1376 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1377 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1381 #ifdef __DML_VBA_DEBUG__
1382 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1383 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1384 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1387 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1389 #ifdef __DML_VBA_DEBUG__
1390 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1393 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1397 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1398 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1399 *VRatioPrefetchY = 0;
1400 *VRatioPrefetchC = 0;
1401 *RequiredPrefetchPixDataBWLuma = 0;
1402 *RequiredPrefetchPixDataBWChroma = 0;
1406 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1407 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1408 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1409 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1411 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1412 (double) LinesToRequestPrefetchPixelData * LineTime);
1413 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1414 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1415 (double) myPipe->HTotal)) * LineTime);
1416 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1417 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1418 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1419 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1420 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1424 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1428 double prefetch_vm_bw;
1429 double prefetch_row_bw;
1431 if (PDEAndMetaPTEBytesFrame == 0) {
1433 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1434 #ifdef __DML_VBA_DEBUG__
1435 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1436 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1437 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1438 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1440 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1441 #ifdef __DML_VBA_DEBUG__
1442 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1447 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1450 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1451 prefetch_row_bw = 0;
1452 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1453 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1455 #ifdef __DML_VBA_DEBUG__
1456 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1457 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1458 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1459 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1462 prefetch_row_bw = 0;
1464 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1467 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1471 *PrefetchBandwidth = 0;
1472 TimeForFetchingMetaPTE = 0;
1473 TimeForFetchingRowInVBlank = 0;
1474 *DestinationLinesToRequestVMInVBlank = 0;
1475 *DestinationLinesToRequestRowInVBlank = 0;
1476 *DestinationLinesForPrefetch = 0;
1477 LinesToRequestPrefetchPixelData = 0;
1478 *VRatioPrefetchY = 0;
1479 *VRatioPrefetchC = 0;
1480 *RequiredPrefetchPixDataBWLuma = 0;
1481 *RequiredPrefetchPixDataBWChroma = 0;
1487 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1489 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1492 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1494 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1497 static void CalculateDCCConfiguration(
1499 bool DCCProgrammingAssumesScanDirectionUnknown,
1500 enum source_format_class SourcePixelFormat,
1501 unsigned int SurfaceWidthLuma,
1502 unsigned int SurfaceWidthChroma,
1503 unsigned int SurfaceHeightLuma,
1504 unsigned int SurfaceHeightChroma,
1505 double DETBufferSize,
1506 unsigned int RequestHeight256ByteLuma,
1507 unsigned int RequestHeight256ByteChroma,
1508 enum dm_swizzle_mode TilingFormat,
1509 unsigned int BytePerPixelY,
1510 unsigned int BytePerPixelC,
1511 double BytePerPixelDETY,
1512 double BytePerPixelDETC,
1513 enum scan_direction_class ScanOrientation,
1514 unsigned int *MaxUncompressedBlockLuma,
1515 unsigned int *MaxUncompressedBlockChroma,
1516 unsigned int *MaxCompressedBlockLuma,
1517 unsigned int *MaxCompressedBlockChroma,
1518 unsigned int *IndependentBlockLuma,
1519 unsigned int *IndependentBlockChroma)
1528 double detile_buf_vp_horz_limit;
1529 double detile_buf_vp_vert_limit;
1531 int MAS_vp_horz_limit;
1532 int MAS_vp_vert_limit;
1533 int max_vp_horz_width;
1534 int max_vp_vert_height;
1535 int eff_surf_width_l;
1536 int eff_surf_width_c;
1537 int eff_surf_height_l;
1538 int eff_surf_height_c;
1540 int full_swath_bytes_horz_wc_l;
1541 int full_swath_bytes_horz_wc_c;
1542 int full_swath_bytes_vert_wc_l;
1543 int full_swath_bytes_vert_wc_c;
1544 int req128_horz_wc_l;
1545 int req128_horz_wc_c;
1546 int req128_vert_wc_l;
1547 int req128_vert_wc_c;
1548 int segment_order_horz_contiguous_luma;
1549 int segment_order_horz_contiguous_chroma;
1550 int segment_order_vert_contiguous_luma;
1551 int segment_order_vert_contiguous_chroma;
1554 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1556 RequestType RequestLuma;
1557 RequestType RequestChroma;
1559 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1565 if (BytePerPixelY == 1)
1567 if (BytePerPixelC == 1)
1569 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1571 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1574 if (BytePerPixelC == 0) {
1575 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1576 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1577 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1579 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1580 detile_buf_vp_horz_limit = (double) swath_buf_size
1581 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1582 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1583 detile_buf_vp_vert_limit = (double) swath_buf_size
1584 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1587 if (SourcePixelFormat == dm_420_10) {
1588 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1589 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1592 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1593 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1595 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1596 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1597 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1598 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1599 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1600 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1601 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1602 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1604 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1605 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1606 if (BytePerPixelC > 0) {
1607 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1608 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1610 full_swath_bytes_horz_wc_c = 0;
1611 full_swath_bytes_vert_wc_c = 0;
1614 if (SourcePixelFormat == dm_420_10) {
1615 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1616 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1617 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1618 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1621 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1622 req128_horz_wc_l = 0;
1623 req128_horz_wc_c = 0;
1624 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1625 req128_horz_wc_l = 0;
1626 req128_horz_wc_c = 1;
1627 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1628 req128_horz_wc_l = 1;
1629 req128_horz_wc_c = 0;
1631 req128_horz_wc_l = 1;
1632 req128_horz_wc_c = 1;
1635 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1636 req128_vert_wc_l = 0;
1637 req128_vert_wc_c = 0;
1638 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1639 req128_vert_wc_l = 0;
1640 req128_vert_wc_c = 1;
1641 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1642 req128_vert_wc_l = 1;
1643 req128_vert_wc_c = 0;
1645 req128_vert_wc_l = 1;
1646 req128_vert_wc_c = 1;
1649 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1650 segment_order_horz_contiguous_luma = 0;
1652 segment_order_horz_contiguous_luma = 1;
1654 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1655 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1656 segment_order_vert_contiguous_luma = 0;
1658 segment_order_vert_contiguous_luma = 1;
1660 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1661 segment_order_horz_contiguous_chroma = 0;
1663 segment_order_horz_contiguous_chroma = 1;
1665 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1666 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1667 segment_order_vert_contiguous_chroma = 0;
1669 segment_order_vert_contiguous_chroma = 1;
1672 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1673 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1674 RequestLuma = REQ_256Bytes;
1675 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1676 RequestLuma = REQ_128BytesNonContiguous;
1678 RequestLuma = REQ_128BytesContiguous;
1680 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1681 RequestChroma = REQ_256Bytes;
1682 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1683 RequestChroma = REQ_128BytesNonContiguous;
1685 RequestChroma = REQ_128BytesContiguous;
1687 } else if (ScanOrientation != dm_vert) {
1688 if (req128_horz_wc_l == 0) {
1689 RequestLuma = REQ_256Bytes;
1690 } else if (segment_order_horz_contiguous_luma == 0) {
1691 RequestLuma = REQ_128BytesNonContiguous;
1693 RequestLuma = REQ_128BytesContiguous;
1695 if (req128_horz_wc_c == 0) {
1696 RequestChroma = REQ_256Bytes;
1697 } else if (segment_order_horz_contiguous_chroma == 0) {
1698 RequestChroma = REQ_128BytesNonContiguous;
1700 RequestChroma = REQ_128BytesContiguous;
1703 if (req128_vert_wc_l == 0) {
1704 RequestLuma = REQ_256Bytes;
1705 } else if (segment_order_vert_contiguous_luma == 0) {
1706 RequestLuma = REQ_128BytesNonContiguous;
1708 RequestLuma = REQ_128BytesContiguous;
1710 if (req128_vert_wc_c == 0) {
1711 RequestChroma = REQ_256Bytes;
1712 } else if (segment_order_vert_contiguous_chroma == 0) {
1713 RequestChroma = REQ_128BytesNonContiguous;
1715 RequestChroma = REQ_128BytesContiguous;
1719 if (RequestLuma == REQ_256Bytes) {
1720 *MaxUncompressedBlockLuma = 256;
1721 *MaxCompressedBlockLuma = 256;
1722 *IndependentBlockLuma = 0;
1723 } else if (RequestLuma == REQ_128BytesContiguous) {
1724 *MaxUncompressedBlockLuma = 256;
1725 *MaxCompressedBlockLuma = 128;
1726 *IndependentBlockLuma = 128;
1728 *MaxUncompressedBlockLuma = 256;
1729 *MaxCompressedBlockLuma = 64;
1730 *IndependentBlockLuma = 64;
1733 if (RequestChroma == REQ_256Bytes) {
1734 *MaxUncompressedBlockChroma = 256;
1735 *MaxCompressedBlockChroma = 256;
1736 *IndependentBlockChroma = 0;
1737 } else if (RequestChroma == REQ_128BytesContiguous) {
1738 *MaxUncompressedBlockChroma = 256;
1739 *MaxCompressedBlockChroma = 128;
1740 *IndependentBlockChroma = 128;
1742 *MaxUncompressedBlockChroma = 256;
1743 *MaxCompressedBlockChroma = 64;
1744 *IndependentBlockChroma = 64;
1747 if (DCCEnabled != true || BytePerPixelC == 0) {
1748 *MaxUncompressedBlockChroma = 0;
1749 *MaxCompressedBlockChroma = 0;
1750 *IndependentBlockChroma = 0;
1753 if (DCCEnabled != true) {
1754 *MaxUncompressedBlockLuma = 0;
1755 *MaxCompressedBlockLuma = 0;
1756 *IndependentBlockLuma = 0;
1760 static double CalculatePrefetchSourceLines(
1761 struct display_mode_lib *mode_lib,
1765 bool ProgressiveToInterlaceUnitInOPP,
1766 unsigned int SwathHeight,
1767 unsigned int ViewportYStart,
1768 double *VInitPreFill,
1769 unsigned int *MaxNumSwath)
1771 struct vba_vars_st *v = &mode_lib->vba;
1772 unsigned int MaxPartialSwath;
1774 if (ProgressiveToInterlaceUnitInOPP)
1775 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1777 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1779 if (!v->IgnoreViewportPositioning) {
1781 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1783 if (*VInitPreFill > 1.0)
1784 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1786 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1787 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1791 if (ViewportYStart != 0)
1792 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1794 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1796 if (*VInitPreFill > 1.0)
1797 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1799 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1802 #ifdef __DML_VBA_DEBUG__
1803 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1804 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1805 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1806 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1807 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1808 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1809 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1810 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1811 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1813 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1816 static unsigned int CalculateVMAndRowBytes(
1817 struct display_mode_lib *mode_lib,
1819 unsigned int BlockHeight256Bytes,
1820 unsigned int BlockWidth256Bytes,
1821 enum source_format_class SourcePixelFormat,
1822 unsigned int SurfaceTiling,
1823 unsigned int BytePerPixel,
1824 enum scan_direction_class ScanDirection,
1825 unsigned int SwathWidth,
1826 unsigned int ViewportHeight,
1829 unsigned int HostVMMaxNonCachedPageTableLevels,
1830 unsigned int GPUVMMinPageSize,
1831 unsigned int HostVMMinPageSize,
1832 unsigned int PTEBufferSizeInRequests,
1834 unsigned int DCCMetaPitch,
1835 unsigned int *MacroTileWidth,
1836 unsigned int *MetaRowByte,
1837 unsigned int *PixelPTEBytesPerRow,
1838 bool *PTEBufferSizeNotExceeded,
1839 int *dpte_row_width_ub,
1840 unsigned int *dpte_row_height,
1841 unsigned int *MetaRequestWidth,
1842 unsigned int *MetaRequestHeight,
1843 unsigned int *meta_row_width,
1844 unsigned int *meta_row_height,
1845 int *vm_group_bytes,
1846 unsigned int *dpte_group_bytes,
1847 unsigned int *PixelPTEReqWidth,
1848 unsigned int *PixelPTEReqHeight,
1849 unsigned int *PTERequestSize,
1850 int *DPDE0BytesFrame,
1851 int *MetaPTEBytesFrame)
1853 struct vba_vars_st *v = &mode_lib->vba;
1854 unsigned int MPDEBytesFrame;
1855 unsigned int DCCMetaSurfaceBytes;
1856 unsigned int MacroTileSizeBytes;
1857 unsigned int MacroTileHeight;
1858 unsigned int ExtraDPDEBytesFrame;
1859 unsigned int PDEAndMetaPTEBytesFrame;
1860 unsigned int PixelPTEReqHeightPTEs = 0;
1861 unsigned int HostVMDynamicLevels = 0;
1862 double FractionOfPTEReturnDrop;
1864 if (GPUVMEnable == true && HostVMEnable == true) {
1865 if (HostVMMinPageSize < 2048) {
1866 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1867 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1868 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1870 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1874 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1875 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1876 if (ScanDirection != dm_vert) {
1877 *meta_row_height = *MetaRequestHeight;
1878 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1879 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1881 *meta_row_height = *MetaRequestWidth;
1882 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1883 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1885 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1886 if (GPUVMEnable == true) {
1887 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1888 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1890 *MetaPTEBytesFrame = 0;
1894 if (DCCEnable != true) {
1895 *MetaPTEBytesFrame = 0;
1900 if (SurfaceTiling == dm_sw_linear) {
1901 MacroTileSizeBytes = 256;
1902 MacroTileHeight = BlockHeight256Bytes;
1904 MacroTileSizeBytes = 65536;
1905 MacroTileHeight = 16 * BlockHeight256Bytes;
1907 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1909 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1910 if (ScanDirection != dm_vert) {
1911 *DPDE0BytesFrame = 64
1913 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1917 *DPDE0BytesFrame = 64
1919 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1923 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1925 *DPDE0BytesFrame = 0;
1926 ExtraDPDEBytesFrame = 0;
1929 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1931 #ifdef __DML_VBA_DEBUG__
1932 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1933 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1934 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1935 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1936 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1939 if (HostVMEnable == true) {
1940 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1942 #ifdef __DML_VBA_DEBUG__
1943 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1946 if (SurfaceTiling == dm_sw_linear) {
1947 PixelPTEReqHeightPTEs = 1;
1948 *PixelPTEReqHeight = 1;
1949 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1950 *PTERequestSize = 64;
1951 FractionOfPTEReturnDrop = 0;
1952 } else if (MacroTileSizeBytes == 4096) {
1953 PixelPTEReqHeightPTEs = 1;
1954 *PixelPTEReqHeight = MacroTileHeight;
1955 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1956 *PTERequestSize = 64;
1957 if (ScanDirection != dm_vert)
1958 FractionOfPTEReturnDrop = 0;
1960 FractionOfPTEReturnDrop = 7 / 8;
1961 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1962 PixelPTEReqHeightPTEs = 16;
1963 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1964 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1965 *PTERequestSize = 128;
1966 FractionOfPTEReturnDrop = 0;
1968 PixelPTEReqHeightPTEs = 1;
1969 *PixelPTEReqHeight = MacroTileHeight;
1970 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1971 *PTERequestSize = 64;
1972 FractionOfPTEReturnDrop = 0;
1975 if (SurfaceTiling == dm_sw_linear) {
1976 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1977 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1978 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1979 } else if (ScanDirection != dm_vert) {
1980 *dpte_row_height = *PixelPTEReqHeight;
1981 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1982 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1984 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1985 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1986 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1989 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1990 *PTEBufferSizeNotExceeded = true;
1992 *PTEBufferSizeNotExceeded = false;
1995 if (GPUVMEnable != true) {
1996 *PixelPTEBytesPerRow = 0;
1997 *PTEBufferSizeNotExceeded = true;
2000 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
2002 if (HostVMEnable == true) {
2003 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2006 if (HostVMEnable == true) {
2007 *vm_group_bytes = 512;
2008 *dpte_group_bytes = 512;
2009 } else if (GPUVMEnable == true) {
2010 *vm_group_bytes = 2048;
2011 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
2012 *dpte_group_bytes = 512;
2014 *dpte_group_bytes = 2048;
2017 *vm_group_bytes = 0;
2018 *dpte_group_bytes = 0;
2020 return PDEAndMetaPTEBytesFrame;
2023 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2025 struct vba_vars_st *v = &mode_lib->vba;
2027 double HostVMInefficiencyFactor = 1.0;
2028 bool NoChromaPlanes = true;
2030 double VMDataOnlyReturnBW;
2031 double MaxTotalRDBandwidth = 0;
2032 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2034 v->WritebackDISPCLK = 0.0;
2035 v->DISPCLKWithRamping = 0;
2036 v->DISPCLKWithoutRamping = 0;
2037 v->GlobalDPPCLK = 0.0;
2038 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2040 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2041 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2042 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2043 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2044 if (v->HostVMEnable != true) {
2045 v->ReturnBW = dml_min(
2046 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2047 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2049 v->ReturnBW = dml_min(
2050 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2051 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2054 /* End DAL custom code */
2056 // DISPCLK and DPPCLK Calculation
2058 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2059 if (v->WritebackEnable[k]) {
2060 v->WritebackDISPCLK = dml_max(
2061 v->WritebackDISPCLK,
2062 dml31_CalculateWriteBackDISPCLK(
2063 v->WritebackPixelFormat[k],
2065 v->WritebackHRatio[k],
2066 v->WritebackVRatio[k],
2067 v->WritebackHTaps[k],
2068 v->WritebackVTaps[k],
2069 v->WritebackSourceWidth[k],
2070 v->WritebackDestinationWidth[k],
2072 v->WritebackLineBufferSize));
2076 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2077 if (v->HRatio[k] > 1) {
2078 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2079 v->MaxDCHUBToPSCLThroughput,
2080 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2082 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2085 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2087 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2088 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2090 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2091 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2094 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2095 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2096 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2097 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2099 if (v->HRatioChroma[k] > 1) {
2100 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2101 v->MaxDCHUBToPSCLThroughput,
2102 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2104 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2106 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2108 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2109 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2112 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2113 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2116 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2120 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2121 if (v->BlendingAndTiming[k] != k)
2123 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2124 v->DISPCLKWithRamping = dml_max(
2125 v->DISPCLKWithRamping,
2126 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2127 * (1 + v->DISPCLKRampingMargin / 100));
2128 v->DISPCLKWithoutRamping = dml_max(
2129 v->DISPCLKWithoutRamping,
2130 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2131 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2132 v->DISPCLKWithRamping = dml_max(
2133 v->DISPCLKWithRamping,
2134 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2135 * (1 + v->DISPCLKRampingMargin / 100));
2136 v->DISPCLKWithoutRamping = dml_max(
2137 v->DISPCLKWithoutRamping,
2138 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2140 v->DISPCLKWithRamping = dml_max(
2141 v->DISPCLKWithRamping,
2142 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2143 v->DISPCLKWithoutRamping = dml_max(
2144 v->DISPCLKWithoutRamping,
2145 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2149 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2150 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2152 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2153 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2154 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2155 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2156 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2157 v->DISPCLKDPPCLKVCOSpeed);
2158 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2159 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2160 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2161 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2163 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2165 v->DISPCLK = v->DISPCLK_calculated;
2166 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2168 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2169 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2170 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2172 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2173 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2174 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2175 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2178 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2179 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2182 // Urgent and B P-State/DRAM Clock Change Watermark
2183 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2184 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2186 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2187 CalculateBytePerPixelAnd256BBlockSizes(
2188 v->SourcePixelFormat[k],
2189 v->SurfaceTiling[k],
2190 &v->BytePerPixelY[k],
2191 &v->BytePerPixelC[k],
2192 &v->BytePerPixelDETY[k],
2193 &v->BytePerPixelDETC[k],
2194 &v->BlockHeight256BytesY[k],
2195 &v->BlockHeight256BytesC[k],
2196 &v->BlockWidth256BytesY[k],
2197 &v->BlockWidth256BytesC[k]);
2200 CalculateSwathWidth(
2202 v->NumberOfActivePlanes,
2203 v->SourcePixelFormat,
2211 v->ODMCombineEnabled,
2214 v->BlockHeight256BytesY,
2215 v->BlockHeight256BytesC,
2216 v->BlockWidth256BytesY,
2217 v->BlockWidth256BytesC,
2218 v->BlendingAndTiming,
2222 v->SwathWidthSingleDPPY,
2223 v->SwathWidthSingleDPPC,
2228 v->swath_width_luma_ub,
2229 v->swath_width_chroma_ub);
2231 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2232 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2234 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2235 * v->VRatioChroma[k];
2236 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2239 // DCFCLK Deep Sleep
2240 CalculateDCFCLKDeepSleep(
2242 v->NumberOfActivePlanes,
2253 v->PSCL_THROUGHPUT_LUMA,
2254 v->PSCL_THROUGHPUT_CHROMA,
2256 v->ReadBandwidthPlaneLuma,
2257 v->ReadBandwidthPlaneChroma,
2259 &v->DCFCLKDeepSleep);
2262 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2263 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2264 v->DSCCLK_calculated[k] = 0.0;
2266 if (v->OutputFormat[k] == dm_420)
2267 v->DSCFormatFactor = 2;
2268 else if (v->OutputFormat[k] == dm_444)
2269 v->DSCFormatFactor = 1;
2270 else if (v->OutputFormat[k] == dm_n422)
2271 v->DSCFormatFactor = 2;
2273 v->DSCFormatFactor = 1;
2274 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2275 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2276 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2277 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2278 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2279 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2281 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2282 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2287 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2288 double BPP = v->OutputBpp[k];
2290 if (v->DSCEnabled[k] && BPP != 0) {
2291 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2292 v->DSCDelay[k] = dscceComputeDelay(
2293 v->DSCInputBitPerComponent[k],
2295 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2296 v->NumberOfDSCSlices[k],
2298 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2299 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2301 * (dscceComputeDelay(
2302 v->DSCInputBitPerComponent[k],
2304 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2305 v->NumberOfDSCSlices[k] / 2.0,
2307 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2310 * (dscceComputeDelay(
2311 v->DSCInputBitPerComponent[k],
2313 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2314 v->NumberOfDSCSlices[k] / 4.0,
2316 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2318 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2324 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2325 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2326 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2327 v->DSCDelay[k] = v->DSCDelay[j];
2330 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2331 unsigned int PDEAndMetaPTEBytesFrameY;
2332 unsigned int PixelPTEBytesPerRowY;
2333 unsigned int MetaRowByteY;
2334 unsigned int MetaRowByteC;
2335 unsigned int PDEAndMetaPTEBytesFrameC;
2336 unsigned int PixelPTEBytesPerRowC;
2337 bool PTEBufferSizeNotExceededY;
2338 bool PTEBufferSizeNotExceededC;
2340 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2341 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2342 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2343 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2344 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2346 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2347 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2350 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2353 v->BlockHeight256BytesC[k],
2354 v->BlockWidth256BytesC[k],
2355 v->SourcePixelFormat[k],
2356 v->SurfaceTiling[k],
2357 v->BytePerPixelC[k],
2360 v->ViewportHeightChroma[k],
2363 v->HostVMMaxNonCachedPageTableLevels,
2364 v->GPUVMMinPageSize,
2365 v->HostVMMinPageSize,
2366 v->PTEBufferSizeInRequestsForChroma,
2368 v->DCCMetaPitchC[k],
2369 &v->MacroTileWidthC[k],
2371 &PixelPTEBytesPerRowC,
2372 &PTEBufferSizeNotExceededC,
2373 &v->dpte_row_width_chroma_ub[k],
2374 &v->dpte_row_height_chroma[k],
2375 &v->meta_req_width_chroma[k],
2376 &v->meta_req_height_chroma[k],
2377 &v->meta_row_width_chroma[k],
2378 &v->meta_row_height_chroma[k],
2381 &v->PixelPTEReqWidthC[k],
2382 &v->PixelPTEReqHeightC[k],
2383 &v->PTERequestSizeC[k],
2384 &v->dpde0_bytes_per_frame_ub_c[k],
2385 &v->meta_pte_bytes_per_frame_ub_c[k]);
2387 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2392 v->ProgressiveToInterlaceUnitInOPP,
2394 v->ViewportYStartC[k],
2395 &v->VInitPreFillC[k],
2396 &v->MaxNumSwathC[k]);
2398 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2399 v->PTEBufferSizeInRequestsForChroma = 0;
2400 PixelPTEBytesPerRowC = 0;
2401 PDEAndMetaPTEBytesFrameC = 0;
2403 v->MaxNumSwathC[k] = 0;
2404 v->PrefetchSourceLinesC[k] = 0;
2407 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2410 v->BlockHeight256BytesY[k],
2411 v->BlockWidth256BytesY[k],
2412 v->SourcePixelFormat[k],
2413 v->SurfaceTiling[k],
2414 v->BytePerPixelY[k],
2417 v->ViewportHeight[k],
2420 v->HostVMMaxNonCachedPageTableLevels,
2421 v->GPUVMMinPageSize,
2422 v->HostVMMinPageSize,
2423 v->PTEBufferSizeInRequestsForLuma,
2425 v->DCCMetaPitchY[k],
2426 &v->MacroTileWidthY[k],
2428 &PixelPTEBytesPerRowY,
2429 &PTEBufferSizeNotExceededY,
2430 &v->dpte_row_width_luma_ub[k],
2431 &v->dpte_row_height[k],
2432 &v->meta_req_width[k],
2433 &v->meta_req_height[k],
2434 &v->meta_row_width[k],
2435 &v->meta_row_height[k],
2436 &v->vm_group_bytes[k],
2437 &v->dpte_group_bytes[k],
2438 &v->PixelPTEReqWidthY[k],
2439 &v->PixelPTEReqHeightY[k],
2440 &v->PTERequestSizeY[k],
2441 &v->dpde0_bytes_per_frame_ub_l[k],
2442 &v->meta_pte_bytes_per_frame_ub_l[k]);
2444 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2449 v->ProgressiveToInterlaceUnitInOPP,
2451 v->ViewportYStartY[k],
2452 &v->VInitPreFillY[k],
2453 &v->MaxNumSwathY[k]);
2454 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2455 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2456 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2458 CalculateRowBandwidth(
2460 v->SourcePixelFormat[k],
2464 v->HTotal[k] / v->PixelClock[k],
2467 v->meta_row_height[k],
2468 v->meta_row_height_chroma[k],
2469 PixelPTEBytesPerRowY,
2470 PixelPTEBytesPerRowC,
2471 v->dpte_row_height[k],
2472 v->dpte_row_height_chroma[k],
2474 &v->dpte_row_bw[k]);
2477 v->TotalDCCActiveDPP = 0;
2478 v->TotalActiveDPP = 0;
2479 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2480 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2481 if (v->DCCEnable[k])
2482 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2483 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2484 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2485 NoChromaPlanes = false;
2488 ReorderBytes = v->NumberOfChannels
2490 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2491 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2492 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2494 VMDataOnlyReturnBW = dml_min(
2495 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2496 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2497 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2498 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2500 #ifdef __DML_VBA_DEBUG__
2501 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2502 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2503 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2504 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2505 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2506 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2507 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2508 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2509 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2510 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2511 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2514 if (v->GPUVMEnable && v->HostVMEnable)
2515 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2517 v->UrgentExtraLatency = CalculateExtraLatency(
2518 v->RoundTripPingLatencyCycles,
2522 v->PixelChunkSizeInKByte,
2523 v->TotalDCCActiveDPP,
2528 v->NumberOfActivePlanes,
2530 v->dpte_group_bytes,
2531 HostVMInefficiencyFactor,
2532 v->HostVMMinPageSize,
2533 v->HostVMMaxNonCachedPageTableLevels);
2535 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2537 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2538 if (v->BlendingAndTiming[k] == k) {
2539 if (v->WritebackEnable[k] == true) {
2540 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2541 + CalculateWriteBackDelay(
2542 v->WritebackPixelFormat[k],
2543 v->WritebackHRatio[k],
2544 v->WritebackVRatio[k],
2545 v->WritebackVTaps[k],
2546 v->WritebackDestinationWidth[k],
2547 v->WritebackDestinationHeight[k],
2548 v->WritebackSourceHeight[k],
2549 v->HTotal[k]) / v->DISPCLK;
2551 v->WritebackDelay[v->VoltageLevel][k] = 0;
2552 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2553 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2554 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2555 v->WritebackDelay[v->VoltageLevel][k],
2557 + CalculateWriteBackDelay(
2558 v->WritebackPixelFormat[j],
2559 v->WritebackHRatio[j],
2560 v->WritebackVRatio[j],
2561 v->WritebackVTaps[j],
2562 v->WritebackDestinationWidth[j],
2563 v->WritebackDestinationHeight[j],
2564 v->WritebackSourceHeight[j],
2565 v->HTotal[k]) / v->DISPCLK);
2571 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2572 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2573 if (v->BlendingAndTiming[k] == j)
2574 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2576 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2577 v->MaxVStartupLines[k] =
2578 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2579 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2580 v->VTotal[k] - v->VActive[k]
2584 (double) v->WritebackDelay[v->VoltageLevel][k]
2585 / (v->HTotal[k] / v->PixelClock[k]),
2587 if (v->MaxVStartupLines[k] > 1023)
2588 v->MaxVStartupLines[k] = 1023;
2590 #ifdef __DML_VBA_DEBUG__
2591 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2592 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2593 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2597 v->MaximumMaxVStartupLines = 0;
2598 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2599 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2602 // We don't really care to iterate between the various prefetch modes
2603 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2605 v->UrgentLatency = CalculateUrgentLatency(
2606 v->UrgentLatencyPixelDataOnly,
2607 v->UrgentLatencyPixelMixedWithVMData,
2608 v->UrgentLatencyVMDataOnly,
2609 v->DoUrgentLatencyAdjustment,
2610 v->UrgentLatencyAdjustmentFabricClockComponent,
2611 v->UrgentLatencyAdjustmentFabricClockReference,
2614 v->FractionOfUrgentBandwidth = 0.0;
2615 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2617 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2620 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2621 bool DestinationLineTimesForPrefetchLessThan2 = false;
2622 bool VRatioPrefetchMoreThan4 = false;
2623 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2624 MaxTotalRDBandwidth = 0;
2626 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2628 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2631 myPipe.DPPCLK = v->DPPCLK[k];
2632 myPipe.DISPCLK = v->DISPCLK;
2633 myPipe.PixelClock = v->PixelClock[k];
2634 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2635 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2636 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2637 myPipe.VRatio = v->VRatio[k];
2638 myPipe.VRatioChroma = v->VRatioChroma[k];
2639 myPipe.SourceScan = v->SourceScan[k];
2640 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2641 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2642 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2643 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2644 myPipe.InterlaceEnable = v->Interlace[k];
2645 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2646 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2647 myPipe.HTotal = v->HTotal[k];
2648 myPipe.DCCEnable = v->DCCEnable[k];
2649 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2650 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2651 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2652 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2653 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2654 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2655 v->ErrorResult[k] = CalculatePrefetchSchedule(
2657 HostVMInefficiencyFactor,
2660 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2662 v->DPPCLKDelaySCLLBOnly,
2663 v->DPPCLKDelayCNVCCursor,
2664 v->DISPCLKDelaySubtotal,
2665 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2667 v->MaxInterDCNTileRepeaters,
2668 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2669 v->MaxVStartupLines[k],
2670 v->GPUVMMaxPageTableLevels,
2673 v->HostVMMaxNonCachedPageTableLevels,
2674 v->HostVMMinPageSize,
2675 v->DynamicMetadataEnable[k],
2676 v->DynamicMetadataVMEnabled,
2677 v->DynamicMetadataLinesBeforeActiveRequired[k],
2678 v->DynamicMetadataTransmittedBytes[k],
2680 v->UrgentExtraLatency,
2682 v->PDEAndMetaPTEBytesFrame[k],
2684 v->PixelPTEBytesPerRow[k],
2685 v->PrefetchSourceLinesY[k],
2687 v->VInitPreFillY[k],
2689 v->PrefetchSourceLinesC[k],
2691 v->VInitPreFillC[k],
2693 v->swath_width_luma_ub[k],
2694 v->swath_width_chroma_ub[k],
2698 &v->DSTXAfterScaler[k],
2699 &v->DSTYAfterScaler[k],
2700 &v->DestinationLinesForPrefetch[k],
2701 &v->PrefetchBandwidth[k],
2702 &v->DestinationLinesToRequestVMInVBlank[k],
2703 &v->DestinationLinesToRequestRowInVBlank[k],
2704 &v->VRatioPrefetchY[k],
2705 &v->VRatioPrefetchC[k],
2706 &v->RequiredPrefetchPixDataBWLuma[k],
2707 &v->RequiredPrefetchPixDataBWChroma[k],
2708 &v->NotEnoughTimeForDynamicMetadata[k],
2710 &v->prefetch_vmrow_bw[k],
2714 &v->VUpdateOffsetPix[k],
2715 &v->VUpdateWidthPix[k],
2716 &v->VReadyOffsetPix[k]);
2718 #ifdef __DML_VBA_DEBUG__
2719 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2721 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2724 v->NoEnoughUrgentLatencyHiding = false;
2725 v->NoEnoughUrgentLatencyHidingPre = false;
2727 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2728 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2729 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2730 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2731 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2733 CalculateUrgentBurstFactor(
2734 v->swath_width_luma_ub[k],
2735 v->swath_width_chroma_ub[k],
2738 v->HTotal[k] / v->PixelClock[k],
2740 v->CursorBufferSize,
2741 v->CursorWidth[k][0],
2745 v->BytePerPixelDETY[k],
2746 v->BytePerPixelDETC[k],
2747 v->DETBufferSizeY[k],
2748 v->DETBufferSizeC[k],
2749 &v->UrgBurstFactorCursor[k],
2750 &v->UrgBurstFactorLuma[k],
2751 &v->UrgBurstFactorChroma[k],
2752 &v->NoUrgentLatencyHiding[k]);
2754 CalculateUrgentBurstFactor(
2755 v->swath_width_luma_ub[k],
2756 v->swath_width_chroma_ub[k],
2759 v->HTotal[k] / v->PixelClock[k],
2761 v->CursorBufferSize,
2762 v->CursorWidth[k][0],
2764 v->VRatioPrefetchY[k],
2765 v->VRatioPrefetchC[k],
2766 v->BytePerPixelDETY[k],
2767 v->BytePerPixelDETC[k],
2768 v->DETBufferSizeY[k],
2769 v->DETBufferSizeC[k],
2770 &v->UrgBurstFactorCursorPre[k],
2771 &v->UrgBurstFactorLumaPre[k],
2772 &v->UrgBurstFactorChromaPre[k],
2773 &v->NoUrgentLatencyHidingPre[k]);
2775 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2777 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2778 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2779 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2780 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2781 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2783 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2784 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2785 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2787 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2789 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2790 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2791 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2792 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2793 + v->cursor_bw_pre[k]);
2795 #ifdef __DML_VBA_DEBUG__
2796 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2797 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2798 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2799 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2800 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2802 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2803 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2805 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2806 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2807 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2808 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2809 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2810 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2811 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2812 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2813 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2814 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2817 if (v->DestinationLinesForPrefetch[k] < 2)
2818 DestinationLineTimesForPrefetchLessThan2 = true;
2820 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2821 VRatioPrefetchMoreThan4 = true;
2823 if (v->NoUrgentLatencyHiding[k] == true)
2824 v->NoEnoughUrgentLatencyHiding = true;
2826 if (v->NoUrgentLatencyHidingPre[k] == true)
2827 v->NoEnoughUrgentLatencyHidingPre = true;
2830 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2832 #ifdef __DML_VBA_DEBUG__
2833 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2834 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2835 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2838 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2839 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2840 v->PrefetchModeSupported = true;
2842 v->PrefetchModeSupported = false;
2843 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2844 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2845 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2846 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2850 // This error result check was done after the PrefetchModeSupported. So we will
2851 // still try to calculate flip schedule even prefetch mode not supported
2852 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2853 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2854 v->PrefetchModeSupported = false;
2855 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2859 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2860 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2861 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2862 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2864 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2865 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2866 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2868 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2869 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2870 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2873 v->TotImmediateFlipBytes = 0;
2874 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2875 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2876 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2878 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2879 CalculateFlipSchedule(
2881 HostVMInefficiencyFactor,
2882 v->UrgentExtraLatency,
2884 v->GPUVMMaxPageTableLevels,
2886 v->HostVMMaxNonCachedPageTableLevels,
2888 v->HostVMMinPageSize,
2889 v->PDEAndMetaPTEBytesFrame[k],
2891 v->PixelPTEBytesPerRow[k],
2892 v->BandwidthAvailableForImmediateFlip,
2893 v->TotImmediateFlipBytes,
2894 v->SourcePixelFormat[k],
2895 v->HTotal[k] / v->PixelClock[k],
2900 v->dpte_row_height[k],
2901 v->meta_row_height[k],
2902 v->dpte_row_height_chroma[k],
2903 v->meta_row_height_chroma[k],
2904 &v->DestinationLinesToRequestVMInImmediateFlip[k],
2905 &v->DestinationLinesToRequestRowInImmediateFlip[k],
2906 &v->final_flip_bw[k],
2907 &v->ImmediateFlipSupportedForPipe[k]);
2910 v->total_dcn_read_bw_with_flip = 0.0;
2911 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2912 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2913 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2915 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2916 v->DPPPerPlane[k] * v->final_flip_bw[k]
2917 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2918 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2919 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2921 * (v->final_flip_bw[k]
2922 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2923 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2924 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2925 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2927 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2928 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2929 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2931 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2932 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2934 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2936 v->ImmediateFlipSupported = true;
2937 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2938 #ifdef __DML_VBA_DEBUG__
2939 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2941 v->ImmediateFlipSupported = false;
2942 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2944 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2945 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2946 #ifdef __DML_VBA_DEBUG__
2947 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2950 v->ImmediateFlipSupported = false;
2954 v->ImmediateFlipSupported = false;
2957 v->PrefetchAndImmediateFlipSupported =
2958 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2959 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2960 v->ImmediateFlipSupported)) ? true : false;
2961 #ifdef __DML_VBA_DEBUG__
2962 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2963 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2964 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2965 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2966 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2967 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2969 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2971 v->VStartupLines = v->VStartupLines + 1;
2972 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2973 ASSERT(v->PrefetchAndImmediateFlipSupported);
2975 // Unbounded Request Enabled
2976 CalculateUnboundedRequestAndCompressedBufferSize(
2977 v->DETBufferSizeInKByte[0],
2978 v->ConfigReturnBufferSizeInKByte,
2979 v->UseUnboundedRequesting,
2983 v->CompressedBufferSegmentSizeInkByte,
2985 &v->UnboundedRequestEnabled,
2986 &v->CompressedBufferSizeInkByte);
2988 //Watermarks and NB P-State/DRAM Clock Change Support
2990 enum clock_change_support DRAMClockChangeSupport; // dummy
2991 CalculateWatermarksAndDRAMSpeedChangeSupport(
2997 v->UrgentExtraLatency,
3007 v->BytePerPixelDETY,
3008 v->BytePerPixelDETC,
3009 v->UnboundedRequestEnabled,
3010 v->CompressedBufferSizeInkByte,
3011 &DRAMClockChangeSupport,
3012 &v->StutterExitWatermark,
3013 &v->StutterEnterPlusExitWatermark,
3014 &v->Z8StutterExitWatermark,
3015 &v->Z8StutterEnterPlusExitWatermark);
3017 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3018 if (v->WritebackEnable[k] == true) {
3019 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
3021 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
3023 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
3028 //Display Pipeline Delivery Time in Prefetch, Groups
3029 CalculatePixelDeliveryTimes(
3030 v->NumberOfActivePlanes,
3035 v->swath_width_luma_ub,
3036 v->swath_width_chroma_ub,
3041 v->PSCL_THROUGHPUT_LUMA,
3042 v->PSCL_THROUGHPUT_CHROMA,
3049 v->BlockWidth256BytesY,
3050 v->BlockHeight256BytesY,
3051 v->BlockWidth256BytesC,
3052 v->BlockHeight256BytesC,
3053 v->DisplayPipeLineDeliveryTimeLuma,
3054 v->DisplayPipeLineDeliveryTimeChroma,
3055 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3056 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3057 v->DisplayPipeRequestDeliveryTimeLuma,
3058 v->DisplayPipeRequestDeliveryTimeChroma,
3059 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3060 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3061 v->CursorRequestDeliveryTime,
3062 v->CursorRequestDeliveryTimePrefetch);
3064 CalculateMetaAndPTETimes(
3065 v->NumberOfActivePlanes,
3068 v->MinMetaChunkSizeBytes,
3072 v->DestinationLinesToRequestRowInVBlank,
3073 v->DestinationLinesToRequestRowInImmediateFlip,
3080 v->dpte_row_height_chroma,
3082 v->meta_row_width_chroma,
3084 v->meta_row_height_chroma,
3086 v->meta_req_width_chroma,
3088 v->meta_req_height_chroma,
3089 v->dpte_group_bytes,
3092 v->PixelPTEReqWidthY,
3093 v->PixelPTEReqHeightY,
3094 v->PixelPTEReqWidthC,
3095 v->PixelPTEReqHeightC,
3096 v->dpte_row_width_luma_ub,
3097 v->dpte_row_width_chroma_ub,
3098 v->DST_Y_PER_PTE_ROW_NOM_L,
3099 v->DST_Y_PER_PTE_ROW_NOM_C,
3100 v->DST_Y_PER_META_ROW_NOM_L,
3101 v->DST_Y_PER_META_ROW_NOM_C,
3102 v->TimePerMetaChunkNominal,
3103 v->TimePerChromaMetaChunkNominal,
3104 v->TimePerMetaChunkVBlank,
3105 v->TimePerChromaMetaChunkVBlank,
3106 v->TimePerMetaChunkFlip,
3107 v->TimePerChromaMetaChunkFlip,
3108 v->time_per_pte_group_nom_luma,
3109 v->time_per_pte_group_vblank_luma,
3110 v->time_per_pte_group_flip_luma,
3111 v->time_per_pte_group_nom_chroma,
3112 v->time_per_pte_group_vblank_chroma,
3113 v->time_per_pte_group_flip_chroma);
3115 CalculateVMGroupAndRequestTimes(
3116 v->NumberOfActivePlanes,
3118 v->GPUVMMaxPageTableLevels,
3121 v->DestinationLinesToRequestVMInVBlank,
3122 v->DestinationLinesToRequestVMInImmediateFlip,
3125 v->dpte_row_width_luma_ub,
3126 v->dpte_row_width_chroma_ub,
3128 v->dpde0_bytes_per_frame_ub_l,
3129 v->dpde0_bytes_per_frame_ub_c,
3130 v->meta_pte_bytes_per_frame_ub_l,
3131 v->meta_pte_bytes_per_frame_ub_c,
3132 v->TimePerVMGroupVBlank,
3133 v->TimePerVMGroupFlip,
3134 v->TimePerVMRequestVBlank,
3135 v->TimePerVMRequestFlip);
3138 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3139 if (PrefetchMode == 0) {
3140 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3141 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3142 v->MinTTUVBlank[k] = dml_max(
3143 v->DRAMClockChangeWatermark,
3144 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3145 } else if (PrefetchMode == 1) {
3146 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3147 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3148 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3150 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3151 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3152 v->MinTTUVBlank[k] = v->UrgentWatermark;
3154 if (!v->DynamicMetadataEnable[k])
3155 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3158 // DCC Configuration
3160 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3161 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3162 v->SourcePixelFormat[k],
3163 v->SurfaceWidthY[k],
3164 v->SurfaceWidthC[k],
3165 v->SurfaceHeightY[k],
3166 v->SurfaceHeightC[k],
3167 v->DETBufferSizeInKByte[0] * 1024,
3168 v->BlockHeight256BytesY[k],
3169 v->BlockHeight256BytesC[k],
3170 v->SurfaceTiling[k],
3171 v->BytePerPixelY[k],
3172 v->BytePerPixelC[k],
3173 v->BytePerPixelDETY[k],
3174 v->BytePerPixelDETC[k],
3176 &v->DCCYMaxUncompressedBlock[k],
3177 &v->DCCCMaxUncompressedBlock[k],
3178 &v->DCCYMaxCompressedBlock[k],
3179 &v->DCCCMaxCompressedBlock[k],
3180 &v->DCCYIndependentBlock[k],
3181 &v->DCCCIndependentBlock[k]);
3184 // VStartup Adjustment
3185 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3186 bool isInterlaceTiming;
3187 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3188 #ifdef __DML_VBA_DEBUG__
3189 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3192 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3194 #ifdef __DML_VBA_DEBUG__
3195 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3196 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3197 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3198 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3201 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3202 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3203 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3206 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3208 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3209 - v->VFrontPorch[k])
3210 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3211 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3213 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3215 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3216 <= (isInterlaceTiming ?
3217 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3218 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3219 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3221 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3223 #ifdef __DML_VBA_DEBUG__
3224 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3225 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3226 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3227 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3228 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3229 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3230 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3231 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3232 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3233 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3234 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3239 //Maximum Bandwidth Used
3240 double TotalWRBandwidth = 0;
3241 double MaxPerPlaneVActiveWRBandwidth = 0;
3242 double WRBandwidth = 0;
3243 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3244 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3245 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3246 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3247 } else if (v->WritebackEnable[k] == true) {
3248 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3249 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3251 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3252 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3255 v->TotalDataReadBandwidth = 0;
3256 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3257 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3260 // Stutter Efficiency
3261 CalculateStutterEfficiency(
3263 v->CompressedBufferSizeInkByte,
3264 v->UnboundedRequestEnabled,
3265 v->ConfigReturnBufferSizeInKByte,
3266 v->MetaFIFOSizeInKEntries,
3267 v->ZeroSizeBufferEntries,
3268 v->NumberOfActivePlanes,
3269 v->ROBBufferSizeInKByte,
3270 v->TotalDataReadBandwidth,
3273 v->COMPBUF_RESERVED_SPACE_64B,
3274 v->COMPBUF_RESERVED_SPACE_ZS,
3277 v->SynchronizedVBlank,
3278 v->StutterEnterPlusExitWatermark,
3279 v->Z8StutterEnterPlusExitWatermark,
3280 v->ProgressiveToInterlaceUnitInOPP,
3286 v->BytePerPixelDETY,
3292 v->DCCFractionOfZeroSizeRequestsLuma,
3293 v->DCCFractionOfZeroSizeRequestsChroma,
3299 v->BlockHeight256BytesY,
3300 v->BlockWidth256BytesY,
3301 v->BlockHeight256BytesC,
3302 v->BlockWidth256BytesC,
3303 v->DCCYMaxUncompressedBlock,
3304 v->DCCCMaxUncompressedBlock,
3308 v->ReadBandwidthPlaneLuma,
3309 v->ReadBandwidthPlaneChroma,
3312 &v->StutterEfficiencyNotIncludingVBlank,
3313 &v->StutterEfficiency,
3314 &v->NumberOfStutterBurstsPerFrame,
3315 &v->Z8StutterEfficiencyNotIncludingVBlank,
3316 &v->Z8StutterEfficiency,
3317 &v->Z8NumberOfStutterBurstsPerFrame,
3321 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3323 struct vba_vars_st *v = &mode_lib->vba;
3324 // Display Pipe Configuration
3325 double BytePerPixDETY[DC__NUM_DPP__MAX];
3326 double BytePerPixDETC[DC__NUM_DPP__MAX];
3327 int BytePerPixY[DC__NUM_DPP__MAX];
3328 int BytePerPixC[DC__NUM_DPP__MAX];
3329 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3330 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3331 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3332 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3333 double dummy1[DC__NUM_DPP__MAX];
3334 double dummy2[DC__NUM_DPP__MAX];
3335 double dummy3[DC__NUM_DPP__MAX];
3336 double dummy4[DC__NUM_DPP__MAX];
3337 int dummy5[DC__NUM_DPP__MAX];
3338 int dummy6[DC__NUM_DPP__MAX];
3339 bool dummy7[DC__NUM_DPP__MAX];
3340 bool dummysinglestring;
3344 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3346 CalculateBytePerPixelAnd256BBlockSizes(
3347 v->SourcePixelFormat[k],
3348 v->SurfaceTiling[k],
3353 &Read256BytesBlockHeightY[k],
3354 &Read256BytesBlockHeightC[k],
3355 &Read256BytesBlockWidthY[k],
3356 &Read256BytesBlockWidthC[k]);
3359 CalculateSwathAndDETConfiguration(
3361 v->NumberOfActivePlanes,
3362 v->DETBufferSizeInKByte[0],
3366 v->SourcePixelFormat,
3374 Read256BytesBlockHeightY,
3375 Read256BytesBlockHeightC,
3376 Read256BytesBlockWidthY,
3377 Read256BytesBlockWidthC,
3378 v->ODMCombineEnabled,
3379 v->BlendingAndTiming,
3397 &dummysinglestring);
3400 static bool CalculateBytePerPixelAnd256BBlockSizes(
3401 enum source_format_class SourcePixelFormat,
3402 enum dm_swizzle_mode SurfaceTiling,
3403 unsigned int *BytePerPixelY,
3404 unsigned int *BytePerPixelC,
3405 double *BytePerPixelDETY,
3406 double *BytePerPixelDETC,
3407 unsigned int *BlockHeight256BytesY,
3408 unsigned int *BlockHeight256BytesC,
3409 unsigned int *BlockWidth256BytesY,
3410 unsigned int *BlockWidth256BytesC)
3412 if (SourcePixelFormat == dm_444_64) {
3413 *BytePerPixelDETY = 8;
3414 *BytePerPixelDETC = 0;
3417 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3418 *BytePerPixelDETY = 4;
3419 *BytePerPixelDETC = 0;
3422 } else if (SourcePixelFormat == dm_444_16) {
3423 *BytePerPixelDETY = 2;
3424 *BytePerPixelDETC = 0;
3427 } else if (SourcePixelFormat == dm_444_8) {
3428 *BytePerPixelDETY = 1;
3429 *BytePerPixelDETC = 0;
3432 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3433 *BytePerPixelDETY = 4;
3434 *BytePerPixelDETC = 1;
3437 } else if (SourcePixelFormat == dm_420_8) {
3438 *BytePerPixelDETY = 1;
3439 *BytePerPixelDETC = 2;
3442 } else if (SourcePixelFormat == dm_420_12) {
3443 *BytePerPixelDETY = 2;
3444 *BytePerPixelDETC = 4;
3448 *BytePerPixelDETY = 4.0 / 3;
3449 *BytePerPixelDETC = 8.0 / 3;
3454 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3455 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3456 if (SurfaceTiling == dm_sw_linear) {
3457 *BlockHeight256BytesY = 1;
3458 } else if (SourcePixelFormat == dm_444_64) {
3459 *BlockHeight256BytesY = 4;
3460 } else if (SourcePixelFormat == dm_444_8) {
3461 *BlockHeight256BytesY = 16;
3463 *BlockHeight256BytesY = 8;
3465 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3466 *BlockHeight256BytesC = 0;
3467 *BlockWidth256BytesC = 0;
3469 if (SurfaceTiling == dm_sw_linear) {
3470 *BlockHeight256BytesY = 1;
3471 *BlockHeight256BytesC = 1;
3472 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3473 *BlockHeight256BytesY = 8;
3474 *BlockHeight256BytesC = 16;
3475 } else if (SourcePixelFormat == dm_420_8) {
3476 *BlockHeight256BytesY = 16;
3477 *BlockHeight256BytesC = 8;
3479 *BlockHeight256BytesY = 8;
3480 *BlockHeight256BytesC = 8;
3482 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3483 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3488 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3490 if (PrefetchMode == 0) {
3491 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3492 } else if (PrefetchMode == 1) {
3493 return dml_max(SREnterPlusExitTime, UrgentLatency);
3495 return UrgentLatency;
3499 double dml31_CalculateWriteBackDISPCLK(
3500 enum source_format_class WritebackPixelFormat,
3502 double WritebackHRatio,
3503 double WritebackVRatio,
3504 unsigned int WritebackHTaps,
3505 unsigned int WritebackVTaps,
3506 long WritebackSourceWidth,
3507 long WritebackDestinationWidth,
3508 unsigned int HTotal,
3509 unsigned int WritebackLineBufferSize)
3511 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3513 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3514 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3515 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3516 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3519 static double CalculateWriteBackDelay(
3520 enum source_format_class WritebackPixelFormat,
3521 double WritebackHRatio,
3522 double WritebackVRatio,
3523 unsigned int WritebackVTaps,
3524 int WritebackDestinationWidth,
3525 int WritebackDestinationHeight,
3526 int WritebackSourceHeight,
3527 unsigned int HTotal)
3529 double CalculateWriteBackDelay;
3531 double Output_lines_last_notclamped;
3532 double WritebackVInit;
3534 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3535 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3536 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3537 if (Output_lines_last_notclamped < 0) {
3538 CalculateWriteBackDelay = 0;
3540 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3542 return CalculateWriteBackDelay;
3545 static void CalculateVupdateAndDynamicMetadataParameters(
3546 int MaxInterDCNTileRepeaters,
3549 double DCFClkDeepSleep,
3553 int DynamicMetadataTransmittedBytes,
3554 int DynamicMetadataLinesBeforeActiveRequired,
3555 int InterlaceEnable,
3556 bool ProgressiveToInterlaceUnitInOPP,
3561 int *VUpdateOffsetPix,
3562 double *VUpdateWidthPix,
3563 double *VReadyOffsetPix)
3565 double TotalRepeaterDelayTime;
3567 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3568 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3569 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3570 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3571 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3572 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3573 *Tdmec = HTotal / PixelClock;
3574 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3575 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3577 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3579 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3580 *Tdmsks = *Tdmsks / 2;
3582 #ifdef __DML_VBA_DEBUG__
3583 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3584 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3585 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3589 static void CalculateRowBandwidth(
3591 enum source_format_class SourcePixelFormat,
3593 double VRatioChroma,
3596 unsigned int MetaRowByteLuma,
3597 unsigned int MetaRowByteChroma,
3598 unsigned int meta_row_height_luma,
3599 unsigned int meta_row_height_chroma,
3600 unsigned int PixelPTEBytesPerRowLuma,
3601 unsigned int PixelPTEBytesPerRowChroma,
3602 unsigned int dpte_row_height_luma,
3603 unsigned int dpte_row_height_chroma,
3604 double *meta_row_bw,
3605 double *dpte_row_bw)
3607 if (DCCEnable != true) {
3609 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3610 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3612 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3615 if (GPUVMEnable != true) {
3617 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3618 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3619 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3621 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3625 static void CalculateFlipSchedule(
3626 struct display_mode_lib *mode_lib,
3627 double HostVMInefficiencyFactor,
3628 double UrgentExtraLatency,
3629 double UrgentLatency,
3630 unsigned int GPUVMMaxPageTableLevels,
3632 unsigned int HostVMMaxNonCachedPageTableLevels,
3634 double HostVMMinPageSize,
3635 double PDEAndMetaPTEBytesPerFrame,
3636 double MetaRowBytes,
3637 double DPTEBytesPerRow,
3638 double BandwidthAvailableForImmediateFlip,
3639 unsigned int TotImmediateFlipBytes,
3640 enum source_format_class SourcePixelFormat,
3643 double VRatioChroma,
3646 unsigned int dpte_row_height,
3647 unsigned int meta_row_height,
3648 unsigned int dpte_row_height_chroma,
3649 unsigned int meta_row_height_chroma,
3650 double *DestinationLinesToRequestVMInImmediateFlip,
3651 double *DestinationLinesToRequestRowInImmediateFlip,
3652 double *final_flip_bw,
3653 bool *ImmediateFlipSupportedForPipe)
3655 double min_row_time = 0.0;
3656 unsigned int HostVMDynamicLevelsTrips;
3657 double TimeForFetchingMetaPTEImmediateFlip;
3658 double TimeForFetchingRowInVBlankImmediateFlip;
3659 double ImmediateFlipBW;
3661 if (GPUVMEnable == true && HostVMEnable == true) {
3662 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3664 HostVMDynamicLevelsTrips = 0;
3667 if (GPUVMEnable == true || DCCEnable == true) {
3668 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
3671 if (GPUVMEnable == true) {
3672 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3673 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3674 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3677 TimeForFetchingMetaPTEImmediateFlip = 0;
3680 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3681 if ((GPUVMEnable == true || DCCEnable == true)) {
3682 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3683 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3684 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3687 TimeForFetchingRowInVBlankImmediateFlip = 0;
3690 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3692 if (GPUVMEnable == true) {
3693 *final_flip_bw = dml_max(
3694 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
3695 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
3696 } else if ((GPUVMEnable == true || DCCEnable == true)) {
3697 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
3702 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
3703 if (GPUVMEnable == true && DCCEnable != true) {
3704 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
3705 } else if (GPUVMEnable != true && DCCEnable == true) {
3706 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
3708 min_row_time = dml_min4(
3709 dpte_row_height * LineTime / VRatio,
3710 meta_row_height * LineTime / VRatio,
3711 dpte_row_height_chroma * LineTime / VRatioChroma,
3712 meta_row_height_chroma * LineTime / VRatioChroma);
3715 if (GPUVMEnable == true && DCCEnable != true) {
3716 min_row_time = dpte_row_height * LineTime / VRatio;
3717 } else if (GPUVMEnable != true && DCCEnable == true) {
3718 min_row_time = meta_row_height * LineTime / VRatio;
3720 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
3724 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
3725 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3726 *ImmediateFlipSupportedForPipe = false;
3728 *ImmediateFlipSupportedForPipe = true;
3731 #ifdef __DML_VBA_DEBUG__
3732 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
3733 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
3734 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3735 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3736 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3737 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
3742 static double TruncToValidBPP(
3750 enum output_encoder_class Output,
3751 enum output_format_class Format,
3752 unsigned int DSCInputBitPerComponent,
3756 enum odm_combine_mode ODMCombine)
3765 if (Format == dm_420) {
3770 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3771 } else if (Format == dm_444) {
3776 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3783 if (Format == dm_n422) {
3785 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3788 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3792 if (DSCEnable && Output == dm_dp) {
3793 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3795 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3798 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3800 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3804 if (DesiredBPP == 0) {
3806 if (MaxLinkBPP < MinDSCBPP) {
3808 } else if (MaxLinkBPP >= MaxDSCBPP) {
3811 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3814 if (MaxLinkBPP >= NonDSCBPP2) {
3816 } else if (MaxLinkBPP >= NonDSCBPP1) {
3818 } else if (MaxLinkBPP >= NonDSCBPP0) {
3825 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3826 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3835 static noinline void CalculatePrefetchSchedulePerPlane(
3836 struct display_mode_lib *mode_lib,
3837 double HostVMInefficiencyFactor,
3842 struct vba_vars_st *v = &mode_lib->vba;
3845 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3846 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3847 myPipe.PixelClock = v->PixelClock[k];
3848 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3849 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3850 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3851 myPipe.VRatio = mode_lib->vba.VRatio[k];
3852 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3854 myPipe.SourceScan = v->SourceScan[k];
3855 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3856 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3857 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3858 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3859 myPipe.InterlaceEnable = v->Interlace[k];
3860 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3861 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3862 myPipe.HTotal = v->HTotal[k];
3863 myPipe.DCCEnable = v->DCCEnable[k];
3864 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3865 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3866 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3867 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3868 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3869 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3870 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3872 HostVMInefficiencyFactor,
3874 v->DSCDelayPerState[i][k],
3875 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3877 v->DPPCLKDelaySCLLBOnly,
3878 v->DPPCLKDelayCNVCCursor,
3879 v->DISPCLKDelaySubtotal,
3880 v->SwathWidthYThisState[k] / v->HRatio[k],
3882 v->MaxInterDCNTileRepeaters,
3883 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3884 v->MaximumVStartup[i][j][k],
3885 v->GPUVMMaxPageTableLevels,
3888 v->HostVMMaxNonCachedPageTableLevels,
3889 v->HostVMMinPageSize,
3890 v->DynamicMetadataEnable[k],
3891 v->DynamicMetadataVMEnabled,
3892 v->DynamicMetadataLinesBeforeActiveRequired[k],
3893 v->DynamicMetadataTransmittedBytes[k],
3897 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3898 v->MetaRowBytes[i][j][k],
3899 v->DPTEBytesPerRow[i][j][k],
3900 v->PrefetchLinesY[i][j][k],
3901 v->SwathWidthYThisState[k],
3904 v->PrefetchLinesC[i][j][k],
3905 v->SwathWidthCThisState[k],
3908 v->swath_width_luma_ub_this_state[k],
3909 v->swath_width_chroma_ub_this_state[k],
3910 v->SwathHeightYThisState[k],
3911 v->SwathHeightCThisState[k],
3913 &v->DSTXAfterScaler[k],
3914 &v->DSTYAfterScaler[k],
3915 &v->LineTimesForPrefetch[k],
3917 &v->LinesForMetaPTE[k],
3918 &v->LinesForMetaAndDPTERow[k],
3919 &v->VRatioPreY[i][j][k],
3920 &v->VRatioPreC[i][j][k],
3921 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3922 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3923 &v->NoTimeForDynamicMetadata[i][j][k],
3925 &v->prefetch_vmrow_bw[k],
3929 &v->VUpdateOffsetPix[k],
3930 &v->VUpdateWidthPix[k],
3931 &v->VReadyOffsetPix[k]);
3934 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3936 struct vba_vars_st *v = &mode_lib->vba;
3940 int ReorderingBytes;
3941 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3942 bool NoChroma = true;
3943 bool EnoughWritebackUnits = true;
3944 bool P2IWith420 = false;
3945 bool DSCOnlyIfNecessaryWithBPP = false;
3946 bool DSC422NativeNotSupported = false;
3947 double MaxTotalVActiveRDBandwidth;
3948 bool ViewportExceedsSurface = false;
3949 bool FMTBufferExceeded = false;
3951 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3953 CalculateMinAndMaxPrefetchMode(
3954 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3955 &MinPrefetchMode, &MaxPrefetchMode);
3957 /*Scale Ratio, taps Support Check*/
3959 v->ScaleRatioAndTapsSupport = true;
3960 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3961 if (v->ScalerEnabled[k] == false
3962 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3963 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3964 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3965 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3966 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3967 v->ScaleRatioAndTapsSupport = false;
3968 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3969 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3970 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3971 || v->VRatio[k] > v->vtaps[k]
3972 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3973 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3974 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3975 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3976 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3977 || v->HRatioChroma[k] > v->MaxHSCLRatio
3978 || v->VRatioChroma[k] > v->MaxVSCLRatio
3979 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3980 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3981 v->ScaleRatioAndTapsSupport = false;
3984 /*Source Format, Pixel Format and Scan Support Check*/
3986 v->SourceFormatPixelAndScanSupport = true;
3987 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3988 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3989 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3990 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3991 v->SourceFormatPixelAndScanSupport = false;
3994 /*Bandwidth Support Check*/
3996 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3997 CalculateBytePerPixelAnd256BBlockSizes(
3998 v->SourcePixelFormat[k],
3999 v->SurfaceTiling[k],
4000 &v->BytePerPixelY[k],
4001 &v->BytePerPixelC[k],
4002 &v->BytePerPixelInDETY[k],
4003 &v->BytePerPixelInDETC[k],
4004 &v->Read256BlockHeightY[k],
4005 &v->Read256BlockHeightC[k],
4006 &v->Read256BlockWidthY[k],
4007 &v->Read256BlockWidthC[k]);
4009 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4010 if (v->SourceScan[k] != dm_vert) {
4011 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
4012 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
4014 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
4015 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
4018 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4019 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
4020 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4021 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
4022 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
4024 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4025 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
4026 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4027 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
4028 } else if (v->WritebackEnable[k] == true) {
4029 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4030 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
4032 v->WriteBandwidth[k] = 0.0;
4036 /*Writeback Latency support check*/
4038 v->WritebackLatencySupport = true;
4039 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4040 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
4041 v->WritebackLatencySupport = false;
4045 /*Writeback Mode Support Check*/
4047 v->TotalNumberOfActiveWriteback = 0;
4048 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4049 if (v->WritebackEnable[k] == true) {
4050 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
4054 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
4055 EnoughWritebackUnits = false;
4058 /*Writeback Scale Ratio and Taps Support Check*/
4060 v->WritebackScaleRatioAndTapsSupport = true;
4061 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4062 if (v->WritebackEnable[k] == true) {
4063 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4064 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4065 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4066 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4067 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4068 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4069 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4070 v->WritebackScaleRatioAndTapsSupport = false;
4072 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4073 v->WritebackScaleRatioAndTapsSupport = false;
4077 /*Maximum DISPCLK/DPPCLK Support check*/
4079 v->WritebackRequiredDISPCLK = 0.0;
4080 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4081 if (v->WritebackEnable[k] == true) {
4082 v->WritebackRequiredDISPCLK = dml_max(
4083 v->WritebackRequiredDISPCLK,
4084 dml31_CalculateWriteBackDISPCLK(
4085 v->WritebackPixelFormat[k],
4087 v->WritebackHRatio[k],
4088 v->WritebackVRatio[k],
4089 v->WritebackHTaps[k],
4090 v->WritebackVTaps[k],
4091 v->WritebackSourceWidth[k],
4092 v->WritebackDestinationWidth[k],
4094 v->WritebackLineBufferSize));
4097 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4098 if (v->HRatio[k] > 1.0) {
4099 v->PSCL_FACTOR[k] = dml_min(
4100 v->MaxDCHUBToPSCLThroughput,
4101 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4103 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4105 if (v->BytePerPixelC[k] == 0.0) {
4106 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4107 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4109 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4110 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4112 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4113 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4116 if (v->HRatioChroma[k] > 1.0) {
4117 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4118 v->MaxDCHUBToPSCLThroughput,
4119 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4121 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4123 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4125 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4126 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4127 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4128 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4130 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4131 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4132 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4136 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4137 int MaximumSwathWidthSupportLuma;
4138 int MaximumSwathWidthSupportChroma;
4140 if (v->SurfaceTiling[k] == dm_sw_linear) {
4141 MaximumSwathWidthSupportLuma = 8192.0;
4142 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4143 MaximumSwathWidthSupportLuma = 2880.0;
4144 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4145 MaximumSwathWidthSupportLuma = 3840.0;
4147 MaximumSwathWidthSupportLuma = 5760.0;
4150 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4151 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4153 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4155 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4156 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4157 if (v->BytePerPixelC[k] == 0.0) {
4158 v->MaximumSwathWidthInLineBufferChroma = 0;
4160 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4161 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4163 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4164 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4167 CalculateSwathAndDETConfiguration(
4169 v->NumberOfActivePlanes,
4170 v->DETBufferSizeInKByte[0],
4171 v->MaximumSwathWidthLuma,
4172 v->MaximumSwathWidthChroma,
4174 v->SourcePixelFormat,
4182 v->Read256BlockHeightY,
4183 v->Read256BlockHeightC,
4184 v->Read256BlockWidthY,
4185 v->Read256BlockWidthC,
4186 v->odm_combine_dummy,
4187 v->BlendingAndTiming,
4190 v->BytePerPixelInDETY,
4191 v->BytePerPixelInDETC,
4195 v->NoOfDPPThisState,
4196 v->swath_width_luma_ub_this_state,
4197 v->swath_width_chroma_ub_this_state,
4198 v->SwathWidthYThisState,
4199 v->SwathWidthCThisState,
4200 v->SwathHeightYThisState,
4201 v->SwathHeightCThisState,
4202 v->DETBufferSizeYThisState,
4203 v->DETBufferSizeCThisState,
4204 v->SingleDPPViewportSizeSupportPerPlane,
4205 &v->ViewportSizeSupport[0][0]);
4207 for (i = 0; i < v->soc.num_states; i++) {
4208 for (j = 0; j < 2; j++) {
4209 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4210 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4211 v->RequiredDISPCLK[i][j] = 0.0;
4212 v->DISPCLK_DPPCLK_Support[i][j] = true;
4213 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4214 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4215 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4216 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4217 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4218 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4219 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4220 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4222 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4223 * (1 + v->DISPCLKRampingMargin / 100.0);
4224 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4225 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4226 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4227 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4228 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4230 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4231 * (1 + v->DISPCLKRampingMargin / 100.0);
4232 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4233 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4234 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4235 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4236 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4239 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4240 || !(v->Output[k] == dm_dp ||
4241 v->Output[k] == dm_edp)) {
4242 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4243 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4245 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4246 FMTBufferExceeded = true;
4247 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4248 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4249 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4250 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4251 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4252 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4253 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4254 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4255 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4256 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4258 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4259 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4261 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4262 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4263 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4264 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4265 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4267 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4268 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4271 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4272 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4273 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4274 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4275 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4277 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4278 FMTBufferExceeded = true;
4280 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4281 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4284 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4285 v->MPCCombine[i][j][k] = false;
4286 v->NoOfDPP[i][j][k] = 4;
4287 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4288 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4289 v->MPCCombine[i][j][k] = false;
4290 v->NoOfDPP[i][j][k] = 2;
4291 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4292 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4293 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4294 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4295 v->MPCCombine[i][j][k] = false;
4296 v->NoOfDPP[i][j][k] = 1;
4297 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4299 v->MPCCombine[i][j][k] = true;
4300 v->NoOfDPP[i][j][k] = 2;
4301 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4303 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4304 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4305 > v->MaxDppclkRoundedDownToDFSGranularity)
4306 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4307 v->DISPCLK_DPPCLK_Support[i][j] = false;
4310 v->TotalNumberOfActiveDPP[i][j] = 0;
4311 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4312 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4313 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4314 if (v->NoOfDPP[i][j][k] == 1)
4315 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4316 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4317 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4322 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4323 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4324 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4325 double BWOfNonSplitPlaneOfMaximumBandwidth;
4326 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4327 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4328 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4329 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4330 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4331 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4332 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4333 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4336 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4337 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4338 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4339 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4340 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4341 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4342 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4345 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4346 v->RequiredDISPCLK[i][j] = 0.0;
4347 v->DISPCLK_DPPCLK_Support[i][j] = true;
4348 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4349 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4350 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4351 v->MPCCombine[i][j][k] = true;
4352 v->NoOfDPP[i][j][k] = 2;
4353 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4354 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4356 v->MPCCombine[i][j][k] = false;
4357 v->NoOfDPP[i][j][k] = 1;
4358 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4359 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4361 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4362 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4363 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4364 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4366 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4368 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4369 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4370 > v->MaxDppclkRoundedDownToDFSGranularity)
4371 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4372 v->DISPCLK_DPPCLK_Support[i][j] = false;
4375 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4376 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4377 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4380 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4381 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4382 v->DISPCLK_DPPCLK_Support[i][j] = false;
4387 /*Total Available Pipes Support Check*/
4389 for (i = 0; i < v->soc.num_states; i++) {
4390 for (j = 0; j < 2; j++) {
4391 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4392 v->TotalAvailablePipesSupport[i][j] = true;
4394 v->TotalAvailablePipesSupport[i][j] = false;
4398 /*Display IO and DSC Support Check*/
4400 v->NonsupportedDSCInputBPC = false;
4401 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4402 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4403 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4404 v->NonsupportedDSCInputBPC = true;
4408 /*Number Of DSC Slices*/
4409 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4410 if (v->BlendingAndTiming[k] == k) {
4411 if (v->PixelClockBackEnd[k] > 3200) {
4412 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4413 } else if (v->PixelClockBackEnd[k] > 1360) {
4414 v->NumberOfDSCSlices[k] = 8;
4415 } else if (v->PixelClockBackEnd[k] > 680) {
4416 v->NumberOfDSCSlices[k] = 4;
4417 } else if (v->PixelClockBackEnd[k] > 340) {
4418 v->NumberOfDSCSlices[k] = 2;
4420 v->NumberOfDSCSlices[k] = 1;
4423 v->NumberOfDSCSlices[k] = 0;
4427 for (i = 0; i < v->soc.num_states; i++) {
4428 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4429 v->RequiresDSC[i][k] = false;
4430 v->RequiresFEC[i][k] = false;
4431 if (v->BlendingAndTiming[k] == k) {
4432 if (v->Output[k] == dm_hdmi) {
4433 v->RequiresDSC[i][k] = false;
4434 v->RequiresFEC[i][k] = false;
4435 v->OutputBppPerState[i][k] = TruncToValidBPP(
4436 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4440 v->PixelClockBackEnd[k],
4441 v->ForcedOutputLinkBPP[k],
4445 v->DSCInputBitPerComponent[k],
4446 v->NumberOfDSCSlices[k],
4447 v->AudioSampleRate[k],
4448 v->AudioSampleLayout[k],
4449 v->ODMCombineEnablePerState[i][k]);
4450 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4451 if (v->DSCEnable[k] == true) {
4452 v->RequiresDSC[i][k] = true;
4453 v->LinkDSCEnable = true;
4454 if (v->Output[k] == dm_dp) {
4455 v->RequiresFEC[i][k] = true;
4457 v->RequiresFEC[i][k] = false;
4460 v->RequiresDSC[i][k] = false;
4461 v->LinkDSCEnable = false;
4462 v->RequiresFEC[i][k] = false;
4465 v->Outbpp = BPP_INVALID;
4466 if (v->PHYCLKPerState[i] >= 270.0) {
4467 v->Outbpp = TruncToValidBPP(
4468 (1.0 - v->Downspreading / 100.0) * 2700,
4469 v->OutputLinkDPLanes[k],
4472 v->PixelClockBackEnd[k],
4473 v->ForcedOutputLinkBPP[k],
4477 v->DSCInputBitPerComponent[k],
4478 v->NumberOfDSCSlices[k],
4479 v->AudioSampleRate[k],
4480 v->AudioSampleLayout[k],
4481 v->ODMCombineEnablePerState[i][k]);
4482 v->OutputBppPerState[i][k] = v->Outbpp;
4483 // TODO: Need some other way to handle this nonsense
4484 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4486 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4487 v->Outbpp = TruncToValidBPP(
4488 (1.0 - v->Downspreading / 100.0) * 5400,
4489 v->OutputLinkDPLanes[k],
4492 v->PixelClockBackEnd[k],
4493 v->ForcedOutputLinkBPP[k],
4497 v->DSCInputBitPerComponent[k],
4498 v->NumberOfDSCSlices[k],
4499 v->AudioSampleRate[k],
4500 v->AudioSampleLayout[k],
4501 v->ODMCombineEnablePerState[i][k]);
4502 v->OutputBppPerState[i][k] = v->Outbpp;
4503 // TODO: Need some other way to handle this nonsense
4504 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4506 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4507 v->Outbpp = TruncToValidBPP(
4508 (1.0 - v->Downspreading / 100.0) * 8100,
4509 v->OutputLinkDPLanes[k],
4512 v->PixelClockBackEnd[k],
4513 v->ForcedOutputLinkBPP[k],
4517 v->DSCInputBitPerComponent[k],
4518 v->NumberOfDSCSlices[k],
4519 v->AudioSampleRate[k],
4520 v->AudioSampleLayout[k],
4521 v->ODMCombineEnablePerState[i][k]);
4522 v->OutputBppPerState[i][k] = v->Outbpp;
4523 // TODO: Need some other way to handle this nonsense
4524 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4526 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4527 v->Outbpp = TruncToValidBPP(
4528 (1.0 - v->Downspreading / 100.0) * 10000,
4532 v->PixelClockBackEnd[k],
4533 v->ForcedOutputLinkBPP[k],
4537 v->DSCInputBitPerComponent[k],
4538 v->NumberOfDSCSlices[k],
4539 v->AudioSampleRate[k],
4540 v->AudioSampleLayout[k],
4541 v->ODMCombineEnablePerState[i][k]);
4542 v->OutputBppPerState[i][k] = v->Outbpp;
4543 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4545 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4546 v->Outbpp = TruncToValidBPP(
4551 v->PixelClockBackEnd[k],
4552 v->ForcedOutputLinkBPP[k],
4556 v->DSCInputBitPerComponent[k],
4557 v->NumberOfDSCSlices[k],
4558 v->AudioSampleRate[k],
4559 v->AudioSampleLayout[k],
4560 v->ODMCombineEnablePerState[i][k]);
4561 v->OutputBppPerState[i][k] = v->Outbpp;
4562 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4566 v->OutputBppPerState[i][k] = 0;
4571 for (i = 0; i < v->soc.num_states; i++) {
4572 v->LinkCapacitySupport[i] = true;
4573 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4574 if (v->BlendingAndTiming[k] == k
4575 && (v->Output[k] == dm_dp ||
4576 v->Output[k] == dm_edp ||
4577 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4578 v->LinkCapacitySupport[i] = false;
4584 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4585 if (v->BlendingAndTiming[k] == k
4586 && (v->Output[k] == dm_dp ||
4587 v->Output[k] == dm_edp ||
4588 v->Output[k] == dm_hdmi)) {
4589 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4592 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4593 && !v->DSC422NativeSupport) {
4594 DSC422NativeNotSupported = true;
4599 for (i = 0; i < v->soc.num_states; ++i) {
4600 v->ODMCombine4To1SupportCheckOK[i] = true;
4601 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4602 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4603 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4604 || v->Output[k] == dm_hdmi)) {
4605 v->ODMCombine4To1SupportCheckOK[i] = false;
4610 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4612 for (i = 0; i < v->soc.num_states; i++) {
4613 v->NotEnoughDSCUnits[i] = false;
4614 v->TotalDSCUnitsRequired = 0.0;
4615 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4616 if (v->RequiresDSC[i][k] == true) {
4617 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4618 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4619 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4620 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4622 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4626 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4627 v->NotEnoughDSCUnits[i] = true;
4630 /*DSC Delay per state*/
4632 for (i = 0; i < v->soc.num_states; i++) {
4633 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4634 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4637 v->BPP = v->OutputBppPerState[i][k];
4639 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4640 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4641 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4642 v->DSCInputBitPerComponent[k],
4644 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4645 v->NumberOfDSCSlices[k],
4647 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4648 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4649 v->DSCDelayPerState[i][k] = 2.0
4650 * (dscceComputeDelay(
4651 v->DSCInputBitPerComponent[k],
4653 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4654 v->NumberOfDSCSlices[k] / 2,
4656 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4658 v->DSCDelayPerState[i][k] = 4.0
4659 * (dscceComputeDelay(
4660 v->DSCInputBitPerComponent[k],
4662 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4663 v->NumberOfDSCSlices[k] / 4,
4665 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4667 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4669 v->DSCDelayPerState[i][k] = 0.0;
4672 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4673 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4674 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4675 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4681 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4683 for (i = 0; i < v->soc.num_states; ++i) {
4684 for (j = 0; j <= 1; ++j) {
4685 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4686 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4687 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4688 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4691 CalculateSwathAndDETConfiguration(
4693 v->NumberOfActivePlanes,
4694 v->DETBufferSizeInKByte[0],
4695 v->MaximumSwathWidthLuma,
4696 v->MaximumSwathWidthChroma,
4698 v->SourcePixelFormat,
4706 v->Read256BlockHeightY,
4707 v->Read256BlockHeightC,
4708 v->Read256BlockWidthY,
4709 v->Read256BlockWidthC,
4710 v->ODMCombineEnableThisState,
4711 v->BlendingAndTiming,
4714 v->BytePerPixelInDETY,
4715 v->BytePerPixelInDETC,
4719 v->NoOfDPPThisState,
4720 v->swath_width_luma_ub_this_state,
4721 v->swath_width_chroma_ub_this_state,
4722 v->SwathWidthYThisState,
4723 v->SwathWidthCThisState,
4724 v->SwathHeightYThisState,
4725 v->SwathHeightCThisState,
4726 v->DETBufferSizeYThisState,
4727 v->DETBufferSizeCThisState,
4729 &v->ViewportSizeSupport[i][j]);
4731 CalculateDCFCLKDeepSleep(
4733 v->NumberOfActivePlanes,
4738 v->SwathWidthYThisState,
4739 v->SwathWidthCThisState,
4740 v->NoOfDPPThisState,
4745 v->PSCL_FACTOR_CHROMA,
4746 v->RequiredDPPCLKThisState,
4747 v->ReadBandwidthLuma,
4748 v->ReadBandwidthChroma,
4750 &v->ProjectedDCFCLKDeepSleep[i][j]);
4752 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4753 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4754 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4755 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4756 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4757 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4758 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4759 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4760 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4765 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4766 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4767 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4770 for (i = 0; i < v->soc.num_states; i++) {
4771 for (j = 0; j < 2; j++) {
4772 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4774 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4775 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4776 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4777 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4778 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4779 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4780 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4781 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4782 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4785 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4786 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4787 if (v->DCCEnable[k] == true) {
4788 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4792 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4793 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4794 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4796 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4797 && v->SourceScan[k] != dm_vert) {
4798 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4800 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4802 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4803 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4806 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4809 v->Read256BlockHeightC[k],
4810 v->Read256BlockWidthC[k],
4811 v->SourcePixelFormat[k],
4812 v->SurfaceTiling[k],
4813 v->BytePerPixelC[k],
4815 v->SwathWidthCThisState[k],
4816 v->ViewportHeightChroma[k],
4819 v->HostVMMaxNonCachedPageTableLevels,
4820 v->GPUVMMinPageSize,
4821 v->HostVMMinPageSize,
4822 v->PTEBufferSizeInRequestsForChroma,
4825 &v->MacroTileWidthC[k],
4827 &v->DPTEBytesPerRowC,
4828 &v->PTEBufferSizeNotExceededC[i][j][k],
4830 &v->dpte_row_height_chroma[k],
4834 &v->meta_row_height_chroma[k],
4841 &v->dummyinteger11);
4843 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4848 v->ProgressiveToInterlaceUnitInOPP,
4849 v->SwathHeightCThisState[k],
4850 v->ViewportYStartC[k],
4854 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4855 v->PTEBufferSizeInRequestsForChroma = 0;
4856 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4857 v->MetaRowBytesC = 0.0;
4858 v->DPTEBytesPerRowC = 0.0;
4859 v->PrefetchLinesC[i][j][k] = 0.0;
4860 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4862 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4865 v->Read256BlockHeightY[k],
4866 v->Read256BlockWidthY[k],
4867 v->SourcePixelFormat[k],
4868 v->SurfaceTiling[k],
4869 v->BytePerPixelY[k],
4871 v->SwathWidthYThisState[k],
4872 v->ViewportHeight[k],
4875 v->HostVMMaxNonCachedPageTableLevels,
4876 v->GPUVMMinPageSize,
4877 v->HostVMMinPageSize,
4878 v->PTEBufferSizeInRequestsForLuma,
4880 v->DCCMetaPitchY[k],
4881 &v->MacroTileWidthY[k],
4883 &v->DPTEBytesPerRowY,
4884 &v->PTEBufferSizeNotExceededY[i][j][k],
4886 &v->dpte_row_height[k],
4890 &v->meta_row_height[k],
4892 &v->dpte_group_bytes[k],
4898 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4903 v->ProgressiveToInterlaceUnitInOPP,
4904 v->SwathHeightYThisState[k],
4905 v->ViewportYStartY[k],
4908 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4909 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4910 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4912 CalculateRowBandwidth(
4914 v->SourcePixelFormat[k],
4918 v->HTotal[k] / v->PixelClock[k],
4921 v->meta_row_height[k],
4922 v->meta_row_height_chroma[k],
4923 v->DPTEBytesPerRowY,
4924 v->DPTEBytesPerRowC,
4925 v->dpte_row_height[k],
4926 v->dpte_row_height_chroma[k],
4927 &v->meta_row_bandwidth[i][j][k],
4928 &v->dpte_row_bandwidth[i][j][k]);
4930 /*DCCMetaBufferSizeSupport(i, j) = True
4931 For k = 0 To NumberOfActivePlanes - 1
4932 If MetaRowBytes(i, j, k) > 24064 Then
4933 DCCMetaBufferSizeSupport(i, j) = False
4936 v->DCCMetaBufferSizeSupport[i][j] = true;
4937 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4938 if (v->MetaRowBytes[i][j][k] > 24064)
4939 v->DCCMetaBufferSizeSupport[i][j] = false;
4941 v->UrgLatency[i] = CalculateUrgentLatency(
4942 v->UrgentLatencyPixelDataOnly,
4943 v->UrgentLatencyPixelMixedWithVMData,
4944 v->UrgentLatencyVMDataOnly,
4945 v->DoUrgentLatencyAdjustment,
4946 v->UrgentLatencyAdjustmentFabricClockComponent,
4947 v->UrgentLatencyAdjustmentFabricClockReference,
4948 v->FabricClockPerState[i]);
4950 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4951 CalculateUrgentBurstFactor(
4952 v->swath_width_luma_ub_this_state[k],
4953 v->swath_width_chroma_ub_this_state[k],
4954 v->SwathHeightYThisState[k],
4955 v->SwathHeightCThisState[k],
4956 v->HTotal[k] / v->PixelClock[k],
4958 v->CursorBufferSize,
4959 v->CursorWidth[k][0],
4963 v->BytePerPixelInDETY[k],
4964 v->BytePerPixelInDETC[k],
4965 v->DETBufferSizeYThisState[k],
4966 v->DETBufferSizeCThisState[k],
4967 &v->UrgentBurstFactorCursor[k],
4968 &v->UrgentBurstFactorLuma[k],
4969 &v->UrgentBurstFactorChroma[k],
4970 &NotUrgentLatencyHiding[k]);
4973 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4974 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4975 if (NotUrgentLatencyHiding[k]) {
4976 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4980 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4981 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4982 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4983 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4986 v->TotalVActivePixelBandwidth[i][j] = 0;
4987 v->TotalVActiveCursorBandwidth[i][j] = 0;
4988 v->TotalMetaRowBandwidth[i][j] = 0;
4989 v->TotalDPTERowBandwidth[i][j] = 0;
4990 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4991 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4992 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4993 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4994 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4999 //Calculate Return BW
5000 for (i = 0; i < v->soc.num_states; ++i) {
5001 for (j = 0; j <= 1; ++j) {
5002 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5003 if (v->BlendingAndTiming[k] == k) {
5004 if (v->WritebackEnable[k] == true) {
5005 v->WritebackDelayTime[k] = v->WritebackLatency
5006 + CalculateWriteBackDelay(
5007 v->WritebackPixelFormat[k],
5008 v->WritebackHRatio[k],
5009 v->WritebackVRatio[k],
5010 v->WritebackVTaps[k],
5011 v->WritebackDestinationWidth[k],
5012 v->WritebackDestinationHeight[k],
5013 v->WritebackSourceHeight[k],
5014 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5016 v->WritebackDelayTime[k] = 0.0;
5018 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5019 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5020 v->WritebackDelayTime[k] = dml_max(
5021 v->WritebackDelayTime[k],
5023 + CalculateWriteBackDelay(
5024 v->WritebackPixelFormat[m],
5025 v->WritebackHRatio[m],
5026 v->WritebackVRatio[m],
5027 v->WritebackVTaps[m],
5028 v->WritebackDestinationWidth[m],
5029 v->WritebackDestinationHeight[m],
5030 v->WritebackSourceHeight[m],
5031 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5036 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5037 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5038 if (v->BlendingAndTiming[k] == m) {
5039 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5043 v->MaxMaxVStartup[i][j] = 0;
5044 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5045 v->MaximumVStartup[i][j][k] =
5046 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5047 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5048 v->VTotal[k] - v->VActive[k]
5052 1.0 * v->WritebackDelayTime[k]
5054 / v->PixelClock[k]),
5056 if (v->MaximumVStartup[i][j][k] > 1023)
5057 v->MaximumVStartup[i][j][k] = 1023;
5058 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5063 ReorderingBytes = v->NumberOfChannels
5065 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5066 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5067 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5069 for (i = 0; i < v->soc.num_states; ++i) {
5070 for (j = 0; j <= 1; ++j) {
5071 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5075 if (v->UseMinimumRequiredDCFCLK == true)
5076 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5078 for (i = 0; i < v->soc.num_states; ++i) {
5079 for (j = 0; j <= 1; ++j) {
5080 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5081 v->ReturnBusWidth * v->DCFCLKState[i][j],
5082 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5083 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5084 double PixelDataOnlyReturnBWPerState = dml_min(
5085 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5086 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5087 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5088 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5089 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5091 if (v->HostVMEnable != true) {
5092 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5094 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5099 //Re-ordering Buffer Support Check
5100 for (i = 0; i < v->soc.num_states; ++i) {
5101 for (j = 0; j <= 1; ++j) {
5102 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5103 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5104 v->ROBSupport[i][j] = true;
5106 v->ROBSupport[i][j] = false;
5111 //Vertical Active BW support check
5113 MaxTotalVActiveRDBandwidth = 0;
5114 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5115 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5118 for (i = 0; i < v->soc.num_states; ++i) {
5119 for (j = 0; j <= 1; ++j) {
5120 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5122 v->ReturnBusWidth * v->DCFCLKState[i][j],
5123 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5124 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5125 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5126 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5128 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5129 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5131 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5136 v->UrgentLatency = CalculateUrgentLatency(
5137 v->UrgentLatencyPixelDataOnly,
5138 v->UrgentLatencyPixelMixedWithVMData,
5139 v->UrgentLatencyVMDataOnly,
5140 v->DoUrgentLatencyAdjustment,
5141 v->UrgentLatencyAdjustmentFabricClockComponent,
5142 v->UrgentLatencyAdjustmentFabricClockReference,
5145 for (i = 0; i < v->soc.num_states; ++i) {
5146 for (j = 0; j <= 1; ++j) {
5147 double VMDataOnlyReturnBWPerState;
5148 double HostVMInefficiencyFactor = 1;
5149 int NextPrefetchModeState = MinPrefetchMode;
5150 bool UnboundedRequestEnabledThisState = false;
5151 int CompressedBufferSizeInkByteThisState = 0;
5154 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5156 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5157 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5158 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5159 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5162 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5163 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5164 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5165 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5166 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5167 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5168 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5169 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5170 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5171 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5174 VMDataOnlyReturnBWPerState = dml_min(
5176 v->ReturnBusWidth * v->DCFCLKState[i][j],
5177 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5178 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5179 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5180 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5181 if (v->GPUVMEnable && v->HostVMEnable)
5182 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5184 v->ExtraLatency = CalculateExtraLatency(
5185 v->RoundTripPingLatencyCycles,
5187 v->DCFCLKState[i][j],
5188 v->TotalNumberOfActiveDPP[i][j],
5189 v->PixelChunkSizeInKByte,
5190 v->TotalNumberOfDCCActiveDPP[i][j],
5192 v->ReturnBWPerState[i][j],
5195 v->NumberOfActivePlanes,
5196 v->NoOfDPPThisState,
5197 v->dpte_group_bytes,
5198 HostVMInefficiencyFactor,
5199 v->HostVMMinPageSize,
5200 v->HostVMMaxNonCachedPageTableLevels);
5202 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5204 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5205 v->MaxVStartup = v->NextMaxVStartup;
5207 v->TWait = CalculateTWait(
5208 v->PrefetchModePerState[i][j],
5209 v->DRAMClockChangeLatency,
5211 v->SREnterPlusExitTime);
5213 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5214 CalculatePrefetchSchedulePerPlane(mode_lib,
5215 HostVMInefficiencyFactor,
5219 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5220 CalculateUrgentBurstFactor(
5221 v->swath_width_luma_ub_this_state[k],
5222 v->swath_width_chroma_ub_this_state[k],
5223 v->SwathHeightYThisState[k],
5224 v->SwathHeightCThisState[k],
5225 v->HTotal[k] / v->PixelClock[k],
5227 v->CursorBufferSize,
5228 v->CursorWidth[k][0],
5230 v->VRatioPreY[i][j][k],
5231 v->VRatioPreC[i][j][k],
5232 v->BytePerPixelInDETY[k],
5233 v->BytePerPixelInDETC[k],
5234 v->DETBufferSizeYThisState[k],
5235 v->DETBufferSizeCThisState[k],
5236 &v->UrgentBurstFactorCursorPre[k],
5237 &v->UrgentBurstFactorLumaPre[k],
5238 &v->UrgentBurstFactorChroma[k],
5239 &v->NotUrgentLatencyHidingPre[k]);
5242 v->MaximumReadBandwidthWithPrefetch = 0.0;
5243 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5244 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5245 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5247 v->MaximumReadBandwidthWithPrefetch =
5248 v->MaximumReadBandwidthWithPrefetch
5250 v->VActivePixelBandwidth[i][j][k]
5251 + v->VActiveCursorBandwidth[i][j][k]
5252 + v->NoOfDPP[i][j][k]
5253 * (v->meta_row_bandwidth[i][j][k]
5254 + v->dpte_row_bandwidth[i][j][k]),
5255 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5257 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5258 * v->UrgentBurstFactorLumaPre[k]
5259 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5260 * v->UrgentBurstFactorChromaPre[k])
5261 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5264 v->NotEnoughUrgentLatencyHidingPre = false;
5265 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5266 if (v->NotUrgentLatencyHidingPre[k] == true) {
5267 v->NotEnoughUrgentLatencyHidingPre = true;
5271 v->PrefetchSupported[i][j] = true;
5272 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5273 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5274 v->PrefetchSupported[i][j] = false;
5276 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5277 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5278 || v->NoTimeForPrefetch[i][j][k] == true) {
5279 v->PrefetchSupported[i][j] = false;
5283 v->DynamicMetadataSupported[i][j] = true;
5284 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5285 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5286 v->DynamicMetadataSupported[i][j] = false;
5290 v->VRatioInPrefetchSupported[i][j] = true;
5291 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5292 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5293 v->VRatioInPrefetchSupported[i][j] = false;
5296 v->AnyLinesForVMOrRowTooLarge = false;
5297 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5298 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5299 v->AnyLinesForVMOrRowTooLarge = true;
5303 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5305 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5306 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5307 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5308 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5310 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5312 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5313 * v->UrgentBurstFactorLumaPre[k]
5314 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5315 * v->UrgentBurstFactorChromaPre[k])
5316 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5318 v->TotImmediateFlipBytes = 0.0;
5319 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5320 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5321 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5322 + v->DPTEBytesPerRow[i][j][k];
5325 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5326 CalculateFlipSchedule(
5328 HostVMInefficiencyFactor,
5331 v->GPUVMMaxPageTableLevels,
5333 v->HostVMMaxNonCachedPageTableLevels,
5335 v->HostVMMinPageSize,
5336 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5337 v->MetaRowBytes[i][j][k],
5338 v->DPTEBytesPerRow[i][j][k],
5339 v->BandwidthAvailableForImmediateFlip,
5340 v->TotImmediateFlipBytes,
5341 v->SourcePixelFormat[k],
5342 v->HTotal[k] / v->PixelClock[k],
5347 v->dpte_row_height[k],
5348 v->meta_row_height[k],
5349 v->dpte_row_height_chroma[k],
5350 v->meta_row_height_chroma[k],
5351 &v->DestinationLinesToRequestVMInImmediateFlip[k],
5352 &v->DestinationLinesToRequestRowInImmediateFlip[k],
5353 &v->final_flip_bw[k],
5354 &v->ImmediateFlipSupportedForPipe[k]);
5356 v->total_dcn_read_bw_with_flip = 0.0;
5357 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5358 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5360 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5361 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5362 + v->VActiveCursorBandwidth[i][j][k],
5364 * (v->final_flip_bw[k]
5365 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5366 * v->UrgentBurstFactorLumaPre[k]
5367 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5368 * v->UrgentBurstFactorChromaPre[k])
5369 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5371 v->ImmediateFlipSupportedForState[i][j] = true;
5372 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5373 v->ImmediateFlipSupportedForState[i][j] = false;
5375 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5376 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5377 v->ImmediateFlipSupportedForState[i][j] = false;
5381 v->ImmediateFlipSupportedForState[i][j] = false;
5384 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5385 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5386 NextPrefetchModeState = NextPrefetchModeState + 1;
5388 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5390 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5391 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5392 && ((v->HostVMEnable == false &&
5393 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5394 || v->ImmediateFlipSupportedForState[i][j] == true))
5395 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5397 CalculateUnboundedRequestAndCompressedBufferSize(
5398 v->DETBufferSizeInKByte[0],
5399 v->ConfigReturnBufferSizeInKByte,
5400 v->UseUnboundedRequesting,
5401 v->TotalNumberOfActiveDPP[i][j],
5404 v->CompressedBufferSegmentSizeInkByte,
5406 &UnboundedRequestEnabledThisState,
5407 &CompressedBufferSizeInkByteThisState);
5409 CalculateWatermarksAndDRAMSpeedChangeSupport(
5411 v->PrefetchModePerState[i][j],
5412 v->DCFCLKState[i][j],
5413 v->ReturnBWPerState[i][j],
5416 v->SOCCLKPerState[i],
5417 v->ProjectedDCFCLKDeepSleep[i][j],
5418 v->DETBufferSizeYThisState,
5419 v->DETBufferSizeCThisState,
5420 v->SwathHeightYThisState,
5421 v->SwathHeightCThisState,
5422 v->SwathWidthYThisState,
5423 v->SwathWidthCThisState,
5424 v->NoOfDPPThisState,
5425 v->BytePerPixelInDETY,
5426 v->BytePerPixelInDETC,
5427 UnboundedRequestEnabledThisState,
5428 CompressedBufferSizeInkByteThisState,
5429 &v->DRAMClockChangeSupport[i][j],
5437 /*PTE Buffer Size Check*/
5438 for (i = 0; i < v->soc.num_states; i++) {
5439 for (j = 0; j < 2; j++) {
5440 v->PTEBufferSizeNotExceeded[i][j] = true;
5441 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5442 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5443 v->PTEBufferSizeNotExceeded[i][j] = false;
5449 /*Cursor Support Check*/
5450 v->CursorSupport = true;
5451 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5452 if (v->CursorWidth[k][0] > 0.0) {
5453 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5454 v->CursorSupport = false;
5459 /*Valid Pitch Check*/
5460 v->PitchSupport = true;
5461 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5462 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5463 if (v->DCCEnable[k] == true) {
5464 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5466 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5468 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5469 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5470 && v->SourcePixelFormat[k] != dm_mono_8) {
5471 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5472 if (v->DCCEnable[k] == true) {
5473 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5474 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5475 64.0 * v->Read256BlockWidthC[k]);
5477 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5480 v->AlignedCPitch[k] = v->PitchC[k];
5481 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5483 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5484 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5485 v->PitchSupport = false;
5489 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5490 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5491 ViewportExceedsSurface = true;
5492 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5493 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5494 && v->SourcePixelFormat[k] != dm_rgbe) {
5495 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5496 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5497 ViewportExceedsSurface = true;
5503 /*Mode Support, Voltage State and SOC Configuration*/
5504 for (i = v->soc.num_states - 1; i >= 0; i--) {
5505 for (j = 0; j < 2; j++) {
5506 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5507 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5508 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5509 && v->DTBCLKRequiredMoreThanSupported[i] == false
5510 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5511 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5512 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5513 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5514 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5515 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5516 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5517 && ((v->HostVMEnable == false
5518 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5519 || v->ImmediateFlipSupportedForState[i][j] == true)
5520 && FMTBufferExceeded == false) {
5521 v->ModeSupport[i][j] = true;
5523 v->ModeSupport[i][j] = false;
5529 unsigned int MaximumMPCCombine = 0;
5530 for (i = v->soc.num_states; i >= 0; i--) {
5531 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5532 v->VoltageLevel = i;
5533 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5534 if (v->ModeSupport[i][0] == true) {
5535 MaximumMPCCombine = 0;
5537 MaximumMPCCombine = 1;
5541 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5542 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5543 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5544 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5546 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5547 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5548 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5549 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5550 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5551 v->maxMpcComb = MaximumMPCCombine;
5555 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5556 struct display_mode_lib *mode_lib,
5557 unsigned int PrefetchMode,
5560 double UrgentLatency,
5561 double ExtraLatency,
5563 double DCFCLKDeepSleep,
5564 unsigned int DETBufferSizeY[],
5565 unsigned int DETBufferSizeC[],
5566 unsigned int SwathHeightY[],
5567 unsigned int SwathHeightC[],
5568 double SwathWidthY[],
5569 double SwathWidthC[],
5570 unsigned int DPPPerPlane[],
5571 double BytePerPixelDETY[],
5572 double BytePerPixelDETC[],
5573 bool UnboundedRequestEnabled,
5574 int unsigned CompressedBufferSizeInkByte,
5575 enum clock_change_support *DRAMClockChangeSupport,
5576 double *StutterExitWatermark,
5577 double *StutterEnterPlusExitWatermark,
5578 double *Z8StutterExitWatermark,
5579 double *Z8StutterEnterPlusExitWatermark)
5581 struct vba_vars_st *v = &mode_lib->vba;
5582 double EffectiveLBLatencyHidingY;
5583 double EffectiveLBLatencyHidingC;
5584 double LinesInDETY[DC__NUM_DPP__MAX];
5586 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5587 unsigned int LinesInDETCRoundedDownToSwath;
5588 double FullDETBufferingTimeY;
5589 double FullDETBufferingTimeC;
5590 double ActiveDRAMClockChangeLatencyMarginY;
5591 double ActiveDRAMClockChangeLatencyMarginC;
5592 double WritebackDRAMClockChangeLatencyMargin;
5593 double PlaneWithMinActiveDRAMClockChangeMargin;
5594 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5595 double WritebackDRAMClockChangeLatencyHiding;
5596 double TotalPixelBW = 0.0;
5599 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5601 #ifdef __DML_VBA_DEBUG__
5602 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5603 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5604 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5607 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5609 #ifdef __DML_VBA_DEBUG__
5610 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5611 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5614 v->TotalActiveWriteback = 0;
5615 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5616 if (v->WritebackEnable[k] == true) {
5617 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5621 if (v->TotalActiveWriteback <= 1) {
5622 v->WritebackUrgentWatermark = v->WritebackLatency;
5624 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5627 if (v->TotalActiveWriteback <= 1) {
5628 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5630 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5633 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5634 TotalPixelBW = TotalPixelBW
5635 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5636 / (v->HTotal[k] / v->PixelClock[k]);
5639 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5640 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5642 v->LBLatencyHidingSourceLinesY = dml_min(
5643 (double) v->MaxLineBufferLines,
5644 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5646 v->LBLatencyHidingSourceLinesC = dml_min(
5647 (double) v->MaxLineBufferLines,
5648 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5650 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5652 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5654 if (UnboundedRequestEnabled) {
5655 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5656 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5659 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5660 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5661 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5662 if (BytePerPixelDETC[k] > 0) {
5663 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5664 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5665 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5668 FullDETBufferingTimeC = 999999;
5671 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5672 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5674 if (v->NumberOfActivePlanes > 1) {
5675 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5676 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5679 if (BytePerPixelDETC[k] > 0) {
5680 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5681 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5683 if (v->NumberOfActivePlanes > 1) {
5684 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5685 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5687 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5689 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5692 if (v->WritebackEnable[k] == true) {
5693 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5694 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5695 if (v->WritebackPixelFormat[k] == dm_444_64) {
5696 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5698 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5699 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5703 v->MinActiveDRAMClockChangeMargin = 999999;
5704 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5705 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5706 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5707 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5708 if (v->BlendingAndTiming[k] == k) {
5709 PlaneWithMinActiveDRAMClockChangeMargin = k;
5711 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5712 if (v->BlendingAndTiming[k] == j) {
5713 PlaneWithMinActiveDRAMClockChangeMargin = j;
5720 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5722 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5723 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5724 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5725 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5726 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5730 v->TotalNumberOfActiveOTG = 0;
5732 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5733 if (v->BlendingAndTiming[k] == k) {
5734 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5738 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5739 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5740 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5741 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5742 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5744 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5747 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5748 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5749 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5750 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5752 #ifdef __DML_VBA_DEBUG__
5753 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5754 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5755 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5756 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5760 static void CalculateDCFCLKDeepSleep(
5761 struct display_mode_lib *mode_lib,
5762 unsigned int NumberOfActivePlanes,
5763 int BytePerPixelY[],
5764 int BytePerPixelC[],
5766 double VRatioChroma[],
5767 double SwathWidthY[],
5768 double SwathWidthC[],
5769 unsigned int DPPPerPlane[],
5771 double HRatioChroma[],
5772 double PixelClock[],
5773 double PSCL_THROUGHPUT[],
5774 double PSCL_THROUGHPUT_CHROMA[],
5776 double ReadBandwidthLuma[],
5777 double ReadBandwidthChroma[],
5779 double *DCFCLKDeepSleep)
5781 struct vba_vars_st *v = &mode_lib->vba;
5782 double DisplayPipeLineDeliveryTimeLuma;
5783 double DisplayPipeLineDeliveryTimeChroma;
5784 double ReadBandwidth = 0.0;
5787 for (k = 0; k < NumberOfActivePlanes; ++k) {
5789 if (VRatio[k] <= 1) {
5790 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5792 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5794 if (BytePerPixelC[k] == 0) {
5795 DisplayPipeLineDeliveryTimeChroma = 0;
5797 if (VRatioChroma[k] <= 1) {
5798 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5800 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5804 if (BytePerPixelC[k] > 0) {
5805 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5806 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5808 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5810 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5814 for (k = 0; k < NumberOfActivePlanes; ++k) {
5815 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5818 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5820 for (k = 0; k < NumberOfActivePlanes; ++k) {
5821 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5825 static void CalculateUrgentBurstFactor(
5826 int swath_width_luma_ub,
5827 int swath_width_chroma_ub,
5828 unsigned int SwathHeightY,
5829 unsigned int SwathHeightC,
5831 double UrgentLatency,
5832 double CursorBufferSize,
5833 unsigned int CursorWidth,
5834 unsigned int CursorBPP,
5837 double BytePerPixelInDETY,
5838 double BytePerPixelInDETC,
5839 double DETBufferSizeY,
5840 double DETBufferSizeC,
5841 double *UrgentBurstFactorCursor,
5842 double *UrgentBurstFactorLuma,
5843 double *UrgentBurstFactorChroma,
5844 bool *NotEnoughUrgentLatencyHiding)
5846 double LinesInDETLuma;
5847 double LinesInDETChroma;
5848 unsigned int LinesInCursorBuffer;
5849 double CursorBufferSizeInTime;
5850 double DETBufferSizeInTimeLuma;
5851 double DETBufferSizeInTimeChroma;
5853 *NotEnoughUrgentLatencyHiding = 0;
5855 if (CursorWidth > 0) {
5856 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5858 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5859 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5860 *NotEnoughUrgentLatencyHiding = 1;
5861 *UrgentBurstFactorCursor = 0;
5863 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5866 *UrgentBurstFactorCursor = 1;
5870 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5872 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5873 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5874 *NotEnoughUrgentLatencyHiding = 1;
5875 *UrgentBurstFactorLuma = 0;
5877 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5880 *UrgentBurstFactorLuma = 1;
5883 if (BytePerPixelInDETC > 0) {
5884 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5886 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5887 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5888 *NotEnoughUrgentLatencyHiding = 1;
5889 *UrgentBurstFactorChroma = 0;
5891 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5894 *UrgentBurstFactorChroma = 1;
5899 static void CalculatePixelDeliveryTimes(
5900 unsigned int NumberOfActivePlanes,
5902 double VRatioChroma[],
5903 double VRatioPrefetchY[],
5904 double VRatioPrefetchC[],
5905 unsigned int swath_width_luma_ub[],
5906 unsigned int swath_width_chroma_ub[],
5907 unsigned int DPPPerPlane[],
5909 double HRatioChroma[],
5910 double PixelClock[],
5911 double PSCL_THROUGHPUT[],
5912 double PSCL_THROUGHPUT_CHROMA[],
5914 int BytePerPixelC[],
5915 enum scan_direction_class SourceScan[],
5916 unsigned int NumberOfCursors[],
5917 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5918 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5919 unsigned int BlockWidth256BytesY[],
5920 unsigned int BlockHeight256BytesY[],
5921 unsigned int BlockWidth256BytesC[],
5922 unsigned int BlockHeight256BytesC[],
5923 double DisplayPipeLineDeliveryTimeLuma[],
5924 double DisplayPipeLineDeliveryTimeChroma[],
5925 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5926 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5927 double DisplayPipeRequestDeliveryTimeLuma[],
5928 double DisplayPipeRequestDeliveryTimeChroma[],
5929 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5930 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5931 double CursorRequestDeliveryTime[],
5932 double CursorRequestDeliveryTimePrefetch[])
5934 double req_per_swath_ub;
5937 for (k = 0; k < NumberOfActivePlanes; ++k) {
5938 if (VRatio[k] <= 1) {
5939 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5941 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5944 if (BytePerPixelC[k] == 0) {
5945 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5947 if (VRatioChroma[k] <= 1) {
5948 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5950 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5954 if (VRatioPrefetchY[k] <= 1) {
5955 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5957 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5960 if (BytePerPixelC[k] == 0) {
5961 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5963 if (VRatioPrefetchC[k] <= 1) {
5964 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5966 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5971 for (k = 0; k < NumberOfActivePlanes; ++k) {
5972 if (SourceScan[k] != dm_vert) {
5973 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5975 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5977 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5978 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5979 if (BytePerPixelC[k] == 0) {
5980 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5981 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5983 if (SourceScan[k] != dm_vert) {
5984 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5986 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5988 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5989 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5991 #ifdef __DML_VBA_DEBUG__
5992 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5993 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5994 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5995 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
5996 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
5997 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
5998 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
5999 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6000 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6001 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6002 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6003 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6007 for (k = 0; k < NumberOfActivePlanes; ++k) {
6008 int cursor_req_per_width;
6009 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6010 if (NumberOfCursors[k] > 0) {
6011 if (VRatio[k] <= 1) {
6012 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6014 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6016 if (VRatioPrefetchY[k] <= 1) {
6017 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6019 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6022 CursorRequestDeliveryTime[k] = 0;
6023 CursorRequestDeliveryTimePrefetch[k] = 0;
6025 #ifdef __DML_VBA_DEBUG__
6026 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6027 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6028 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6033 static void CalculateMetaAndPTETimes(
6034 int NumberOfActivePlanes,
6037 int MinMetaChunkSizeBytes,
6040 double VRatioChroma[],
6041 double DestinationLinesToRequestRowInVBlank[],
6042 double DestinationLinesToRequestRowInImmediateFlip[],
6044 double PixelClock[],
6045 int BytePerPixelY[],
6046 int BytePerPixelC[],
6047 enum scan_direction_class SourceScan[],
6048 int dpte_row_height[],
6049 int dpte_row_height_chroma[],
6050 int meta_row_width[],
6051 int meta_row_width_chroma[],
6052 int meta_row_height[],
6053 int meta_row_height_chroma[],
6054 int meta_req_width[],
6055 int meta_req_width_chroma[],
6056 int meta_req_height[],
6057 int meta_req_height_chroma[],
6058 int dpte_group_bytes[],
6059 int PTERequestSizeY[],
6060 int PTERequestSizeC[],
6061 int PixelPTEReqWidthY[],
6062 int PixelPTEReqHeightY[],
6063 int PixelPTEReqWidthC[],
6064 int PixelPTEReqHeightC[],
6065 int dpte_row_width_luma_ub[],
6066 int dpte_row_width_chroma_ub[],
6067 double DST_Y_PER_PTE_ROW_NOM_L[],
6068 double DST_Y_PER_PTE_ROW_NOM_C[],
6069 double DST_Y_PER_META_ROW_NOM_L[],
6070 double DST_Y_PER_META_ROW_NOM_C[],
6071 double TimePerMetaChunkNominal[],
6072 double TimePerChromaMetaChunkNominal[],
6073 double TimePerMetaChunkVBlank[],
6074 double TimePerChromaMetaChunkVBlank[],
6075 double TimePerMetaChunkFlip[],
6076 double TimePerChromaMetaChunkFlip[],
6077 double time_per_pte_group_nom_luma[],
6078 double time_per_pte_group_vblank_luma[],
6079 double time_per_pte_group_flip_luma[],
6080 double time_per_pte_group_nom_chroma[],
6081 double time_per_pte_group_vblank_chroma[],
6082 double time_per_pte_group_flip_chroma[])
6084 unsigned int meta_chunk_width;
6085 unsigned int min_meta_chunk_width;
6086 unsigned int meta_chunk_per_row_int;
6087 unsigned int meta_row_remainder;
6088 unsigned int meta_chunk_threshold;
6089 unsigned int meta_chunks_per_row_ub;
6090 unsigned int meta_chunk_width_chroma;
6091 unsigned int min_meta_chunk_width_chroma;
6092 unsigned int meta_chunk_per_row_int_chroma;
6093 unsigned int meta_row_remainder_chroma;
6094 unsigned int meta_chunk_threshold_chroma;
6095 unsigned int meta_chunks_per_row_ub_chroma;
6096 unsigned int dpte_group_width_luma;
6097 unsigned int dpte_groups_per_row_luma_ub;
6098 unsigned int dpte_group_width_chroma;
6099 unsigned int dpte_groups_per_row_chroma_ub;
6102 for (k = 0; k < NumberOfActivePlanes; ++k) {
6103 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6104 if (BytePerPixelC[k] == 0) {
6105 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6107 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6109 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6110 if (BytePerPixelC[k] == 0) {
6111 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6113 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6117 for (k = 0; k < NumberOfActivePlanes; ++k) {
6118 if (DCCEnable[k] == true) {
6119 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6120 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6121 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6122 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6123 if (SourceScan[k] != dm_vert) {
6124 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6126 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6128 if (meta_row_remainder <= meta_chunk_threshold) {
6129 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6131 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6133 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6134 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6135 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6136 if (BytePerPixelC[k] == 0) {
6137 TimePerChromaMetaChunkNominal[k] = 0;
6138 TimePerChromaMetaChunkVBlank[k] = 0;
6139 TimePerChromaMetaChunkFlip[k] = 0;
6141 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6142 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6143 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6144 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6145 if (SourceScan[k] != dm_vert) {
6146 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6148 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6150 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6151 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6153 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6155 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6156 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6157 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6160 TimePerMetaChunkNominal[k] = 0;
6161 TimePerMetaChunkVBlank[k] = 0;
6162 TimePerMetaChunkFlip[k] = 0;
6163 TimePerChromaMetaChunkNominal[k] = 0;
6164 TimePerChromaMetaChunkVBlank[k] = 0;
6165 TimePerChromaMetaChunkFlip[k] = 0;
6169 for (k = 0; k < NumberOfActivePlanes; ++k) {
6170 if (GPUVMEnable == true) {
6171 if (SourceScan[k] != dm_vert) {
6172 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6174 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6176 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6177 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6178 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6179 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6180 if (BytePerPixelC[k] == 0) {
6181 time_per_pte_group_nom_chroma[k] = 0;
6182 time_per_pte_group_vblank_chroma[k] = 0;
6183 time_per_pte_group_flip_chroma[k] = 0;
6185 if (SourceScan[k] != dm_vert) {
6186 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6188 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6190 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6191 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6192 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6193 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6196 time_per_pte_group_nom_luma[k] = 0;
6197 time_per_pte_group_vblank_luma[k] = 0;
6198 time_per_pte_group_flip_luma[k] = 0;
6199 time_per_pte_group_nom_chroma[k] = 0;
6200 time_per_pte_group_vblank_chroma[k] = 0;
6201 time_per_pte_group_flip_chroma[k] = 0;
6206 static void CalculateVMGroupAndRequestTimes(
6207 unsigned int NumberOfActivePlanes,
6209 unsigned int GPUVMMaxPageTableLevels,
6210 unsigned int HTotal[],
6211 int BytePerPixelC[],
6212 double DestinationLinesToRequestVMInVBlank[],
6213 double DestinationLinesToRequestVMInImmediateFlip[],
6215 double PixelClock[],
6216 int dpte_row_width_luma_ub[],
6217 int dpte_row_width_chroma_ub[],
6218 int vm_group_bytes[],
6219 unsigned int dpde0_bytes_per_frame_ub_l[],
6220 unsigned int dpde0_bytes_per_frame_ub_c[],
6221 int meta_pte_bytes_per_frame_ub_l[],
6222 int meta_pte_bytes_per_frame_ub_c[],
6223 double TimePerVMGroupVBlank[],
6224 double TimePerVMGroupFlip[],
6225 double TimePerVMRequestVBlank[],
6226 double TimePerVMRequestFlip[])
6228 int num_group_per_lower_vm_stage;
6229 int num_req_per_lower_vm_stage;
6232 for (k = 0; k < NumberOfActivePlanes; ++k) {
6233 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6234 if (DCCEnable[k] == false) {
6235 if (BytePerPixelC[k] > 0) {
6236 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6237 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6239 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6242 if (GPUVMMaxPageTableLevels == 1) {
6243 if (BytePerPixelC[k] > 0) {
6244 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6245 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6247 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6250 if (BytePerPixelC[k] > 0) {
6251 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6252 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6253 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6254 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6256 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6257 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6262 if (DCCEnable[k] == false) {
6263 if (BytePerPixelC[k] > 0) {
6264 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6266 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6269 if (GPUVMMaxPageTableLevels == 1) {
6270 if (BytePerPixelC[k] > 0) {
6271 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6273 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6276 if (BytePerPixelC[k] > 0) {
6277 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6278 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6280 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6285 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6286 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6287 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6288 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6290 if (GPUVMMaxPageTableLevels > 2) {
6291 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6292 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6293 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6294 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6298 TimePerVMGroupVBlank[k] = 0;
6299 TimePerVMGroupFlip[k] = 0;
6300 TimePerVMRequestVBlank[k] = 0;
6301 TimePerVMRequestFlip[k] = 0;
6306 static void CalculateStutterEfficiency(
6307 struct display_mode_lib *mode_lib,
6308 int CompressedBufferSizeInkByte,
6309 bool UnboundedRequestEnabled,
6310 int ConfigReturnBufferSizeInKByte,
6311 int MetaFIFOSizeInKEntries,
6312 int ZeroSizeBufferEntries,
6313 int NumberOfActivePlanes,
6314 int ROBBufferSizeInKByte,
6315 double TotalDataReadBandwidth,
6318 double COMPBUF_RESERVED_SPACE_64B,
6319 double COMPBUF_RESERVED_SPACE_ZS,
6321 double SRExitZ8Time,
6322 bool SynchronizedVBlank,
6323 double Z8StutterEnterPlusExitWatermark,
6324 double StutterEnterPlusExitWatermark,
6325 bool ProgressiveToInterlaceUnitInOPP,
6327 double MinTTUVBlank[],
6329 unsigned int DETBufferSizeY[],
6330 int BytePerPixelY[],
6331 double BytePerPixelDETY[],
6332 double SwathWidthY[],
6335 double NetDCCRateLuma[],
6336 double NetDCCRateChroma[],
6337 double DCCFractionOfZeroSizeRequestsLuma[],
6338 double DCCFractionOfZeroSizeRequestsChroma[],
6341 double PixelClock[],
6343 enum scan_direction_class SourceScan[],
6344 int BlockHeight256BytesY[],
6345 int BlockWidth256BytesY[],
6346 int BlockHeight256BytesC[],
6347 int BlockWidth256BytesC[],
6348 int DCCYMaxUncompressedBlock[],
6349 int DCCCMaxUncompressedBlock[],
6352 bool WritebackEnable[],
6353 double ReadBandwidthPlaneLuma[],
6354 double ReadBandwidthPlaneChroma[],
6355 double meta_row_bw[],
6356 double dpte_row_bw[],
6357 double *StutterEfficiencyNotIncludingVBlank,
6358 double *StutterEfficiency,
6359 int *NumberOfStutterBurstsPerFrame,
6360 double *Z8StutterEfficiencyNotIncludingVBlank,
6361 double *Z8StutterEfficiency,
6362 int *Z8NumberOfStutterBurstsPerFrame,
6363 double *StutterPeriod)
6365 struct vba_vars_st *v = &mode_lib->vba;
6367 double DETBufferingTimeY;
6368 double SwathWidthYCriticalPlane = 0;
6369 double VActiveTimeCriticalPlane = 0;
6370 double FrameTimeCriticalPlane = 0;
6371 int BytePerPixelYCriticalPlane = 0;
6372 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6373 double MinTTUVBlankCriticalPlane = 0;
6374 double TotalCompressedReadBandwidth;
6375 double TotalRowReadBandwidth;
6376 double AverageDCCCompressionRate;
6377 double EffectiveCompressedBufferSize;
6378 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6379 double StutterBurstTime;
6380 int TotalActiveWriteback;
6382 double LinesInDETYRoundedDownToSwath;
6383 double MaximumEffectiveCompressionLuma;
6384 double MaximumEffectiveCompressionChroma;
6385 double TotalZeroSizeRequestReadBandwidth;
6386 double TotalZeroSizeCompressedReadBandwidth;
6387 double AverageDCCZeroSizeFraction;
6388 double AverageZeroSizeCompressionRate;
6389 int TotalNumberOfActiveOTG = 0;
6390 double LastStutterPeriod = 0.0;
6391 double LastZ8StutterPeriod = 0.0;
6394 TotalZeroSizeRequestReadBandwidth = 0;
6395 TotalZeroSizeCompressedReadBandwidth = 0;
6396 TotalRowReadBandwidth = 0;
6397 TotalCompressedReadBandwidth = 0;
6399 for (k = 0; k < NumberOfActivePlanes; ++k) {
6400 if (DCCEnable[k] == true) {
6401 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6402 || DCCYMaxUncompressedBlock[k] < 256) {
6403 MaximumEffectiveCompressionLuma = 2;
6405 MaximumEffectiveCompressionLuma = 4;
6407 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6408 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6409 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6410 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6411 if (ReadBandwidthPlaneChroma[k] > 0) {
6412 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6413 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6414 MaximumEffectiveCompressionChroma = 2;
6416 MaximumEffectiveCompressionChroma = 4;
6418 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6419 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6420 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6421 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6422 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6425 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6427 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6430 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6431 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6433 #ifdef __DML_VBA_DEBUG__
6434 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6435 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6436 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6437 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6438 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6439 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6440 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6441 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6444 if (AverageDCCZeroSizeFraction == 1) {
6445 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6446 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6447 } else if (AverageDCCZeroSizeFraction > 0) {
6448 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6449 EffectiveCompressedBufferSize = dml_min(
6450 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6451 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6452 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6453 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6454 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6456 "DML::%s: min 2 = %f\n",
6458 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6459 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6460 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6462 EffectiveCompressedBufferSize = dml_min(
6463 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6464 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6465 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6466 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6469 #ifdef __DML_VBA_DEBUG__
6470 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6471 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6472 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6476 for (k = 0; k < NumberOfActivePlanes; ++k) {
6477 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6478 / BytePerPixelDETY[k] / SwathWidthY[k];
6479 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6480 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6481 #ifdef __DML_VBA_DEBUG__
6482 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6483 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6484 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6485 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6486 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6487 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6488 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6489 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6490 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6491 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6492 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6493 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6496 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6497 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6499 *StutterPeriod = DETBufferingTimeY;
6500 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6501 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6502 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6503 SwathWidthYCriticalPlane = SwathWidthY[k];
6504 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6505 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6507 #ifdef __DML_VBA_DEBUG__
6508 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6509 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6510 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6511 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6512 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6513 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6514 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6519 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6520 #ifdef __DML_VBA_DEBUG__
6521 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6522 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6523 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6524 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6525 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6526 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6527 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6528 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6529 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6530 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6533 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6534 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6535 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6536 #ifdef __DML_VBA_DEBUG__
6537 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6538 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6539 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6540 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6541 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6543 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6546 "DML::%s: Time to finish residue swath=%f\n",
6548 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6550 TotalActiveWriteback = 0;
6551 for (k = 0; k < NumberOfActivePlanes; ++k) {
6552 if (WritebackEnable[k]) {
6553 TotalActiveWriteback = TotalActiveWriteback + 1;
6557 if (TotalActiveWriteback == 0) {
6558 #ifdef __DML_VBA_DEBUG__
6559 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6560 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6561 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6562 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6564 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6565 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6566 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6567 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6569 *StutterEfficiencyNotIncludingVBlank = 0.;
6570 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6571 *NumberOfStutterBurstsPerFrame = 0;
6572 *Z8NumberOfStutterBurstsPerFrame = 0;
6574 #ifdef __DML_VBA_DEBUG__
6575 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6576 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6577 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6578 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6579 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6582 for (k = 0; k < NumberOfActivePlanes; ++k) {
6583 if (v->BlendingAndTiming[k] == k) {
6584 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6588 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6589 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6591 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6592 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6593 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6595 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6598 *StutterEfficiency = 0;
6601 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6602 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6603 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6604 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6605 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6607 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6610 *Z8StutterEfficiency = 0.;
6613 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6614 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6615 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6616 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6617 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6618 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6619 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6620 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6623 static void CalculateSwathAndDETConfiguration(
6624 bool ForceSingleDPP,
6625 int NumberOfActivePlanes,
6626 unsigned int DETBufferSizeInKByte,
6627 double MaximumSwathWidthLuma[],
6628 double MaximumSwathWidthChroma[],
6629 enum scan_direction_class SourceScan[],
6630 enum source_format_class SourcePixelFormat[],
6631 enum dm_swizzle_mode SurfaceTiling[],
6632 int ViewportWidth[],
6633 int ViewportHeight[],
6634 int SurfaceWidthY[],
6635 int SurfaceWidthC[],
6636 int SurfaceHeightY[],
6637 int SurfaceHeightC[],
6638 int Read256BytesBlockHeightY[],
6639 int Read256BytesBlockHeightC[],
6640 int Read256BytesBlockWidthY[],
6641 int Read256BytesBlockWidthC[],
6642 enum odm_combine_mode ODMCombineEnabled[],
6643 int BlendingAndTiming[],
6646 double BytePerPixDETY[],
6647 double BytePerPixDETC[],
6650 double HRatioChroma[],
6652 int swath_width_luma_ub[],
6653 int swath_width_chroma_ub[],
6654 double SwathWidth[],
6655 double SwathWidthChroma[],
6658 unsigned int DETBufferSizeY[],
6659 unsigned int DETBufferSizeC[],
6660 bool ViewportSizeSupportPerPlane[],
6661 bool *ViewportSizeSupport)
6663 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6664 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6665 int MinimumSwathHeightY;
6666 int MinimumSwathHeightC;
6667 int RoundedUpMaxSwathSizeBytesY;
6668 int RoundedUpMaxSwathSizeBytesC;
6669 int RoundedUpMinSwathSizeBytesY;
6670 int RoundedUpMinSwathSizeBytesC;
6671 int RoundedUpSwathSizeBytesY;
6672 int RoundedUpSwathSizeBytesC;
6673 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6674 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6677 CalculateSwathWidth(
6679 NumberOfActivePlanes,
6691 Read256BytesBlockHeightY,
6692 Read256BytesBlockHeightC,
6693 Read256BytesBlockWidthY,
6694 Read256BytesBlockWidthC,
6699 SwathWidthSingleDPP,
6700 SwathWidthSingleDPPChroma,
6703 MaximumSwathHeightY,
6704 MaximumSwathHeightC,
6705 swath_width_luma_ub,
6706 swath_width_chroma_ub);
6708 *ViewportSizeSupport = true;
6709 for (k = 0; k < NumberOfActivePlanes; ++k) {
6710 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6711 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6712 if (SurfaceTiling[k] == dm_sw_linear
6713 || (SourcePixelFormat[k] == dm_444_64
6714 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6715 && SourceScan[k] != dm_vert)) {
6716 MinimumSwathHeightY = MaximumSwathHeightY[k];
6717 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6718 MinimumSwathHeightY = MaximumSwathHeightY[k];
6720 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6722 MinimumSwathHeightC = MaximumSwathHeightC[k];
6724 if (SurfaceTiling[k] == dm_sw_linear) {
6725 MinimumSwathHeightY = MaximumSwathHeightY[k];
6726 MinimumSwathHeightC = MaximumSwathHeightC[k];
6727 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6728 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6729 MinimumSwathHeightC = MaximumSwathHeightC[k];
6730 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6731 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6732 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6733 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6734 MinimumSwathHeightY = MaximumSwathHeightY[k];
6735 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6737 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6738 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6742 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6743 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6744 if (SourcePixelFormat[k] == dm_420_10) {
6745 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6746 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6748 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6749 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6750 if (SourcePixelFormat[k] == dm_420_10) {
6751 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6752 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6755 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6756 SwathHeightY[k] = MaximumSwathHeightY[k];
6757 SwathHeightC[k] = MaximumSwathHeightC[k];
6758 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6759 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6760 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6761 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6762 SwathHeightY[k] = MinimumSwathHeightY;
6763 SwathHeightC[k] = MaximumSwathHeightC[k];
6764 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6765 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6766 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6767 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6768 SwathHeightY[k] = MaximumSwathHeightY[k];
6769 SwathHeightC[k] = MinimumSwathHeightC;
6770 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6771 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6773 SwathHeightY[k] = MinimumSwathHeightY;
6774 SwathHeightC[k] = MinimumSwathHeightC;
6775 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6776 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6779 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6780 if (SwathHeightC[k] == 0) {
6781 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6782 DETBufferSizeC[k] = 0;
6783 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6784 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6785 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6787 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6788 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6791 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6792 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6793 *ViewportSizeSupport = false;
6794 ViewportSizeSupportPerPlane[k] = false;
6796 ViewportSizeSupportPerPlane[k] = true;
6802 static void CalculateSwathWidth(
6803 bool ForceSingleDPP,
6804 int NumberOfActivePlanes,
6805 enum source_format_class SourcePixelFormat[],
6806 enum scan_direction_class SourceScan[],
6807 int ViewportWidth[],
6808 int ViewportHeight[],
6809 int SurfaceWidthY[],
6810 int SurfaceWidthC[],
6811 int SurfaceHeightY[],
6812 int SurfaceHeightC[],
6813 enum odm_combine_mode ODMCombineEnabled[],
6816 int Read256BytesBlockHeightY[],
6817 int Read256BytesBlockHeightC[],
6818 int Read256BytesBlockWidthY[],
6819 int Read256BytesBlockWidthC[],
6820 int BlendingAndTiming[],
6824 double SwathWidthSingleDPPY[],
6825 double SwathWidthSingleDPPC[],
6826 double SwathWidthY[],
6827 double SwathWidthC[],
6828 int MaximumSwathHeightY[],
6829 int MaximumSwathHeightC[],
6830 int swath_width_luma_ub[],
6831 int swath_width_chroma_ub[])
6833 enum odm_combine_mode MainPlaneODMCombine;
6836 #ifdef __DML_VBA_DEBUG__
6837 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6840 for (k = 0; k < NumberOfActivePlanes; ++k) {
6841 if (SourceScan[k] != dm_vert) {
6842 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6844 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6847 #ifdef __DML_VBA_DEBUG__
6848 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6849 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6852 MainPlaneODMCombine = ODMCombineEnabled[k];
6853 for (j = 0; j < NumberOfActivePlanes; ++j) {
6854 if (BlendingAndTiming[k] == j) {
6855 MainPlaneODMCombine = ODMCombineEnabled[j];
6859 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6860 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6861 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6862 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6863 } else if (DPPPerPlane[k] == 2) {
6864 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6866 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6869 #ifdef __DML_VBA_DEBUG__
6870 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6871 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6874 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6875 SwathWidthC[k] = SwathWidthY[k] / 2;
6876 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6878 SwathWidthC[k] = SwathWidthY[k];
6879 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6882 if (ForceSingleDPP == true) {
6883 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6884 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6887 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6888 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6889 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6890 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6892 #ifdef __DML_VBA_DEBUG__
6893 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6896 if (SourceScan[k] != dm_vert) {
6897 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6898 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6899 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6900 if (BytePerPixC[k] > 0) {
6901 swath_width_chroma_ub[k] = dml_min(
6903 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6905 swath_width_chroma_ub[k] = 0;
6908 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6909 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6910 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6911 if (BytePerPixC[k] > 0) {
6912 swath_width_chroma_ub[k] = dml_min(
6913 surface_height_ub_c,
6914 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6916 swath_width_chroma_ub[k] = 0;
6923 static double CalculateExtraLatency(
6924 int RoundTripPingLatencyCycles,
6925 int ReorderingBytes,
6927 int TotalNumberOfActiveDPP,
6928 int PixelChunkSizeInKByte,
6929 int TotalNumberOfDCCActiveDPP,
6934 int NumberOfActivePlanes,
6936 int dpte_group_bytes[],
6937 double HostVMInefficiencyFactor,
6938 double HostVMMinPageSize,
6939 int HostVMMaxNonCachedPageTableLevels)
6941 double ExtraLatencyBytes;
6942 double ExtraLatency;
6944 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6946 TotalNumberOfActiveDPP,
6947 PixelChunkSizeInKByte,
6948 TotalNumberOfDCCActiveDPP,
6952 NumberOfActivePlanes,
6955 HostVMInefficiencyFactor,
6957 HostVMMaxNonCachedPageTableLevels);
6959 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6961 #ifdef __DML_VBA_DEBUG__
6962 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6963 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6964 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6965 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6966 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6969 return ExtraLatency;
6972 static double CalculateExtraLatencyBytes(
6973 int ReorderingBytes,
6974 int TotalNumberOfActiveDPP,
6975 int PixelChunkSizeInKByte,
6976 int TotalNumberOfDCCActiveDPP,
6980 int NumberOfActivePlanes,
6982 int dpte_group_bytes[],
6983 double HostVMInefficiencyFactor,
6984 double HostVMMinPageSize,
6985 int HostVMMaxNonCachedPageTableLevels)
6988 int HostVMDynamicLevels = 0, k;
6990 if (GPUVMEnable == true && HostVMEnable == true) {
6991 if (HostVMMinPageSize < 2048) {
6992 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6993 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6994 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6996 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6999 HostVMDynamicLevels = 0;
7002 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7004 if (GPUVMEnable == true) {
7005 for (k = 0; k < NumberOfActivePlanes; ++k) {
7006 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7012 static double CalculateUrgentLatency(
7013 double UrgentLatencyPixelDataOnly,
7014 double UrgentLatencyPixelMixedWithVMData,
7015 double UrgentLatencyVMDataOnly,
7016 bool DoUrgentLatencyAdjustment,
7017 double UrgentLatencyAdjustmentFabricClockComponent,
7018 double UrgentLatencyAdjustmentFabricClockReference,
7023 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7024 if (DoUrgentLatencyAdjustment == true) {
7025 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7030 static void UseMinimumDCFCLK(
7031 struct display_mode_lib *mode_lib,
7032 int MaxPrefetchMode,
7033 int ReorderingBytes)
7035 struct vba_vars_st *v = &mode_lib->vba;
7036 int dummy1, i, j, k;
7037 double NormalEfficiency, dummy2, dummy3;
7038 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7040 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7041 for (i = 0; i < v->soc.num_states; ++i) {
7042 for (j = 0; j <= 1; ++j) {
7043 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7044 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7045 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7046 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7047 double MinimumTWait;
7048 double NonDPTEBandwidth;
7049 double DPTEBandwidth;
7050 double DCFCLKRequiredForAverageBandwidth;
7051 double ExtraLatencyBytes;
7052 double ExtraLatencyCycles;
7053 double DCFCLKRequiredForPeakBandwidth;
7054 int NoOfDPPState[DC__NUM_DPP__MAX];
7055 double MinimumTvmPlus2Tr0;
7057 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7058 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7059 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7060 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7063 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7064 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7067 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7068 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7069 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7070 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7071 DCFCLKRequiredForAverageBandwidth = dml_max3(
7072 v->ProjectedDCFCLKDeepSleep[i][j],
7073 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7074 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7075 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7077 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7079 v->TotalNumberOfActiveDPP[i][j],
7080 v->PixelChunkSizeInKByte,
7081 v->TotalNumberOfDCCActiveDPP[i][j],
7085 v->NumberOfActivePlanes,
7087 v->dpte_group_bytes,
7089 v->HostVMMinPageSize,
7090 v->HostVMMaxNonCachedPageTableLevels);
7091 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7092 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7093 double DCFCLKCyclesRequiredInPrefetch;
7094 double ExpectedPrefetchBWAcceleration;
7095 double PrefetchTime;
7097 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7098 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7099 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7100 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7101 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7102 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7103 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7104 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7105 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7106 DynamicMetadataVMExtraLatency[k] =
7107 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7108 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7109 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7111 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7112 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7113 - DynamicMetadataVMExtraLatency[k];
7115 if (PrefetchTime > 0) {
7116 double ExpectedVRatioPrefetch;
7117 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7118 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7119 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7120 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7121 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7122 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7123 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7126 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7128 if (v->DynamicMetadataEnable[k] == true) {
7133 double AllowedTimeForUrgentExtraLatency;
7135 CalculateVupdateAndDynamicMetadataParameters(
7136 v->MaxInterDCNTileRepeaters,
7137 v->RequiredDPPCLK[i][j][k],
7138 v->RequiredDISPCLK[i][j],
7139 v->ProjectedDCFCLKDeepSleep[i][j],
7142 v->VTotal[k] - v->VActive[k],
7143 v->DynamicMetadataTransmittedBytes[k],
7144 v->DynamicMetadataLinesBeforeActiveRequired[k],
7146 v->ProgressiveToInterlaceUnitInOPP,
7154 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7155 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7156 if (AllowedTimeForUrgentExtraLatency > 0) {
7157 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7158 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7159 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7161 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7165 DCFCLKRequiredForPeakBandwidth = 0;
7166 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7167 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7169 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7170 * (v->GPUVMEnable == true ?
7171 (v->HostVMEnable == true ?
7172 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7174 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7175 double MaximumTvmPlus2Tr0PlusTsw;
7176 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7177 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7178 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7180 DCFCLKRequiredForPeakBandwidth = dml_max3(
7181 DCFCLKRequiredForPeakBandwidth,
7182 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7183 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7186 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7191 static void CalculateUnboundedRequestAndCompressedBufferSize(
7192 unsigned int DETBufferSizeInKByte,
7193 int ConfigReturnBufferSizeInKByte,
7194 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7196 bool NoChromaPlanes,
7198 int CompressedBufferSegmentSizeInkByteFinal,
7199 enum output_encoder_class *Output,
7200 bool *UnboundedRequestEnabled,
7201 int *CompressedBufferSizeInkByte)
7203 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7205 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7206 *CompressedBufferSizeInkByte = (
7207 *UnboundedRequestEnabled == true ?
7208 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7209 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7210 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7212 #ifdef __DML_VBA_DEBUG__
7213 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7214 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7215 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7216 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7217 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7218 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7219 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7223 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7225 bool ret_val = false;
7227 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7228 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {