2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
28 #include "../display_mode_lib.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
46 // For DML-C changes that hasn't been propagated to VBA yet
47 //#define __DML_VBA_ALLOW_DELTA__
49 // Move these to ip paramaters/constant
51 // At which vstartup the DML start to try if the mode can be supported
52 #define __DML_VBA_MIN_VSTARTUP__ 9
54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
57 // fudge factor for min dcfclk calclation
58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
69 enum scan_direction_class SourceScan;
70 unsigned int BlockWidth256BytesY;
71 unsigned int BlockHeight256BytesY;
72 unsigned int BlockWidth256BytesC;
73 unsigned int BlockHeight256BytesC;
74 unsigned int InterlaceEnable;
75 unsigned int NumberOfCursors;
78 unsigned int DCCEnable;
79 bool ODMCombineIsEnabled;
80 enum source_format_class SourcePixelFormat;
83 bool ProgressiveToInterlaceUnitInOPP;
87 #define BPP_BLENDED_PIPE 0xffffffff
89 static bool CalculateBytePerPixelAnd256BBlockSizes(
90 enum source_format_class SourcePixelFormat,
91 enum dm_swizzle_mode SurfaceTiling,
92 unsigned int *BytePerPixelY,
93 unsigned int *BytePerPixelC,
94 double *BytePerPixelDETY,
95 double *BytePerPixelDETC,
96 unsigned int *BlockHeight256BytesY,
97 unsigned int *BlockHeight256BytesC,
98 unsigned int *BlockWidth256BytesY,
99 unsigned int *BlockWidth256BytesC);
100 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
101 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
102 static unsigned int dscceComputeDelay(
105 unsigned int sliceWidth,
106 unsigned int numSlices,
107 enum output_format_class pixelFormat,
108 enum output_encoder_class Output);
109 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
110 static bool CalculatePrefetchSchedule(
111 struct display_mode_lib *mode_lib,
112 double HostVMInefficiencyFactor,
114 unsigned int DSCDelay,
115 double DPPCLKDelaySubtotalPlusCNVCFormater,
116 double DPPCLKDelaySCL,
117 double DPPCLKDelaySCLLBOnly,
118 double DPPCLKDelayCNVCCursor,
119 double DISPCLKDelaySubtotal,
120 unsigned int DPP_RECOUT_WIDTH,
121 enum output_format_class OutputFormat,
122 unsigned int MaxInterDCNTileRepeaters,
123 unsigned int VStartup,
124 unsigned int MaxVStartup,
125 unsigned int GPUVMPageTableLevels,
128 unsigned int HostVMMaxNonCachedPageTableLevels,
129 double HostVMMinPageSize,
130 bool DynamicMetadataEnable,
131 bool DynamicMetadataVMEnabled,
132 int DynamicMetadataLinesBeforeActiveRequired,
133 unsigned int DynamicMetadataTransmittedBytes,
134 double UrgentLatency,
135 double UrgentExtraLatency,
137 unsigned int PDEAndMetaPTEBytesFrame,
138 unsigned int MetaRowByte,
139 unsigned int PixelPTEBytesPerRow,
140 double PrefetchSourceLinesY,
141 unsigned int SwathWidthY,
142 double VInitPreFillY,
143 unsigned int MaxNumSwathY,
144 double PrefetchSourceLinesC,
145 unsigned int SwathWidthC,
146 double VInitPreFillC,
147 unsigned int MaxNumSwathC,
148 int swath_width_luma_ub,
149 int swath_width_chroma_ub,
150 unsigned int SwathHeightY,
151 unsigned int SwathHeightC,
153 double *DSTXAfterScaler,
154 double *DSTYAfterScaler,
155 double *DestinationLinesForPrefetch,
156 double *PrefetchBandwidth,
157 double *DestinationLinesToRequestVMInVBlank,
158 double *DestinationLinesToRequestRowInVBlank,
159 double *VRatioPrefetchY,
160 double *VRatioPrefetchC,
161 double *RequiredPrefetchPixDataBWLuma,
162 double *RequiredPrefetchPixDataBWChroma,
163 bool *NotEnoughTimeForDynamicMetadata,
165 double *prefetch_vmrow_bw,
169 int *VUpdateOffsetPix,
170 double *VUpdateWidthPix,
171 double *VReadyOffsetPix);
172 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
173 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
174 static void CalculateDCCConfiguration(
176 bool DCCProgrammingAssumesScanDirectionUnknown,
177 enum source_format_class SourcePixelFormat,
178 unsigned int SurfaceWidthLuma,
179 unsigned int SurfaceWidthChroma,
180 unsigned int SurfaceHeightLuma,
181 unsigned int SurfaceHeightChroma,
182 double DETBufferSize,
183 unsigned int RequestHeight256ByteLuma,
184 unsigned int RequestHeight256ByteChroma,
185 enum dm_swizzle_mode TilingFormat,
186 unsigned int BytePerPixelY,
187 unsigned int BytePerPixelC,
188 double BytePerPixelDETY,
189 double BytePerPixelDETC,
190 enum scan_direction_class ScanOrientation,
191 unsigned int *MaxUncompressedBlockLuma,
192 unsigned int *MaxUncompressedBlockChroma,
193 unsigned int *MaxCompressedBlockLuma,
194 unsigned int *MaxCompressedBlockChroma,
195 unsigned int *IndependentBlockLuma,
196 unsigned int *IndependentBlockChroma);
197 static double CalculatePrefetchSourceLines(
198 struct display_mode_lib *mode_lib,
202 bool ProgressiveToInterlaceUnitInOPP,
203 unsigned int SwathHeight,
204 unsigned int ViewportYStart,
205 double *VInitPreFill,
206 unsigned int *MaxNumSwath);
207 static unsigned int CalculateVMAndRowBytes(
208 struct display_mode_lib *mode_lib,
210 unsigned int BlockHeight256Bytes,
211 unsigned int BlockWidth256Bytes,
212 enum source_format_class SourcePixelFormat,
213 unsigned int SurfaceTiling,
214 unsigned int BytePerPixel,
215 enum scan_direction_class ScanDirection,
216 unsigned int SwathWidth,
217 unsigned int ViewportHeight,
220 unsigned int HostVMMaxNonCachedPageTableLevels,
221 unsigned int GPUVMMinPageSize,
222 unsigned int HostVMMinPageSize,
223 unsigned int PTEBufferSizeInRequests,
225 unsigned int DCCMetaPitch,
226 unsigned int *MacroTileWidth,
227 unsigned int *MetaRowByte,
228 unsigned int *PixelPTEBytesPerRow,
229 bool *PTEBufferSizeNotExceeded,
230 int *dpte_row_width_ub,
231 unsigned int *dpte_row_height,
232 unsigned int *MetaRequestWidth,
233 unsigned int *MetaRequestHeight,
234 unsigned int *meta_row_width,
235 unsigned int *meta_row_height,
237 unsigned int *dpte_group_bytes,
238 unsigned int *PixelPTEReqWidth,
239 unsigned int *PixelPTEReqHeight,
240 unsigned int *PTERequestSize,
241 int *DPDE0BytesFrame,
242 int *MetaPTEBytesFrame);
243 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
244 static void CalculateRowBandwidth(
246 enum source_format_class SourcePixelFormat,
251 unsigned int MetaRowByteLuma,
252 unsigned int MetaRowByteChroma,
253 unsigned int meta_row_height_luma,
254 unsigned int meta_row_height_chroma,
255 unsigned int PixelPTEBytesPerRowLuma,
256 unsigned int PixelPTEBytesPerRowChroma,
257 unsigned int dpte_row_height_luma,
258 unsigned int dpte_row_height_chroma,
260 double *dpte_row_bw);
262 static void CalculateFlipSchedule(
263 struct display_mode_lib *mode_lib,
265 double HostVMInefficiencyFactor,
266 double UrgentExtraLatency,
267 double UrgentLatency,
268 double PDEAndMetaPTEBytesPerFrame,
270 double DPTEBytesPerRow);
271 static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 int WritebackDestinationWidth,
277 int WritebackDestinationHeight,
278 int WritebackSourceHeight,
279 unsigned int HTotal);
281 static void CalculateVupdateAndDynamicMetadataParameters(
282 int MaxInterDCNTileRepeaters,
285 double DCFClkDeepSleep,
289 int DynamicMetadataTransmittedBytes,
290 int DynamicMetadataLinesBeforeActiveRequired,
292 bool ProgressiveToInterlaceUnitInOPP,
297 int *VUpdateOffsetPix,
298 double *VUpdateWidthPix,
299 double *VReadyOffsetPix);
301 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
302 struct display_mode_lib *mode_lib,
303 unsigned int PrefetchMode,
306 double UrgentLatency,
309 double DCFCLKDeepSleep,
310 unsigned int DETBufferSizeY[],
311 unsigned int DETBufferSizeC[],
312 unsigned int SwathHeightY[],
313 unsigned int SwathHeightC[],
314 double SwathWidthY[],
315 double SwathWidthC[],
316 unsigned int DPPPerPlane[],
317 double BytePerPixelDETY[],
318 double BytePerPixelDETC[],
319 bool UnboundedRequestEnabled,
320 int unsigned CompressedBufferSizeInkByte,
321 enum clock_change_support *DRAMClockChangeSupport,
322 double *StutterExitWatermark,
323 double *StutterEnterPlusExitWatermark,
324 double *Z8StutterExitWatermark,
325 double *Z8StutterEnterPlusExitWatermark);
327 static void CalculateDCFCLKDeepSleep(
328 struct display_mode_lib *mode_lib,
329 unsigned int NumberOfActivePlanes,
333 double VRatioChroma[],
334 double SwathWidthY[],
335 double SwathWidthC[],
336 unsigned int DPPPerPlane[],
338 double HRatioChroma[],
340 double PSCL_THROUGHPUT[],
341 double PSCL_THROUGHPUT_CHROMA[],
343 double ReadBandwidthLuma[],
344 double ReadBandwidthChroma[],
346 double *DCFCLKDeepSleep);
348 static void CalculateUrgentBurstFactor(
349 int swath_width_luma_ub,
350 int swath_width_chroma_ub,
351 unsigned int SwathHeightY,
352 unsigned int SwathHeightC,
354 double UrgentLatency,
355 double CursorBufferSize,
356 unsigned int CursorWidth,
357 unsigned int CursorBPP,
360 double BytePerPixelInDETY,
361 double BytePerPixelInDETC,
362 double DETBufferSizeY,
363 double DETBufferSizeC,
364 double *UrgentBurstFactorCursor,
365 double *UrgentBurstFactorLuma,
366 double *UrgentBurstFactorChroma,
367 bool *NotEnoughUrgentLatencyHiding);
369 static void UseMinimumDCFCLK(
370 struct display_mode_lib *mode_lib,
372 int ReorderingBytes);
374 static void CalculatePixelDeliveryTimes(
375 unsigned int NumberOfActivePlanes,
377 double VRatioChroma[],
378 double VRatioPrefetchY[],
379 double VRatioPrefetchC[],
380 unsigned int swath_width_luma_ub[],
381 unsigned int swath_width_chroma_ub[],
382 unsigned int DPPPerPlane[],
384 double HRatioChroma[],
386 double PSCL_THROUGHPUT[],
387 double PSCL_THROUGHPUT_CHROMA[],
390 enum scan_direction_class SourceScan[],
391 unsigned int NumberOfCursors[],
392 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
393 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
394 unsigned int BlockWidth256BytesY[],
395 unsigned int BlockHeight256BytesY[],
396 unsigned int BlockWidth256BytesC[],
397 unsigned int BlockHeight256BytesC[],
398 double DisplayPipeLineDeliveryTimeLuma[],
399 double DisplayPipeLineDeliveryTimeChroma[],
400 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
401 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
402 double DisplayPipeRequestDeliveryTimeLuma[],
403 double DisplayPipeRequestDeliveryTimeChroma[],
404 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
405 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
406 double CursorRequestDeliveryTime[],
407 double CursorRequestDeliveryTimePrefetch[]);
409 static void CalculateMetaAndPTETimes(
410 int NumberOfActivePlanes,
413 int MinMetaChunkSizeBytes,
416 double VRatioChroma[],
417 double DestinationLinesToRequestRowInVBlank[],
418 double DestinationLinesToRequestRowInImmediateFlip[],
423 enum scan_direction_class SourceScan[],
424 int dpte_row_height[],
425 int dpte_row_height_chroma[],
426 int meta_row_width[],
427 int meta_row_width_chroma[],
428 int meta_row_height[],
429 int meta_row_height_chroma[],
430 int meta_req_width[],
431 int meta_req_width_chroma[],
432 int meta_req_height[],
433 int meta_req_height_chroma[],
434 int dpte_group_bytes[],
435 int PTERequestSizeY[],
436 int PTERequestSizeC[],
437 int PixelPTEReqWidthY[],
438 int PixelPTEReqHeightY[],
439 int PixelPTEReqWidthC[],
440 int PixelPTEReqHeightC[],
441 int dpte_row_width_luma_ub[],
442 int dpte_row_width_chroma_ub[],
443 double DST_Y_PER_PTE_ROW_NOM_L[],
444 double DST_Y_PER_PTE_ROW_NOM_C[],
445 double DST_Y_PER_META_ROW_NOM_L[],
446 double DST_Y_PER_META_ROW_NOM_C[],
447 double TimePerMetaChunkNominal[],
448 double TimePerChromaMetaChunkNominal[],
449 double TimePerMetaChunkVBlank[],
450 double TimePerChromaMetaChunkVBlank[],
451 double TimePerMetaChunkFlip[],
452 double TimePerChromaMetaChunkFlip[],
453 double time_per_pte_group_nom_luma[],
454 double time_per_pte_group_vblank_luma[],
455 double time_per_pte_group_flip_luma[],
456 double time_per_pte_group_nom_chroma[],
457 double time_per_pte_group_vblank_chroma[],
458 double time_per_pte_group_flip_chroma[]);
460 static void CalculateVMGroupAndRequestTimes(
461 unsigned int NumberOfActivePlanes,
463 unsigned int GPUVMMaxPageTableLevels,
464 unsigned int HTotal[],
466 double DestinationLinesToRequestVMInVBlank[],
467 double DestinationLinesToRequestVMInImmediateFlip[],
470 int dpte_row_width_luma_ub[],
471 int dpte_row_width_chroma_ub[],
472 int vm_group_bytes[],
473 unsigned int dpde0_bytes_per_frame_ub_l[],
474 unsigned int dpde0_bytes_per_frame_ub_c[],
475 int meta_pte_bytes_per_frame_ub_l[],
476 int meta_pte_bytes_per_frame_ub_c[],
477 double TimePerVMGroupVBlank[],
478 double TimePerVMGroupFlip[],
479 double TimePerVMRequestVBlank[],
480 double TimePerVMRequestFlip[]);
482 static void CalculateStutterEfficiency(
483 struct display_mode_lib *mode_lib,
484 int CompressedBufferSizeInkByte,
485 bool UnboundedRequestEnabled,
486 int ConfigReturnBufferSizeInKByte,
487 int MetaFIFOSizeInKEntries,
488 int ZeroSizeBufferEntries,
489 int NumberOfActivePlanes,
490 int ROBBufferSizeInKByte,
491 double TotalDataReadBandwidth,
494 double COMPBUF_RESERVED_SPACE_64B,
495 double COMPBUF_RESERVED_SPACE_ZS,
498 bool SynchronizedVBlank,
499 double Z8StutterEnterPlusExitWatermark,
500 double StutterEnterPlusExitWatermark,
501 bool ProgressiveToInterlaceUnitInOPP,
503 double MinTTUVBlank[],
505 unsigned int DETBufferSizeY[],
507 double BytePerPixelDETY[],
508 double SwathWidthY[],
511 double NetDCCRateLuma[],
512 double NetDCCRateChroma[],
513 double DCCFractionOfZeroSizeRequestsLuma[],
514 double DCCFractionOfZeroSizeRequestsChroma[],
519 enum scan_direction_class SourceScan[],
520 int BlockHeight256BytesY[],
521 int BlockWidth256BytesY[],
522 int BlockHeight256BytesC[],
523 int BlockWidth256BytesC[],
524 int DCCYMaxUncompressedBlock[],
525 int DCCCMaxUncompressedBlock[],
528 bool WritebackEnable[],
529 double ReadBandwidthPlaneLuma[],
530 double ReadBandwidthPlaneChroma[],
531 double meta_row_bw[],
532 double dpte_row_bw[],
533 double *StutterEfficiencyNotIncludingVBlank,
534 double *StutterEfficiency,
535 int *NumberOfStutterBurstsPerFrame,
536 double *Z8StutterEfficiencyNotIncludingVBlank,
537 double *Z8StutterEfficiency,
538 int *Z8NumberOfStutterBurstsPerFrame,
539 double *StutterPeriod);
541 static void CalculateSwathAndDETConfiguration(
543 int NumberOfActivePlanes,
544 unsigned int DETBufferSizeInKByte,
545 double MaximumSwathWidthLuma[],
546 double MaximumSwathWidthChroma[],
547 enum scan_direction_class SourceScan[],
548 enum source_format_class SourcePixelFormat[],
549 enum dm_swizzle_mode SurfaceTiling[],
551 int ViewportHeight[],
554 int SurfaceHeightY[],
555 int SurfaceHeightC[],
556 int Read256BytesBlockHeightY[],
557 int Read256BytesBlockHeightC[],
558 int Read256BytesBlockWidthY[],
559 int Read256BytesBlockWidthC[],
560 enum odm_combine_mode ODMCombineEnabled[],
561 int BlendingAndTiming[],
564 double BytePerPixDETY[],
565 double BytePerPixDETC[],
568 double HRatioChroma[],
570 int swath_width_luma_ub[],
571 int swath_width_chroma_ub[],
573 double SwathWidthChroma[],
576 unsigned int DETBufferSizeY[],
577 unsigned int DETBufferSizeC[],
578 bool ViewportSizeSupportPerPlane[],
579 bool *ViewportSizeSupport);
580 static void CalculateSwathWidth(
582 int NumberOfActivePlanes,
583 enum source_format_class SourcePixelFormat[],
584 enum scan_direction_class SourceScan[],
586 int ViewportHeight[],
589 int SurfaceHeightY[],
590 int SurfaceHeightC[],
591 enum odm_combine_mode ODMCombineEnabled[],
594 int Read256BytesBlockHeightY[],
595 int Read256BytesBlockHeightC[],
596 int Read256BytesBlockWidthY[],
597 int Read256BytesBlockWidthC[],
598 int BlendingAndTiming[],
602 double SwathWidthSingleDPPY[],
603 double SwathWidthSingleDPPC[],
604 double SwathWidthY[],
605 double SwathWidthC[],
606 int MaximumSwathHeightY[],
607 int MaximumSwathHeightC[],
608 int swath_width_luma_ub[],
609 int swath_width_chroma_ub[]);
611 static double CalculateExtraLatency(
612 int RoundTripPingLatencyCycles,
615 int TotalNumberOfActiveDPP,
616 int PixelChunkSizeInKByte,
617 int TotalNumberOfDCCActiveDPP,
622 int NumberOfActivePlanes,
624 int dpte_group_bytes[],
625 double HostVMInefficiencyFactor,
626 double HostVMMinPageSize,
627 int HostVMMaxNonCachedPageTableLevels);
629 static double CalculateExtraLatencyBytes(
631 int TotalNumberOfActiveDPP,
632 int PixelChunkSizeInKByte,
633 int TotalNumberOfDCCActiveDPP,
637 int NumberOfActivePlanes,
639 int dpte_group_bytes[],
640 double HostVMInefficiencyFactor,
641 double HostVMMinPageSize,
642 int HostVMMaxNonCachedPageTableLevels);
644 static double CalculateUrgentLatency(
645 double UrgentLatencyPixelDataOnly,
646 double UrgentLatencyPixelMixedWithVMData,
647 double UrgentLatencyVMDataOnly,
648 bool DoUrgentLatencyAdjustment,
649 double UrgentLatencyAdjustmentFabricClockComponent,
650 double UrgentLatencyAdjustmentFabricClockReference,
651 double FabricClockSingle);
653 static void CalculateUnboundedRequestAndCompressedBufferSize(
654 unsigned int DETBufferSizeInKByte,
655 int ConfigReturnBufferSizeInKByte,
656 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
660 int CompressedBufferSegmentSizeInkByteFinal,
661 enum output_encoder_class *Output,
662 bool *UnboundedRequestEnabled,
663 int *CompressedBufferSizeInkByte);
665 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
667 void dml31_recalculate(struct display_mode_lib *mode_lib)
669 ModeSupportAndSystemConfiguration(mode_lib);
670 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
671 DisplayPipeConfiguration(mode_lib);
672 #ifdef __DML_VBA_DEBUG__
673 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
675 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
678 static unsigned int dscceComputeDelay(
681 unsigned int sliceWidth,
682 unsigned int numSlices,
683 enum output_format_class pixelFormat,
684 enum output_encoder_class Output)
686 // valid bpc = source bits per component in the set of {8, 10, 12}
687 // valid bpp = increments of 1/16 of a bit
688 // min = 6/7/8 in N420/N422/444, respectively
689 // max = such that compression is 1:1
690 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
691 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
692 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
695 unsigned int rcModelSize = 8192;
697 // N422/N420 operate at 2 pixels per clock
698 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
700 if (pixelFormat == dm_420)
702 else if (pixelFormat == dm_444)
704 else if (pixelFormat == dm_n422)
706 // #all other modes operate at 1 pixel per clock
710 //initial transmit delay as per PPS
711 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
721 //divide by pixel per cycle to compute slice width as seen by DSC
722 w = sliceWidth / pixelsPerClock;
724 //422 mode has an additional cycle of delay
725 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
730 //main calculation for the dscce
731 ix = initalXmitDelay + 45;
736 ax = (a + 2) / 3 + D + 6 + 1;
737 L = (ax + wx - 1) / wx;
738 if ((ix % w) == 0 && P != 0)
742 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
744 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
745 pixels = Delay * 3 * pixelsPerClock;
749 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
751 unsigned int Delay = 0;
753 if (pixelFormat == dm_420) {
758 // dscc - input deserializer
760 // dscc gets pixels every other cycle
762 // dscc - input cdc fifo
764 // dscc gets pixels every other cycle
766 // dscc - cdc uncertainty
768 // dscc - output cdc fifo
770 // dscc gets pixels every other cycle
772 // dscc - cdc uncertainty
774 // dscc - output serializer
778 } else if (pixelFormat == dm_n422) {
783 // dscc - input deserializer
785 // dscc - input cdc fifo
787 // dscc - cdc uncertainty
789 // dscc - output cdc fifo
791 // dscc - cdc uncertainty
793 // dscc - output serializer
802 // dscc - input deserializer
804 // dscc - input cdc fifo
806 // dscc - cdc uncertainty
808 // dscc - output cdc fifo
810 // dscc - output serializer
812 // dscc - cdc uncertainty
821 static bool CalculatePrefetchSchedule(
822 struct display_mode_lib *mode_lib,
823 double HostVMInefficiencyFactor,
825 unsigned int DSCDelay,
826 double DPPCLKDelaySubtotalPlusCNVCFormater,
827 double DPPCLKDelaySCL,
828 double DPPCLKDelaySCLLBOnly,
829 double DPPCLKDelayCNVCCursor,
830 double DISPCLKDelaySubtotal,
831 unsigned int DPP_RECOUT_WIDTH,
832 enum output_format_class OutputFormat,
833 unsigned int MaxInterDCNTileRepeaters,
834 unsigned int VStartup,
835 unsigned int MaxVStartup,
836 unsigned int GPUVMPageTableLevels,
839 unsigned int HostVMMaxNonCachedPageTableLevels,
840 double HostVMMinPageSize,
841 bool DynamicMetadataEnable,
842 bool DynamicMetadataVMEnabled,
843 int DynamicMetadataLinesBeforeActiveRequired,
844 unsigned int DynamicMetadataTransmittedBytes,
845 double UrgentLatency,
846 double UrgentExtraLatency,
848 unsigned int PDEAndMetaPTEBytesFrame,
849 unsigned int MetaRowByte,
850 unsigned int PixelPTEBytesPerRow,
851 double PrefetchSourceLinesY,
852 unsigned int SwathWidthY,
853 double VInitPreFillY,
854 unsigned int MaxNumSwathY,
855 double PrefetchSourceLinesC,
856 unsigned int SwathWidthC,
857 double VInitPreFillC,
858 unsigned int MaxNumSwathC,
859 int swath_width_luma_ub,
860 int swath_width_chroma_ub,
861 unsigned int SwathHeightY,
862 unsigned int SwathHeightC,
864 double *DSTXAfterScaler,
865 double *DSTYAfterScaler,
866 double *DestinationLinesForPrefetch,
867 double *PrefetchBandwidth,
868 double *DestinationLinesToRequestVMInVBlank,
869 double *DestinationLinesToRequestRowInVBlank,
870 double *VRatioPrefetchY,
871 double *VRatioPrefetchC,
872 double *RequiredPrefetchPixDataBWLuma,
873 double *RequiredPrefetchPixDataBWChroma,
874 bool *NotEnoughTimeForDynamicMetadata,
876 double *prefetch_vmrow_bw,
880 int *VUpdateOffsetPix,
881 double *VUpdateWidthPix,
882 double *VReadyOffsetPix)
884 bool MyError = false;
885 unsigned int DPPCycles, DISPCLKCycles;
886 double DSTTotalPixelsAfterScaler;
888 double dst_y_prefetch_equ;
890 double prefetch_bw_oto;
891 double prefetch_bw_pr;
894 double Tvm_oto_lines;
895 double Tr0_oto_lines;
896 double dst_y_prefetch_oto;
897 double TimeForFetchingMetaPTE = 0;
898 double TimeForFetchingRowInVBlank = 0;
899 double LinesToRequestPrefetchPixelData = 0;
900 unsigned int HostVMDynamicLevelsTrips;
904 double Tvm_trips_rounded;
905 double Tr0_trips_rounded;
908 double prefetch_bw_equ;
914 double prefetch_sw_bytes;
917 int max_vratio_pre = 4;
923 if (GPUVMEnable == true && HostVMEnable == true) {
924 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
926 HostVMDynamicLevelsTrips = 0;
928 #ifdef __DML_VBA_DEBUG__
929 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
931 CalculateVupdateAndDynamicMetadataParameters(
932 MaxInterDCNTileRepeaters,
935 myPipe->DCFCLKDeepSleep,
939 DynamicMetadataTransmittedBytes,
940 DynamicMetadataLinesBeforeActiveRequired,
941 myPipe->InterlaceEnable,
942 myPipe->ProgressiveToInterlaceUnitInOPP,
951 LineTime = myPipe->HTotal / myPipe->PixelClock;
952 trip_to_mem = UrgentLatency;
953 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
955 #ifdef __DML_VBA_ALLOW_DELTA__
956 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
958 if (DynamicMetadataVMEnabled == true) {
960 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
962 *Tdmdl = TWait + UrgentExtraLatency;
965 #ifdef __DML_VBA_ALLOW_DELTA__
966 if (DynamicMetadataEnable == false) {
971 if (DynamicMetadataEnable == true) {
972 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
973 *NotEnoughTimeForDynamicMetadata = true;
974 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
975 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
976 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
977 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
978 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
980 *NotEnoughTimeForDynamicMetadata = false;
983 *NotEnoughTimeForDynamicMetadata = false;
986 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
988 if (myPipe->ScalerEnabled)
989 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
991 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
993 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
995 DISPCLKCycles = DISPCLKDelaySubtotal;
997 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1000 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1002 #ifdef __DML_VBA_DEBUG__
1003 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1004 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1005 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1006 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1007 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1008 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1009 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1010 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1013 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1015 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1016 *DSTYAfterScaler = 1;
1018 *DSTYAfterScaler = 0;
1020 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1021 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1022 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1024 #ifdef __DML_VBA_DEBUG__
1025 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1030 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1031 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1032 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1034 #ifdef __DML_VBA_ALLOW_DELTA__
1035 if (!myPipe->DCCEnable) {
1037 Tr0_trips_rounded = 0.0;
1043 Tvm_trips_rounded = 0.0;
1047 if (GPUVMPageTableLevels >= 3) {
1048 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1052 } else if (!myPipe->DCCEnable) {
1055 *Tno_bw = LineTime / 4;
1058 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1059 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1061 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1063 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
1064 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1065 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1066 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
1067 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1069 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1070 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1071 Tsw_oto = Lsw_oto * LineTime;
1073 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
1075 #ifdef __DML_VBA_DEBUG__
1076 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1077 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1078 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1079 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1080 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1081 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1084 if (GPUVMEnable == true)
1085 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1087 Tvm_oto = LineTime / 4.0;
1089 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1090 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1094 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1097 #ifdef __DML_VBA_DEBUG__
1098 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1099 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1100 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1101 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1102 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1103 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1104 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1105 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1106 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1109 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1110 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1111 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1112 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1113 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1114 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1116 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1118 if (prefetch_sw_bytes < dep_bytes)
1119 prefetch_sw_bytes = 2 * dep_bytes;
1121 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1122 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1123 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1124 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1125 dml_print("DML: LineTime: %f\n", LineTime);
1126 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1128 dml_print("DML: LineTime: %f\n", LineTime);
1129 dml_print("DML: VStartup: %d\n", VStartup);
1130 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1131 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1132 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1133 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1134 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1135 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1136 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1137 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1138 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1139 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1140 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1142 *PrefetchBandwidth = 0;
1143 *DestinationLinesToRequestVMInVBlank = 0;
1144 *DestinationLinesToRequestRowInVBlank = 0;
1145 *VRatioPrefetchY = 0;
1146 *VRatioPrefetchC = 0;
1147 *RequiredPrefetchPixDataBWLuma = 0;
1148 if (dst_y_prefetch_equ > 1) {
1149 double PrefetchBandwidth1;
1150 double PrefetchBandwidth2;
1151 double PrefetchBandwidth3;
1152 double PrefetchBandwidth4;
1154 if (Tpre_rounded - *Tno_bw > 0) {
1155 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1156 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1157 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1159 PrefetchBandwidth1 = 0;
1162 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1163 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1164 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1167 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1168 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1170 PrefetchBandwidth2 = 0;
1172 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1173 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1174 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1175 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1177 PrefetchBandwidth3 = 0;
1180 #ifdef __DML_VBA_DEBUG__
1181 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1182 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1183 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1185 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1186 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1187 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1190 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1191 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1193 PrefetchBandwidth4 = 0;
1200 if (PrefetchBandwidth1 > 0) {
1201 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1202 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1211 if (PrefetchBandwidth2 > 0) {
1212 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1213 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1222 if (PrefetchBandwidth3 > 0) {
1223 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1224 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1234 prefetch_bw_equ = PrefetchBandwidth1;
1235 } else if (Case2OK) {
1236 prefetch_bw_equ = PrefetchBandwidth2;
1237 } else if (Case3OK) {
1238 prefetch_bw_equ = PrefetchBandwidth3;
1240 prefetch_bw_equ = PrefetchBandwidth4;
1243 #ifdef __DML_VBA_DEBUG__
1244 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1245 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1246 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1247 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1250 if (prefetch_bw_equ > 0) {
1251 if (GPUVMEnable == true) {
1252 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1254 Tvm_equ = LineTime / 4;
1257 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1259 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1261 (LineTime - Tvm_equ) / 2,
1264 Tr0_equ = (LineTime - Tvm_equ) / 2;
1269 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1273 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1274 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1275 TimeForFetchingMetaPTE = Tvm_oto;
1276 TimeForFetchingRowInVBlank = Tr0_oto;
1277 *PrefetchBandwidth = prefetch_bw_oto;
1279 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1280 TimeForFetchingMetaPTE = Tvm_equ;
1281 TimeForFetchingRowInVBlank = Tr0_equ;
1282 *PrefetchBandwidth = prefetch_bw_equ;
1285 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1287 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1289 #ifdef __DML_VBA_ALLOW_DELTA__
1290 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1291 // See note above dated 5/30/2018
1292 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1293 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1295 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1298 #ifdef __DML_VBA_DEBUG__
1299 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1300 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1301 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1302 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1303 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1304 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1305 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1308 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1310 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1311 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1312 #ifdef __DML_VBA_DEBUG__
1313 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1314 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1315 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1317 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1318 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1319 *VRatioPrefetchY = dml_max(
1320 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1321 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1322 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1325 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1326 *VRatioPrefetchY = 0;
1328 #ifdef __DML_VBA_DEBUG__
1329 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1330 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1331 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1335 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1336 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1338 #ifdef __DML_VBA_DEBUG__
1339 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1340 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1341 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1343 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1344 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1345 *VRatioPrefetchC = dml_max(
1347 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1348 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1351 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1352 *VRatioPrefetchC = 0;
1354 #ifdef __DML_VBA_DEBUG__
1355 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1356 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1357 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1361 #ifdef __DML_VBA_DEBUG__
1362 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1363 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1364 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1367 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1369 #ifdef __DML_VBA_DEBUG__
1370 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1373 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1377 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1378 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1379 *VRatioPrefetchY = 0;
1380 *VRatioPrefetchC = 0;
1381 *RequiredPrefetchPixDataBWLuma = 0;
1382 *RequiredPrefetchPixDataBWChroma = 0;
1386 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1387 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1388 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1389 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1391 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1392 (double) LinesToRequestPrefetchPixelData * LineTime);
1393 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1394 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1395 (double) myPipe->HTotal)) * LineTime);
1396 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1397 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1398 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1399 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1400 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1404 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1408 double prefetch_vm_bw;
1409 double prefetch_row_bw;
1411 if (PDEAndMetaPTEBytesFrame == 0) {
1413 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1414 #ifdef __DML_VBA_DEBUG__
1415 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1416 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1417 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1418 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1420 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1421 #ifdef __DML_VBA_DEBUG__
1422 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1427 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1430 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1431 prefetch_row_bw = 0;
1432 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1433 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1435 #ifdef __DML_VBA_DEBUG__
1436 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1437 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1438 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1439 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1442 prefetch_row_bw = 0;
1444 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1447 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1451 *PrefetchBandwidth = 0;
1452 TimeForFetchingMetaPTE = 0;
1453 TimeForFetchingRowInVBlank = 0;
1454 *DestinationLinesToRequestVMInVBlank = 0;
1455 *DestinationLinesToRequestRowInVBlank = 0;
1456 *DestinationLinesForPrefetch = 0;
1457 LinesToRequestPrefetchPixelData = 0;
1458 *VRatioPrefetchY = 0;
1459 *VRatioPrefetchC = 0;
1460 *RequiredPrefetchPixDataBWLuma = 0;
1461 *RequiredPrefetchPixDataBWChroma = 0;
1467 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1469 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1472 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1474 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1477 static void CalculateDCCConfiguration(
1479 bool DCCProgrammingAssumesScanDirectionUnknown,
1480 enum source_format_class SourcePixelFormat,
1481 unsigned int SurfaceWidthLuma,
1482 unsigned int SurfaceWidthChroma,
1483 unsigned int SurfaceHeightLuma,
1484 unsigned int SurfaceHeightChroma,
1485 double DETBufferSize,
1486 unsigned int RequestHeight256ByteLuma,
1487 unsigned int RequestHeight256ByteChroma,
1488 enum dm_swizzle_mode TilingFormat,
1489 unsigned int BytePerPixelY,
1490 unsigned int BytePerPixelC,
1491 double BytePerPixelDETY,
1492 double BytePerPixelDETC,
1493 enum scan_direction_class ScanOrientation,
1494 unsigned int *MaxUncompressedBlockLuma,
1495 unsigned int *MaxUncompressedBlockChroma,
1496 unsigned int *MaxCompressedBlockLuma,
1497 unsigned int *MaxCompressedBlockChroma,
1498 unsigned int *IndependentBlockLuma,
1499 unsigned int *IndependentBlockChroma)
1508 double detile_buf_vp_horz_limit;
1509 double detile_buf_vp_vert_limit;
1511 int MAS_vp_horz_limit;
1512 int MAS_vp_vert_limit;
1513 int max_vp_horz_width;
1514 int max_vp_vert_height;
1515 int eff_surf_width_l;
1516 int eff_surf_width_c;
1517 int eff_surf_height_l;
1518 int eff_surf_height_c;
1520 int full_swath_bytes_horz_wc_l;
1521 int full_swath_bytes_horz_wc_c;
1522 int full_swath_bytes_vert_wc_l;
1523 int full_swath_bytes_vert_wc_c;
1524 int req128_horz_wc_l;
1525 int req128_horz_wc_c;
1526 int req128_vert_wc_l;
1527 int req128_vert_wc_c;
1528 int segment_order_horz_contiguous_luma;
1529 int segment_order_horz_contiguous_chroma;
1530 int segment_order_vert_contiguous_luma;
1531 int segment_order_vert_contiguous_chroma;
1534 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1536 RequestType RequestLuma;
1537 RequestType RequestChroma;
1539 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1545 if (BytePerPixelY == 1)
1547 if (BytePerPixelC == 1)
1549 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1551 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1554 if (BytePerPixelC == 0) {
1555 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1556 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1557 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1559 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1560 detile_buf_vp_horz_limit = (double) swath_buf_size
1561 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1562 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1563 detile_buf_vp_vert_limit = (double) swath_buf_size
1564 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1567 if (SourcePixelFormat == dm_420_10) {
1568 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1569 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1572 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1573 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1575 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1576 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1577 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1578 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1579 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1580 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1581 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1582 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1584 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1585 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1586 if (BytePerPixelC > 0) {
1587 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1588 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1590 full_swath_bytes_horz_wc_c = 0;
1591 full_swath_bytes_vert_wc_c = 0;
1594 if (SourcePixelFormat == dm_420_10) {
1595 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1596 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1597 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1598 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1601 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1602 req128_horz_wc_l = 0;
1603 req128_horz_wc_c = 0;
1604 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1605 req128_horz_wc_l = 0;
1606 req128_horz_wc_c = 1;
1607 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1608 req128_horz_wc_l = 1;
1609 req128_horz_wc_c = 0;
1611 req128_horz_wc_l = 1;
1612 req128_horz_wc_c = 1;
1615 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1616 req128_vert_wc_l = 0;
1617 req128_vert_wc_c = 0;
1618 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1619 req128_vert_wc_l = 0;
1620 req128_vert_wc_c = 1;
1621 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1622 req128_vert_wc_l = 1;
1623 req128_vert_wc_c = 0;
1625 req128_vert_wc_l = 1;
1626 req128_vert_wc_c = 1;
1629 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1630 segment_order_horz_contiguous_luma = 0;
1632 segment_order_horz_contiguous_luma = 1;
1634 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1635 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1636 segment_order_vert_contiguous_luma = 0;
1638 segment_order_vert_contiguous_luma = 1;
1640 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1641 segment_order_horz_contiguous_chroma = 0;
1643 segment_order_horz_contiguous_chroma = 1;
1645 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1646 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1647 segment_order_vert_contiguous_chroma = 0;
1649 segment_order_vert_contiguous_chroma = 1;
1652 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1653 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1654 RequestLuma = REQ_256Bytes;
1655 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1656 RequestLuma = REQ_128BytesNonContiguous;
1658 RequestLuma = REQ_128BytesContiguous;
1660 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1661 RequestChroma = REQ_256Bytes;
1662 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1663 RequestChroma = REQ_128BytesNonContiguous;
1665 RequestChroma = REQ_128BytesContiguous;
1667 } else if (ScanOrientation != dm_vert) {
1668 if (req128_horz_wc_l == 0) {
1669 RequestLuma = REQ_256Bytes;
1670 } else if (segment_order_horz_contiguous_luma == 0) {
1671 RequestLuma = REQ_128BytesNonContiguous;
1673 RequestLuma = REQ_128BytesContiguous;
1675 if (req128_horz_wc_c == 0) {
1676 RequestChroma = REQ_256Bytes;
1677 } else if (segment_order_horz_contiguous_chroma == 0) {
1678 RequestChroma = REQ_128BytesNonContiguous;
1680 RequestChroma = REQ_128BytesContiguous;
1683 if (req128_vert_wc_l == 0) {
1684 RequestLuma = REQ_256Bytes;
1685 } else if (segment_order_vert_contiguous_luma == 0) {
1686 RequestLuma = REQ_128BytesNonContiguous;
1688 RequestLuma = REQ_128BytesContiguous;
1690 if (req128_vert_wc_c == 0) {
1691 RequestChroma = REQ_256Bytes;
1692 } else if (segment_order_vert_contiguous_chroma == 0) {
1693 RequestChroma = REQ_128BytesNonContiguous;
1695 RequestChroma = REQ_128BytesContiguous;
1699 if (RequestLuma == REQ_256Bytes) {
1700 *MaxUncompressedBlockLuma = 256;
1701 *MaxCompressedBlockLuma = 256;
1702 *IndependentBlockLuma = 0;
1703 } else if (RequestLuma == REQ_128BytesContiguous) {
1704 *MaxUncompressedBlockLuma = 256;
1705 *MaxCompressedBlockLuma = 128;
1706 *IndependentBlockLuma = 128;
1708 *MaxUncompressedBlockLuma = 256;
1709 *MaxCompressedBlockLuma = 64;
1710 *IndependentBlockLuma = 64;
1713 if (RequestChroma == REQ_256Bytes) {
1714 *MaxUncompressedBlockChroma = 256;
1715 *MaxCompressedBlockChroma = 256;
1716 *IndependentBlockChroma = 0;
1717 } else if (RequestChroma == REQ_128BytesContiguous) {
1718 *MaxUncompressedBlockChroma = 256;
1719 *MaxCompressedBlockChroma = 128;
1720 *IndependentBlockChroma = 128;
1722 *MaxUncompressedBlockChroma = 256;
1723 *MaxCompressedBlockChroma = 64;
1724 *IndependentBlockChroma = 64;
1727 if (DCCEnabled != true || BytePerPixelC == 0) {
1728 *MaxUncompressedBlockChroma = 0;
1729 *MaxCompressedBlockChroma = 0;
1730 *IndependentBlockChroma = 0;
1733 if (DCCEnabled != true) {
1734 *MaxUncompressedBlockLuma = 0;
1735 *MaxCompressedBlockLuma = 0;
1736 *IndependentBlockLuma = 0;
1740 static double CalculatePrefetchSourceLines(
1741 struct display_mode_lib *mode_lib,
1745 bool ProgressiveToInterlaceUnitInOPP,
1746 unsigned int SwathHeight,
1747 unsigned int ViewportYStart,
1748 double *VInitPreFill,
1749 unsigned int *MaxNumSwath)
1751 struct vba_vars_st *v = &mode_lib->vba;
1752 unsigned int MaxPartialSwath;
1754 if (ProgressiveToInterlaceUnitInOPP)
1755 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1757 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1759 if (!v->IgnoreViewportPositioning) {
1761 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1763 if (*VInitPreFill > 1.0)
1764 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1766 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1767 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1771 if (ViewportYStart != 0)
1772 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1774 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1776 if (*VInitPreFill > 1.0)
1777 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1779 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1782 #ifdef __DML_VBA_DEBUG__
1783 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1784 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1785 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1786 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1787 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1788 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1789 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1790 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1791 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1793 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1796 static unsigned int CalculateVMAndRowBytes(
1797 struct display_mode_lib *mode_lib,
1799 unsigned int BlockHeight256Bytes,
1800 unsigned int BlockWidth256Bytes,
1801 enum source_format_class SourcePixelFormat,
1802 unsigned int SurfaceTiling,
1803 unsigned int BytePerPixel,
1804 enum scan_direction_class ScanDirection,
1805 unsigned int SwathWidth,
1806 unsigned int ViewportHeight,
1809 unsigned int HostVMMaxNonCachedPageTableLevels,
1810 unsigned int GPUVMMinPageSize,
1811 unsigned int HostVMMinPageSize,
1812 unsigned int PTEBufferSizeInRequests,
1814 unsigned int DCCMetaPitch,
1815 unsigned int *MacroTileWidth,
1816 unsigned int *MetaRowByte,
1817 unsigned int *PixelPTEBytesPerRow,
1818 bool *PTEBufferSizeNotExceeded,
1819 int *dpte_row_width_ub,
1820 unsigned int *dpte_row_height,
1821 unsigned int *MetaRequestWidth,
1822 unsigned int *MetaRequestHeight,
1823 unsigned int *meta_row_width,
1824 unsigned int *meta_row_height,
1825 int *vm_group_bytes,
1826 unsigned int *dpte_group_bytes,
1827 unsigned int *PixelPTEReqWidth,
1828 unsigned int *PixelPTEReqHeight,
1829 unsigned int *PTERequestSize,
1830 int *DPDE0BytesFrame,
1831 int *MetaPTEBytesFrame)
1833 struct vba_vars_st *v = &mode_lib->vba;
1834 unsigned int MPDEBytesFrame;
1835 unsigned int DCCMetaSurfaceBytes;
1836 unsigned int MacroTileSizeBytes;
1837 unsigned int MacroTileHeight;
1838 unsigned int ExtraDPDEBytesFrame;
1839 unsigned int PDEAndMetaPTEBytesFrame;
1840 unsigned int PixelPTEReqHeightPTEs = 0;
1841 unsigned int HostVMDynamicLevels = 0;
1842 double FractionOfPTEReturnDrop;
1844 if (GPUVMEnable == true && HostVMEnable == true) {
1845 if (HostVMMinPageSize < 2048) {
1846 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1847 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1848 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1850 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1854 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1855 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1856 if (ScanDirection != dm_vert) {
1857 *meta_row_height = *MetaRequestHeight;
1858 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1859 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1861 *meta_row_height = *MetaRequestWidth;
1862 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1863 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1865 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1866 if (GPUVMEnable == true) {
1867 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1868 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1870 *MetaPTEBytesFrame = 0;
1874 if (DCCEnable != true) {
1875 *MetaPTEBytesFrame = 0;
1880 if (SurfaceTiling == dm_sw_linear) {
1881 MacroTileSizeBytes = 256;
1882 MacroTileHeight = BlockHeight256Bytes;
1884 MacroTileSizeBytes = 65536;
1885 MacroTileHeight = 16 * BlockHeight256Bytes;
1887 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1889 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1890 if (ScanDirection != dm_vert) {
1891 *DPDE0BytesFrame = 64
1893 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1897 *DPDE0BytesFrame = 64
1899 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1903 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1905 *DPDE0BytesFrame = 0;
1906 ExtraDPDEBytesFrame = 0;
1909 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1911 #ifdef __DML_VBA_DEBUG__
1912 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1913 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1914 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1915 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1916 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1919 if (HostVMEnable == true) {
1920 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1922 #ifdef __DML_VBA_DEBUG__
1923 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1926 if (SurfaceTiling == dm_sw_linear) {
1927 PixelPTEReqHeightPTEs = 1;
1928 *PixelPTEReqHeight = 1;
1929 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1930 *PTERequestSize = 64;
1931 FractionOfPTEReturnDrop = 0;
1932 } else if (MacroTileSizeBytes == 4096) {
1933 PixelPTEReqHeightPTEs = 1;
1934 *PixelPTEReqHeight = MacroTileHeight;
1935 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1936 *PTERequestSize = 64;
1937 if (ScanDirection != dm_vert)
1938 FractionOfPTEReturnDrop = 0;
1940 FractionOfPTEReturnDrop = 7 / 8;
1941 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1942 PixelPTEReqHeightPTEs = 16;
1943 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1944 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1945 *PTERequestSize = 128;
1946 FractionOfPTEReturnDrop = 0;
1948 PixelPTEReqHeightPTEs = 1;
1949 *PixelPTEReqHeight = MacroTileHeight;
1950 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1951 *PTERequestSize = 64;
1952 FractionOfPTEReturnDrop = 0;
1955 if (SurfaceTiling == dm_sw_linear) {
1956 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1957 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1958 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1959 } else if (ScanDirection != dm_vert) {
1960 *dpte_row_height = *PixelPTEReqHeight;
1961 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1962 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1964 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1965 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1966 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1969 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1970 *PTEBufferSizeNotExceeded = true;
1972 *PTEBufferSizeNotExceeded = false;
1975 if (GPUVMEnable != true) {
1976 *PixelPTEBytesPerRow = 0;
1977 *PTEBufferSizeNotExceeded = true;
1980 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1982 if (HostVMEnable == true) {
1983 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1986 if (HostVMEnable == true) {
1987 *vm_group_bytes = 512;
1988 *dpte_group_bytes = 512;
1989 } else if (GPUVMEnable == true) {
1990 *vm_group_bytes = 2048;
1991 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1992 *dpte_group_bytes = 512;
1994 *dpte_group_bytes = 2048;
1997 *vm_group_bytes = 0;
1998 *dpte_group_bytes = 0;
2000 return PDEAndMetaPTEBytesFrame;
2003 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2005 struct vba_vars_st *v = &mode_lib->vba;
2007 double HostVMInefficiencyFactor = 1.0;
2008 bool NoChromaPlanes = true;
2010 double VMDataOnlyReturnBW;
2011 double MaxTotalRDBandwidth = 0;
2012 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2014 v->WritebackDISPCLK = 0.0;
2015 v->DISPCLKWithRamping = 0;
2016 v->DISPCLKWithoutRamping = 0;
2017 v->GlobalDPPCLK = 0.0;
2018 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2020 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2021 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2022 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2023 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2024 if (v->HostVMEnable != true) {
2025 v->ReturnBW = dml_min(
2026 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2027 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2029 v->ReturnBW = dml_min(
2030 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2031 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2034 /* End DAL custom code */
2036 // DISPCLK and DPPCLK Calculation
2038 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2039 if (v->WritebackEnable[k]) {
2040 v->WritebackDISPCLK = dml_max(
2041 v->WritebackDISPCLK,
2042 dml31_CalculateWriteBackDISPCLK(
2043 v->WritebackPixelFormat[k],
2045 v->WritebackHRatio[k],
2046 v->WritebackVRatio[k],
2047 v->WritebackHTaps[k],
2048 v->WritebackVTaps[k],
2049 v->WritebackSourceWidth[k],
2050 v->WritebackDestinationWidth[k],
2052 v->WritebackLineBufferSize));
2056 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2057 if (v->HRatio[k] > 1) {
2058 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2059 v->MaxDCHUBToPSCLThroughput,
2060 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2062 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2065 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2067 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2068 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2070 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2071 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2074 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2075 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2076 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2077 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2079 if (v->HRatioChroma[k] > 1) {
2080 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2081 v->MaxDCHUBToPSCLThroughput,
2082 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2084 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2086 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2088 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2089 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2092 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2093 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2096 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2100 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2101 if (v->BlendingAndTiming[k] != k)
2103 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2104 v->DISPCLKWithRamping = dml_max(
2105 v->DISPCLKWithRamping,
2106 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2107 * (1 + v->DISPCLKRampingMargin / 100));
2108 v->DISPCLKWithoutRamping = dml_max(
2109 v->DISPCLKWithoutRamping,
2110 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2111 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2112 v->DISPCLKWithRamping = dml_max(
2113 v->DISPCLKWithRamping,
2114 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2115 * (1 + v->DISPCLKRampingMargin / 100));
2116 v->DISPCLKWithoutRamping = dml_max(
2117 v->DISPCLKWithoutRamping,
2118 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2120 v->DISPCLKWithRamping = dml_max(
2121 v->DISPCLKWithRamping,
2122 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2123 v->DISPCLKWithoutRamping = dml_max(
2124 v->DISPCLKWithoutRamping,
2125 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2129 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2130 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2132 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2133 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2134 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2135 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2136 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2137 v->DISPCLKDPPCLKVCOSpeed);
2138 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2139 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2140 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2141 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2143 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2145 v->DISPCLK = v->DISPCLK_calculated;
2146 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2148 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2149 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2150 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2152 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2153 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2154 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2155 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2158 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2159 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2162 // Urgent and B P-State/DRAM Clock Change Watermark
2163 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2164 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2166 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2167 CalculateBytePerPixelAnd256BBlockSizes(
2168 v->SourcePixelFormat[k],
2169 v->SurfaceTiling[k],
2170 &v->BytePerPixelY[k],
2171 &v->BytePerPixelC[k],
2172 &v->BytePerPixelDETY[k],
2173 &v->BytePerPixelDETC[k],
2174 &v->BlockHeight256BytesY[k],
2175 &v->BlockHeight256BytesC[k],
2176 &v->BlockWidth256BytesY[k],
2177 &v->BlockWidth256BytesC[k]);
2180 CalculateSwathWidth(
2182 v->NumberOfActivePlanes,
2183 v->SourcePixelFormat,
2191 v->ODMCombineEnabled,
2194 v->BlockHeight256BytesY,
2195 v->BlockHeight256BytesC,
2196 v->BlockWidth256BytesY,
2197 v->BlockWidth256BytesC,
2198 v->BlendingAndTiming,
2202 v->SwathWidthSingleDPPY,
2203 v->SwathWidthSingleDPPC,
2208 v->swath_width_luma_ub,
2209 v->swath_width_chroma_ub);
2211 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2212 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2214 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2215 * v->VRatioChroma[k];
2216 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2219 // DCFCLK Deep Sleep
2220 CalculateDCFCLKDeepSleep(
2222 v->NumberOfActivePlanes,
2233 v->PSCL_THROUGHPUT_LUMA,
2234 v->PSCL_THROUGHPUT_CHROMA,
2236 v->ReadBandwidthPlaneLuma,
2237 v->ReadBandwidthPlaneChroma,
2239 &v->DCFCLKDeepSleep);
2242 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2243 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2244 v->DSCCLK_calculated[k] = 0.0;
2246 if (v->OutputFormat[k] == dm_420)
2247 v->DSCFormatFactor = 2;
2248 else if (v->OutputFormat[k] == dm_444)
2249 v->DSCFormatFactor = 1;
2250 else if (v->OutputFormat[k] == dm_n422)
2251 v->DSCFormatFactor = 2;
2253 v->DSCFormatFactor = 1;
2254 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2255 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2256 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2257 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2258 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2259 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2261 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2262 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2267 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2268 double BPP = v->OutputBpp[k];
2270 if (v->DSCEnabled[k] && BPP != 0) {
2271 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2272 v->DSCDelay[k] = dscceComputeDelay(
2273 v->DSCInputBitPerComponent[k],
2275 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2276 v->NumberOfDSCSlices[k],
2278 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2279 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2281 * (dscceComputeDelay(
2282 v->DSCInputBitPerComponent[k],
2284 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2285 v->NumberOfDSCSlices[k] / 2.0,
2287 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2290 * (dscceComputeDelay(
2291 v->DSCInputBitPerComponent[k],
2293 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2294 v->NumberOfDSCSlices[k] / 4.0,
2296 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2298 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2304 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2305 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2306 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2307 v->DSCDelay[k] = v->DSCDelay[j];
2310 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2311 unsigned int PDEAndMetaPTEBytesFrameY;
2312 unsigned int PixelPTEBytesPerRowY;
2313 unsigned int MetaRowByteY;
2314 unsigned int MetaRowByteC;
2315 unsigned int PDEAndMetaPTEBytesFrameC;
2316 unsigned int PixelPTEBytesPerRowC;
2317 bool PTEBufferSizeNotExceededY;
2318 bool PTEBufferSizeNotExceededC;
2320 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2321 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2322 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2323 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2324 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2326 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2327 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2330 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2333 v->BlockHeight256BytesC[k],
2334 v->BlockWidth256BytesC[k],
2335 v->SourcePixelFormat[k],
2336 v->SurfaceTiling[k],
2337 v->BytePerPixelC[k],
2340 v->ViewportHeightChroma[k],
2343 v->HostVMMaxNonCachedPageTableLevels,
2344 v->GPUVMMinPageSize,
2345 v->HostVMMinPageSize,
2346 v->PTEBufferSizeInRequestsForChroma,
2348 v->DCCMetaPitchC[k],
2349 &v->MacroTileWidthC[k],
2351 &PixelPTEBytesPerRowC,
2352 &PTEBufferSizeNotExceededC,
2353 &v->dpte_row_width_chroma_ub[k],
2354 &v->dpte_row_height_chroma[k],
2355 &v->meta_req_width_chroma[k],
2356 &v->meta_req_height_chroma[k],
2357 &v->meta_row_width_chroma[k],
2358 &v->meta_row_height_chroma[k],
2361 &v->PixelPTEReqWidthC[k],
2362 &v->PixelPTEReqHeightC[k],
2363 &v->PTERequestSizeC[k],
2364 &v->dpde0_bytes_per_frame_ub_c[k],
2365 &v->meta_pte_bytes_per_frame_ub_c[k]);
2367 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2372 v->ProgressiveToInterlaceUnitInOPP,
2374 v->ViewportYStartC[k],
2375 &v->VInitPreFillC[k],
2376 &v->MaxNumSwathC[k]);
2378 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2379 v->PTEBufferSizeInRequestsForChroma = 0;
2380 PixelPTEBytesPerRowC = 0;
2381 PDEAndMetaPTEBytesFrameC = 0;
2383 v->MaxNumSwathC[k] = 0;
2384 v->PrefetchSourceLinesC[k] = 0;
2387 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2390 v->BlockHeight256BytesY[k],
2391 v->BlockWidth256BytesY[k],
2392 v->SourcePixelFormat[k],
2393 v->SurfaceTiling[k],
2394 v->BytePerPixelY[k],
2397 v->ViewportHeight[k],
2400 v->HostVMMaxNonCachedPageTableLevels,
2401 v->GPUVMMinPageSize,
2402 v->HostVMMinPageSize,
2403 v->PTEBufferSizeInRequestsForLuma,
2405 v->DCCMetaPitchY[k],
2406 &v->MacroTileWidthY[k],
2408 &PixelPTEBytesPerRowY,
2409 &PTEBufferSizeNotExceededY,
2410 &v->dpte_row_width_luma_ub[k],
2411 &v->dpte_row_height[k],
2412 &v->meta_req_width[k],
2413 &v->meta_req_height[k],
2414 &v->meta_row_width[k],
2415 &v->meta_row_height[k],
2416 &v->vm_group_bytes[k],
2417 &v->dpte_group_bytes[k],
2418 &v->PixelPTEReqWidthY[k],
2419 &v->PixelPTEReqHeightY[k],
2420 &v->PTERequestSizeY[k],
2421 &v->dpde0_bytes_per_frame_ub_l[k],
2422 &v->meta_pte_bytes_per_frame_ub_l[k]);
2424 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2429 v->ProgressiveToInterlaceUnitInOPP,
2431 v->ViewportYStartY[k],
2432 &v->VInitPreFillY[k],
2433 &v->MaxNumSwathY[k]);
2434 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2435 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2436 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2438 CalculateRowBandwidth(
2440 v->SourcePixelFormat[k],
2444 v->HTotal[k] / v->PixelClock[k],
2447 v->meta_row_height[k],
2448 v->meta_row_height_chroma[k],
2449 PixelPTEBytesPerRowY,
2450 PixelPTEBytesPerRowC,
2451 v->dpte_row_height[k],
2452 v->dpte_row_height_chroma[k],
2454 &v->dpte_row_bw[k]);
2457 v->TotalDCCActiveDPP = 0;
2458 v->TotalActiveDPP = 0;
2459 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2460 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2461 if (v->DCCEnable[k])
2462 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2463 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2464 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2465 NoChromaPlanes = false;
2468 ReorderBytes = v->NumberOfChannels
2470 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2471 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2472 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2474 VMDataOnlyReturnBW = dml_min(
2475 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2476 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2477 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2478 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2480 #ifdef __DML_VBA_DEBUG__
2481 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2482 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2483 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2484 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2485 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2486 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2487 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2488 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2489 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2490 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2491 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2494 if (v->GPUVMEnable && v->HostVMEnable)
2495 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2497 v->UrgentExtraLatency = CalculateExtraLatency(
2498 v->RoundTripPingLatencyCycles,
2502 v->PixelChunkSizeInKByte,
2503 v->TotalDCCActiveDPP,
2508 v->NumberOfActivePlanes,
2510 v->dpte_group_bytes,
2511 HostVMInefficiencyFactor,
2512 v->HostVMMinPageSize,
2513 v->HostVMMaxNonCachedPageTableLevels);
2515 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2517 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2518 if (v->BlendingAndTiming[k] == k) {
2519 if (v->WritebackEnable[k] == true) {
2520 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2521 + CalculateWriteBackDelay(
2522 v->WritebackPixelFormat[k],
2523 v->WritebackHRatio[k],
2524 v->WritebackVRatio[k],
2525 v->WritebackVTaps[k],
2526 v->WritebackDestinationWidth[k],
2527 v->WritebackDestinationHeight[k],
2528 v->WritebackSourceHeight[k],
2529 v->HTotal[k]) / v->DISPCLK;
2531 v->WritebackDelay[v->VoltageLevel][k] = 0;
2532 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2533 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2534 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2535 v->WritebackDelay[v->VoltageLevel][k],
2537 + CalculateWriteBackDelay(
2538 v->WritebackPixelFormat[j],
2539 v->WritebackHRatio[j],
2540 v->WritebackVRatio[j],
2541 v->WritebackVTaps[j],
2542 v->WritebackDestinationWidth[j],
2543 v->WritebackDestinationHeight[j],
2544 v->WritebackSourceHeight[j],
2545 v->HTotal[k]) / v->DISPCLK);
2551 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2552 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2553 if (v->BlendingAndTiming[k] == j)
2554 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2556 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2557 v->MaxVStartupLines[k] =
2558 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2559 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2560 v->VTotal[k] - v->VActive[k]
2564 (double) v->WritebackDelay[v->VoltageLevel][k]
2565 / (v->HTotal[k] / v->PixelClock[k]),
2567 if (v->MaxVStartupLines[k] > 1023)
2568 v->MaxVStartupLines[k] = 1023;
2570 #ifdef __DML_VBA_DEBUG__
2571 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2572 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2573 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2577 v->MaximumMaxVStartupLines = 0;
2578 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2579 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2582 // We don't really care to iterate between the various prefetch modes
2583 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2585 v->UrgentLatency = CalculateUrgentLatency(
2586 v->UrgentLatencyPixelDataOnly,
2587 v->UrgentLatencyPixelMixedWithVMData,
2588 v->UrgentLatencyVMDataOnly,
2589 v->DoUrgentLatencyAdjustment,
2590 v->UrgentLatencyAdjustmentFabricClockComponent,
2591 v->UrgentLatencyAdjustmentFabricClockReference,
2594 v->FractionOfUrgentBandwidth = 0.0;
2595 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2597 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2600 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2601 bool DestinationLineTimesForPrefetchLessThan2 = false;
2602 bool VRatioPrefetchMoreThan4 = false;
2603 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2604 MaxTotalRDBandwidth = 0;
2606 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2608 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2611 myPipe.DPPCLK = v->DPPCLK[k];
2612 myPipe.DISPCLK = v->DISPCLK;
2613 myPipe.PixelClock = v->PixelClock[k];
2614 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2615 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2616 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2617 myPipe.VRatio = v->VRatio[k];
2618 myPipe.VRatioChroma = v->VRatioChroma[k];
2619 myPipe.SourceScan = v->SourceScan[k];
2620 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2621 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2622 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2623 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2624 myPipe.InterlaceEnable = v->Interlace[k];
2625 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2626 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2627 myPipe.HTotal = v->HTotal[k];
2628 myPipe.DCCEnable = v->DCCEnable[k];
2629 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2630 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2631 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2632 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2633 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2634 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2635 v->ErrorResult[k] = CalculatePrefetchSchedule(
2637 HostVMInefficiencyFactor,
2640 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2642 v->DPPCLKDelaySCLLBOnly,
2643 v->DPPCLKDelayCNVCCursor,
2644 v->DISPCLKDelaySubtotal,
2645 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2647 v->MaxInterDCNTileRepeaters,
2648 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2649 v->MaxVStartupLines[k],
2650 v->GPUVMMaxPageTableLevels,
2653 v->HostVMMaxNonCachedPageTableLevels,
2654 v->HostVMMinPageSize,
2655 v->DynamicMetadataEnable[k],
2656 v->DynamicMetadataVMEnabled,
2657 v->DynamicMetadataLinesBeforeActiveRequired[k],
2658 v->DynamicMetadataTransmittedBytes[k],
2660 v->UrgentExtraLatency,
2662 v->PDEAndMetaPTEBytesFrame[k],
2664 v->PixelPTEBytesPerRow[k],
2665 v->PrefetchSourceLinesY[k],
2667 v->VInitPreFillY[k],
2669 v->PrefetchSourceLinesC[k],
2671 v->VInitPreFillC[k],
2673 v->swath_width_luma_ub[k],
2674 v->swath_width_chroma_ub[k],
2678 &v->DSTXAfterScaler[k],
2679 &v->DSTYAfterScaler[k],
2680 &v->DestinationLinesForPrefetch[k],
2681 &v->PrefetchBandwidth[k],
2682 &v->DestinationLinesToRequestVMInVBlank[k],
2683 &v->DestinationLinesToRequestRowInVBlank[k],
2684 &v->VRatioPrefetchY[k],
2685 &v->VRatioPrefetchC[k],
2686 &v->RequiredPrefetchPixDataBWLuma[k],
2687 &v->RequiredPrefetchPixDataBWChroma[k],
2688 &v->NotEnoughTimeForDynamicMetadata[k],
2690 &v->prefetch_vmrow_bw[k],
2694 &v->VUpdateOffsetPix[k],
2695 &v->VUpdateWidthPix[k],
2696 &v->VReadyOffsetPix[k]);
2698 #ifdef __DML_VBA_DEBUG__
2699 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2701 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2704 v->NoEnoughUrgentLatencyHiding = false;
2705 v->NoEnoughUrgentLatencyHidingPre = false;
2707 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2708 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2709 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2710 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2711 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2713 CalculateUrgentBurstFactor(
2714 v->swath_width_luma_ub[k],
2715 v->swath_width_chroma_ub[k],
2718 v->HTotal[k] / v->PixelClock[k],
2720 v->CursorBufferSize,
2721 v->CursorWidth[k][0],
2725 v->BytePerPixelDETY[k],
2726 v->BytePerPixelDETC[k],
2727 v->DETBufferSizeY[k],
2728 v->DETBufferSizeC[k],
2729 &v->UrgBurstFactorCursor[k],
2730 &v->UrgBurstFactorLuma[k],
2731 &v->UrgBurstFactorChroma[k],
2732 &v->NoUrgentLatencyHiding[k]);
2734 CalculateUrgentBurstFactor(
2735 v->swath_width_luma_ub[k],
2736 v->swath_width_chroma_ub[k],
2739 v->HTotal[k] / v->PixelClock[k],
2741 v->CursorBufferSize,
2742 v->CursorWidth[k][0],
2744 v->VRatioPrefetchY[k],
2745 v->VRatioPrefetchC[k],
2746 v->BytePerPixelDETY[k],
2747 v->BytePerPixelDETC[k],
2748 v->DETBufferSizeY[k],
2749 v->DETBufferSizeC[k],
2750 &v->UrgBurstFactorCursorPre[k],
2751 &v->UrgBurstFactorLumaPre[k],
2752 &v->UrgBurstFactorChromaPre[k],
2753 &v->NoUrgentLatencyHidingPre[k]);
2755 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2757 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2758 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2759 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2760 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2761 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2763 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2764 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2765 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2767 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2769 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2770 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2771 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2772 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2773 + v->cursor_bw_pre[k]);
2775 #ifdef __DML_VBA_DEBUG__
2776 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2777 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2778 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2779 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2780 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2782 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2783 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2785 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2786 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2787 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2788 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2789 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2790 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2791 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2792 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2793 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2794 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2797 if (v->DestinationLinesForPrefetch[k] < 2)
2798 DestinationLineTimesForPrefetchLessThan2 = true;
2800 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2801 VRatioPrefetchMoreThan4 = true;
2803 if (v->NoUrgentLatencyHiding[k] == true)
2804 v->NoEnoughUrgentLatencyHiding = true;
2806 if (v->NoUrgentLatencyHidingPre[k] == true)
2807 v->NoEnoughUrgentLatencyHidingPre = true;
2810 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2812 #ifdef __DML_VBA_DEBUG__
2813 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2814 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2815 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2818 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2819 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2820 v->PrefetchModeSupported = true;
2822 v->PrefetchModeSupported = false;
2823 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2824 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2825 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2826 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2830 // This error result check was done after the PrefetchModeSupported. So we will
2831 // still try to calculate flip schedule even prefetch mode not supported
2832 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2833 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2834 v->PrefetchModeSupported = false;
2835 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2839 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2840 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2841 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2842 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2844 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2845 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2846 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2848 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2849 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2850 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2853 v->TotImmediateFlipBytes = 0;
2854 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2855 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2856 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2858 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2859 CalculateFlipSchedule(
2862 HostVMInefficiencyFactor,
2863 v->UrgentExtraLatency,
2865 v->PDEAndMetaPTEBytesFrame[k],
2867 v->PixelPTEBytesPerRow[k]);
2870 v->total_dcn_read_bw_with_flip = 0.0;
2871 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2872 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2873 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2875 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2876 v->DPPPerPlane[k] * v->final_flip_bw[k]
2877 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2878 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2879 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2881 * (v->final_flip_bw[k]
2882 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2883 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2884 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2885 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2887 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2888 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2889 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2891 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2892 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2894 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2896 v->ImmediateFlipSupported = true;
2897 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2898 #ifdef __DML_VBA_DEBUG__
2899 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2901 v->ImmediateFlipSupported = false;
2902 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2904 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2905 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2906 #ifdef __DML_VBA_DEBUG__
2907 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2910 v->ImmediateFlipSupported = false;
2914 v->ImmediateFlipSupported = false;
2917 v->PrefetchAndImmediateFlipSupported =
2918 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2919 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2920 v->ImmediateFlipSupported)) ? true : false;
2921 #ifdef __DML_VBA_DEBUG__
2922 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2923 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2924 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2925 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2926 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2927 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2929 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2931 v->VStartupLines = v->VStartupLines + 1;
2932 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2933 ASSERT(v->PrefetchAndImmediateFlipSupported);
2935 // Unbounded Request Enabled
2936 CalculateUnboundedRequestAndCompressedBufferSize(
2937 v->DETBufferSizeInKByte[0],
2938 v->ConfigReturnBufferSizeInKByte,
2939 v->UseUnboundedRequesting,
2943 v->CompressedBufferSegmentSizeInkByte,
2945 &v->UnboundedRequestEnabled,
2946 &v->CompressedBufferSizeInkByte);
2948 //Watermarks and NB P-State/DRAM Clock Change Support
2950 enum clock_change_support DRAMClockChangeSupport; // dummy
2951 CalculateWatermarksAndDRAMSpeedChangeSupport(
2957 v->UrgentExtraLatency,
2967 v->BytePerPixelDETY,
2968 v->BytePerPixelDETC,
2969 v->UnboundedRequestEnabled,
2970 v->CompressedBufferSizeInkByte,
2971 &DRAMClockChangeSupport,
2972 &v->StutterExitWatermark,
2973 &v->StutterEnterPlusExitWatermark,
2974 &v->Z8StutterExitWatermark,
2975 &v->Z8StutterEnterPlusExitWatermark);
2977 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2978 if (v->WritebackEnable[k] == true) {
2979 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2981 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2983 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2988 //Display Pipeline Delivery Time in Prefetch, Groups
2989 CalculatePixelDeliveryTimes(
2990 v->NumberOfActivePlanes,
2995 v->swath_width_luma_ub,
2996 v->swath_width_chroma_ub,
3001 v->PSCL_THROUGHPUT_LUMA,
3002 v->PSCL_THROUGHPUT_CHROMA,
3009 v->BlockWidth256BytesY,
3010 v->BlockHeight256BytesY,
3011 v->BlockWidth256BytesC,
3012 v->BlockHeight256BytesC,
3013 v->DisplayPipeLineDeliveryTimeLuma,
3014 v->DisplayPipeLineDeliveryTimeChroma,
3015 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3016 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3017 v->DisplayPipeRequestDeliveryTimeLuma,
3018 v->DisplayPipeRequestDeliveryTimeChroma,
3019 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3020 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3021 v->CursorRequestDeliveryTime,
3022 v->CursorRequestDeliveryTimePrefetch);
3024 CalculateMetaAndPTETimes(
3025 v->NumberOfActivePlanes,
3028 v->MinMetaChunkSizeBytes,
3032 v->DestinationLinesToRequestRowInVBlank,
3033 v->DestinationLinesToRequestRowInImmediateFlip,
3040 v->dpte_row_height_chroma,
3042 v->meta_row_width_chroma,
3044 v->meta_row_height_chroma,
3046 v->meta_req_width_chroma,
3048 v->meta_req_height_chroma,
3049 v->dpte_group_bytes,
3052 v->PixelPTEReqWidthY,
3053 v->PixelPTEReqHeightY,
3054 v->PixelPTEReqWidthC,
3055 v->PixelPTEReqHeightC,
3056 v->dpte_row_width_luma_ub,
3057 v->dpte_row_width_chroma_ub,
3058 v->DST_Y_PER_PTE_ROW_NOM_L,
3059 v->DST_Y_PER_PTE_ROW_NOM_C,
3060 v->DST_Y_PER_META_ROW_NOM_L,
3061 v->DST_Y_PER_META_ROW_NOM_C,
3062 v->TimePerMetaChunkNominal,
3063 v->TimePerChromaMetaChunkNominal,
3064 v->TimePerMetaChunkVBlank,
3065 v->TimePerChromaMetaChunkVBlank,
3066 v->TimePerMetaChunkFlip,
3067 v->TimePerChromaMetaChunkFlip,
3068 v->time_per_pte_group_nom_luma,
3069 v->time_per_pte_group_vblank_luma,
3070 v->time_per_pte_group_flip_luma,
3071 v->time_per_pte_group_nom_chroma,
3072 v->time_per_pte_group_vblank_chroma,
3073 v->time_per_pte_group_flip_chroma);
3075 CalculateVMGroupAndRequestTimes(
3076 v->NumberOfActivePlanes,
3078 v->GPUVMMaxPageTableLevels,
3081 v->DestinationLinesToRequestVMInVBlank,
3082 v->DestinationLinesToRequestVMInImmediateFlip,
3085 v->dpte_row_width_luma_ub,
3086 v->dpte_row_width_chroma_ub,
3088 v->dpde0_bytes_per_frame_ub_l,
3089 v->dpde0_bytes_per_frame_ub_c,
3090 v->meta_pte_bytes_per_frame_ub_l,
3091 v->meta_pte_bytes_per_frame_ub_c,
3092 v->TimePerVMGroupVBlank,
3093 v->TimePerVMGroupFlip,
3094 v->TimePerVMRequestVBlank,
3095 v->TimePerVMRequestFlip);
3098 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3099 if (PrefetchMode == 0) {
3100 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3101 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3102 v->MinTTUVBlank[k] = dml_max(
3103 v->DRAMClockChangeWatermark,
3104 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3105 } else if (PrefetchMode == 1) {
3106 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3107 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3108 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3110 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3111 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3112 v->MinTTUVBlank[k] = v->UrgentWatermark;
3114 if (!v->DynamicMetadataEnable[k])
3115 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3118 // DCC Configuration
3120 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3121 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3122 v->SourcePixelFormat[k],
3123 v->SurfaceWidthY[k],
3124 v->SurfaceWidthC[k],
3125 v->SurfaceHeightY[k],
3126 v->SurfaceHeightC[k],
3127 v->DETBufferSizeInKByte[0] * 1024,
3128 v->BlockHeight256BytesY[k],
3129 v->BlockHeight256BytesC[k],
3130 v->SurfaceTiling[k],
3131 v->BytePerPixelY[k],
3132 v->BytePerPixelC[k],
3133 v->BytePerPixelDETY[k],
3134 v->BytePerPixelDETC[k],
3136 &v->DCCYMaxUncompressedBlock[k],
3137 &v->DCCCMaxUncompressedBlock[k],
3138 &v->DCCYMaxCompressedBlock[k],
3139 &v->DCCCMaxCompressedBlock[k],
3140 &v->DCCYIndependentBlock[k],
3141 &v->DCCCIndependentBlock[k]);
3144 // VStartup Adjustment
3145 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3146 bool isInterlaceTiming;
3147 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3148 #ifdef __DML_VBA_DEBUG__
3149 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3152 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3154 #ifdef __DML_VBA_DEBUG__
3155 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3156 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3157 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3158 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3161 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3162 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3163 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3166 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3168 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3169 - v->VFrontPorch[k])
3170 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3171 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3173 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3175 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3176 <= (isInterlaceTiming ?
3177 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3178 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3179 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3181 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3183 #ifdef __DML_VBA_DEBUG__
3184 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3185 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3186 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3187 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3188 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3189 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3190 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3191 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3192 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3193 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3194 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3199 //Maximum Bandwidth Used
3200 double TotalWRBandwidth = 0;
3201 double MaxPerPlaneVActiveWRBandwidth = 0;
3202 double WRBandwidth = 0;
3203 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3204 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3205 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3206 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3207 } else if (v->WritebackEnable[k] == true) {
3208 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3209 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3211 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3212 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3215 v->TotalDataReadBandwidth = 0;
3216 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3217 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3220 // Stutter Efficiency
3221 CalculateStutterEfficiency(
3223 v->CompressedBufferSizeInkByte,
3224 v->UnboundedRequestEnabled,
3225 v->ConfigReturnBufferSizeInKByte,
3226 v->MetaFIFOSizeInKEntries,
3227 v->ZeroSizeBufferEntries,
3228 v->NumberOfActivePlanes,
3229 v->ROBBufferSizeInKByte,
3230 v->TotalDataReadBandwidth,
3233 v->COMPBUF_RESERVED_SPACE_64B,
3234 v->COMPBUF_RESERVED_SPACE_ZS,
3237 v->SynchronizedVBlank,
3238 v->StutterEnterPlusExitWatermark,
3239 v->Z8StutterEnterPlusExitWatermark,
3240 v->ProgressiveToInterlaceUnitInOPP,
3246 v->BytePerPixelDETY,
3252 v->DCCFractionOfZeroSizeRequestsLuma,
3253 v->DCCFractionOfZeroSizeRequestsChroma,
3259 v->BlockHeight256BytesY,
3260 v->BlockWidth256BytesY,
3261 v->BlockHeight256BytesC,
3262 v->BlockWidth256BytesC,
3263 v->DCCYMaxUncompressedBlock,
3264 v->DCCCMaxUncompressedBlock,
3268 v->ReadBandwidthPlaneLuma,
3269 v->ReadBandwidthPlaneChroma,
3272 &v->StutterEfficiencyNotIncludingVBlank,
3273 &v->StutterEfficiency,
3274 &v->NumberOfStutterBurstsPerFrame,
3275 &v->Z8StutterEfficiencyNotIncludingVBlank,
3276 &v->Z8StutterEfficiency,
3277 &v->Z8NumberOfStutterBurstsPerFrame,
3281 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3283 struct vba_vars_st *v = &mode_lib->vba;
3284 // Display Pipe Configuration
3285 double BytePerPixDETY[DC__NUM_DPP__MAX];
3286 double BytePerPixDETC[DC__NUM_DPP__MAX];
3287 int BytePerPixY[DC__NUM_DPP__MAX];
3288 int BytePerPixC[DC__NUM_DPP__MAX];
3289 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3290 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3291 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3292 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3293 double dummy1[DC__NUM_DPP__MAX];
3294 double dummy2[DC__NUM_DPP__MAX];
3295 double dummy3[DC__NUM_DPP__MAX];
3296 double dummy4[DC__NUM_DPP__MAX];
3297 int dummy5[DC__NUM_DPP__MAX];
3298 int dummy6[DC__NUM_DPP__MAX];
3299 bool dummy7[DC__NUM_DPP__MAX];
3300 bool dummysinglestring;
3304 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3306 CalculateBytePerPixelAnd256BBlockSizes(
3307 v->SourcePixelFormat[k],
3308 v->SurfaceTiling[k],
3313 &Read256BytesBlockHeightY[k],
3314 &Read256BytesBlockHeightC[k],
3315 &Read256BytesBlockWidthY[k],
3316 &Read256BytesBlockWidthC[k]);
3319 CalculateSwathAndDETConfiguration(
3321 v->NumberOfActivePlanes,
3322 v->DETBufferSizeInKByte[0],
3326 v->SourcePixelFormat,
3334 Read256BytesBlockHeightY,
3335 Read256BytesBlockHeightC,
3336 Read256BytesBlockWidthY,
3337 Read256BytesBlockWidthC,
3338 v->ODMCombineEnabled,
3339 v->BlendingAndTiming,
3357 &dummysinglestring);
3360 static bool CalculateBytePerPixelAnd256BBlockSizes(
3361 enum source_format_class SourcePixelFormat,
3362 enum dm_swizzle_mode SurfaceTiling,
3363 unsigned int *BytePerPixelY,
3364 unsigned int *BytePerPixelC,
3365 double *BytePerPixelDETY,
3366 double *BytePerPixelDETC,
3367 unsigned int *BlockHeight256BytesY,
3368 unsigned int *BlockHeight256BytesC,
3369 unsigned int *BlockWidth256BytesY,
3370 unsigned int *BlockWidth256BytesC)
3372 if (SourcePixelFormat == dm_444_64) {
3373 *BytePerPixelDETY = 8;
3374 *BytePerPixelDETC = 0;
3377 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3378 *BytePerPixelDETY = 4;
3379 *BytePerPixelDETC = 0;
3382 } else if (SourcePixelFormat == dm_444_16) {
3383 *BytePerPixelDETY = 2;
3384 *BytePerPixelDETC = 0;
3387 } else if (SourcePixelFormat == dm_444_8) {
3388 *BytePerPixelDETY = 1;
3389 *BytePerPixelDETC = 0;
3392 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3393 *BytePerPixelDETY = 4;
3394 *BytePerPixelDETC = 1;
3397 } else if (SourcePixelFormat == dm_420_8) {
3398 *BytePerPixelDETY = 1;
3399 *BytePerPixelDETC = 2;
3402 } else if (SourcePixelFormat == dm_420_12) {
3403 *BytePerPixelDETY = 2;
3404 *BytePerPixelDETC = 4;
3408 *BytePerPixelDETY = 4.0 / 3;
3409 *BytePerPixelDETC = 8.0 / 3;
3414 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3415 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3416 if (SurfaceTiling == dm_sw_linear) {
3417 *BlockHeight256BytesY = 1;
3418 } else if (SourcePixelFormat == dm_444_64) {
3419 *BlockHeight256BytesY = 4;
3420 } else if (SourcePixelFormat == dm_444_8) {
3421 *BlockHeight256BytesY = 16;
3423 *BlockHeight256BytesY = 8;
3425 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3426 *BlockHeight256BytesC = 0;
3427 *BlockWidth256BytesC = 0;
3429 if (SurfaceTiling == dm_sw_linear) {
3430 *BlockHeight256BytesY = 1;
3431 *BlockHeight256BytesC = 1;
3432 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3433 *BlockHeight256BytesY = 8;
3434 *BlockHeight256BytesC = 16;
3435 } else if (SourcePixelFormat == dm_420_8) {
3436 *BlockHeight256BytesY = 16;
3437 *BlockHeight256BytesC = 8;
3439 *BlockHeight256BytesY = 8;
3440 *BlockHeight256BytesC = 8;
3442 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3443 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3448 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3450 if (PrefetchMode == 0) {
3451 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3452 } else if (PrefetchMode == 1) {
3453 return dml_max(SREnterPlusExitTime, UrgentLatency);
3455 return UrgentLatency;
3459 double dml31_CalculateWriteBackDISPCLK(
3460 enum source_format_class WritebackPixelFormat,
3462 double WritebackHRatio,
3463 double WritebackVRatio,
3464 unsigned int WritebackHTaps,
3465 unsigned int WritebackVTaps,
3466 long WritebackSourceWidth,
3467 long WritebackDestinationWidth,
3468 unsigned int HTotal,
3469 unsigned int WritebackLineBufferSize)
3471 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3473 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3474 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3475 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3476 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3479 static double CalculateWriteBackDelay(
3480 enum source_format_class WritebackPixelFormat,
3481 double WritebackHRatio,
3482 double WritebackVRatio,
3483 unsigned int WritebackVTaps,
3484 int WritebackDestinationWidth,
3485 int WritebackDestinationHeight,
3486 int WritebackSourceHeight,
3487 unsigned int HTotal)
3489 double CalculateWriteBackDelay;
3491 double Output_lines_last_notclamped;
3492 double WritebackVInit;
3494 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3495 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3496 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3497 if (Output_lines_last_notclamped < 0) {
3498 CalculateWriteBackDelay = 0;
3500 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3502 return CalculateWriteBackDelay;
3505 static void CalculateVupdateAndDynamicMetadataParameters(
3506 int MaxInterDCNTileRepeaters,
3509 double DCFClkDeepSleep,
3513 int DynamicMetadataTransmittedBytes,
3514 int DynamicMetadataLinesBeforeActiveRequired,
3515 int InterlaceEnable,
3516 bool ProgressiveToInterlaceUnitInOPP,
3521 int *VUpdateOffsetPix,
3522 double *VUpdateWidthPix,
3523 double *VReadyOffsetPix)
3525 double TotalRepeaterDelayTime;
3527 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3528 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3529 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3530 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3531 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3532 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3533 *Tdmec = HTotal / PixelClock;
3534 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3535 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3537 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3539 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3540 *Tdmsks = *Tdmsks / 2;
3542 #ifdef __DML_VBA_DEBUG__
3543 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3544 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3545 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3549 static void CalculateRowBandwidth(
3551 enum source_format_class SourcePixelFormat,
3553 double VRatioChroma,
3556 unsigned int MetaRowByteLuma,
3557 unsigned int MetaRowByteChroma,
3558 unsigned int meta_row_height_luma,
3559 unsigned int meta_row_height_chroma,
3560 unsigned int PixelPTEBytesPerRowLuma,
3561 unsigned int PixelPTEBytesPerRowChroma,
3562 unsigned int dpte_row_height_luma,
3563 unsigned int dpte_row_height_chroma,
3564 double *meta_row_bw,
3565 double *dpte_row_bw)
3567 if (DCCEnable != true) {
3569 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3570 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3572 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3575 if (GPUVMEnable != true) {
3577 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3578 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3579 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3581 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3585 static void CalculateFlipSchedule(
3586 struct display_mode_lib *mode_lib,
3588 double HostVMInefficiencyFactor,
3589 double UrgentExtraLatency,
3590 double UrgentLatency,
3591 double PDEAndMetaPTEBytesPerFrame,
3592 double MetaRowBytes,
3593 double DPTEBytesPerRow)
3595 struct vba_vars_st *v = &mode_lib->vba;
3596 double min_row_time = 0.0;
3597 unsigned int HostVMDynamicLevelsTrips;
3598 double TimeForFetchingMetaPTEImmediateFlip;
3599 double TimeForFetchingRowInVBlankImmediateFlip;
3600 double ImmediateFlipBW;
3601 double LineTime = v->HTotal[k] / v->PixelClock[k];
3603 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3604 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3606 HostVMDynamicLevelsTrips = 0;
3609 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3610 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3613 if (v->GPUVMEnable == true) {
3614 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3615 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3616 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3619 TimeForFetchingMetaPTEImmediateFlip = 0;
3622 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3623 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3624 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3625 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3626 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3629 TimeForFetchingRowInVBlankImmediateFlip = 0;
3632 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3634 if (v->GPUVMEnable == true) {
3635 v->final_flip_bw[k] = dml_max(
3636 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3637 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3638 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3639 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3641 v->final_flip_bw[k] = 0;
3644 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3645 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3646 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3647 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3648 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3650 min_row_time = dml_min4(
3651 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3652 v->meta_row_height[k] * LineTime / v->VRatio[k],
3653 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3654 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3657 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3658 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3659 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3660 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3662 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3666 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3667 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3668 v->ImmediateFlipSupportedForPipe[k] = false;
3670 v->ImmediateFlipSupportedForPipe[k] = true;
3673 #ifdef __DML_VBA_DEBUG__
3674 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3675 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3676 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3677 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3678 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3679 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3684 static double TruncToValidBPP(
3692 enum output_encoder_class Output,
3693 enum output_format_class Format,
3694 unsigned int DSCInputBitPerComponent,
3698 enum odm_combine_mode ODMCombine)
3707 if (Format == dm_420) {
3712 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
3713 } else if (Format == dm_444) {
3718 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3725 if (Format == dm_n422) {
3727 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3730 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3734 if (DSCEnable && Output == dm_dp) {
3735 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3737 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3740 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3742 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3746 if (DesiredBPP == 0) {
3748 if (MaxLinkBPP < MinDSCBPP) {
3750 } else if (MaxLinkBPP >= MaxDSCBPP) {
3753 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3756 if (MaxLinkBPP >= NonDSCBPP2) {
3758 } else if (MaxLinkBPP >= NonDSCBPP1) {
3760 } else if (MaxLinkBPP >= NonDSCBPP0) {
3767 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3768 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3777 static noinline void CalculatePrefetchSchedulePerPlane(
3778 struct display_mode_lib *mode_lib,
3779 double HostVMInefficiencyFactor,
3784 struct vba_vars_st *v = &mode_lib->vba;
3787 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3788 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3789 myPipe.PixelClock = v->PixelClock[k];
3790 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3791 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3792 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3793 myPipe.VRatio = mode_lib->vba.VRatio[k];
3794 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3796 myPipe.SourceScan = v->SourceScan[k];
3797 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3798 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3799 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3800 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3801 myPipe.InterlaceEnable = v->Interlace[k];
3802 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3803 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3804 myPipe.HTotal = v->HTotal[k];
3805 myPipe.DCCEnable = v->DCCEnable[k];
3806 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3807 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3808 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3809 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3810 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3811 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3812 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3814 HostVMInefficiencyFactor,
3816 v->DSCDelayPerState[i][k],
3817 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3819 v->DPPCLKDelaySCLLBOnly,
3820 v->DPPCLKDelayCNVCCursor,
3821 v->DISPCLKDelaySubtotal,
3822 v->SwathWidthYThisState[k] / v->HRatio[k],
3824 v->MaxInterDCNTileRepeaters,
3825 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3826 v->MaximumVStartup[i][j][k],
3827 v->GPUVMMaxPageTableLevels,
3830 v->HostVMMaxNonCachedPageTableLevels,
3831 v->HostVMMinPageSize,
3832 v->DynamicMetadataEnable[k],
3833 v->DynamicMetadataVMEnabled,
3834 v->DynamicMetadataLinesBeforeActiveRequired[k],
3835 v->DynamicMetadataTransmittedBytes[k],
3839 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3840 v->MetaRowBytes[i][j][k],
3841 v->DPTEBytesPerRow[i][j][k],
3842 v->PrefetchLinesY[i][j][k],
3843 v->SwathWidthYThisState[k],
3846 v->PrefetchLinesC[i][j][k],
3847 v->SwathWidthCThisState[k],
3850 v->swath_width_luma_ub_this_state[k],
3851 v->swath_width_chroma_ub_this_state[k],
3852 v->SwathHeightYThisState[k],
3853 v->SwathHeightCThisState[k],
3855 &v->DSTXAfterScaler[k],
3856 &v->DSTYAfterScaler[k],
3857 &v->LineTimesForPrefetch[k],
3859 &v->LinesForMetaPTE[k],
3860 &v->LinesForMetaAndDPTERow[k],
3861 &v->VRatioPreY[i][j][k],
3862 &v->VRatioPreC[i][j][k],
3863 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3864 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3865 &v->NoTimeForDynamicMetadata[i][j][k],
3867 &v->prefetch_vmrow_bw[k],
3871 &v->VUpdateOffsetPix[k],
3872 &v->VUpdateWidthPix[k],
3873 &v->VReadyOffsetPix[k]);
3876 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3878 struct vba_vars_st *v = &mode_lib->vba;
3882 int ReorderingBytes;
3883 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3884 bool NoChroma = true;
3885 bool EnoughWritebackUnits = true;
3886 bool P2IWith420 = false;
3887 bool DSCOnlyIfNecessaryWithBPP = false;
3888 bool DSC422NativeNotSupported = false;
3889 double MaxTotalVActiveRDBandwidth;
3890 bool ViewportExceedsSurface = false;
3891 bool FMTBufferExceeded = false;
3893 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3895 CalculateMinAndMaxPrefetchMode(
3896 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3897 &MinPrefetchMode, &MaxPrefetchMode);
3899 /*Scale Ratio, taps Support Check*/
3901 v->ScaleRatioAndTapsSupport = true;
3902 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3903 if (v->ScalerEnabled[k] == false
3904 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3905 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3906 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3907 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3908 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3909 v->ScaleRatioAndTapsSupport = false;
3910 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3911 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3912 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3913 || v->VRatio[k] > v->vtaps[k]
3914 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3915 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3916 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3917 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3918 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3919 || v->HRatioChroma[k] > v->MaxHSCLRatio
3920 || v->VRatioChroma[k] > v->MaxVSCLRatio
3921 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3922 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3923 v->ScaleRatioAndTapsSupport = false;
3926 /*Source Format, Pixel Format and Scan Support Check*/
3928 v->SourceFormatPixelAndScanSupport = true;
3929 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3930 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3931 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3932 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3933 v->SourceFormatPixelAndScanSupport = false;
3936 /*Bandwidth Support Check*/
3938 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3939 CalculateBytePerPixelAnd256BBlockSizes(
3940 v->SourcePixelFormat[k],
3941 v->SurfaceTiling[k],
3942 &v->BytePerPixelY[k],
3943 &v->BytePerPixelC[k],
3944 &v->BytePerPixelInDETY[k],
3945 &v->BytePerPixelInDETC[k],
3946 &v->Read256BlockHeightY[k],
3947 &v->Read256BlockHeightC[k],
3948 &v->Read256BlockWidthY[k],
3949 &v->Read256BlockWidthC[k]);
3951 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3952 if (v->SourceScan[k] != dm_vert) {
3953 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3954 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3956 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3957 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3960 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3961 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3962 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3963 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3964 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3966 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3967 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3968 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3969 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3970 } else if (v->WritebackEnable[k] == true) {
3971 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3972 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3974 v->WriteBandwidth[k] = 0.0;
3978 /*Writeback Latency support check*/
3980 v->WritebackLatencySupport = true;
3981 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3982 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3983 v->WritebackLatencySupport = false;
3987 /*Writeback Mode Support Check*/
3989 v->TotalNumberOfActiveWriteback = 0;
3990 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3991 if (v->WritebackEnable[k] == true) {
3992 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3996 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3997 EnoughWritebackUnits = false;
4000 /*Writeback Scale Ratio and Taps Support Check*/
4002 v->WritebackScaleRatioAndTapsSupport = true;
4003 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4004 if (v->WritebackEnable[k] == true) {
4005 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4006 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4007 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4008 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4009 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4010 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4011 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4012 v->WritebackScaleRatioAndTapsSupport = false;
4014 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4015 v->WritebackScaleRatioAndTapsSupport = false;
4019 /*Maximum DISPCLK/DPPCLK Support check*/
4021 v->WritebackRequiredDISPCLK = 0.0;
4022 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4023 if (v->WritebackEnable[k] == true) {
4024 v->WritebackRequiredDISPCLK = dml_max(
4025 v->WritebackRequiredDISPCLK,
4026 dml31_CalculateWriteBackDISPCLK(
4027 v->WritebackPixelFormat[k],
4029 v->WritebackHRatio[k],
4030 v->WritebackVRatio[k],
4031 v->WritebackHTaps[k],
4032 v->WritebackVTaps[k],
4033 v->WritebackSourceWidth[k],
4034 v->WritebackDestinationWidth[k],
4036 v->WritebackLineBufferSize));
4039 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4040 if (v->HRatio[k] > 1.0) {
4041 v->PSCL_FACTOR[k] = dml_min(
4042 v->MaxDCHUBToPSCLThroughput,
4043 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
4045 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4047 if (v->BytePerPixelC[k] == 0.0) {
4048 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4049 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4051 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4052 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4054 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4055 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4058 if (v->HRatioChroma[k] > 1.0) {
4059 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4060 v->MaxDCHUBToPSCLThroughput,
4061 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
4063 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
4065 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4067 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
4068 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4069 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
4070 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4072 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4073 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4074 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4078 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4079 int MaximumSwathWidthSupportLuma;
4080 int MaximumSwathWidthSupportChroma;
4082 if (v->SurfaceTiling[k] == dm_sw_linear) {
4083 MaximumSwathWidthSupportLuma = 8192.0;
4084 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4085 MaximumSwathWidthSupportLuma = 2880.0;
4086 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4087 MaximumSwathWidthSupportLuma = 3840.0;
4089 MaximumSwathWidthSupportLuma = 5760.0;
4092 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4093 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4095 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4097 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4098 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4099 if (v->BytePerPixelC[k] == 0.0) {
4100 v->MaximumSwathWidthInLineBufferChroma = 0;
4102 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4103 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4105 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4106 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4109 CalculateSwathAndDETConfiguration(
4111 v->NumberOfActivePlanes,
4112 v->DETBufferSizeInKByte[0],
4113 v->MaximumSwathWidthLuma,
4114 v->MaximumSwathWidthChroma,
4116 v->SourcePixelFormat,
4124 v->Read256BlockHeightY,
4125 v->Read256BlockHeightC,
4126 v->Read256BlockWidthY,
4127 v->Read256BlockWidthC,
4128 v->odm_combine_dummy,
4129 v->BlendingAndTiming,
4132 v->BytePerPixelInDETY,
4133 v->BytePerPixelInDETC,
4137 v->NoOfDPPThisState,
4138 v->swath_width_luma_ub_this_state,
4139 v->swath_width_chroma_ub_this_state,
4140 v->SwathWidthYThisState,
4141 v->SwathWidthCThisState,
4142 v->SwathHeightYThisState,
4143 v->SwathHeightCThisState,
4144 v->DETBufferSizeYThisState,
4145 v->DETBufferSizeCThisState,
4146 v->SingleDPPViewportSizeSupportPerPlane,
4147 &v->ViewportSizeSupport[0][0]);
4149 for (i = 0; i < v->soc.num_states; i++) {
4150 for (j = 0; j < 2; j++) {
4151 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4152 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4153 v->RequiredDISPCLK[i][j] = 0.0;
4154 v->DISPCLK_DPPCLK_Support[i][j] = true;
4155 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4156 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4157 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4158 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4159 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4160 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4161 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4162 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4164 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4165 * (1 + v->DISPCLKRampingMargin / 100.0);
4166 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4167 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4168 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4169 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4170 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4172 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4173 * (1 + v->DISPCLKRampingMargin / 100.0);
4174 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4175 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4176 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4177 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4178 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4181 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4182 || !(v->Output[k] == dm_dp ||
4183 v->Output[k] == dm_edp)) {
4184 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4185 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4187 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4188 FMTBufferExceeded = true;
4189 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4190 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4191 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4192 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4193 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4194 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4195 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4196 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4197 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4198 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4200 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4201 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4203 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4204 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4205 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4206 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4207 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4209 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4210 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4213 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4214 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4215 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4216 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4217 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4219 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4220 FMTBufferExceeded = true;
4222 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4223 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4226 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4227 v->MPCCombine[i][j][k] = false;
4228 v->NoOfDPP[i][j][k] = 4;
4229 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4230 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4231 v->MPCCombine[i][j][k] = false;
4232 v->NoOfDPP[i][j][k] = 2;
4233 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4234 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4235 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4236 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4237 v->MPCCombine[i][j][k] = false;
4238 v->NoOfDPP[i][j][k] = 1;
4239 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4241 v->MPCCombine[i][j][k] = true;
4242 v->NoOfDPP[i][j][k] = 2;
4243 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4245 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4246 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4247 > v->MaxDppclkRoundedDownToDFSGranularity)
4248 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4249 v->DISPCLK_DPPCLK_Support[i][j] = false;
4252 v->TotalNumberOfActiveDPP[i][j] = 0;
4253 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4254 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4255 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4256 if (v->NoOfDPP[i][j][k] == 1)
4257 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4258 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4259 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4264 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4265 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4266 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4267 double BWOfNonSplitPlaneOfMaximumBandwidth;
4268 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4269 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4270 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4271 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4272 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4273 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4274 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4275 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4278 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4279 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4280 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4281 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4282 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4283 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4284 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4287 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4288 v->RequiredDISPCLK[i][j] = 0.0;
4289 v->DISPCLK_DPPCLK_Support[i][j] = true;
4290 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4291 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4292 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4293 v->MPCCombine[i][j][k] = true;
4294 v->NoOfDPP[i][j][k] = 2;
4295 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4296 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4298 v->MPCCombine[i][j][k] = false;
4299 v->NoOfDPP[i][j][k] = 1;
4300 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4301 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4303 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4304 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4305 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4306 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4308 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4310 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4311 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4312 > v->MaxDppclkRoundedDownToDFSGranularity)
4313 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4314 v->DISPCLK_DPPCLK_Support[i][j] = false;
4317 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4318 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4319 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4322 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4323 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4324 v->DISPCLK_DPPCLK_Support[i][j] = false;
4329 /*Total Available Pipes Support Check*/
4331 for (i = 0; i < v->soc.num_states; i++) {
4332 for (j = 0; j < 2; j++) {
4333 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4334 v->TotalAvailablePipesSupport[i][j] = true;
4336 v->TotalAvailablePipesSupport[i][j] = false;
4340 /*Display IO and DSC Support Check*/
4342 v->NonsupportedDSCInputBPC = false;
4343 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4344 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4345 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4346 v->NonsupportedDSCInputBPC = true;
4350 /*Number Of DSC Slices*/
4351 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4352 if (v->BlendingAndTiming[k] == k) {
4353 if (v->PixelClockBackEnd[k] > 3200) {
4354 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4355 } else if (v->PixelClockBackEnd[k] > 1360) {
4356 v->NumberOfDSCSlices[k] = 8;
4357 } else if (v->PixelClockBackEnd[k] > 680) {
4358 v->NumberOfDSCSlices[k] = 4;
4359 } else if (v->PixelClockBackEnd[k] > 340) {
4360 v->NumberOfDSCSlices[k] = 2;
4362 v->NumberOfDSCSlices[k] = 1;
4365 v->NumberOfDSCSlices[k] = 0;
4369 for (i = 0; i < v->soc.num_states; i++) {
4370 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4371 v->RequiresDSC[i][k] = false;
4372 v->RequiresFEC[i][k] = false;
4373 if (v->BlendingAndTiming[k] == k) {
4374 if (v->Output[k] == dm_hdmi) {
4375 v->RequiresDSC[i][k] = false;
4376 v->RequiresFEC[i][k] = false;
4377 v->OutputBppPerState[i][k] = TruncToValidBPP(
4378 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4382 v->PixelClockBackEnd[k],
4383 v->ForcedOutputLinkBPP[k],
4387 v->DSCInputBitPerComponent[k],
4388 v->NumberOfDSCSlices[k],
4389 v->AudioSampleRate[k],
4390 v->AudioSampleLayout[k],
4391 v->ODMCombineEnablePerState[i][k]);
4392 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) {
4393 if (v->DSCEnable[k] == true) {
4394 v->RequiresDSC[i][k] = true;
4395 v->LinkDSCEnable = true;
4396 if (v->Output[k] == dm_dp) {
4397 v->RequiresFEC[i][k] = true;
4399 v->RequiresFEC[i][k] = false;
4402 v->RequiresDSC[i][k] = false;
4403 v->LinkDSCEnable = false;
4404 v->RequiresFEC[i][k] = false;
4407 v->Outbpp = BPP_INVALID;
4408 if (v->PHYCLKPerState[i] >= 270.0) {
4409 v->Outbpp = TruncToValidBPP(
4410 (1.0 - v->Downspreading / 100.0) * 2700,
4411 v->OutputLinkDPLanes[k],
4414 v->PixelClockBackEnd[k],
4415 v->ForcedOutputLinkBPP[k],
4419 v->DSCInputBitPerComponent[k],
4420 v->NumberOfDSCSlices[k],
4421 v->AudioSampleRate[k],
4422 v->AudioSampleLayout[k],
4423 v->ODMCombineEnablePerState[i][k]);
4424 v->OutputBppPerState[i][k] = v->Outbpp;
4425 // TODO: Need some other way to handle this nonsense
4426 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4428 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4429 v->Outbpp = TruncToValidBPP(
4430 (1.0 - v->Downspreading / 100.0) * 5400,
4431 v->OutputLinkDPLanes[k],
4434 v->PixelClockBackEnd[k],
4435 v->ForcedOutputLinkBPP[k],
4439 v->DSCInputBitPerComponent[k],
4440 v->NumberOfDSCSlices[k],
4441 v->AudioSampleRate[k],
4442 v->AudioSampleLayout[k],
4443 v->ODMCombineEnablePerState[i][k]);
4444 v->OutputBppPerState[i][k] = v->Outbpp;
4445 // TODO: Need some other way to handle this nonsense
4446 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4448 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4449 v->Outbpp = TruncToValidBPP(
4450 (1.0 - v->Downspreading / 100.0) * 8100,
4451 v->OutputLinkDPLanes[k],
4454 v->PixelClockBackEnd[k],
4455 v->ForcedOutputLinkBPP[k],
4459 v->DSCInputBitPerComponent[k],
4460 v->NumberOfDSCSlices[k],
4461 v->AudioSampleRate[k],
4462 v->AudioSampleLayout[k],
4463 v->ODMCombineEnablePerState[i][k]);
4464 v->OutputBppPerState[i][k] = v->Outbpp;
4465 // TODO: Need some other way to handle this nonsense
4466 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4468 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) {
4469 v->Outbpp = TruncToValidBPP(
4470 (1.0 - v->Downspreading / 100.0) * 10000,
4474 v->PixelClockBackEnd[k],
4475 v->ForcedOutputLinkBPP[k],
4479 v->DSCInputBitPerComponent[k],
4480 v->NumberOfDSCSlices[k],
4481 v->AudioSampleRate[k],
4482 v->AudioSampleLayout[k],
4483 v->ODMCombineEnablePerState[i][k]);
4484 v->OutputBppPerState[i][k] = v->Outbpp;
4485 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4";
4487 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) {
4488 v->Outbpp = TruncToValidBPP(
4493 v->PixelClockBackEnd[k],
4494 v->ForcedOutputLinkBPP[k],
4498 v->DSCInputBitPerComponent[k],
4499 v->NumberOfDSCSlices[k],
4500 v->AudioSampleRate[k],
4501 v->AudioSampleLayout[k],
4502 v->ODMCombineEnablePerState[i][k]);
4503 v->OutputBppPerState[i][k] = v->Outbpp;
4504 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4";
4508 v->OutputBppPerState[i][k] = 0;
4513 for (i = 0; i < v->soc.num_states; i++) {
4514 v->LinkCapacitySupport[i] = true;
4515 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4516 if (v->BlendingAndTiming[k] == k
4517 && (v->Output[k] == dm_dp ||
4518 v->Output[k] == dm_edp ||
4519 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4520 v->LinkCapacitySupport[i] = false;
4526 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4527 if (v->BlendingAndTiming[k] == k
4528 && (v->Output[k] == dm_dp ||
4529 v->Output[k] == dm_edp ||
4530 v->Output[k] == dm_hdmi)) {
4531 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4534 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4535 && !v->DSC422NativeSupport) {
4536 DSC422NativeNotSupported = true;
4541 for (i = 0; i < v->soc.num_states; ++i) {
4542 v->ODMCombine4To1SupportCheckOK[i] = true;
4543 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4544 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4545 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4546 || v->Output[k] == dm_hdmi)) {
4547 v->ODMCombine4To1SupportCheckOK[i] = false;
4552 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4554 for (i = 0; i < v->soc.num_states; i++) {
4555 v->NotEnoughDSCUnits[i] = false;
4556 v->TotalDSCUnitsRequired = 0.0;
4557 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4558 if (v->RequiresDSC[i][k] == true) {
4559 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4560 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4561 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4562 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4564 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4568 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4569 v->NotEnoughDSCUnits[i] = true;
4572 /*DSC Delay per state*/
4574 for (i = 0; i < v->soc.num_states; i++) {
4575 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4576 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4579 v->BPP = v->OutputBppPerState[i][k];
4581 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4582 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4583 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4584 v->DSCInputBitPerComponent[k],
4586 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4587 v->NumberOfDSCSlices[k],
4589 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4590 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4591 v->DSCDelayPerState[i][k] = 2.0
4592 * (dscceComputeDelay(
4593 v->DSCInputBitPerComponent[k],
4595 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4596 v->NumberOfDSCSlices[k] / 2,
4598 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4600 v->DSCDelayPerState[i][k] = 4.0
4601 * (dscceComputeDelay(
4602 v->DSCInputBitPerComponent[k],
4604 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4605 v->NumberOfDSCSlices[k] / 4,
4607 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4609 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4611 v->DSCDelayPerState[i][k] = 0.0;
4614 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4615 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4616 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4617 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4623 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4625 for (i = 0; i < v->soc.num_states; ++i) {
4626 for (j = 0; j <= 1; ++j) {
4627 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4628 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4629 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4630 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4633 CalculateSwathAndDETConfiguration(
4635 v->NumberOfActivePlanes,
4636 v->DETBufferSizeInKByte[0],
4637 v->MaximumSwathWidthLuma,
4638 v->MaximumSwathWidthChroma,
4640 v->SourcePixelFormat,
4648 v->Read256BlockHeightY,
4649 v->Read256BlockHeightC,
4650 v->Read256BlockWidthY,
4651 v->Read256BlockWidthC,
4652 v->ODMCombineEnableThisState,
4653 v->BlendingAndTiming,
4656 v->BytePerPixelInDETY,
4657 v->BytePerPixelInDETC,
4661 v->NoOfDPPThisState,
4662 v->swath_width_luma_ub_this_state,
4663 v->swath_width_chroma_ub_this_state,
4664 v->SwathWidthYThisState,
4665 v->SwathWidthCThisState,
4666 v->SwathHeightYThisState,
4667 v->SwathHeightCThisState,
4668 v->DETBufferSizeYThisState,
4669 v->DETBufferSizeCThisState,
4671 &v->ViewportSizeSupport[i][j]);
4673 CalculateDCFCLKDeepSleep(
4675 v->NumberOfActivePlanes,
4680 v->SwathWidthYThisState,
4681 v->SwathWidthCThisState,
4682 v->NoOfDPPThisState,
4687 v->PSCL_FACTOR_CHROMA,
4688 v->RequiredDPPCLKThisState,
4689 v->ReadBandwidthLuma,
4690 v->ReadBandwidthChroma,
4692 &v->ProjectedDCFCLKDeepSleep[i][j]);
4694 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4695 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4696 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4697 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4698 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4699 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4700 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4701 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4702 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4707 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4708 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4709 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4712 for (i = 0; i < v->soc.num_states; i++) {
4713 for (j = 0; j < 2; j++) {
4714 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4716 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4717 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4718 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4719 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4720 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4721 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4722 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4723 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4724 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4727 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4728 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4729 if (v->DCCEnable[k] == true) {
4730 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4734 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4735 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4736 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4738 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4739 && v->SourceScan[k] != dm_vert) {
4740 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4742 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4744 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4745 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4748 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4751 v->Read256BlockHeightC[k],
4752 v->Read256BlockWidthC[k],
4753 v->SourcePixelFormat[k],
4754 v->SurfaceTiling[k],
4755 v->BytePerPixelC[k],
4757 v->SwathWidthCThisState[k],
4758 v->ViewportHeightChroma[k],
4761 v->HostVMMaxNonCachedPageTableLevels,
4762 v->GPUVMMinPageSize,
4763 v->HostVMMinPageSize,
4764 v->PTEBufferSizeInRequestsForChroma,
4767 &v->MacroTileWidthC[k],
4769 &v->DPTEBytesPerRowC,
4770 &v->PTEBufferSizeNotExceededC[i][j][k],
4772 &v->dpte_row_height_chroma[k],
4776 &v->meta_row_height_chroma[k],
4783 &v->dummyinteger11);
4785 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4790 v->ProgressiveToInterlaceUnitInOPP,
4791 v->SwathHeightCThisState[k],
4792 v->ViewportYStartC[k],
4796 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4797 v->PTEBufferSizeInRequestsForChroma = 0;
4798 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4799 v->MetaRowBytesC = 0.0;
4800 v->DPTEBytesPerRowC = 0.0;
4801 v->PrefetchLinesC[i][j][k] = 0.0;
4802 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4804 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4807 v->Read256BlockHeightY[k],
4808 v->Read256BlockWidthY[k],
4809 v->SourcePixelFormat[k],
4810 v->SurfaceTiling[k],
4811 v->BytePerPixelY[k],
4813 v->SwathWidthYThisState[k],
4814 v->ViewportHeight[k],
4817 v->HostVMMaxNonCachedPageTableLevels,
4818 v->GPUVMMinPageSize,
4819 v->HostVMMinPageSize,
4820 v->PTEBufferSizeInRequestsForLuma,
4822 v->DCCMetaPitchY[k],
4823 &v->MacroTileWidthY[k],
4825 &v->DPTEBytesPerRowY,
4826 &v->PTEBufferSizeNotExceededY[i][j][k],
4828 &v->dpte_row_height[k],
4832 &v->meta_row_height[k],
4834 &v->dpte_group_bytes[k],
4840 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4845 v->ProgressiveToInterlaceUnitInOPP,
4846 v->SwathHeightYThisState[k],
4847 v->ViewportYStartY[k],
4850 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4851 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4852 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4854 CalculateRowBandwidth(
4856 v->SourcePixelFormat[k],
4860 v->HTotal[k] / v->PixelClock[k],
4863 v->meta_row_height[k],
4864 v->meta_row_height_chroma[k],
4865 v->DPTEBytesPerRowY,
4866 v->DPTEBytesPerRowC,
4867 v->dpte_row_height[k],
4868 v->dpte_row_height_chroma[k],
4869 &v->meta_row_bandwidth[i][j][k],
4870 &v->dpte_row_bandwidth[i][j][k]);
4872 /*DCCMetaBufferSizeSupport(i, j) = True
4873 For k = 0 To NumberOfActivePlanes - 1
4874 If MetaRowBytes(i, j, k) > 24064 Then
4875 DCCMetaBufferSizeSupport(i, j) = False
4878 v->DCCMetaBufferSizeSupport[i][j] = true;
4879 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4880 if (v->MetaRowBytes[i][j][k] > 24064)
4881 v->DCCMetaBufferSizeSupport[i][j] = false;
4883 v->UrgLatency[i] = CalculateUrgentLatency(
4884 v->UrgentLatencyPixelDataOnly,
4885 v->UrgentLatencyPixelMixedWithVMData,
4886 v->UrgentLatencyVMDataOnly,
4887 v->DoUrgentLatencyAdjustment,
4888 v->UrgentLatencyAdjustmentFabricClockComponent,
4889 v->UrgentLatencyAdjustmentFabricClockReference,
4890 v->FabricClockPerState[i]);
4892 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4893 CalculateUrgentBurstFactor(
4894 v->swath_width_luma_ub_this_state[k],
4895 v->swath_width_chroma_ub_this_state[k],
4896 v->SwathHeightYThisState[k],
4897 v->SwathHeightCThisState[k],
4898 v->HTotal[k] / v->PixelClock[k],
4900 v->CursorBufferSize,
4901 v->CursorWidth[k][0],
4905 v->BytePerPixelInDETY[k],
4906 v->BytePerPixelInDETC[k],
4907 v->DETBufferSizeYThisState[k],
4908 v->DETBufferSizeCThisState[k],
4909 &v->UrgentBurstFactorCursor[k],
4910 &v->UrgentBurstFactorLuma[k],
4911 &v->UrgentBurstFactorChroma[k],
4912 &NotUrgentLatencyHiding[k]);
4915 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4916 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4917 if (NotUrgentLatencyHiding[k]) {
4918 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4922 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4923 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4924 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4925 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4928 v->TotalVActivePixelBandwidth[i][j] = 0;
4929 v->TotalVActiveCursorBandwidth[i][j] = 0;
4930 v->TotalMetaRowBandwidth[i][j] = 0;
4931 v->TotalDPTERowBandwidth[i][j] = 0;
4932 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4933 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4934 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4935 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4936 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4941 //Calculate Return BW
4942 for (i = 0; i < v->soc.num_states; ++i) {
4943 for (j = 0; j <= 1; ++j) {
4944 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4945 if (v->BlendingAndTiming[k] == k) {
4946 if (v->WritebackEnable[k] == true) {
4947 v->WritebackDelayTime[k] = v->WritebackLatency
4948 + CalculateWriteBackDelay(
4949 v->WritebackPixelFormat[k],
4950 v->WritebackHRatio[k],
4951 v->WritebackVRatio[k],
4952 v->WritebackVTaps[k],
4953 v->WritebackDestinationWidth[k],
4954 v->WritebackDestinationHeight[k],
4955 v->WritebackSourceHeight[k],
4956 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4958 v->WritebackDelayTime[k] = 0.0;
4960 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4961 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4962 v->WritebackDelayTime[k] = dml_max(
4963 v->WritebackDelayTime[k],
4965 + CalculateWriteBackDelay(
4966 v->WritebackPixelFormat[m],
4967 v->WritebackHRatio[m],
4968 v->WritebackVRatio[m],
4969 v->WritebackVTaps[m],
4970 v->WritebackDestinationWidth[m],
4971 v->WritebackDestinationHeight[m],
4972 v->WritebackSourceHeight[m],
4973 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4978 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4979 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4980 if (v->BlendingAndTiming[k] == m) {
4981 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4985 v->MaxMaxVStartup[i][j] = 0;
4986 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4987 v->MaximumVStartup[i][j][k] =
4988 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
4989 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
4990 v->VTotal[k] - v->VActive[k]
4994 1.0 * v->WritebackDelayTime[k]
4996 / v->PixelClock[k]),
4998 if (v->MaximumVStartup[i][j][k] > 1023)
4999 v->MaximumVStartup[i][j][k] = 1023;
5000 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5005 ReorderingBytes = v->NumberOfChannels
5007 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5008 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5009 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5011 for (i = 0; i < v->soc.num_states; ++i) {
5012 for (j = 0; j <= 1; ++j) {
5013 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5017 if (v->UseMinimumRequiredDCFCLK == true)
5018 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5020 for (i = 0; i < v->soc.num_states; ++i) {
5021 for (j = 0; j <= 1; ++j) {
5022 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5023 v->ReturnBusWidth * v->DCFCLKState[i][j],
5024 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5025 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5026 double PixelDataOnlyReturnBWPerState = dml_min(
5027 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5028 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5029 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5030 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5031 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5033 if (v->HostVMEnable != true) {
5034 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5036 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5041 //Re-ordering Buffer Support Check
5042 for (i = 0; i < v->soc.num_states; ++i) {
5043 for (j = 0; j <= 1; ++j) {
5044 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5045 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5046 v->ROBSupport[i][j] = true;
5048 v->ROBSupport[i][j] = false;
5053 //Vertical Active BW support check
5055 MaxTotalVActiveRDBandwidth = 0;
5056 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5057 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5060 for (i = 0; i < v->soc.num_states; ++i) {
5061 for (j = 0; j <= 1; ++j) {
5062 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5064 v->ReturnBusWidth * v->DCFCLKState[i][j],
5065 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5066 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5067 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5068 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5070 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5071 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5073 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5078 v->UrgentLatency = CalculateUrgentLatency(
5079 v->UrgentLatencyPixelDataOnly,
5080 v->UrgentLatencyPixelMixedWithVMData,
5081 v->UrgentLatencyVMDataOnly,
5082 v->DoUrgentLatencyAdjustment,
5083 v->UrgentLatencyAdjustmentFabricClockComponent,
5084 v->UrgentLatencyAdjustmentFabricClockReference,
5087 for (i = 0; i < v->soc.num_states; ++i) {
5088 for (j = 0; j <= 1; ++j) {
5089 double VMDataOnlyReturnBWPerState;
5090 double HostVMInefficiencyFactor = 1;
5091 int NextPrefetchModeState = MinPrefetchMode;
5092 bool UnboundedRequestEnabledThisState = false;
5093 int CompressedBufferSizeInkByteThisState = 0;
5096 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5098 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5099 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5100 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5101 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5104 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5105 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5106 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5107 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5108 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5109 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5110 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5111 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5112 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5113 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5116 VMDataOnlyReturnBWPerState = dml_min(
5118 v->ReturnBusWidth * v->DCFCLKState[i][j],
5119 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5120 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5121 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5122 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5123 if (v->GPUVMEnable && v->HostVMEnable)
5124 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5126 v->ExtraLatency = CalculateExtraLatency(
5127 v->RoundTripPingLatencyCycles,
5129 v->DCFCLKState[i][j],
5130 v->TotalNumberOfActiveDPP[i][j],
5131 v->PixelChunkSizeInKByte,
5132 v->TotalNumberOfDCCActiveDPP[i][j],
5134 v->ReturnBWPerState[i][j],
5137 v->NumberOfActivePlanes,
5138 v->NoOfDPPThisState,
5139 v->dpte_group_bytes,
5140 HostVMInefficiencyFactor,
5141 v->HostVMMinPageSize,
5142 v->HostVMMaxNonCachedPageTableLevels);
5144 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5146 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5147 v->MaxVStartup = v->NextMaxVStartup;
5149 v->TWait = CalculateTWait(
5150 v->PrefetchModePerState[i][j],
5151 v->DRAMClockChangeLatency,
5153 v->SREnterPlusExitTime);
5155 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5156 CalculatePrefetchSchedulePerPlane(mode_lib,
5157 HostVMInefficiencyFactor,
5161 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5162 CalculateUrgentBurstFactor(
5163 v->swath_width_luma_ub_this_state[k],
5164 v->swath_width_chroma_ub_this_state[k],
5165 v->SwathHeightYThisState[k],
5166 v->SwathHeightCThisState[k],
5167 v->HTotal[k] / v->PixelClock[k],
5169 v->CursorBufferSize,
5170 v->CursorWidth[k][0],
5172 v->VRatioPreY[i][j][k],
5173 v->VRatioPreC[i][j][k],
5174 v->BytePerPixelInDETY[k],
5175 v->BytePerPixelInDETC[k],
5176 v->DETBufferSizeYThisState[k],
5177 v->DETBufferSizeCThisState[k],
5178 &v->UrgentBurstFactorCursorPre[k],
5179 &v->UrgentBurstFactorLumaPre[k],
5180 &v->UrgentBurstFactorChroma[k],
5181 &v->NotUrgentLatencyHidingPre[k]);
5184 v->MaximumReadBandwidthWithPrefetch = 0.0;
5185 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5186 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5187 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5189 v->MaximumReadBandwidthWithPrefetch =
5190 v->MaximumReadBandwidthWithPrefetch
5192 v->VActivePixelBandwidth[i][j][k]
5193 + v->VActiveCursorBandwidth[i][j][k]
5194 + v->NoOfDPP[i][j][k]
5195 * (v->meta_row_bandwidth[i][j][k]
5196 + v->dpte_row_bandwidth[i][j][k]),
5197 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5199 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5200 * v->UrgentBurstFactorLumaPre[k]
5201 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5202 * v->UrgentBurstFactorChromaPre[k])
5203 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5206 v->NotEnoughUrgentLatencyHidingPre = false;
5207 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5208 if (v->NotUrgentLatencyHidingPre[k] == true) {
5209 v->NotEnoughUrgentLatencyHidingPre = true;
5213 v->PrefetchSupported[i][j] = true;
5214 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5215 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5216 v->PrefetchSupported[i][j] = false;
5218 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5219 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5220 || v->NoTimeForPrefetch[i][j][k] == true) {
5221 v->PrefetchSupported[i][j] = false;
5225 v->DynamicMetadataSupported[i][j] = true;
5226 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5227 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5228 v->DynamicMetadataSupported[i][j] = false;
5232 v->VRatioInPrefetchSupported[i][j] = true;
5233 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5234 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5235 v->VRatioInPrefetchSupported[i][j] = false;
5238 v->AnyLinesForVMOrRowTooLarge = false;
5239 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5240 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5241 v->AnyLinesForVMOrRowTooLarge = true;
5245 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5247 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5248 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5249 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5250 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5252 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5254 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5255 * v->UrgentBurstFactorLumaPre[k]
5256 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5257 * v->UrgentBurstFactorChromaPre[k])
5258 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5260 v->TotImmediateFlipBytes = 0.0;
5261 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5262 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5263 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5264 + v->DPTEBytesPerRow[i][j][k];
5267 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5268 CalculateFlipSchedule(
5271 HostVMInefficiencyFactor,
5274 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5275 v->MetaRowBytes[i][j][k],
5276 v->DPTEBytesPerRow[i][j][k]);
5278 v->total_dcn_read_bw_with_flip = 0.0;
5279 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5280 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5282 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5283 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5284 + v->VActiveCursorBandwidth[i][j][k],
5286 * (v->final_flip_bw[k]
5287 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5288 * v->UrgentBurstFactorLumaPre[k]
5289 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5290 * v->UrgentBurstFactorChromaPre[k])
5291 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5293 v->ImmediateFlipSupportedForState[i][j] = true;
5294 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5295 v->ImmediateFlipSupportedForState[i][j] = false;
5297 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5298 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5299 v->ImmediateFlipSupportedForState[i][j] = false;
5303 v->ImmediateFlipSupportedForState[i][j] = false;
5306 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5307 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5308 NextPrefetchModeState = NextPrefetchModeState + 1;
5310 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5312 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5313 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5314 && ((v->HostVMEnable == false &&
5315 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5316 || v->ImmediateFlipSupportedForState[i][j] == true))
5317 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5319 CalculateUnboundedRequestAndCompressedBufferSize(
5320 v->DETBufferSizeInKByte[0],
5321 v->ConfigReturnBufferSizeInKByte,
5322 v->UseUnboundedRequesting,
5323 v->TotalNumberOfActiveDPP[i][j],
5326 v->CompressedBufferSegmentSizeInkByte,
5328 &UnboundedRequestEnabledThisState,
5329 &CompressedBufferSizeInkByteThisState);
5331 CalculateWatermarksAndDRAMSpeedChangeSupport(
5333 v->PrefetchModePerState[i][j],
5334 v->DCFCLKState[i][j],
5335 v->ReturnBWPerState[i][j],
5338 v->SOCCLKPerState[i],
5339 v->ProjectedDCFCLKDeepSleep[i][j],
5340 v->DETBufferSizeYThisState,
5341 v->DETBufferSizeCThisState,
5342 v->SwathHeightYThisState,
5343 v->SwathHeightCThisState,
5344 v->SwathWidthYThisState,
5345 v->SwathWidthCThisState,
5346 v->NoOfDPPThisState,
5347 v->BytePerPixelInDETY,
5348 v->BytePerPixelInDETC,
5349 UnboundedRequestEnabledThisState,
5350 CompressedBufferSizeInkByteThisState,
5351 &v->DRAMClockChangeSupport[i][j],
5359 /*PTE Buffer Size Check*/
5360 for (i = 0; i < v->soc.num_states; i++) {
5361 for (j = 0; j < 2; j++) {
5362 v->PTEBufferSizeNotExceeded[i][j] = true;
5363 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5364 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5365 v->PTEBufferSizeNotExceeded[i][j] = false;
5371 /*Cursor Support Check*/
5372 v->CursorSupport = true;
5373 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5374 if (v->CursorWidth[k][0] > 0.0) {
5375 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5376 v->CursorSupport = false;
5381 /*Valid Pitch Check*/
5382 v->PitchSupport = true;
5383 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5384 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5385 if (v->DCCEnable[k] == true) {
5386 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5388 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5390 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5391 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5392 && v->SourcePixelFormat[k] != dm_mono_8) {
5393 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5394 if (v->DCCEnable[k] == true) {
5395 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5396 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5397 64.0 * v->Read256BlockWidthC[k]);
5399 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5402 v->AlignedCPitch[k] = v->PitchC[k];
5403 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5405 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5406 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5407 v->PitchSupport = false;
5411 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5412 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5413 ViewportExceedsSurface = true;
5414 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5415 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5416 && v->SourcePixelFormat[k] != dm_rgbe) {
5417 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5418 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5419 ViewportExceedsSurface = true;
5425 /*Mode Support, Voltage State and SOC Configuration*/
5426 for (i = v->soc.num_states - 1; i >= 0; i--) {
5427 for (j = 0; j < 2; j++) {
5428 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5429 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5430 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5431 && v->DTBCLKRequiredMoreThanSupported[i] == false
5432 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5433 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5434 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5435 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5436 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5437 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5438 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5439 && ((v->HostVMEnable == false
5440 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5441 || v->ImmediateFlipSupportedForState[i][j] == true)
5442 && FMTBufferExceeded == false) {
5443 v->ModeSupport[i][j] = true;
5445 v->ModeSupport[i][j] = false;
5451 unsigned int MaximumMPCCombine = 0;
5452 for (i = v->soc.num_states; i >= 0; i--) {
5453 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5454 v->VoltageLevel = i;
5455 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5456 if (v->ModeSupport[i][0] == true) {
5457 MaximumMPCCombine = 0;
5459 MaximumMPCCombine = 1;
5463 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5464 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5465 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5466 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5468 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5469 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5470 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5471 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5472 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5473 v->maxMpcComb = MaximumMPCCombine;
5477 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5478 struct display_mode_lib *mode_lib,
5479 unsigned int PrefetchMode,
5482 double UrgentLatency,
5483 double ExtraLatency,
5485 double DCFCLKDeepSleep,
5486 unsigned int DETBufferSizeY[],
5487 unsigned int DETBufferSizeC[],
5488 unsigned int SwathHeightY[],
5489 unsigned int SwathHeightC[],
5490 double SwathWidthY[],
5491 double SwathWidthC[],
5492 unsigned int DPPPerPlane[],
5493 double BytePerPixelDETY[],
5494 double BytePerPixelDETC[],
5495 bool UnboundedRequestEnabled,
5496 int unsigned CompressedBufferSizeInkByte,
5497 enum clock_change_support *DRAMClockChangeSupport,
5498 double *StutterExitWatermark,
5499 double *StutterEnterPlusExitWatermark,
5500 double *Z8StutterExitWatermark,
5501 double *Z8StutterEnterPlusExitWatermark)
5503 struct vba_vars_st *v = &mode_lib->vba;
5504 double EffectiveLBLatencyHidingY;
5505 double EffectiveLBLatencyHidingC;
5506 double LinesInDETY[DC__NUM_DPP__MAX];
5508 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5509 unsigned int LinesInDETCRoundedDownToSwath;
5510 double FullDETBufferingTimeY;
5511 double FullDETBufferingTimeC;
5512 double ActiveDRAMClockChangeLatencyMarginY;
5513 double ActiveDRAMClockChangeLatencyMarginC;
5514 double WritebackDRAMClockChangeLatencyMargin;
5515 double PlaneWithMinActiveDRAMClockChangeMargin;
5516 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5517 double WritebackDRAMClockChangeLatencyHiding;
5518 double TotalPixelBW = 0.0;
5521 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5523 #ifdef __DML_VBA_DEBUG__
5524 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5525 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5526 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5529 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5531 #ifdef __DML_VBA_DEBUG__
5532 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5533 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5536 v->TotalActiveWriteback = 0;
5537 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5538 if (v->WritebackEnable[k] == true) {
5539 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5543 if (v->TotalActiveWriteback <= 1) {
5544 v->WritebackUrgentWatermark = v->WritebackLatency;
5546 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5549 if (v->TotalActiveWriteback <= 1) {
5550 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5552 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5555 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5556 TotalPixelBW = TotalPixelBW
5557 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5558 / (v->HTotal[k] / v->PixelClock[k]);
5561 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5562 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5564 v->LBLatencyHidingSourceLinesY = dml_min(
5565 (double) v->MaxLineBufferLines,
5566 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5568 v->LBLatencyHidingSourceLinesC = dml_min(
5569 (double) v->MaxLineBufferLines,
5570 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5572 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5574 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5576 if (UnboundedRequestEnabled) {
5577 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5578 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5581 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5582 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5583 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5584 if (BytePerPixelDETC[k] > 0) {
5585 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5586 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5587 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5590 FullDETBufferingTimeC = 999999;
5593 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5594 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5596 if (v->NumberOfActivePlanes > 1) {
5597 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5598 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5601 if (BytePerPixelDETC[k] > 0) {
5602 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5603 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5605 if (v->NumberOfActivePlanes > 1) {
5606 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5607 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5609 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5611 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5614 if (v->WritebackEnable[k] == true) {
5615 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5616 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5617 if (v->WritebackPixelFormat[k] == dm_444_64) {
5618 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5620 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5621 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5625 v->MinActiveDRAMClockChangeMargin = 999999;
5626 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5627 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5628 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5629 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5630 if (v->BlendingAndTiming[k] == k) {
5631 PlaneWithMinActiveDRAMClockChangeMargin = k;
5633 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5634 if (v->BlendingAndTiming[k] == j) {
5635 PlaneWithMinActiveDRAMClockChangeMargin = j;
5642 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5644 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5645 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5646 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5647 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5648 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5652 v->TotalNumberOfActiveOTG = 0;
5654 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5655 if (v->BlendingAndTiming[k] == k) {
5656 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5660 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5661 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5662 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5663 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5664 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5666 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5669 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5670 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5671 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5672 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5674 #ifdef __DML_VBA_DEBUG__
5675 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5676 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5677 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5678 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5682 static void CalculateDCFCLKDeepSleep(
5683 struct display_mode_lib *mode_lib,
5684 unsigned int NumberOfActivePlanes,
5685 int BytePerPixelY[],
5686 int BytePerPixelC[],
5688 double VRatioChroma[],
5689 double SwathWidthY[],
5690 double SwathWidthC[],
5691 unsigned int DPPPerPlane[],
5693 double HRatioChroma[],
5694 double PixelClock[],
5695 double PSCL_THROUGHPUT[],
5696 double PSCL_THROUGHPUT_CHROMA[],
5698 double ReadBandwidthLuma[],
5699 double ReadBandwidthChroma[],
5701 double *DCFCLKDeepSleep)
5703 struct vba_vars_st *v = &mode_lib->vba;
5704 double DisplayPipeLineDeliveryTimeLuma;
5705 double DisplayPipeLineDeliveryTimeChroma;
5706 double ReadBandwidth = 0.0;
5709 for (k = 0; k < NumberOfActivePlanes; ++k) {
5711 if (VRatio[k] <= 1) {
5712 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5714 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5716 if (BytePerPixelC[k] == 0) {
5717 DisplayPipeLineDeliveryTimeChroma = 0;
5719 if (VRatioChroma[k] <= 1) {
5720 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5722 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5726 if (BytePerPixelC[k] > 0) {
5727 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5728 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5730 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5732 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5736 for (k = 0; k < NumberOfActivePlanes; ++k) {
5737 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5740 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5742 for (k = 0; k < NumberOfActivePlanes; ++k) {
5743 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5747 static void CalculateUrgentBurstFactor(
5748 int swath_width_luma_ub,
5749 int swath_width_chroma_ub,
5750 unsigned int SwathHeightY,
5751 unsigned int SwathHeightC,
5753 double UrgentLatency,
5754 double CursorBufferSize,
5755 unsigned int CursorWidth,
5756 unsigned int CursorBPP,
5759 double BytePerPixelInDETY,
5760 double BytePerPixelInDETC,
5761 double DETBufferSizeY,
5762 double DETBufferSizeC,
5763 double *UrgentBurstFactorCursor,
5764 double *UrgentBurstFactorLuma,
5765 double *UrgentBurstFactorChroma,
5766 bool *NotEnoughUrgentLatencyHiding)
5768 double LinesInDETLuma;
5769 double LinesInDETChroma;
5770 unsigned int LinesInCursorBuffer;
5771 double CursorBufferSizeInTime;
5772 double DETBufferSizeInTimeLuma;
5773 double DETBufferSizeInTimeChroma;
5775 *NotEnoughUrgentLatencyHiding = 0;
5777 if (CursorWidth > 0) {
5778 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5780 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5781 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5782 *NotEnoughUrgentLatencyHiding = 1;
5783 *UrgentBurstFactorCursor = 0;
5785 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5788 *UrgentBurstFactorCursor = 1;
5792 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5794 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5795 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5796 *NotEnoughUrgentLatencyHiding = 1;
5797 *UrgentBurstFactorLuma = 0;
5799 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5802 *UrgentBurstFactorLuma = 1;
5805 if (BytePerPixelInDETC > 0) {
5806 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5808 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5809 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5810 *NotEnoughUrgentLatencyHiding = 1;
5811 *UrgentBurstFactorChroma = 0;
5813 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5816 *UrgentBurstFactorChroma = 1;
5821 static void CalculatePixelDeliveryTimes(
5822 unsigned int NumberOfActivePlanes,
5824 double VRatioChroma[],
5825 double VRatioPrefetchY[],
5826 double VRatioPrefetchC[],
5827 unsigned int swath_width_luma_ub[],
5828 unsigned int swath_width_chroma_ub[],
5829 unsigned int DPPPerPlane[],
5831 double HRatioChroma[],
5832 double PixelClock[],
5833 double PSCL_THROUGHPUT[],
5834 double PSCL_THROUGHPUT_CHROMA[],
5836 int BytePerPixelC[],
5837 enum scan_direction_class SourceScan[],
5838 unsigned int NumberOfCursors[],
5839 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5840 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5841 unsigned int BlockWidth256BytesY[],
5842 unsigned int BlockHeight256BytesY[],
5843 unsigned int BlockWidth256BytesC[],
5844 unsigned int BlockHeight256BytesC[],
5845 double DisplayPipeLineDeliveryTimeLuma[],
5846 double DisplayPipeLineDeliveryTimeChroma[],
5847 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5848 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5849 double DisplayPipeRequestDeliveryTimeLuma[],
5850 double DisplayPipeRequestDeliveryTimeChroma[],
5851 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5852 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5853 double CursorRequestDeliveryTime[],
5854 double CursorRequestDeliveryTimePrefetch[])
5856 double req_per_swath_ub;
5859 for (k = 0; k < NumberOfActivePlanes; ++k) {
5860 if (VRatio[k] <= 1) {
5861 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5863 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5866 if (BytePerPixelC[k] == 0) {
5867 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5869 if (VRatioChroma[k] <= 1) {
5870 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5872 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5876 if (VRatioPrefetchY[k] <= 1) {
5877 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5879 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5882 if (BytePerPixelC[k] == 0) {
5883 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5885 if (VRatioPrefetchC[k] <= 1) {
5886 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5888 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5893 for (k = 0; k < NumberOfActivePlanes; ++k) {
5894 if (SourceScan[k] != dm_vert) {
5895 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5897 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5899 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5900 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5901 if (BytePerPixelC[k] == 0) {
5902 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5903 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5905 if (SourceScan[k] != dm_vert) {
5906 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5908 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5910 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5911 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5913 #ifdef __DML_VBA_DEBUG__
5914 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5915 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5916 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5917 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
5918 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
5919 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
5920 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
5921 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
5922 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
5923 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
5924 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
5925 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
5929 for (k = 0; k < NumberOfActivePlanes; ++k) {
5930 int cursor_req_per_width;
5931 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
5932 if (NumberOfCursors[k] > 0) {
5933 if (VRatio[k] <= 1) {
5934 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5936 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5938 if (VRatioPrefetchY[k] <= 1) {
5939 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
5941 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
5944 CursorRequestDeliveryTime[k] = 0;
5945 CursorRequestDeliveryTimePrefetch[k] = 0;
5947 #ifdef __DML_VBA_DEBUG__
5948 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
5949 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
5950 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
5955 static void CalculateMetaAndPTETimes(
5956 int NumberOfActivePlanes,
5959 int MinMetaChunkSizeBytes,
5962 double VRatioChroma[],
5963 double DestinationLinesToRequestRowInVBlank[],
5964 double DestinationLinesToRequestRowInImmediateFlip[],
5966 double PixelClock[],
5967 int BytePerPixelY[],
5968 int BytePerPixelC[],
5969 enum scan_direction_class SourceScan[],
5970 int dpte_row_height[],
5971 int dpte_row_height_chroma[],
5972 int meta_row_width[],
5973 int meta_row_width_chroma[],
5974 int meta_row_height[],
5975 int meta_row_height_chroma[],
5976 int meta_req_width[],
5977 int meta_req_width_chroma[],
5978 int meta_req_height[],
5979 int meta_req_height_chroma[],
5980 int dpte_group_bytes[],
5981 int PTERequestSizeY[],
5982 int PTERequestSizeC[],
5983 int PixelPTEReqWidthY[],
5984 int PixelPTEReqHeightY[],
5985 int PixelPTEReqWidthC[],
5986 int PixelPTEReqHeightC[],
5987 int dpte_row_width_luma_ub[],
5988 int dpte_row_width_chroma_ub[],
5989 double DST_Y_PER_PTE_ROW_NOM_L[],
5990 double DST_Y_PER_PTE_ROW_NOM_C[],
5991 double DST_Y_PER_META_ROW_NOM_L[],
5992 double DST_Y_PER_META_ROW_NOM_C[],
5993 double TimePerMetaChunkNominal[],
5994 double TimePerChromaMetaChunkNominal[],
5995 double TimePerMetaChunkVBlank[],
5996 double TimePerChromaMetaChunkVBlank[],
5997 double TimePerMetaChunkFlip[],
5998 double TimePerChromaMetaChunkFlip[],
5999 double time_per_pte_group_nom_luma[],
6000 double time_per_pte_group_vblank_luma[],
6001 double time_per_pte_group_flip_luma[],
6002 double time_per_pte_group_nom_chroma[],
6003 double time_per_pte_group_vblank_chroma[],
6004 double time_per_pte_group_flip_chroma[])
6006 unsigned int meta_chunk_width;
6007 unsigned int min_meta_chunk_width;
6008 unsigned int meta_chunk_per_row_int;
6009 unsigned int meta_row_remainder;
6010 unsigned int meta_chunk_threshold;
6011 unsigned int meta_chunks_per_row_ub;
6012 unsigned int meta_chunk_width_chroma;
6013 unsigned int min_meta_chunk_width_chroma;
6014 unsigned int meta_chunk_per_row_int_chroma;
6015 unsigned int meta_row_remainder_chroma;
6016 unsigned int meta_chunk_threshold_chroma;
6017 unsigned int meta_chunks_per_row_ub_chroma;
6018 unsigned int dpte_group_width_luma;
6019 unsigned int dpte_groups_per_row_luma_ub;
6020 unsigned int dpte_group_width_chroma;
6021 unsigned int dpte_groups_per_row_chroma_ub;
6024 for (k = 0; k < NumberOfActivePlanes; ++k) {
6025 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6026 if (BytePerPixelC[k] == 0) {
6027 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6029 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6031 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6032 if (BytePerPixelC[k] == 0) {
6033 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6035 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6039 for (k = 0; k < NumberOfActivePlanes; ++k) {
6040 if (DCCEnable[k] == true) {
6041 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6042 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6043 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6044 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6045 if (SourceScan[k] != dm_vert) {
6046 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6048 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6050 if (meta_row_remainder <= meta_chunk_threshold) {
6051 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6053 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6055 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6056 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6057 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6058 if (BytePerPixelC[k] == 0) {
6059 TimePerChromaMetaChunkNominal[k] = 0;
6060 TimePerChromaMetaChunkVBlank[k] = 0;
6061 TimePerChromaMetaChunkFlip[k] = 0;
6063 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6064 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6065 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6066 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6067 if (SourceScan[k] != dm_vert) {
6068 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6070 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6072 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6073 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6075 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6077 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6078 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6079 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6082 TimePerMetaChunkNominal[k] = 0;
6083 TimePerMetaChunkVBlank[k] = 0;
6084 TimePerMetaChunkFlip[k] = 0;
6085 TimePerChromaMetaChunkNominal[k] = 0;
6086 TimePerChromaMetaChunkVBlank[k] = 0;
6087 TimePerChromaMetaChunkFlip[k] = 0;
6091 for (k = 0; k < NumberOfActivePlanes; ++k) {
6092 if (GPUVMEnable == true) {
6093 if (SourceScan[k] != dm_vert) {
6094 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6096 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6098 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6099 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6100 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6101 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6102 if (BytePerPixelC[k] == 0) {
6103 time_per_pte_group_nom_chroma[k] = 0;
6104 time_per_pte_group_vblank_chroma[k] = 0;
6105 time_per_pte_group_flip_chroma[k] = 0;
6107 if (SourceScan[k] != dm_vert) {
6108 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6110 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6112 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6113 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6114 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6115 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6118 time_per_pte_group_nom_luma[k] = 0;
6119 time_per_pte_group_vblank_luma[k] = 0;
6120 time_per_pte_group_flip_luma[k] = 0;
6121 time_per_pte_group_nom_chroma[k] = 0;
6122 time_per_pte_group_vblank_chroma[k] = 0;
6123 time_per_pte_group_flip_chroma[k] = 0;
6128 static void CalculateVMGroupAndRequestTimes(
6129 unsigned int NumberOfActivePlanes,
6131 unsigned int GPUVMMaxPageTableLevels,
6132 unsigned int HTotal[],
6133 int BytePerPixelC[],
6134 double DestinationLinesToRequestVMInVBlank[],
6135 double DestinationLinesToRequestVMInImmediateFlip[],
6137 double PixelClock[],
6138 int dpte_row_width_luma_ub[],
6139 int dpte_row_width_chroma_ub[],
6140 int vm_group_bytes[],
6141 unsigned int dpde0_bytes_per_frame_ub_l[],
6142 unsigned int dpde0_bytes_per_frame_ub_c[],
6143 int meta_pte_bytes_per_frame_ub_l[],
6144 int meta_pte_bytes_per_frame_ub_c[],
6145 double TimePerVMGroupVBlank[],
6146 double TimePerVMGroupFlip[],
6147 double TimePerVMRequestVBlank[],
6148 double TimePerVMRequestFlip[])
6150 int num_group_per_lower_vm_stage;
6151 int num_req_per_lower_vm_stage;
6154 for (k = 0; k < NumberOfActivePlanes; ++k) {
6155 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6156 if (DCCEnable[k] == false) {
6157 if (BytePerPixelC[k] > 0) {
6158 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6159 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6161 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6164 if (GPUVMMaxPageTableLevels == 1) {
6165 if (BytePerPixelC[k] > 0) {
6166 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6167 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6169 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6172 if (BytePerPixelC[k] > 0) {
6173 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6174 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6175 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6176 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6178 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6179 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6184 if (DCCEnable[k] == false) {
6185 if (BytePerPixelC[k] > 0) {
6186 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6188 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6191 if (GPUVMMaxPageTableLevels == 1) {
6192 if (BytePerPixelC[k] > 0) {
6193 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6195 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6198 if (BytePerPixelC[k] > 0) {
6199 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6200 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6202 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6207 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6208 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6209 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6210 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6212 if (GPUVMMaxPageTableLevels > 2) {
6213 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6214 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6215 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6216 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6220 TimePerVMGroupVBlank[k] = 0;
6221 TimePerVMGroupFlip[k] = 0;
6222 TimePerVMRequestVBlank[k] = 0;
6223 TimePerVMRequestFlip[k] = 0;
6228 static void CalculateStutterEfficiency(
6229 struct display_mode_lib *mode_lib,
6230 int CompressedBufferSizeInkByte,
6231 bool UnboundedRequestEnabled,
6232 int ConfigReturnBufferSizeInKByte,
6233 int MetaFIFOSizeInKEntries,
6234 int ZeroSizeBufferEntries,
6235 int NumberOfActivePlanes,
6236 int ROBBufferSizeInKByte,
6237 double TotalDataReadBandwidth,
6240 double COMPBUF_RESERVED_SPACE_64B,
6241 double COMPBUF_RESERVED_SPACE_ZS,
6243 double SRExitZ8Time,
6244 bool SynchronizedVBlank,
6245 double Z8StutterEnterPlusExitWatermark,
6246 double StutterEnterPlusExitWatermark,
6247 bool ProgressiveToInterlaceUnitInOPP,
6249 double MinTTUVBlank[],
6251 unsigned int DETBufferSizeY[],
6252 int BytePerPixelY[],
6253 double BytePerPixelDETY[],
6254 double SwathWidthY[],
6257 double NetDCCRateLuma[],
6258 double NetDCCRateChroma[],
6259 double DCCFractionOfZeroSizeRequestsLuma[],
6260 double DCCFractionOfZeroSizeRequestsChroma[],
6263 double PixelClock[],
6265 enum scan_direction_class SourceScan[],
6266 int BlockHeight256BytesY[],
6267 int BlockWidth256BytesY[],
6268 int BlockHeight256BytesC[],
6269 int BlockWidth256BytesC[],
6270 int DCCYMaxUncompressedBlock[],
6271 int DCCCMaxUncompressedBlock[],
6274 bool WritebackEnable[],
6275 double ReadBandwidthPlaneLuma[],
6276 double ReadBandwidthPlaneChroma[],
6277 double meta_row_bw[],
6278 double dpte_row_bw[],
6279 double *StutterEfficiencyNotIncludingVBlank,
6280 double *StutterEfficiency,
6281 int *NumberOfStutterBurstsPerFrame,
6282 double *Z8StutterEfficiencyNotIncludingVBlank,
6283 double *Z8StutterEfficiency,
6284 int *Z8NumberOfStutterBurstsPerFrame,
6285 double *StutterPeriod)
6287 struct vba_vars_st *v = &mode_lib->vba;
6289 double DETBufferingTimeY;
6290 double SwathWidthYCriticalPlane = 0;
6291 double VActiveTimeCriticalPlane = 0;
6292 double FrameTimeCriticalPlane = 0;
6293 int BytePerPixelYCriticalPlane = 0;
6294 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6295 double MinTTUVBlankCriticalPlane = 0;
6296 double TotalCompressedReadBandwidth;
6297 double TotalRowReadBandwidth;
6298 double AverageDCCCompressionRate;
6299 double EffectiveCompressedBufferSize;
6300 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6301 double StutterBurstTime;
6302 int TotalActiveWriteback;
6304 double LinesInDETYRoundedDownToSwath;
6305 double MaximumEffectiveCompressionLuma;
6306 double MaximumEffectiveCompressionChroma;
6307 double TotalZeroSizeRequestReadBandwidth;
6308 double TotalZeroSizeCompressedReadBandwidth;
6309 double AverageDCCZeroSizeFraction;
6310 double AverageZeroSizeCompressionRate;
6311 int TotalNumberOfActiveOTG = 0;
6312 double LastStutterPeriod = 0.0;
6313 double LastZ8StutterPeriod = 0.0;
6316 TotalZeroSizeRequestReadBandwidth = 0;
6317 TotalZeroSizeCompressedReadBandwidth = 0;
6318 TotalRowReadBandwidth = 0;
6319 TotalCompressedReadBandwidth = 0;
6321 for (k = 0; k < NumberOfActivePlanes; ++k) {
6322 if (DCCEnable[k] == true) {
6323 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6324 || DCCYMaxUncompressedBlock[k] < 256) {
6325 MaximumEffectiveCompressionLuma = 2;
6327 MaximumEffectiveCompressionLuma = 4;
6329 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6330 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6331 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6332 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6333 if (ReadBandwidthPlaneChroma[k] > 0) {
6334 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6335 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6336 MaximumEffectiveCompressionChroma = 2;
6338 MaximumEffectiveCompressionChroma = 4;
6340 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6341 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6342 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6343 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6344 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6347 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6349 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6352 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6353 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6355 #ifdef __DML_VBA_DEBUG__
6356 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6357 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6358 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6359 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6360 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6361 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6362 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6363 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6366 if (AverageDCCZeroSizeFraction == 1) {
6367 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6368 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6369 } else if (AverageDCCZeroSizeFraction > 0) {
6370 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6371 EffectiveCompressedBufferSize = dml_min(
6372 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6373 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6374 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6375 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6376 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6378 "DML::%s: min 2 = %f\n",
6380 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6381 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6382 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6384 EffectiveCompressedBufferSize = dml_min(
6385 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6386 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6387 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6388 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6391 #ifdef __DML_VBA_DEBUG__
6392 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6393 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6394 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6398 for (k = 0; k < NumberOfActivePlanes; ++k) {
6399 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6400 / BytePerPixelDETY[k] / SwathWidthY[k];
6401 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6402 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6403 #ifdef __DML_VBA_DEBUG__
6404 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6405 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6406 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6407 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6408 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6409 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6410 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6411 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6412 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6413 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6414 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6415 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6418 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6419 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6421 *StutterPeriod = DETBufferingTimeY;
6422 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6423 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6424 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6425 SwathWidthYCriticalPlane = SwathWidthY[k];
6426 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6427 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6429 #ifdef __DML_VBA_DEBUG__
6430 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6431 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6432 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6433 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6434 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6435 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6436 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6441 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6442 #ifdef __DML_VBA_DEBUG__
6443 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6444 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6445 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6446 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6447 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6448 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6449 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6450 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6451 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6452 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6455 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6456 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6457 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6458 #ifdef __DML_VBA_DEBUG__
6459 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6460 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6461 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6462 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6463 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6465 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6468 "DML::%s: Time to finish residue swath=%f\n",
6470 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6472 TotalActiveWriteback = 0;
6473 for (k = 0; k < NumberOfActivePlanes; ++k) {
6474 if (WritebackEnable[k]) {
6475 TotalActiveWriteback = TotalActiveWriteback + 1;
6479 if (TotalActiveWriteback == 0) {
6480 #ifdef __DML_VBA_DEBUG__
6481 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6482 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6483 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6484 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6486 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6487 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6488 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6489 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6491 *StutterEfficiencyNotIncludingVBlank = 0.;
6492 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6493 *NumberOfStutterBurstsPerFrame = 0;
6494 *Z8NumberOfStutterBurstsPerFrame = 0;
6496 #ifdef __DML_VBA_DEBUG__
6497 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6498 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6499 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6500 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6501 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6504 for (k = 0; k < NumberOfActivePlanes; ++k) {
6505 if (v->BlendingAndTiming[k] == k) {
6506 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6510 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6511 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6513 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6514 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6515 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6517 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6520 *StutterEfficiency = 0;
6523 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6524 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6525 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6526 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6527 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6529 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6532 *Z8StutterEfficiency = 0.;
6535 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6536 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6537 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6538 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6539 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6540 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6541 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6542 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6545 static void CalculateSwathAndDETConfiguration(
6546 bool ForceSingleDPP,
6547 int NumberOfActivePlanes,
6548 unsigned int DETBufferSizeInKByte,
6549 double MaximumSwathWidthLuma[],
6550 double MaximumSwathWidthChroma[],
6551 enum scan_direction_class SourceScan[],
6552 enum source_format_class SourcePixelFormat[],
6553 enum dm_swizzle_mode SurfaceTiling[],
6554 int ViewportWidth[],
6555 int ViewportHeight[],
6556 int SurfaceWidthY[],
6557 int SurfaceWidthC[],
6558 int SurfaceHeightY[],
6559 int SurfaceHeightC[],
6560 int Read256BytesBlockHeightY[],
6561 int Read256BytesBlockHeightC[],
6562 int Read256BytesBlockWidthY[],
6563 int Read256BytesBlockWidthC[],
6564 enum odm_combine_mode ODMCombineEnabled[],
6565 int BlendingAndTiming[],
6568 double BytePerPixDETY[],
6569 double BytePerPixDETC[],
6572 double HRatioChroma[],
6574 int swath_width_luma_ub[],
6575 int swath_width_chroma_ub[],
6576 double SwathWidth[],
6577 double SwathWidthChroma[],
6580 unsigned int DETBufferSizeY[],
6581 unsigned int DETBufferSizeC[],
6582 bool ViewportSizeSupportPerPlane[],
6583 bool *ViewportSizeSupport)
6585 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6586 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6587 int MinimumSwathHeightY;
6588 int MinimumSwathHeightC;
6589 int RoundedUpMaxSwathSizeBytesY;
6590 int RoundedUpMaxSwathSizeBytesC;
6591 int RoundedUpMinSwathSizeBytesY;
6592 int RoundedUpMinSwathSizeBytesC;
6593 int RoundedUpSwathSizeBytesY;
6594 int RoundedUpSwathSizeBytesC;
6595 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6596 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6599 CalculateSwathWidth(
6601 NumberOfActivePlanes,
6613 Read256BytesBlockHeightY,
6614 Read256BytesBlockHeightC,
6615 Read256BytesBlockWidthY,
6616 Read256BytesBlockWidthC,
6621 SwathWidthSingleDPP,
6622 SwathWidthSingleDPPChroma,
6625 MaximumSwathHeightY,
6626 MaximumSwathHeightC,
6627 swath_width_luma_ub,
6628 swath_width_chroma_ub);
6630 *ViewportSizeSupport = true;
6631 for (k = 0; k < NumberOfActivePlanes; ++k) {
6632 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6633 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6634 if (SurfaceTiling[k] == dm_sw_linear
6635 || (SourcePixelFormat[k] == dm_444_64
6636 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6637 && SourceScan[k] != dm_vert)) {
6638 MinimumSwathHeightY = MaximumSwathHeightY[k];
6639 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6640 MinimumSwathHeightY = MaximumSwathHeightY[k];
6642 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6644 MinimumSwathHeightC = MaximumSwathHeightC[k];
6646 if (SurfaceTiling[k] == dm_sw_linear) {
6647 MinimumSwathHeightY = MaximumSwathHeightY[k];
6648 MinimumSwathHeightC = MaximumSwathHeightC[k];
6649 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6650 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6651 MinimumSwathHeightC = MaximumSwathHeightC[k];
6652 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6653 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6654 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6655 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6656 MinimumSwathHeightY = MaximumSwathHeightY[k];
6657 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6659 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6660 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6664 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6665 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6666 if (SourcePixelFormat[k] == dm_420_10) {
6667 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6668 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6670 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6671 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6672 if (SourcePixelFormat[k] == dm_420_10) {
6673 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6674 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6677 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6678 SwathHeightY[k] = MaximumSwathHeightY[k];
6679 SwathHeightC[k] = MaximumSwathHeightC[k];
6680 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6681 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6682 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6683 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6684 SwathHeightY[k] = MinimumSwathHeightY;
6685 SwathHeightC[k] = MaximumSwathHeightC[k];
6686 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6687 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6688 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6689 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6690 SwathHeightY[k] = MaximumSwathHeightY[k];
6691 SwathHeightC[k] = MinimumSwathHeightC;
6692 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6693 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6695 SwathHeightY[k] = MinimumSwathHeightY;
6696 SwathHeightC[k] = MinimumSwathHeightC;
6697 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6698 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6701 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6702 if (SwathHeightC[k] == 0) {
6703 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6704 DETBufferSizeC[k] = 0;
6705 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6706 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6707 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6709 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6710 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6713 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6714 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6715 *ViewportSizeSupport = false;
6716 ViewportSizeSupportPerPlane[k] = false;
6718 ViewportSizeSupportPerPlane[k] = true;
6724 static void CalculateSwathWidth(
6725 bool ForceSingleDPP,
6726 int NumberOfActivePlanes,
6727 enum source_format_class SourcePixelFormat[],
6728 enum scan_direction_class SourceScan[],
6729 int ViewportWidth[],
6730 int ViewportHeight[],
6731 int SurfaceWidthY[],
6732 int SurfaceWidthC[],
6733 int SurfaceHeightY[],
6734 int SurfaceHeightC[],
6735 enum odm_combine_mode ODMCombineEnabled[],
6738 int Read256BytesBlockHeightY[],
6739 int Read256BytesBlockHeightC[],
6740 int Read256BytesBlockWidthY[],
6741 int Read256BytesBlockWidthC[],
6742 int BlendingAndTiming[],
6746 double SwathWidthSingleDPPY[],
6747 double SwathWidthSingleDPPC[],
6748 double SwathWidthY[],
6749 double SwathWidthC[],
6750 int MaximumSwathHeightY[],
6751 int MaximumSwathHeightC[],
6752 int swath_width_luma_ub[],
6753 int swath_width_chroma_ub[])
6755 enum odm_combine_mode MainPlaneODMCombine;
6758 #ifdef __DML_VBA_DEBUG__
6759 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6762 for (k = 0; k < NumberOfActivePlanes; ++k) {
6763 if (SourceScan[k] != dm_vert) {
6764 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6766 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6769 #ifdef __DML_VBA_DEBUG__
6770 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6771 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6774 MainPlaneODMCombine = ODMCombineEnabled[k];
6775 for (j = 0; j < NumberOfActivePlanes; ++j) {
6776 if (BlendingAndTiming[k] == j) {
6777 MainPlaneODMCombine = ODMCombineEnabled[j];
6781 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6782 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6783 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6784 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6785 } else if (DPPPerPlane[k] == 2) {
6786 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6788 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6791 #ifdef __DML_VBA_DEBUG__
6792 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6793 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6796 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6797 SwathWidthC[k] = SwathWidthY[k] / 2;
6798 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6800 SwathWidthC[k] = SwathWidthY[k];
6801 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6804 if (ForceSingleDPP == true) {
6805 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6806 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6809 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6810 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6811 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6812 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6814 #ifdef __DML_VBA_DEBUG__
6815 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6818 if (SourceScan[k] != dm_vert) {
6819 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6820 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6821 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6822 if (BytePerPixC[k] > 0) {
6823 swath_width_chroma_ub[k] = dml_min(
6825 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6827 swath_width_chroma_ub[k] = 0;
6830 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6831 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6832 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6833 if (BytePerPixC[k] > 0) {
6834 swath_width_chroma_ub[k] = dml_min(
6835 surface_height_ub_c,
6836 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6838 swath_width_chroma_ub[k] = 0;
6845 static double CalculateExtraLatency(
6846 int RoundTripPingLatencyCycles,
6847 int ReorderingBytes,
6849 int TotalNumberOfActiveDPP,
6850 int PixelChunkSizeInKByte,
6851 int TotalNumberOfDCCActiveDPP,
6856 int NumberOfActivePlanes,
6858 int dpte_group_bytes[],
6859 double HostVMInefficiencyFactor,
6860 double HostVMMinPageSize,
6861 int HostVMMaxNonCachedPageTableLevels)
6863 double ExtraLatencyBytes;
6864 double ExtraLatency;
6866 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6868 TotalNumberOfActiveDPP,
6869 PixelChunkSizeInKByte,
6870 TotalNumberOfDCCActiveDPP,
6874 NumberOfActivePlanes,
6877 HostVMInefficiencyFactor,
6879 HostVMMaxNonCachedPageTableLevels);
6881 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6883 #ifdef __DML_VBA_DEBUG__
6884 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6885 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6886 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6887 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6888 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6891 return ExtraLatency;
6894 static double CalculateExtraLatencyBytes(
6895 int ReorderingBytes,
6896 int TotalNumberOfActiveDPP,
6897 int PixelChunkSizeInKByte,
6898 int TotalNumberOfDCCActiveDPP,
6902 int NumberOfActivePlanes,
6904 int dpte_group_bytes[],
6905 double HostVMInefficiencyFactor,
6906 double HostVMMinPageSize,
6907 int HostVMMaxNonCachedPageTableLevels)
6910 int HostVMDynamicLevels = 0, k;
6912 if (GPUVMEnable == true && HostVMEnable == true) {
6913 if (HostVMMinPageSize < 2048) {
6914 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6915 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6916 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6918 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6921 HostVMDynamicLevels = 0;
6924 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
6926 if (GPUVMEnable == true) {
6927 for (k = 0; k < NumberOfActivePlanes; ++k) {
6928 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
6934 static double CalculateUrgentLatency(
6935 double UrgentLatencyPixelDataOnly,
6936 double UrgentLatencyPixelMixedWithVMData,
6937 double UrgentLatencyVMDataOnly,
6938 bool DoUrgentLatencyAdjustment,
6939 double UrgentLatencyAdjustmentFabricClockComponent,
6940 double UrgentLatencyAdjustmentFabricClockReference,
6945 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
6946 if (DoUrgentLatencyAdjustment == true) {
6947 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
6952 static void UseMinimumDCFCLK(
6953 struct display_mode_lib *mode_lib,
6954 int MaxPrefetchMode,
6955 int ReorderingBytes)
6957 struct vba_vars_st *v = &mode_lib->vba;
6958 int dummy1, i, j, k;
6959 double NormalEfficiency, dummy2, dummy3;
6960 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
6962 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
6963 for (i = 0; i < v->soc.num_states; ++i) {
6964 for (j = 0; j <= 1; ++j) {
6965 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
6966 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
6967 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
6968 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
6969 double MinimumTWait;
6970 double NonDPTEBandwidth;
6971 double DPTEBandwidth;
6972 double DCFCLKRequiredForAverageBandwidth;
6973 double ExtraLatencyBytes;
6974 double ExtraLatencyCycles;
6975 double DCFCLKRequiredForPeakBandwidth;
6976 int NoOfDPPState[DC__NUM_DPP__MAX];
6977 double MinimumTvmPlus2Tr0;
6979 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
6980 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
6981 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
6982 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
6985 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
6986 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
6989 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
6990 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
6991 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
6992 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
6993 DCFCLKRequiredForAverageBandwidth = dml_max3(
6994 v->ProjectedDCFCLKDeepSleep[i][j],
6995 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
6996 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
6997 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
6999 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7001 v->TotalNumberOfActiveDPP[i][j],
7002 v->PixelChunkSizeInKByte,
7003 v->TotalNumberOfDCCActiveDPP[i][j],
7007 v->NumberOfActivePlanes,
7009 v->dpte_group_bytes,
7011 v->HostVMMinPageSize,
7012 v->HostVMMaxNonCachedPageTableLevels);
7013 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7014 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7015 double DCFCLKCyclesRequiredInPrefetch;
7016 double ExpectedPrefetchBWAcceleration;
7017 double PrefetchTime;
7019 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7020 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7021 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7022 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7023 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7024 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7025 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7026 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7027 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7028 DynamicMetadataVMExtraLatency[k] =
7029 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7030 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7031 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7033 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7034 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7035 - DynamicMetadataVMExtraLatency[k];
7037 if (PrefetchTime > 0) {
7038 double ExpectedVRatioPrefetch;
7039 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7040 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7041 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7042 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7043 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7044 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7045 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7048 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7050 if (v->DynamicMetadataEnable[k] == true) {
7055 double AllowedTimeForUrgentExtraLatency;
7057 CalculateVupdateAndDynamicMetadataParameters(
7058 v->MaxInterDCNTileRepeaters,
7059 v->RequiredDPPCLK[i][j][k],
7060 v->RequiredDISPCLK[i][j],
7061 v->ProjectedDCFCLKDeepSleep[i][j],
7064 v->VTotal[k] - v->VActive[k],
7065 v->DynamicMetadataTransmittedBytes[k],
7066 v->DynamicMetadataLinesBeforeActiveRequired[k],
7068 v->ProgressiveToInterlaceUnitInOPP,
7076 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7077 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7078 if (AllowedTimeForUrgentExtraLatency > 0) {
7079 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7080 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7081 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7083 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7087 DCFCLKRequiredForPeakBandwidth = 0;
7088 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7089 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7091 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7092 * (v->GPUVMEnable == true ?
7093 (v->HostVMEnable == true ?
7094 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7096 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7097 double MaximumTvmPlus2Tr0PlusTsw;
7098 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7099 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7100 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7102 DCFCLKRequiredForPeakBandwidth = dml_max3(
7103 DCFCLKRequiredForPeakBandwidth,
7104 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7105 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7108 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7113 static void CalculateUnboundedRequestAndCompressedBufferSize(
7114 unsigned int DETBufferSizeInKByte,
7115 int ConfigReturnBufferSizeInKByte,
7116 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7118 bool NoChromaPlanes,
7120 int CompressedBufferSegmentSizeInkByteFinal,
7121 enum output_encoder_class *Output,
7122 bool *UnboundedRequestEnabled,
7123 int *CompressedBufferSizeInkByte)
7125 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7127 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7128 *CompressedBufferSizeInkByte = (
7129 *UnboundedRequestEnabled == true ?
7130 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7131 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7132 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7134 #ifdef __DML_VBA_DEBUG__
7135 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7136 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7137 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7138 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7139 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7140 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7141 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7145 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7147 bool ret_val = false;
7149 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7150 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {