1
2 /*
3 * ATI Mach64 CT/VT/GT/LT Support
4 */
5
6 #include <linux/fb.h>
7
8 #include <asm/io.h>
9
10 #include <video/fbcon.h>
11
12 #include "mach64.h"
13 #include "atyfb.h"
14
15
16 /* FIXME: remove the FAIL definition */
17 #define FAIL(x) do { printk(x "\n"); return -EINVAL; } while (0)
18
19 static void aty_st_pll(int offset, u8 val, const struct fb_info_aty *info) stdcall;
20
21 static int aty_valid_pll_ct(const struct fb_info_aty *info, u32 vclk_per,
22 struct pll_ct *pll);
23 static int aty_dsp_gt(const struct fb_info_aty *info, u8 bpp, u32 stretch,
24 struct pll_ct *pll);
25 static int aty_var_to_pll_ct(const struct fb_info_aty *info, u32 vclk_per,
26 u8 bpp, u32 stretch, union aty_pll *pll);
27 static u32 aty_pll_ct_to_var(const struct fb_info_aty *info,
28 const union aty_pll *pll);
29
30 /*
31 * ATI Mach64 CT clock synthesis description.
32 *
33 * All clocks on the Mach64 can be calculated using the same principle:
34 *
35 * XTALIN * x * FB_DIV
36 * CLK = ----------------------
37 * PLL_REF_DIV * POST_DIV
38 *
39 * XTALIN is a fixed speed clock. Common speeds are 14.31 MHz and 29.50 MHz.
40 * PLL_REF_DIV can be set by the user, but is the same for all clocks.
41 * FB_DIV can be set by the user for each clock individually, it should be set
42 * between 128 and 255, the chip will generate a bad clock signal for too low
43 * values.
44 * x depends on the type of clock; usually it is 2, but for the MCLK it can also
45 * be set to 4.
46 * POST_DIV can be set by the user for each clock individually, Possible values
47 * are 1,2,4,8 and for some clocks other values are available too.
48 * CLK is of course the clock speed that is generated.
49 *
50 * The Mach64 has these clocks:
51 *
52 * MCLK The clock rate of the chip
53 * XCLK The clock rate of the on-chip memory
54 * VCLK0 First pixel clock of first CRT controller
55 * VCLK1 Second pixel clock of first CRT controller
56 * VCLK2 Third pixel clock of first CRT controller
57 * VCLK3 Fourth pixel clock of first CRT controller
58 * VCLK Selected pixel clock, one of VCLK0, VCLK1, VCLK2, VCLK3
59 * V2CLK Pixel clock of the second CRT controller.
60 * SCLK Multi-purpose clock
61 *
62 * - MCLK and XCLK use the same FB_DIV
63 * - VCLK0 .. VCLK3 use the same FB_DIV
64 * - V2CLK is needed when the second CRTC is used (can be used for dualhead);
65 * i.e. CRT monitor connected to laptop has different resolution than built
66 * in LCD monitor.
67 * - SCLK is not available on all cards; it is known to exist on the Rage LT-PRO,
68 * Rage XL and Rage Mobility. It is known not to exist on the Mach64 VT.
69 * - V2CLK is not available on all cards, most likely only the Rage LT-PRO,
70 * the Rage XL and the Rage Mobility
71 *
72 * SCLK can be used to:
73 * - Clock the chip instead of MCLK
74 * - Replace XTALIN with a user defined frequency
75 * - Generate the pixel clock for the LCD monitor (instead of VCLK)
76 */
77
78 /*
79 * It can be quite hard to calculate XCLK and MCLK if they don't run at the
80 * same frequency. Luckily, until now all cards that need asynchrone clock
81 * speeds seem to have SCLK.
82 * So this driver uses SCLK to clock the chip and XCLK to clock the memory.
83 */
84
85 static u8 postdividers[] = {1,2,4,8,3};
86
aty_ld_pll(int offset,const struct fb_info_aty * info)87 u8 stdcall aty_ld_pll(int offset, const struct fb_info_aty *info)
88 {
89 unsigned long addr;
90
91 addr = info->ati_regbase + CLOCK_CNTL + 1;
92 /* write addr byte */
93 writeb((offset << 2) | PLL_WR_EN,addr);
94 addr++;
95 /* read the register value */
96 return readb(addr);
97 }
98
aty_st_pll(int offset,u8 val,const struct fb_info_aty * info)99 static void stdcall aty_st_pll(int offset, u8 val, const struct fb_info_aty *info)
100 {
101 unsigned long addr;
102 addr = info->ati_regbase + CLOCK_CNTL + 1;
103 /* write addr byte */
104 writeb((offset << 2) | PLL_WR_EN,addr);
105 addr++;
106 /* write the register value */
107 writeb(val,addr);
108 addr--;
109 /* Disable write access */
110 writeb(offset << 2,addr);
111 }
112
113 /* ------------------------------------------------------------------------- */
114
115 /*
116 * PLL programming (Mach64 CT family)
117 */
118
119 /*
120 * This procedure sets the display fifo. The display fifo is a buffer that
121 * contains data read from the video memory that waits to be processed by
122 * the CRT controller.
123 *
124 * On the more modern Mach64 variants, the chip doesn't calculate the
125 * interval after which the display fifo has to be reloaded from memory
126 * automatically, the driver has to do it instead.
127 */
128
129
130 #define Maximum_DSP_PRECISION 7
131
aty_dsp_gt(const struct fb_info_aty * info,u8 bpp,u32 width,struct pll_ct * pll)132 static int aty_dsp_gt(const struct fb_info_aty *info, u8 bpp,
133 u32 width, struct pll_ct *pll) {
134
135 u32 multiplier,divider,ras_multiplier,ras_divider,tmp;
136 u32 dsp_on,dsp_off,dsp_xclks;
137 u8 vshift,xshift;
138 s8 dsp_precision;
139
140 multiplier = ((u32)info->mclk_fb_div)*pll->vclk_post_div_real;
141 divider = ((u32)pll->vclk_fb_div)*info->xclk_post_div_real;
142
143 ras_multiplier = info->xclkmaxrasdelay;
144 ras_divider = 1;
145
146 if (bpp>=8)
147 divider = divider * (bpp >> 2);
148
149 vshift = (6 - 2); /* FIFO is 64 bits wide in accelerator mode ... */
150
151 if (bpp == 0)
152 vshift--; /* ... but only 32 bits in VGA mode. */
153
154 #ifdef CONFIG_FB_ATY_GENERIC_LCD
155 if (width != 0) {
156 multiplier = multiplier * info->lcd_width;
157 divider = divider * width;
158
159 ras_multiplier = ras_multiplier * info->lcd_width;
160 ras_divider = ras_divider * width;
161 };
162 #endif
163
164 /* If we don't do this, 32 bits for multiplier & divider won't be
165 enough in certain situations! */
166 while (((multiplier | divider) & 1) == 0) {
167 multiplier = multiplier >> 1;
168 divider = divider >> 1;
169 };
170
171 /* Determine DSP precision first */
172 tmp = ((multiplier * info->fifo_size) << vshift) / divider;
173 for (dsp_precision = -5; tmp; dsp_precision++)
174 tmp >>= 1;
175 if (dsp_precision < 0)
176 dsp_precision = 0;
177 else if (dsp_precision > Maximum_DSP_PRECISION)
178 dsp_precision = Maximum_DSP_PRECISION;
179
180 xshift = 6 - dsp_precision;
181 vshift += xshift;
182
183 /* Move on to dsp_off */
184 dsp_off = ((multiplier * (info->fifo_size - 1)) << vshift) / divider -
185 (1 << (vshift - xshift));
186
187 /* Next is dsp_on */
188 // if (bpp == 0)
189 // dsp_on = ((multiplier * 20 << vshift) + divider) / divider;
190 // else {
191 dsp_on = ((multiplier << vshift) + divider) / divider;
192 tmp = ((ras_multiplier << xshift) + ras_divider) / ras_divider;
193 if (dsp_on < tmp)
194 dsp_on = tmp;
195 dsp_on = dsp_on + (tmp * 2) + (info->xclkpagefaultdelay << xshift);
196 // };
197
198 /* Calculate rounding factor and apply it to dsp_on */
199 tmp = ((1 << (Maximum_DSP_PRECISION - dsp_precision)) - 1) >> 1;
200 dsp_on = ((dsp_on + tmp) / (tmp + 1)) * (tmp + 1);
201
202 if (dsp_on >= ((dsp_off / (tmp + 1)) * (tmp + 1)))
203 {
204 dsp_on = dsp_off - (multiplier << vshift) / divider;
205 dsp_on = (dsp_on / (tmp + 1)) * (tmp + 1);
206 }
207
208 /* Last but not least: dsp_xclks */
209 dsp_xclks = ((multiplier << (vshift + 5)) + divider) / divider;
210
211 /* Get register values. */
212 pll->dsp_on_off = (dsp_on << 16) + dsp_off;
213 pll->dsp_config = (dsp_precision << 20) | (info->dsp_loop_latency << 16) |
214 dsp_xclks;
215 return 0;
216 };
217
aty_valid_pll_ct(const struct fb_info_aty * info,u32 vclk_per,struct pll_ct * pll)218 static int aty_valid_pll_ct(const struct fb_info_aty *info, u32 vclk_per,
219 struct pll_ct *pll)
220 {
221 u32 q;
222
223 /* FIXME: use the VTB/GTB /{3,6,12} post dividers if they're better suited */
224 q = info->ref_clk_per*info->pll_ref_div*4/vclk_per; /* actually 8*q */
225 if (q < 16*8 || q > 255*8)
226 FAIL("vclk out of range");
227 else {
228 pll->vclk_post_div = (q < 128*8);
229 pll->vclk_post_div += (q < 64*8);
230 pll->vclk_post_div += (q < 32*8);
231 };
232 pll->vclk_post_div_real = postdividers[pll->vclk_post_div];
233 // pll->vclk_post_div <<= 6;
234 pll->vclk_fb_div = q*pll->vclk_post_div_real/8;
235 pll->pll_vclk_cntl = 0x03; /* VCLK = PLL_VCLK/VCLKx_POST */
236 return 0;
237 }
238
aty_var_to_pll_ct(const struct fb_info_aty * info,u32 vclk_per,u8 bpp,u32 width,union aty_pll * pll)239 static int aty_var_to_pll_ct(const struct fb_info_aty *info, u32 vclk_per,
240 u8 bpp, u32 width, union aty_pll *pll)
241 {
242 int err;
243
244 if ((err = aty_valid_pll_ct(info, vclk_per, &pll->ct)))
245 return err;
246 if (M64_HAS(GTB_DSP) && (err = aty_dsp_gt(info, bpp, width, &pll->ct)))
247 return err;
248 return 0;
249 }
250
aty_pll_ct_to_var(const struct fb_info_aty * info,const union aty_pll * pll)251 static u32 aty_pll_ct_to_var(const struct fb_info_aty *info,
252 const union aty_pll *pll)
253 {
254 u32 ref_clk_per = info->ref_clk_per;
255 u8 pll_ref_div = info->pll_ref_div;
256 u8 vclk_fb_div = pll->ct.vclk_fb_div;
257 u8 vclk_post_div = pll->ct.vclk_post_div_real;
258
259 return ref_clk_per*pll_ref_div*vclk_post_div/vclk_fb_div/2;
260 }
261
aty_set_pll_ct(const struct fb_info_aty * info,const union aty_pll * pll)262 void aty_set_pll_ct(const struct fb_info_aty *info, const union aty_pll *pll)
263 {
264 u8 a;
265 aty_st_pll(PLL_VCLK_CNTL, pll->ct.pll_vclk_cntl, info);
266 a = aty_ld_pll(VCLK_POST_DIV, info) & ~3;
267 aty_st_pll(VCLK_POST_DIV, a | pll->ct.vclk_post_div, info);
268 aty_st_pll(VCLK0_FB_DIV, pll->ct.vclk_fb_div, info);
269
270 if (M64_HAS(GTB_DSP)) {
271 aty_st_le32(DSP_CONFIG, pll->ct.dsp_config, info);
272 aty_st_le32(DSP_ON_OFF, pll->ct.dsp_on_off, info);
273 }
274 }
275
276
aty_init_pll_ct(struct fb_info_aty * info)277 static void __init aty_init_pll_ct(struct fb_info_aty *info) {
278 u8 pll_ref_div,pll_gen_cntl,pll_ext_cntl;
279 u8 mpost_div,xpost_div;
280 u8 sclk_post_div_real,sclk_fb_div,spll_cntl2;
281 u32 q,i;
282 u32 mc,trp,trcd,tcrd,tras;
283
284 mc = aty_ld_le32(MEM_CNTL, info);
285 trp = (mc & 0x300) >> 8;
286 trcd = (mc & 0xc00) >> 10;
287 tcrd = (mc & 0x1000) >> 12; tras = (mc & 0x70000) >> 16;
288 info->xclkpagefaultdelay = trcd + tcrd + trp + 2;
289 info->xclkmaxrasdelay = tras + trp + 2;
290
291 if (M64_HAS(FIFO_24)) {
292 info->fifo_size = 24;
293 info->xclkpagefaultdelay += 2;
294 info->xclkmaxrasdelay += 3;
295 } else {
296 info->fifo_size = 32;
297 };
298
299 switch (info->ram_type) {
300 case DRAM:
301 if (info->total_vram<=1*1024*1024) {
302 info->dsp_loop_latency = 10;
303 } else {
304 info->dsp_loop_latency = 8;
305 info->xclkpagefaultdelay += 2;
306 };
307 break;
308 case EDO:
309 case PSEUDO_EDO:
310 if (info->total_vram<=1*1024*1024) {
311 info->dsp_loop_latency = 9;
312 } else {
313 info->dsp_loop_latency = 8;
314 info->xclkpagefaultdelay += 1;
315 };
316 break;
317 case SDRAM:
318 if (info->total_vram<=1*1024*1024) {
319 info->dsp_loop_latency = 11;
320 } else {
321 info->dsp_loop_latency = 10;
322 info->xclkpagefaultdelay += 1;
323 };
324 break;
325 case SGRAM:
326 info->dsp_loop_latency = 8;
327 info->xclkpagefaultdelay += 3;
328 break;
329 default:
330 info->dsp_loop_latency = 11;
331 info->xclkpagefaultdelay += 3;
332 break;
333 };
334
335 if (info->xclkmaxrasdelay <= info->xclkpagefaultdelay)
336 info->xclkmaxrasdelay = info->xclkpagefaultdelay + 1;
337
338 /* Exit if the user does not want us to tamper with the clock
339 rates of her chip. */
340 if (info->mclk_per == 0) {
341 u16 mclk_fb_div;
342 u8 pll_ext_cntl;
343
344 info->pll_ref_div = aty_ld_pll(PLL_REF_DIV, info);
345 pll_ext_cntl = aty_ld_pll(PLL_EXT_CNTL, info);
346 info->xclk_post_div_real = postdividers[pll_ext_cntl & 7];
347 mclk_fb_div = aty_ld_pll(MCLK_FB_DIV, info);
348 if (pll_ext_cntl & 8)
349 mclk_fb_div <<= 1;
350 info->mclk_fb_div = mclk_fb_div;
351 return;
352 };
353
354 pll_ref_div = info->pll_per*2*255/info->ref_clk_per;
355 info->pll_ref_div = pll_ref_div;
356
357 /* FIXME: use the VTB/GTB /3 post divider if it's better suited */
358 q = info->ref_clk_per*pll_ref_div*4/info->xclk_per; /* actually 8*q */
359 if (q < 16*8 || q > 255*8) {
360 printk(KERN_CRIT "xclk out of range\n");
361 return;
362 } else {
363 xpost_div = (q < 128*8);
364 xpost_div += (q < 64*8);
365 xpost_div += (q < 32*8);
366 };
367 info->xclk_post_div_real = postdividers[xpost_div];
368 info->mclk_fb_div = q*info->xclk_post_div_real/8;
369
370 if (M64_HAS(SDRAM_MAGIC_PLL) && (info->ram_type >= SDRAM))
371 pll_gen_cntl = 0x04;
372 else
373 /* The Rage Mobility M1 needs bit 3 set...*/
374 /* original: pll_gen_cntl = 0x84 */
375 pll_gen_cntl = 0x8C;
376
377 if (M64_HAS(MAGIC_POSTDIV))
378 pll_ext_cntl = 0;
379 else
380 pll_ext_cntl = xpost_div;
381
382 if (info->mclk_per == info->xclk_per)
383 pll_gen_cntl |= xpost_div<<4; /* mclk == xclk */
384 else {
385 /*
386 * The chip clock is not equal to the memory clock.
387 * Therefore we will use sclk to clock the chip.
388 */
389 pll_gen_cntl |= 6<<4; /* mclk == sclk*/
390
391 q = info->ref_clk_per*pll_ref_div*4/info->mclk_per; /* actually 8*q */
392 if (q < 16*8 || q > 255*8) {
393 printk(KERN_CRIT "mclk out of range\n");
394 return;
395 } else {
396 mpost_div = (q < 128*8);
397 mpost_div += (q < 64*8);
398 mpost_div += (q < 32*8);
399 };
400 sclk_post_div_real = postdividers[mpost_div];
401 sclk_fb_div = q*sclk_post_div_real/8;
402 spll_cntl2 = mpost_div << 4;
403 /*
404 * This disables the sclk, crashes the computer as reported:
405 * aty_st_pll(SPLL_CNTL2, 3, info);
406 *
407 * So it seems the sclk must be enabled before it is used;
408 * so PLL_GEN_CNTL must be programmed *after* the sclk.
409 */
410 aty_st_pll(SCLK_FB_DIV, sclk_fb_div, info);
411 aty_st_pll(SPLL_CNTL2, spll_cntl2, info);
412 /*
413 * The sclk has been started. However, I believe the first clock
414 * ticks it generates are not very stable. Hope this primitive loop
415 * helps for Rage Mobilities that sometimes crash when
416 * we switch to sclk. (Daniel Mantione, 13-05-2003)
417 */
418 for (i=0;i<=0x1ffff;i++);
419 };
420
421 aty_st_pll(PLL_REF_DIV, pll_ref_div, info);
422 aty_st_pll(PLL_GEN_CNTL, pll_gen_cntl, info);
423 aty_st_pll(MCLK_FB_DIV, info->mclk_fb_div, info);
424 aty_st_pll(PLL_EXT_CNTL, pll_ext_cntl, info);
425 /* Disable the extra precision pixel clock controls since we do not
426 use them. */
427 aty_st_pll(EXT_VPLL_CNTL, aty_ld_pll(EXT_VPLL_CNTL, info) &
428 ~(EXT_VPLL_EN | EXT_VPLL_VGA_EN |
429 EXT_VPLL_INSYNC), info);
430 #if 0
431 /* This code causes problems on the Rage Mobility M1
432 and seems unnecessary. Comments wanted! */
433 if (M64_HAS(GTB_DSP)) {
434 if (M64_HAS(XL_DLL))
435 aty_st_pll(DLL_CNTL, 0x80, info);
436 else if (info->ram_type >= SDRAM)
437 aty_st_pll(DLL_CNTL, 0xa6, info);
438 else
439 aty_st_pll(DLL_CNTL, 0xa0, info);
440 aty_st_pll(VFC_CNTL, 0x1b, info);
441 };
442 #endif
443 };
444
dummy(void)445 static int dummy(void)
446 {
447 return 0;
448 }
449
450 const struct aty_dac_ops aty_dac_ct = {
451 set_dac: (void *)dummy,
452 };
453
454 const struct aty_pll_ops aty_pll_ct = {
455 var_to_pll: aty_var_to_pll_ct,
456 pll_to_var: aty_pll_ct_to_var,
457 set_pll: aty_set_pll_ct,
458 init_pll: aty_init_pll_ct
459 };
460
461