1 
2 /*
3  *  ATI Mach64 CT/VT/GT/LT Support
4  */
5 
6 #include <linux/fb.h>
7 
8 #include <asm/io.h>
9 
10 #include <video/fbcon.h>
11 
12 #include "mach64.h"
13 #include "atyfb.h"
14 
15 
16 /* FIXME: remove the FAIL definition */
17 #define FAIL(x) do { printk(x "\n"); return -EINVAL; } while (0)
18 
19 static void aty_st_pll(int offset, u8 val, const struct fb_info_aty *info) stdcall;
20 
21 static int aty_valid_pll_ct(const struct fb_info_aty *info, u32 vclk_per,
22 			    struct pll_ct *pll);
23 static int aty_dsp_gt(const struct fb_info_aty *info, u8 bpp, u32 stretch,
24 		      struct pll_ct *pll);
25 static int aty_var_to_pll_ct(const struct fb_info_aty *info, u32 vclk_per,
26 			     u8 bpp, u32 stretch, union aty_pll *pll);
27 static u32 aty_pll_ct_to_var(const struct fb_info_aty *info,
28 			     const union aty_pll *pll);
29 
30 /*
31  * ATI Mach64 CT clock synthesis description.
32  *
33  * All clocks on the Mach64 can be calculated using the same principle:
34  *
35  *       XTALIN * x * FB_DIV
36  * CLK = ----------------------
37  *       PLL_REF_DIV * POST_DIV
38  *
39  * XTALIN is a fixed speed clock. Common speeds are 14.31 MHz and 29.50 MHz.
40  * PLL_REF_DIV can be set by the user, but is the same for all clocks.
41  * FB_DIV can be set by the user for each clock individually, it should be set
42  * between 128 and 255, the chip will generate a bad clock signal for too low
43  * values.
44  * x depends on the type of clock; usually it is 2, but for the MCLK it can also
45  * be set to 4.
46  * POST_DIV can be set by the user for each clock individually, Possible values
47  * are 1,2,4,8 and for some clocks other values are available too.
48  * CLK is of course the clock speed that is generated.
49  *
50  * The Mach64 has these clocks:
51  *
52  * MCLK			The clock rate of the chip
53  * XCLK			The clock rate of the on-chip memory
54  * VCLK0		First pixel clock of first CRT controller
55  * VCLK1    Second pixel clock of first CRT controller
56  * VCLK2		Third pixel clock of first CRT controller
57  * VCLK3    Fourth pixel clock of first CRT controller
58  * VCLK			Selected pixel clock, one of VCLK0, VCLK1, VCLK2, VCLK3
59  * V2CLK		Pixel clock of the second CRT controller.
60  * SCLK			Multi-purpose clock
61  *
62  * - MCLK and XCLK use the same FB_DIV
63  * - VCLK0 .. VCLK3 use the same FB_DIV
64  * - V2CLK is needed when the second CRTC is used (can be used for dualhead);
65  *   i.e. CRT monitor connected to laptop has different resolution than built
66  *   in LCD monitor.
67  * - SCLK is not available on all cards; it is known to exist on the Rage LT-PRO,
68  *   Rage XL and Rage Mobility. It is known not to exist on the Mach64 VT.
69  * - V2CLK is not available on all cards, most likely only the Rage LT-PRO,
70  *   the Rage XL and the Rage Mobility
71  *
72  * SCLK can be used to:
73  * - Clock the chip instead of MCLK
74  * - Replace XTALIN with a user defined frequency
75  * - Generate the pixel clock for the LCD monitor (instead of VCLK)
76  */
77 
78  /*
79   * It can be quite hard to calculate XCLK and MCLK if they don't run at the
80   * same frequency. Luckily, until now all cards that need asynchrone clock
81   * speeds seem to have SCLK.
82   * So this driver uses SCLK to clock the chip and XCLK to clock the memory.
83   */
84 
85 static u8 postdividers[] = {1,2,4,8,3};
86 
aty_ld_pll(int offset,const struct fb_info_aty * info)87 u8 stdcall aty_ld_pll(int offset, const struct fb_info_aty *info)
88 {
89     unsigned long addr;
90 
91     addr = info->ati_regbase + CLOCK_CNTL + 1;
92     /* write addr byte */
93     writeb((offset << 2) | PLL_WR_EN,addr);
94     addr++;
95     /* read the register value */
96     return readb(addr);
97 }
98 
aty_st_pll(int offset,u8 val,const struct fb_info_aty * info)99 static void stdcall aty_st_pll(int offset, u8 val, const struct fb_info_aty *info)
100 {
101     unsigned long addr;
102     addr = info->ati_regbase + CLOCK_CNTL + 1;
103     /* write addr byte */
104     writeb((offset << 2) | PLL_WR_EN,addr);
105     addr++;
106     /* write the register value */
107     writeb(val,addr);
108     addr--;
109     /* Disable write access */
110     writeb(offset << 2,addr);
111 }
112 
113 /* ------------------------------------------------------------------------- */
114 
115     /*
116      *  PLL programming (Mach64 CT family)
117      */
118 
119 /*
120  * This procedure sets the display fifo. The display fifo is a buffer that
121  * contains data read from the video memory that waits to be processed by
122  * the CRT controller.
123  *
124  * On the more modern Mach64 variants, the chip doesn't calculate the
125  * interval after which the display fifo has to be reloaded from memory
126  * automatically, the driver has to do it instead.
127  */
128 
129 
130 #define Maximum_DSP_PRECISION 7
131 
aty_dsp_gt(const struct fb_info_aty * info,u8 bpp,u32 width,struct pll_ct * pll)132 static int aty_dsp_gt(const struct fb_info_aty *info, u8 bpp,
133 		      u32 width, struct pll_ct *pll) {
134 
135     u32 multiplier,divider,ras_multiplier,ras_divider,tmp;
136     u32 dsp_on,dsp_off,dsp_xclks;
137     u8 vshift,xshift;
138     s8 dsp_precision;
139 
140     multiplier = ((u32)info->mclk_fb_div)*pll->vclk_post_div_real;
141     divider = ((u32)pll->vclk_fb_div)*info->xclk_post_div_real;
142 
143     ras_multiplier = info->xclkmaxrasdelay;
144     ras_divider = 1;
145 
146     if (bpp>=8)
147         divider = divider * (bpp >> 2);
148 
149     vshift = (6 - 2);	/* FIFO is 64 bits wide in accelerator mode ... */
150 
151     if (bpp == 0)
152         vshift--;	/* ... but only 32 bits in VGA mode. */
153 
154 #ifdef CONFIG_FB_ATY_GENERIC_LCD
155     if (width != 0) {
156         multiplier = multiplier * info->lcd_width;
157         divider = divider * width;
158 
159         ras_multiplier = ras_multiplier * info->lcd_width;
160         ras_divider = ras_divider * width;
161     };
162 #endif
163 
164     /* If we don't do this, 32 bits for multiplier & divider won't be
165        enough in certain situations! */
166     while (((multiplier | divider) & 1) == 0) {
167 	multiplier = multiplier >> 1;
168 	divider = divider >> 1;
169     };
170 
171     /* Determine DSP precision first */
172     tmp = ((multiplier * info->fifo_size) << vshift) / divider;
173     for (dsp_precision = -5;  tmp;  dsp_precision++)
174         tmp >>= 1;
175     if (dsp_precision < 0)
176         dsp_precision = 0;
177     else if (dsp_precision > Maximum_DSP_PRECISION)
178         dsp_precision = Maximum_DSP_PRECISION;
179 
180     xshift = 6 - dsp_precision;
181     vshift += xshift;
182 
183     /* Move on to dsp_off */
184     dsp_off = ((multiplier * (info->fifo_size - 1)) << vshift) / divider -
185 	      (1 << (vshift - xshift));
186 
187     /* Next is dsp_on */
188 //    if (bpp == 0)
189 //        dsp_on = ((multiplier * 20 << vshift) + divider) / divider;
190 //    else {
191         dsp_on = ((multiplier << vshift) + divider) / divider;
192         tmp = ((ras_multiplier << xshift) + ras_divider) / ras_divider;
193         if (dsp_on < tmp)
194             dsp_on = tmp;
195         dsp_on = dsp_on + (tmp * 2) + (info->xclkpagefaultdelay << xshift);
196 //    };
197 
198     /* Calculate rounding factor and apply it to dsp_on */
199     tmp = ((1 << (Maximum_DSP_PRECISION - dsp_precision)) - 1) >> 1;
200     dsp_on = ((dsp_on + tmp) / (tmp + 1)) * (tmp + 1);
201 
202     if (dsp_on >= ((dsp_off / (tmp + 1)) * (tmp + 1)))
203     {
204         dsp_on = dsp_off - (multiplier << vshift) / divider;
205         dsp_on = (dsp_on / (tmp + 1)) * (tmp + 1);
206     }
207 
208     /* Last but not least:  dsp_xclks */
209     dsp_xclks = ((multiplier << (vshift + 5)) + divider) / divider;
210 
211     /* Get register values. */
212     pll->dsp_on_off = (dsp_on << 16) + dsp_off;
213     pll->dsp_config = (dsp_precision << 20) | (info->dsp_loop_latency << 16) |
214                       dsp_xclks;
215     return 0;
216 };
217 
aty_valid_pll_ct(const struct fb_info_aty * info,u32 vclk_per,struct pll_ct * pll)218 static int aty_valid_pll_ct(const struct fb_info_aty *info, u32 vclk_per,
219 			    struct pll_ct *pll)
220 {
221     u32 q;
222 
223     /* FIXME: use the VTB/GTB /{3,6,12} post dividers if they're better suited */
224     q = info->ref_clk_per*info->pll_ref_div*4/vclk_per;	/* actually 8*q */
225     if (q < 16*8 || q > 255*8)
226         FAIL("vclk out of range");
227     else {
228         pll->vclk_post_div  = (q < 128*8);
229         pll->vclk_post_div += (q <  64*8);
230         pll->vclk_post_div += (q <  32*8);
231     };
232     pll->vclk_post_div_real = postdividers[pll->vclk_post_div];
233 //    pll->vclk_post_div <<= 6;
234     pll->vclk_fb_div = q*pll->vclk_post_div_real/8;
235     pll->pll_vclk_cntl = 0x03;	/* VCLK = PLL_VCLK/VCLKx_POST */
236     return 0;
237 }
238 
aty_var_to_pll_ct(const struct fb_info_aty * info,u32 vclk_per,u8 bpp,u32 width,union aty_pll * pll)239 static int aty_var_to_pll_ct(const struct fb_info_aty *info, u32 vclk_per,
240 			     u8 bpp, u32 width, union aty_pll *pll)
241 {
242     int err;
243 
244     if ((err = aty_valid_pll_ct(info, vclk_per, &pll->ct)))
245         return err;
246     if (M64_HAS(GTB_DSP) && (err = aty_dsp_gt(info, bpp, width, &pll->ct)))
247         return err;
248     return 0;
249 }
250 
aty_pll_ct_to_var(const struct fb_info_aty * info,const union aty_pll * pll)251 static u32 aty_pll_ct_to_var(const struct fb_info_aty *info,
252 			     const union aty_pll *pll)
253 {
254     u32 ref_clk_per = info->ref_clk_per;
255     u8 pll_ref_div = info->pll_ref_div;
256     u8 vclk_fb_div = pll->ct.vclk_fb_div;
257     u8 vclk_post_div = pll->ct.vclk_post_div_real;
258 
259     return ref_clk_per*pll_ref_div*vclk_post_div/vclk_fb_div/2;
260 }
261 
aty_set_pll_ct(const struct fb_info_aty * info,const union aty_pll * pll)262 void aty_set_pll_ct(const struct fb_info_aty *info, const union aty_pll *pll)
263 {
264     u8 a;
265     aty_st_pll(PLL_VCLK_CNTL, pll->ct.pll_vclk_cntl, info);
266     a = aty_ld_pll(VCLK_POST_DIV, info) & ~3;
267     aty_st_pll(VCLK_POST_DIV, a | pll->ct.vclk_post_div, info);
268     aty_st_pll(VCLK0_FB_DIV, pll->ct.vclk_fb_div, info);
269 
270     if (M64_HAS(GTB_DSP)) {
271         aty_st_le32(DSP_CONFIG, pll->ct.dsp_config, info);
272         aty_st_le32(DSP_ON_OFF, pll->ct.dsp_on_off, info);
273     }
274 }
275 
276 
aty_init_pll_ct(struct fb_info_aty * info)277 static void __init aty_init_pll_ct(struct fb_info_aty *info) {
278     u8 pll_ref_div,pll_gen_cntl,pll_ext_cntl;
279     u8 mpost_div,xpost_div;
280     u8 sclk_post_div_real,sclk_fb_div,spll_cntl2;
281     u32 q,i;
282     u32 mc,trp,trcd,tcrd,tras;
283 
284     mc = aty_ld_le32(MEM_CNTL, info);
285     trp = (mc & 0x300) >> 8;
286     trcd = (mc & 0xc00) >> 10;
287     tcrd = (mc & 0x1000) >> 12;    tras = (mc & 0x70000) >> 16;
288     info->xclkpagefaultdelay = trcd + tcrd + trp + 2;
289     info->xclkmaxrasdelay = tras + trp + 2;
290 
291     if (M64_HAS(FIFO_24)) {
292         info->fifo_size = 24;
293         info->xclkpagefaultdelay += 2;
294         info->xclkmaxrasdelay += 3;
295     } else {
296         info->fifo_size = 32;
297     };
298 
299     switch (info->ram_type) {
300         case DRAM:
301             if (info->total_vram<=1*1024*1024) {
302                 info->dsp_loop_latency = 10;
303             } else {
304                 info->dsp_loop_latency = 8;
305                 info->xclkpagefaultdelay += 2;
306             };
307             break;
308         case EDO:
309         case PSEUDO_EDO:
310             if (info->total_vram<=1*1024*1024) {
311                 info->dsp_loop_latency = 9;
312             } else {
313                 info->dsp_loop_latency = 8;
314                 info->xclkpagefaultdelay += 1;
315             };
316             break;
317         case SDRAM:
318             if (info->total_vram<=1*1024*1024) {
319                 info->dsp_loop_latency = 11;
320             } else {
321                 info->dsp_loop_latency = 10;
322                 info->xclkpagefaultdelay += 1;
323             };
324             break;
325         case SGRAM:
326             info->dsp_loop_latency = 8;
327             info->xclkpagefaultdelay += 3;
328             break;
329         default:
330             info->dsp_loop_latency = 11;
331             info->xclkpagefaultdelay += 3;
332             break;
333     };
334 
335     if (info->xclkmaxrasdelay <= info->xclkpagefaultdelay)
336         info->xclkmaxrasdelay = info->xclkpagefaultdelay + 1;
337 
338     /* Exit if the user does not want us to tamper with the clock
339        rates of her chip. */
340     if (info->mclk_per == 0) {
341         u16 mclk_fb_div;
342         u8 pll_ext_cntl;
343 
344         info->pll_ref_div = aty_ld_pll(PLL_REF_DIV, info);
345         pll_ext_cntl = aty_ld_pll(PLL_EXT_CNTL, info);
346         info->xclk_post_div_real = postdividers[pll_ext_cntl & 7];
347         mclk_fb_div = aty_ld_pll(MCLK_FB_DIV, info);
348         if (pll_ext_cntl & 8)
349             mclk_fb_div <<= 1;
350         info->mclk_fb_div = mclk_fb_div;
351         return;
352     };
353 
354     pll_ref_div = info->pll_per*2*255/info->ref_clk_per;
355     info->pll_ref_div = pll_ref_div;
356 
357     /* FIXME: use the VTB/GTB /3 post divider if it's better suited */
358     q = info->ref_clk_per*pll_ref_div*4/info->xclk_per;	/* actually 8*q */
359     if (q < 16*8 || q > 255*8) {
360         printk(KERN_CRIT "xclk out of range\n");
361         return;
362     } else {
363         xpost_div  = (q < 128*8);
364         xpost_div += (q <  64*8);
365         xpost_div += (q <  32*8);
366     };
367     info->xclk_post_div_real = postdividers[xpost_div];
368     info->mclk_fb_div = q*info->xclk_post_div_real/8;
369 
370     if (M64_HAS(SDRAM_MAGIC_PLL) && (info->ram_type >= SDRAM))
371         pll_gen_cntl = 0x04;
372     else
373 	/* The Rage Mobility M1 needs bit 3 set...*/
374 	/* original: pll_gen_cntl = 0x84 */
375         pll_gen_cntl = 0x8C;
376 
377     if (M64_HAS(MAGIC_POSTDIV))
378         pll_ext_cntl = 0;
379     else
380        	pll_ext_cntl = xpost_div;
381 
382     if (info->mclk_per == info->xclk_per)
383         pll_gen_cntl |= xpost_div<<4; /* mclk == xclk */
384     else {
385 	/*
386 	 * The chip clock is not equal to the memory clock.
387 	 * Therefore we will use sclk to clock the chip.
388 	 */
389         pll_gen_cntl |= 6<<4;	/* mclk == sclk*/
390 
391         q = info->ref_clk_per*pll_ref_div*4/info->mclk_per;	/* actually 8*q */
392         if (q < 16*8 || q > 255*8) {
393 	    printk(KERN_CRIT "mclk out of range\n");
394             return;
395         } else {
396             mpost_div  = (q < 128*8);
397             mpost_div += (q <  64*8);
398             mpost_div += (q <  32*8);
399         };
400         sclk_post_div_real = postdividers[mpost_div];
401         sclk_fb_div = q*sclk_post_div_real/8;
402         spll_cntl2 = mpost_div << 4;
403 	/*
404          * This disables the sclk, crashes the computer as reported:
405          * aty_st_pll(SPLL_CNTL2, 3, info);
406 	 *
407          * So it seems the sclk must be enabled before it is used;
408          * so PLL_GEN_CNTL must be programmed *after* the sclk.
409 	 */
410         aty_st_pll(SCLK_FB_DIV, sclk_fb_div, info);
411         aty_st_pll(SPLL_CNTL2, spll_cntl2, info);
412 	/*
413 	 * The sclk has been started. However, I believe the first clock
414 	 * ticks it generates are not very stable. Hope this primitive loop
415 	 * helps for Rage Mobilities that sometimes crash when
416 	 * we switch to sclk. (Daniel Mantione, 13-05-2003)
417 	 */
418         for (i=0;i<=0x1ffff;i++);
419     };
420 
421     aty_st_pll(PLL_REF_DIV, pll_ref_div, info);
422     aty_st_pll(PLL_GEN_CNTL, pll_gen_cntl, info);
423     aty_st_pll(MCLK_FB_DIV, info->mclk_fb_div, info);
424     aty_st_pll(PLL_EXT_CNTL, pll_ext_cntl, info);
425     /* Disable the extra precision pixel clock controls since we do not
426        use them. */
427     aty_st_pll(EXT_VPLL_CNTL, aty_ld_pll(EXT_VPLL_CNTL, info) &
428 	                      ~(EXT_VPLL_EN | EXT_VPLL_VGA_EN |
429 			        EXT_VPLL_INSYNC), info);
430 #if 0
431     /* This code causes problems on the Rage Mobility M1
432        and seems unnecessary. Comments wanted! */
433     if (M64_HAS(GTB_DSP)) {
434         if (M64_HAS(XL_DLL))
435             aty_st_pll(DLL_CNTL, 0x80, info);
436         else if (info->ram_type >= SDRAM)
437             aty_st_pll(DLL_CNTL, 0xa6, info);
438         else
439             aty_st_pll(DLL_CNTL, 0xa0, info);
440         aty_st_pll(VFC_CNTL, 0x1b, info);
441     };
442 #endif
443 };
444 
dummy(void)445 static int dummy(void)
446 {
447     return 0;
448 }
449 
450 const struct aty_dac_ops aty_dac_ct = {
451     set_dac:	(void *)dummy,
452 };
453 
454 const struct aty_pll_ops aty_pll_ct = {
455     var_to_pll:	aty_var_to_pll_ct,
456     pll_to_var:	aty_pll_ct_to_var,
457     set_pll:	aty_set_pll_ct,
458     init_pll:	aty_init_pll_ct
459 };
460 
461