1 /*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/sysdev.h>
29 #include <linux/ctype.h>
30 #include <linux/edac.h>
31 #include <asm/uaccess.h>
32 #include <asm/page.h>
33 #include <asm/edac.h>
34 #include "edac_core.h"
35 #include "edac_module.h"
36
37 /* lock to memory controller's control array */
38 static DEFINE_MUTEX(mem_ctls_mutex);
39 static LIST_HEAD(mc_devices);
40
41 #ifdef CONFIG_EDAC_DEBUG
42
edac_mc_dump_channel(struct channel_info * chan)43 static void edac_mc_dump_channel(struct channel_info *chan)
44 {
45 debugf4("\tchannel = %p\n", chan);
46 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
47 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
48 debugf4("\tchannel->label = '%s'\n", chan->label);
49 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
50 }
51
edac_mc_dump_csrow(struct csrow_info * csrow)52 static void edac_mc_dump_csrow(struct csrow_info *csrow)
53 {
54 debugf4("\tcsrow = %p\n", csrow);
55 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
57 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
61 debugf4("\tcsrow->channels = %p\n", csrow->channels);
62 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
63 }
64
edac_mc_dump_mci(struct mem_ctl_info * mci)65 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
66 {
67 debugf3("\tmci = %p\n", mci);
68 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
69 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
70 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
71 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
72 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
73 mci->nr_csrows, mci->csrows);
74 debugf3("\tdev = %p\n", mci->dev);
75 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
76 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
77 }
78
79 #endif /* CONFIG_EDAC_DEBUG */
80
81 /*
82 * keep those in sync with the enum mem_type
83 */
84 const char *edac_mem_types[] = {
85 "Empty csrow",
86 "Reserved csrow type",
87 "Unknown csrow type",
88 "Fast page mode RAM",
89 "Extended data out RAM",
90 "Burst Extended data out RAM",
91 "Single data rate SDRAM",
92 "Registered single data rate SDRAM",
93 "Double data rate SDRAM",
94 "Registered Double data rate SDRAM",
95 "Rambus DRAM",
96 "Unbuffered DDR2 RAM",
97 "Fully buffered DDR2",
98 "Registered DDR2 RAM",
99 "Rambus XDR",
100 "Unbuffered DDR3 RAM",
101 "Registered DDR3 RAM",
102 };
103 EXPORT_SYMBOL_GPL(edac_mem_types);
104
105 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
106 * Adjust 'ptr' so that its alignment is at least as stringent as what the
107 * compiler would provide for X and return the aligned result.
108 *
109 * If 'size' is a constant, the compiler will optimize this whole function
110 * down to either a no-op or the addition of a constant to the value of 'ptr'.
111 */
edac_align_ptr(void * ptr,unsigned size)112 void *edac_align_ptr(void *ptr, unsigned size)
113 {
114 unsigned align, r;
115
116 /* Here we assume that the alignment of a "long long" is the most
117 * stringent alignment that the compiler will ever provide by default.
118 * As far as I know, this is a reasonable assumption.
119 */
120 if (size > sizeof(long))
121 align = sizeof(long long);
122 else if (size > sizeof(int))
123 align = sizeof(long);
124 else if (size > sizeof(short))
125 align = sizeof(int);
126 else if (size > sizeof(char))
127 align = sizeof(short);
128 else
129 return (char *)ptr;
130
131 r = size % align;
132
133 if (r == 0)
134 return (char *)ptr;
135
136 return (void *)(((unsigned long)ptr) + align - r);
137 }
138
139 /**
140 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
141 * @size_pvt: size of private storage needed
142 * @nr_csrows: Number of CWROWS needed for this MC
143 * @nr_chans: Number of channels for the MC
144 *
145 * Everything is kmalloc'ed as one big chunk - more efficient.
146 * Only can be used if all structures have the same lifetime - otherwise
147 * you have to allocate and initialize your own structures.
148 *
149 * Use edac_mc_free() to free mc structures allocated by this function.
150 *
151 * Returns:
152 * NULL allocation failed
153 * struct mem_ctl_info pointer
154 */
edac_mc_alloc(unsigned sz_pvt,unsigned nr_csrows,unsigned nr_chans,int edac_index)155 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
156 unsigned nr_chans, int edac_index)
157 {
158 struct mem_ctl_info *mci;
159 struct csrow_info *csi, *csrow;
160 struct channel_info *chi, *chp, *chan;
161 void *pvt;
162 unsigned size;
163 int row, chn;
164 int err;
165
166 /* Figure out the offsets of the various items from the start of an mc
167 * structure. We want the alignment of each item to be at least as
168 * stringent as what the compiler would provide if we could simply
169 * hardcode everything into a single struct.
170 */
171 mci = (struct mem_ctl_info *)0;
172 csi = edac_align_ptr(&mci[1], sizeof(*csi));
173 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
174 pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
175 size = ((unsigned long)pvt) + sz_pvt;
176
177 mci = kzalloc(size, GFP_KERNEL);
178 if (mci == NULL)
179 return NULL;
180
181 /* Adjust pointers so they point within the memory we just allocated
182 * rather than an imaginary chunk of memory located at address 0.
183 */
184 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
185 chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
186 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
187
188 /* setup index and various internal pointers */
189 mci->mc_idx = edac_index;
190 mci->csrows = csi;
191 mci->pvt_info = pvt;
192 mci->nr_csrows = nr_csrows;
193
194 for (row = 0; row < nr_csrows; row++) {
195 csrow = &csi[row];
196 csrow->csrow_idx = row;
197 csrow->mci = mci;
198 csrow->nr_channels = nr_chans;
199 chp = &chi[row * nr_chans];
200 csrow->channels = chp;
201
202 for (chn = 0; chn < nr_chans; chn++) {
203 chan = &chp[chn];
204 chan->chan_idx = chn;
205 chan->csrow = csrow;
206 }
207 }
208
209 mci->op_state = OP_ALLOC;
210 INIT_LIST_HEAD(&mci->grp_kobj_list);
211
212 /*
213 * Initialize the 'root' kobj for the edac_mc controller
214 */
215 err = edac_mc_register_sysfs_main_kobj(mci);
216 if (err) {
217 kfree(mci);
218 return NULL;
219 }
220
221 /* at this point, the root kobj is valid, and in order to
222 * 'free' the object, then the function:
223 * edac_mc_unregister_sysfs_main_kobj() must be called
224 * which will perform kobj unregistration and the actual free
225 * will occur during the kobject callback operation
226 */
227 return mci;
228 }
229 EXPORT_SYMBOL_GPL(edac_mc_alloc);
230
231 /**
232 * edac_mc_free
233 * 'Free' a previously allocated 'mci' structure
234 * @mci: pointer to a struct mem_ctl_info structure
235 */
edac_mc_free(struct mem_ctl_info * mci)236 void edac_mc_free(struct mem_ctl_info *mci)
237 {
238 debugf1("%s()\n", __func__);
239
240 edac_mc_unregister_sysfs_main_kobj(mci);
241
242 /* free the mci instance memory here */
243 kfree(mci);
244 }
245 EXPORT_SYMBOL_GPL(edac_mc_free);
246
247
248 /**
249 * find_mci_by_dev
250 *
251 * scan list of controllers looking for the one that manages
252 * the 'dev' device
253 * @dev: pointer to a struct device related with the MCI
254 */
find_mci_by_dev(struct device * dev)255 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
256 {
257 struct mem_ctl_info *mci;
258 struct list_head *item;
259
260 debugf3("%s()\n", __func__);
261
262 list_for_each(item, &mc_devices) {
263 mci = list_entry(item, struct mem_ctl_info, link);
264
265 if (mci->dev == dev)
266 return mci;
267 }
268
269 return NULL;
270 }
271 EXPORT_SYMBOL_GPL(find_mci_by_dev);
272
273 /*
274 * handler for EDAC to check if NMI type handler has asserted interrupt
275 */
edac_mc_assert_error_check_and_clear(void)276 static int edac_mc_assert_error_check_and_clear(void)
277 {
278 int old_state;
279
280 if (edac_op_state == EDAC_OPSTATE_POLL)
281 return 1;
282
283 old_state = edac_err_assert;
284 edac_err_assert = 0;
285
286 return old_state;
287 }
288
289 /*
290 * edac_mc_workq_function
291 * performs the operation scheduled by a workq request
292 */
edac_mc_workq_function(struct work_struct * work_req)293 static void edac_mc_workq_function(struct work_struct *work_req)
294 {
295 struct delayed_work *d_work = to_delayed_work(work_req);
296 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
297
298 mutex_lock(&mem_ctls_mutex);
299
300 /* if this control struct has movd to offline state, we are done */
301 if (mci->op_state == OP_OFFLINE) {
302 mutex_unlock(&mem_ctls_mutex);
303 return;
304 }
305
306 /* Only poll controllers that are running polled and have a check */
307 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
308 mci->edac_check(mci);
309
310 mutex_unlock(&mem_ctls_mutex);
311
312 /* Reschedule */
313 queue_delayed_work(edac_workqueue, &mci->work,
314 msecs_to_jiffies(edac_mc_get_poll_msec()));
315 }
316
317 /*
318 * edac_mc_workq_setup
319 * initialize a workq item for this mci
320 * passing in the new delay period in msec
321 *
322 * locking model:
323 *
324 * called with the mem_ctls_mutex held
325 */
edac_mc_workq_setup(struct mem_ctl_info * mci,unsigned msec)326 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
327 {
328 debugf0("%s()\n", __func__);
329
330 /* if this instance is not in the POLL state, then simply return */
331 if (mci->op_state != OP_RUNNING_POLL)
332 return;
333
334 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
335 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
336 }
337
338 /*
339 * edac_mc_workq_teardown
340 * stop the workq processing on this mci
341 *
342 * locking model:
343 *
344 * called WITHOUT lock held
345 */
edac_mc_workq_teardown(struct mem_ctl_info * mci)346 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
347 {
348 int status;
349
350 if (mci->op_state != OP_RUNNING_POLL)
351 return;
352
353 status = cancel_delayed_work(&mci->work);
354 if (status == 0) {
355 debugf0("%s() not canceled, flush the queue\n",
356 __func__);
357
358 /* workq instance might be running, wait for it */
359 flush_workqueue(edac_workqueue);
360 }
361 }
362
363 /*
364 * edac_mc_reset_delay_period(unsigned long value)
365 *
366 * user space has updated our poll period value, need to
367 * reset our workq delays
368 */
edac_mc_reset_delay_period(int value)369 void edac_mc_reset_delay_period(int value)
370 {
371 struct mem_ctl_info *mci;
372 struct list_head *item;
373
374 mutex_lock(&mem_ctls_mutex);
375
376 /* scan the list and turn off all workq timers, doing so under lock
377 */
378 list_for_each(item, &mc_devices) {
379 mci = list_entry(item, struct mem_ctl_info, link);
380
381 if (mci->op_state == OP_RUNNING_POLL)
382 cancel_delayed_work(&mci->work);
383 }
384
385 mutex_unlock(&mem_ctls_mutex);
386
387
388 /* re-walk the list, and reset the poll delay */
389 mutex_lock(&mem_ctls_mutex);
390
391 list_for_each(item, &mc_devices) {
392 mci = list_entry(item, struct mem_ctl_info, link);
393
394 edac_mc_workq_setup(mci, (unsigned long) value);
395 }
396
397 mutex_unlock(&mem_ctls_mutex);
398 }
399
400
401
402 /* Return 0 on success, 1 on failure.
403 * Before calling this function, caller must
404 * assign a unique value to mci->mc_idx.
405 *
406 * locking model:
407 *
408 * called with the mem_ctls_mutex lock held
409 */
add_mc_to_global_list(struct mem_ctl_info * mci)410 static int add_mc_to_global_list(struct mem_ctl_info *mci)
411 {
412 struct list_head *item, *insert_before;
413 struct mem_ctl_info *p;
414
415 insert_before = &mc_devices;
416
417 p = find_mci_by_dev(mci->dev);
418 if (unlikely(p != NULL))
419 goto fail0;
420
421 list_for_each(item, &mc_devices) {
422 p = list_entry(item, struct mem_ctl_info, link);
423
424 if (p->mc_idx >= mci->mc_idx) {
425 if (unlikely(p->mc_idx == mci->mc_idx))
426 goto fail1;
427
428 insert_before = item;
429 break;
430 }
431 }
432
433 list_add_tail_rcu(&mci->link, insert_before);
434 atomic_inc(&edac_handlers);
435 return 0;
436
437 fail0:
438 edac_printk(KERN_WARNING, EDAC_MC,
439 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
440 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
441 return 1;
442
443 fail1:
444 edac_printk(KERN_WARNING, EDAC_MC,
445 "bug in low-level driver: attempt to assign\n"
446 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
447 return 1;
448 }
449
complete_mc_list_del(struct rcu_head * head)450 static void complete_mc_list_del(struct rcu_head *head)
451 {
452 struct mem_ctl_info *mci;
453
454 mci = container_of(head, struct mem_ctl_info, rcu);
455 INIT_LIST_HEAD(&mci->link);
456 }
457
del_mc_from_global_list(struct mem_ctl_info * mci)458 static void del_mc_from_global_list(struct mem_ctl_info *mci)
459 {
460 atomic_dec(&edac_handlers);
461 list_del_rcu(&mci->link);
462 call_rcu(&mci->rcu, complete_mc_list_del);
463 rcu_barrier();
464 }
465
466 /**
467 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
468 *
469 * If found, return a pointer to the structure.
470 * Else return NULL.
471 *
472 * Caller must hold mem_ctls_mutex.
473 */
edac_mc_find(int idx)474 struct mem_ctl_info *edac_mc_find(int idx)
475 {
476 struct list_head *item;
477 struct mem_ctl_info *mci;
478
479 list_for_each(item, &mc_devices) {
480 mci = list_entry(item, struct mem_ctl_info, link);
481
482 if (mci->mc_idx >= idx) {
483 if (mci->mc_idx == idx)
484 return mci;
485
486 break;
487 }
488 }
489
490 return NULL;
491 }
492 EXPORT_SYMBOL(edac_mc_find);
493
494 /**
495 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
496 * create sysfs entries associated with mci structure
497 * @mci: pointer to the mci structure to be added to the list
498 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
499 *
500 * Return:
501 * 0 Success
502 * !0 Failure
503 */
504
505 /* FIXME - should a warning be printed if no error detection? correction? */
edac_mc_add_mc(struct mem_ctl_info * mci)506 int edac_mc_add_mc(struct mem_ctl_info *mci)
507 {
508 debugf0("%s()\n", __func__);
509
510 #ifdef CONFIG_EDAC_DEBUG
511 if (edac_debug_level >= 3)
512 edac_mc_dump_mci(mci);
513
514 if (edac_debug_level >= 4) {
515 int i;
516
517 for (i = 0; i < mci->nr_csrows; i++) {
518 int j;
519
520 edac_mc_dump_csrow(&mci->csrows[i]);
521 for (j = 0; j < mci->csrows[i].nr_channels; j++)
522 edac_mc_dump_channel(&mci->csrows[i].
523 channels[j]);
524 }
525 }
526 #endif
527 mutex_lock(&mem_ctls_mutex);
528
529 if (add_mc_to_global_list(mci))
530 goto fail0;
531
532 /* set load time so that error rate can be tracked */
533 mci->start_time = jiffies;
534
535 if (edac_create_sysfs_mci_device(mci)) {
536 edac_mc_printk(mci, KERN_WARNING,
537 "failed to create sysfs device\n");
538 goto fail1;
539 }
540
541 /* If there IS a check routine, then we are running POLLED */
542 if (mci->edac_check != NULL) {
543 /* This instance is NOW RUNNING */
544 mci->op_state = OP_RUNNING_POLL;
545
546 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
547 } else {
548 mci->op_state = OP_RUNNING_INTERRUPT;
549 }
550
551 /* Report action taken */
552 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
553 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
554
555 mutex_unlock(&mem_ctls_mutex);
556 return 0;
557
558 fail1:
559 del_mc_from_global_list(mci);
560
561 fail0:
562 mutex_unlock(&mem_ctls_mutex);
563 return 1;
564 }
565 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
566
567 /**
568 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
569 * remove mci structure from global list
570 * @pdev: Pointer to 'struct device' representing mci structure to remove.
571 *
572 * Return pointer to removed mci structure, or NULL if device not found.
573 */
edac_mc_del_mc(struct device * dev)574 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
575 {
576 struct mem_ctl_info *mci;
577
578 debugf0("%s()\n", __func__);
579
580 mutex_lock(&mem_ctls_mutex);
581
582 /* find the requested mci struct in the global list */
583 mci = find_mci_by_dev(dev);
584 if (mci == NULL) {
585 mutex_unlock(&mem_ctls_mutex);
586 return NULL;
587 }
588
589 del_mc_from_global_list(mci);
590 mutex_unlock(&mem_ctls_mutex);
591
592 /* flush workq processes */
593 edac_mc_workq_teardown(mci);
594
595 /* marking MCI offline */
596 mci->op_state = OP_OFFLINE;
597
598 /* remove from sysfs */
599 edac_remove_sysfs_mci_device(mci);
600
601 edac_printk(KERN_INFO, EDAC_MC,
602 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
603 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
604
605 return mci;
606 }
607 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
608
edac_mc_scrub_block(unsigned long page,unsigned long offset,u32 size)609 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
610 u32 size)
611 {
612 struct page *pg;
613 void *virt_addr;
614 unsigned long flags = 0;
615
616 debugf3("%s()\n", __func__);
617
618 /* ECC error page was not in our memory. Ignore it. */
619 if (!pfn_valid(page))
620 return;
621
622 /* Find the actual page structure then map it and fix */
623 pg = pfn_to_page(page);
624
625 if (PageHighMem(pg))
626 local_irq_save(flags);
627
628 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
629
630 /* Perform architecture specific atomic scrub operation */
631 atomic_scrub(virt_addr + offset, size);
632
633 /* Unmap and complete */
634 kunmap_atomic(virt_addr, KM_BOUNCE_READ);
635
636 if (PageHighMem(pg))
637 local_irq_restore(flags);
638 }
639
640 /* FIXME - should return -1 */
edac_mc_find_csrow_by_page(struct mem_ctl_info * mci,unsigned long page)641 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
642 {
643 struct csrow_info *csrows = mci->csrows;
644 int row, i;
645
646 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
647 row = -1;
648
649 for (i = 0; i < mci->nr_csrows; i++) {
650 struct csrow_info *csrow = &csrows[i];
651
652 if (csrow->nr_pages == 0)
653 continue;
654
655 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
656 "mask(0x%lx)\n", mci->mc_idx, __func__,
657 csrow->first_page, page, csrow->last_page,
658 csrow->page_mask);
659
660 if ((page >= csrow->first_page) &&
661 (page <= csrow->last_page) &&
662 ((page & csrow->page_mask) ==
663 (csrow->first_page & csrow->page_mask))) {
664 row = i;
665 break;
666 }
667 }
668
669 if (row == -1)
670 edac_mc_printk(mci, KERN_ERR,
671 "could not look up page error address %lx\n",
672 (unsigned long)page);
673
674 return row;
675 }
676 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
677
678 /* FIXME - setable log (warning/emerg) levels */
679 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
edac_mc_handle_ce(struct mem_ctl_info * mci,unsigned long page_frame_number,unsigned long offset_in_page,unsigned long syndrome,int row,int channel,const char * msg)680 void edac_mc_handle_ce(struct mem_ctl_info *mci,
681 unsigned long page_frame_number,
682 unsigned long offset_in_page, unsigned long syndrome,
683 int row, int channel, const char *msg)
684 {
685 unsigned long remapped_page;
686
687 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
688
689 /* FIXME - maybe make panic on INTERNAL ERROR an option */
690 if (row >= mci->nr_csrows || row < 0) {
691 /* something is wrong */
692 edac_mc_printk(mci, KERN_ERR,
693 "INTERNAL ERROR: row out of range "
694 "(%d >= %d)\n", row, mci->nr_csrows);
695 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
696 return;
697 }
698
699 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
700 /* something is wrong */
701 edac_mc_printk(mci, KERN_ERR,
702 "INTERNAL ERROR: channel out of range "
703 "(%d >= %d)\n", channel,
704 mci->csrows[row].nr_channels);
705 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
706 return;
707 }
708
709 if (edac_mc_get_log_ce())
710 /* FIXME - put in DIMM location */
711 edac_mc_printk(mci, KERN_WARNING,
712 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
713 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
714 page_frame_number, offset_in_page,
715 mci->csrows[row].grain, syndrome, row, channel,
716 mci->csrows[row].channels[channel].label, msg);
717
718 mci->ce_count++;
719 mci->csrows[row].ce_count++;
720 mci->csrows[row].channels[channel].ce_count++;
721
722 if (mci->scrub_mode & SCRUB_SW_SRC) {
723 /*
724 * Some MC's can remap memory so that it is still available
725 * at a different address when PCI devices map into memory.
726 * MC's that can't do this lose the memory where PCI devices
727 * are mapped. This mapping is MC dependent and so we call
728 * back into the MC driver for it to map the MC page to
729 * a physical (CPU) page which can then be mapped to a virtual
730 * page - which can then be scrubbed.
731 */
732 remapped_page = mci->ctl_page_to_phys ?
733 mci->ctl_page_to_phys(mci, page_frame_number) :
734 page_frame_number;
735
736 edac_mc_scrub_block(remapped_page, offset_in_page,
737 mci->csrows[row].grain);
738 }
739 }
740 EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
741
edac_mc_handle_ce_no_info(struct mem_ctl_info * mci,const char * msg)742 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
743 {
744 if (edac_mc_get_log_ce())
745 edac_mc_printk(mci, KERN_WARNING,
746 "CE - no information available: %s\n", msg);
747
748 mci->ce_noinfo_count++;
749 mci->ce_count++;
750 }
751 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
752
edac_mc_handle_ue(struct mem_ctl_info * mci,unsigned long page_frame_number,unsigned long offset_in_page,int row,const char * msg)753 void edac_mc_handle_ue(struct mem_ctl_info *mci,
754 unsigned long page_frame_number,
755 unsigned long offset_in_page, int row, const char *msg)
756 {
757 int len = EDAC_MC_LABEL_LEN * 4;
758 char labels[len + 1];
759 char *pos = labels;
760 int chan;
761 int chars;
762
763 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
764
765 /* FIXME - maybe make panic on INTERNAL ERROR an option */
766 if (row >= mci->nr_csrows || row < 0) {
767 /* something is wrong */
768 edac_mc_printk(mci, KERN_ERR,
769 "INTERNAL ERROR: row out of range "
770 "(%d >= %d)\n", row, mci->nr_csrows);
771 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
772 return;
773 }
774
775 chars = snprintf(pos, len + 1, "%s",
776 mci->csrows[row].channels[0].label);
777 len -= chars;
778 pos += chars;
779
780 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
781 chan++) {
782 chars = snprintf(pos, len + 1, ":%s",
783 mci->csrows[row].channels[chan].label);
784 len -= chars;
785 pos += chars;
786 }
787
788 if (edac_mc_get_log_ue())
789 edac_mc_printk(mci, KERN_EMERG,
790 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
791 "labels \"%s\": %s\n", page_frame_number,
792 offset_in_page, mci->csrows[row].grain, row,
793 labels, msg);
794
795 if (edac_mc_get_panic_on_ue())
796 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
797 "row %d, labels \"%s\": %s\n", mci->mc_idx,
798 page_frame_number, offset_in_page,
799 mci->csrows[row].grain, row, labels, msg);
800
801 mci->ue_count++;
802 mci->csrows[row].ue_count++;
803 }
804 EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
805
edac_mc_handle_ue_no_info(struct mem_ctl_info * mci,const char * msg)806 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
807 {
808 if (edac_mc_get_panic_on_ue())
809 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
810
811 if (edac_mc_get_log_ue())
812 edac_mc_printk(mci, KERN_WARNING,
813 "UE - no information available: %s\n", msg);
814 mci->ue_noinfo_count++;
815 mci->ue_count++;
816 }
817 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
818
819 /*************************************************************
820 * On Fully Buffered DIMM modules, this help function is
821 * called to process UE events
822 */
edac_mc_handle_fbd_ue(struct mem_ctl_info * mci,unsigned int csrow,unsigned int channela,unsigned int channelb,char * msg)823 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
824 unsigned int csrow,
825 unsigned int channela,
826 unsigned int channelb, char *msg)
827 {
828 int len = EDAC_MC_LABEL_LEN * 4;
829 char labels[len + 1];
830 char *pos = labels;
831 int chars;
832
833 if (csrow >= mci->nr_csrows) {
834 /* something is wrong */
835 edac_mc_printk(mci, KERN_ERR,
836 "INTERNAL ERROR: row out of range (%d >= %d)\n",
837 csrow, mci->nr_csrows);
838 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
839 return;
840 }
841
842 if (channela >= mci->csrows[csrow].nr_channels) {
843 /* something is wrong */
844 edac_mc_printk(mci, KERN_ERR,
845 "INTERNAL ERROR: channel-a out of range "
846 "(%d >= %d)\n",
847 channela, mci->csrows[csrow].nr_channels);
848 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
849 return;
850 }
851
852 if (channelb >= mci->csrows[csrow].nr_channels) {
853 /* something is wrong */
854 edac_mc_printk(mci, KERN_ERR,
855 "INTERNAL ERROR: channel-b out of range "
856 "(%d >= %d)\n",
857 channelb, mci->csrows[csrow].nr_channels);
858 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
859 return;
860 }
861
862 mci->ue_count++;
863 mci->csrows[csrow].ue_count++;
864
865 /* Generate the DIMM labels from the specified channels */
866 chars = snprintf(pos, len + 1, "%s",
867 mci->csrows[csrow].channels[channela].label);
868 len -= chars;
869 pos += chars;
870 chars = snprintf(pos, len + 1, "-%s",
871 mci->csrows[csrow].channels[channelb].label);
872
873 if (edac_mc_get_log_ue())
874 edac_mc_printk(mci, KERN_EMERG,
875 "UE row %d, channel-a= %d channel-b= %d "
876 "labels \"%s\": %s\n", csrow, channela, channelb,
877 labels, msg);
878
879 if (edac_mc_get_panic_on_ue())
880 panic("UE row %d, channel-a= %d channel-b= %d "
881 "labels \"%s\": %s\n", csrow, channela,
882 channelb, labels, msg);
883 }
884 EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
885
886 /*************************************************************
887 * On Fully Buffered DIMM modules, this help function is
888 * called to process CE events
889 */
edac_mc_handle_fbd_ce(struct mem_ctl_info * mci,unsigned int csrow,unsigned int channel,char * msg)890 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
891 unsigned int csrow, unsigned int channel, char *msg)
892 {
893
894 /* Ensure boundary values */
895 if (csrow >= mci->nr_csrows) {
896 /* something is wrong */
897 edac_mc_printk(mci, KERN_ERR,
898 "INTERNAL ERROR: row out of range (%d >= %d)\n",
899 csrow, mci->nr_csrows);
900 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
901 return;
902 }
903 if (channel >= mci->csrows[csrow].nr_channels) {
904 /* something is wrong */
905 edac_mc_printk(mci, KERN_ERR,
906 "INTERNAL ERROR: channel out of range (%d >= %d)\n",
907 channel, mci->csrows[csrow].nr_channels);
908 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
909 return;
910 }
911
912 if (edac_mc_get_log_ce())
913 /* FIXME - put in DIMM location */
914 edac_mc_printk(mci, KERN_WARNING,
915 "CE row %d, channel %d, label \"%s\": %s\n",
916 csrow, channel,
917 mci->csrows[csrow].channels[channel].label, msg);
918
919 mci->ce_count++;
920 mci->csrows[csrow].ce_count++;
921 mci->csrows[csrow].channels[channel].ce_count++;
922 }
923 EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
924