1 /*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/ctype.h>
29 #include <linux/edac.h>
30 #include <asm/uaccess.h>
31 #include <asm/page.h>
32 #include <asm/edac.h>
33 #include "edac_core.h"
34 #include "edac_module.h"
35
36 /* lock to memory controller's control array */
37 static DEFINE_MUTEX(mem_ctls_mutex);
38 static LIST_HEAD(mc_devices);
39
40 #ifdef CONFIG_EDAC_DEBUG
41
edac_mc_dump_channel(struct rank_info * chan)42 static void edac_mc_dump_channel(struct rank_info *chan)
43 {
44 debugf4("\tchannel = %p\n", chan);
45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
46 debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
47 debugf4("\tchannel->label = '%s'\n", chan->label);
48 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
49 }
50
edac_mc_dump_csrow(struct csrow_info * csrow)51 static void edac_mc_dump_csrow(struct csrow_info *csrow)
52 {
53 debugf4("\tcsrow = %p\n", csrow);
54 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
55 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
56 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
57 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
58 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
59 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
60 debugf4("\tcsrow->channels = %p\n", csrow->channels);
61 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
62 }
63
edac_mc_dump_mci(struct mem_ctl_info * mci)64 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
65 {
66 debugf3("\tmci = %p\n", mci);
67 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
68 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
69 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
70 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
71 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
72 mci->nr_csrows, mci->csrows);
73 debugf3("\tdev = %p\n", mci->dev);
74 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
75 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
76 }
77
78 #endif /* CONFIG_EDAC_DEBUG */
79
80 /*
81 * keep those in sync with the enum mem_type
82 */
83 const char *edac_mem_types[] = {
84 "Empty csrow",
85 "Reserved csrow type",
86 "Unknown csrow type",
87 "Fast page mode RAM",
88 "Extended data out RAM",
89 "Burst Extended data out RAM",
90 "Single data rate SDRAM",
91 "Registered single data rate SDRAM",
92 "Double data rate SDRAM",
93 "Registered Double data rate SDRAM",
94 "Rambus DRAM",
95 "Unbuffered DDR2 RAM",
96 "Fully buffered DDR2",
97 "Registered DDR2 RAM",
98 "Rambus XDR",
99 "Unbuffered DDR3 RAM",
100 "Registered DDR3 RAM",
101 };
102 EXPORT_SYMBOL_GPL(edac_mem_types);
103
104 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
105 * Adjust 'ptr' so that its alignment is at least as stringent as what the
106 * compiler would provide for X and return the aligned result.
107 *
108 * If 'size' is a constant, the compiler will optimize this whole function
109 * down to either a no-op or the addition of a constant to the value of 'ptr'.
110 */
edac_align_ptr(void * ptr,unsigned size)111 void *edac_align_ptr(void *ptr, unsigned size)
112 {
113 unsigned align, r;
114
115 /* Here we assume that the alignment of a "long long" is the most
116 * stringent alignment that the compiler will ever provide by default.
117 * As far as I know, this is a reasonable assumption.
118 */
119 if (size > sizeof(long))
120 align = sizeof(long long);
121 else if (size > sizeof(int))
122 align = sizeof(long);
123 else if (size > sizeof(short))
124 align = sizeof(int);
125 else if (size > sizeof(char))
126 align = sizeof(short);
127 else
128 return (char *)ptr;
129
130 r = size % align;
131
132 if (r == 0)
133 return (char *)ptr;
134
135 return (void *)(((unsigned long)ptr) + align - r);
136 }
137
138 /**
139 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
140 * @size_pvt: size of private storage needed
141 * @nr_csrows: Number of CWROWS needed for this MC
142 * @nr_chans: Number of channels for the MC
143 *
144 * Everything is kmalloc'ed as one big chunk - more efficient.
145 * Only can be used if all structures have the same lifetime - otherwise
146 * you have to allocate and initialize your own structures.
147 *
148 * Use edac_mc_free() to free mc structures allocated by this function.
149 *
150 * Returns:
151 * NULL allocation failed
152 * struct mem_ctl_info pointer
153 */
edac_mc_alloc(unsigned sz_pvt,unsigned nr_csrows,unsigned nr_chans,int edac_index)154 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
155 unsigned nr_chans, int edac_index)
156 {
157 struct mem_ctl_info *mci;
158 struct csrow_info *csi, *csrow;
159 struct rank_info *chi, *chp, *chan;
160 void *pvt;
161 unsigned size;
162 int row, chn;
163 int err;
164
165 /* Figure out the offsets of the various items from the start of an mc
166 * structure. We want the alignment of each item to be at least as
167 * stringent as what the compiler would provide if we could simply
168 * hardcode everything into a single struct.
169 */
170 mci = (struct mem_ctl_info *)0;
171 csi = edac_align_ptr(&mci[1], sizeof(*csi));
172 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
173 pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
174 size = ((unsigned long)pvt) + sz_pvt;
175
176 mci = kzalloc(size, GFP_KERNEL);
177 if (mci == NULL)
178 return NULL;
179
180 /* Adjust pointers so they point within the memory we just allocated
181 * rather than an imaginary chunk of memory located at address 0.
182 */
183 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
184 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
185 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
186
187 /* setup index and various internal pointers */
188 mci->mc_idx = edac_index;
189 mci->csrows = csi;
190 mci->pvt_info = pvt;
191 mci->nr_csrows = nr_csrows;
192
193 for (row = 0; row < nr_csrows; row++) {
194 csrow = &csi[row];
195 csrow->csrow_idx = row;
196 csrow->mci = mci;
197 csrow->nr_channels = nr_chans;
198 chp = &chi[row * nr_chans];
199 csrow->channels = chp;
200
201 for (chn = 0; chn < nr_chans; chn++) {
202 chan = &chp[chn];
203 chan->chan_idx = chn;
204 chan->csrow = csrow;
205 }
206 }
207
208 mci->op_state = OP_ALLOC;
209 INIT_LIST_HEAD(&mci->grp_kobj_list);
210
211 /*
212 * Initialize the 'root' kobj for the edac_mc controller
213 */
214 err = edac_mc_register_sysfs_main_kobj(mci);
215 if (err) {
216 kfree(mci);
217 return NULL;
218 }
219
220 /* at this point, the root kobj is valid, and in order to
221 * 'free' the object, then the function:
222 * edac_mc_unregister_sysfs_main_kobj() must be called
223 * which will perform kobj unregistration and the actual free
224 * will occur during the kobject callback operation
225 */
226 return mci;
227 }
228 EXPORT_SYMBOL_GPL(edac_mc_alloc);
229
230 /**
231 * edac_mc_free
232 * 'Free' a previously allocated 'mci' structure
233 * @mci: pointer to a struct mem_ctl_info structure
234 */
edac_mc_free(struct mem_ctl_info * mci)235 void edac_mc_free(struct mem_ctl_info *mci)
236 {
237 debugf1("%s()\n", __func__);
238
239 edac_mc_unregister_sysfs_main_kobj(mci);
240
241 /* free the mci instance memory here */
242 kfree(mci);
243 }
244 EXPORT_SYMBOL_GPL(edac_mc_free);
245
246
247 /**
248 * find_mci_by_dev
249 *
250 * scan list of controllers looking for the one that manages
251 * the 'dev' device
252 * @dev: pointer to a struct device related with the MCI
253 */
find_mci_by_dev(struct device * dev)254 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
255 {
256 struct mem_ctl_info *mci;
257 struct list_head *item;
258
259 debugf3("%s()\n", __func__);
260
261 list_for_each(item, &mc_devices) {
262 mci = list_entry(item, struct mem_ctl_info, link);
263
264 if (mci->dev == dev)
265 return mci;
266 }
267
268 return NULL;
269 }
270 EXPORT_SYMBOL_GPL(find_mci_by_dev);
271
272 /*
273 * handler for EDAC to check if NMI type handler has asserted interrupt
274 */
edac_mc_assert_error_check_and_clear(void)275 static int edac_mc_assert_error_check_and_clear(void)
276 {
277 int old_state;
278
279 if (edac_op_state == EDAC_OPSTATE_POLL)
280 return 1;
281
282 old_state = edac_err_assert;
283 edac_err_assert = 0;
284
285 return old_state;
286 }
287
288 /*
289 * edac_mc_workq_function
290 * performs the operation scheduled by a workq request
291 */
edac_mc_workq_function(struct work_struct * work_req)292 static void edac_mc_workq_function(struct work_struct *work_req)
293 {
294 struct delayed_work *d_work = to_delayed_work(work_req);
295 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
296
297 mutex_lock(&mem_ctls_mutex);
298
299 /* if this control struct has movd to offline state, we are done */
300 if (mci->op_state == OP_OFFLINE) {
301 mutex_unlock(&mem_ctls_mutex);
302 return;
303 }
304
305 /* Only poll controllers that are running polled and have a check */
306 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
307 mci->edac_check(mci);
308
309 mutex_unlock(&mem_ctls_mutex);
310
311 /* Reschedule */
312 queue_delayed_work(edac_workqueue, &mci->work,
313 msecs_to_jiffies(edac_mc_get_poll_msec()));
314 }
315
316 /*
317 * edac_mc_workq_setup
318 * initialize a workq item for this mci
319 * passing in the new delay period in msec
320 *
321 * locking model:
322 *
323 * called with the mem_ctls_mutex held
324 */
edac_mc_workq_setup(struct mem_ctl_info * mci,unsigned msec)325 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
326 {
327 debugf0("%s()\n", __func__);
328
329 /* if this instance is not in the POLL state, then simply return */
330 if (mci->op_state != OP_RUNNING_POLL)
331 return;
332
333 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
334 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
335 }
336
337 /*
338 * edac_mc_workq_teardown
339 * stop the workq processing on this mci
340 *
341 * locking model:
342 *
343 * called WITHOUT lock held
344 */
edac_mc_workq_teardown(struct mem_ctl_info * mci)345 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
346 {
347 int status;
348
349 if (mci->op_state != OP_RUNNING_POLL)
350 return;
351
352 status = cancel_delayed_work(&mci->work);
353 if (status == 0) {
354 debugf0("%s() not canceled, flush the queue\n",
355 __func__);
356
357 /* workq instance might be running, wait for it */
358 flush_workqueue(edac_workqueue);
359 }
360 }
361
362 /*
363 * edac_mc_reset_delay_period(unsigned long value)
364 *
365 * user space has updated our poll period value, need to
366 * reset our workq delays
367 */
edac_mc_reset_delay_period(int value)368 void edac_mc_reset_delay_period(int value)
369 {
370 struct mem_ctl_info *mci;
371 struct list_head *item;
372
373 mutex_lock(&mem_ctls_mutex);
374
375 /* scan the list and turn off all workq timers, doing so under lock
376 */
377 list_for_each(item, &mc_devices) {
378 mci = list_entry(item, struct mem_ctl_info, link);
379
380 if (mci->op_state == OP_RUNNING_POLL)
381 cancel_delayed_work(&mci->work);
382 }
383
384 mutex_unlock(&mem_ctls_mutex);
385
386
387 /* re-walk the list, and reset the poll delay */
388 mutex_lock(&mem_ctls_mutex);
389
390 list_for_each(item, &mc_devices) {
391 mci = list_entry(item, struct mem_ctl_info, link);
392
393 edac_mc_workq_setup(mci, (unsigned long) value);
394 }
395
396 mutex_unlock(&mem_ctls_mutex);
397 }
398
399
400
401 /* Return 0 on success, 1 on failure.
402 * Before calling this function, caller must
403 * assign a unique value to mci->mc_idx.
404 *
405 * locking model:
406 *
407 * called with the mem_ctls_mutex lock held
408 */
add_mc_to_global_list(struct mem_ctl_info * mci)409 static int add_mc_to_global_list(struct mem_ctl_info *mci)
410 {
411 struct list_head *item, *insert_before;
412 struct mem_ctl_info *p;
413
414 insert_before = &mc_devices;
415
416 p = find_mci_by_dev(mci->dev);
417 if (unlikely(p != NULL))
418 goto fail0;
419
420 list_for_each(item, &mc_devices) {
421 p = list_entry(item, struct mem_ctl_info, link);
422
423 if (p->mc_idx >= mci->mc_idx) {
424 if (unlikely(p->mc_idx == mci->mc_idx))
425 goto fail1;
426
427 insert_before = item;
428 break;
429 }
430 }
431
432 list_add_tail_rcu(&mci->link, insert_before);
433 atomic_inc(&edac_handlers);
434 return 0;
435
436 fail0:
437 edac_printk(KERN_WARNING, EDAC_MC,
438 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
439 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
440 return 1;
441
442 fail1:
443 edac_printk(KERN_WARNING, EDAC_MC,
444 "bug in low-level driver: attempt to assign\n"
445 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
446 return 1;
447 }
448
del_mc_from_global_list(struct mem_ctl_info * mci)449 static void del_mc_from_global_list(struct mem_ctl_info *mci)
450 {
451 atomic_dec(&edac_handlers);
452 list_del_rcu(&mci->link);
453
454 /* these are for safe removal of devices from global list while
455 * NMI handlers may be traversing list
456 */
457 synchronize_rcu();
458 INIT_LIST_HEAD(&mci->link);
459 }
460
461 /**
462 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
463 *
464 * If found, return a pointer to the structure.
465 * Else return NULL.
466 *
467 * Caller must hold mem_ctls_mutex.
468 */
edac_mc_find(int idx)469 struct mem_ctl_info *edac_mc_find(int idx)
470 {
471 struct list_head *item;
472 struct mem_ctl_info *mci;
473
474 list_for_each(item, &mc_devices) {
475 mci = list_entry(item, struct mem_ctl_info, link);
476
477 if (mci->mc_idx >= idx) {
478 if (mci->mc_idx == idx)
479 return mci;
480
481 break;
482 }
483 }
484
485 return NULL;
486 }
487 EXPORT_SYMBOL(edac_mc_find);
488
489 /**
490 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
491 * create sysfs entries associated with mci structure
492 * @mci: pointer to the mci structure to be added to the list
493 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
494 *
495 * Return:
496 * 0 Success
497 * !0 Failure
498 */
499
500 /* FIXME - should a warning be printed if no error detection? correction? */
edac_mc_add_mc(struct mem_ctl_info * mci)501 int edac_mc_add_mc(struct mem_ctl_info *mci)
502 {
503 debugf0("%s()\n", __func__);
504
505 #ifdef CONFIG_EDAC_DEBUG
506 if (edac_debug_level >= 3)
507 edac_mc_dump_mci(mci);
508
509 if (edac_debug_level >= 4) {
510 int i;
511
512 for (i = 0; i < mci->nr_csrows; i++) {
513 int j;
514
515 edac_mc_dump_csrow(&mci->csrows[i]);
516 for (j = 0; j < mci->csrows[i].nr_channels; j++)
517 edac_mc_dump_channel(&mci->csrows[i].
518 channels[j]);
519 }
520 }
521 #endif
522 mutex_lock(&mem_ctls_mutex);
523
524 if (add_mc_to_global_list(mci))
525 goto fail0;
526
527 /* set load time so that error rate can be tracked */
528 mci->start_time = jiffies;
529
530 if (edac_create_sysfs_mci_device(mci)) {
531 edac_mc_printk(mci, KERN_WARNING,
532 "failed to create sysfs device\n");
533 goto fail1;
534 }
535
536 /* If there IS a check routine, then we are running POLLED */
537 if (mci->edac_check != NULL) {
538 /* This instance is NOW RUNNING */
539 mci->op_state = OP_RUNNING_POLL;
540
541 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
542 } else {
543 mci->op_state = OP_RUNNING_INTERRUPT;
544 }
545
546 /* Report action taken */
547 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
548 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
549
550 mutex_unlock(&mem_ctls_mutex);
551 return 0;
552
553 fail1:
554 del_mc_from_global_list(mci);
555
556 fail0:
557 mutex_unlock(&mem_ctls_mutex);
558 return 1;
559 }
560 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
561
562 /**
563 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
564 * remove mci structure from global list
565 * @pdev: Pointer to 'struct device' representing mci structure to remove.
566 *
567 * Return pointer to removed mci structure, or NULL if device not found.
568 */
edac_mc_del_mc(struct device * dev)569 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
570 {
571 struct mem_ctl_info *mci;
572
573 debugf0("%s()\n", __func__);
574
575 mutex_lock(&mem_ctls_mutex);
576
577 /* find the requested mci struct in the global list */
578 mci = find_mci_by_dev(dev);
579 if (mci == NULL) {
580 mutex_unlock(&mem_ctls_mutex);
581 return NULL;
582 }
583
584 del_mc_from_global_list(mci);
585 mutex_unlock(&mem_ctls_mutex);
586
587 /* flush workq processes */
588 edac_mc_workq_teardown(mci);
589
590 /* marking MCI offline */
591 mci->op_state = OP_OFFLINE;
592
593 /* remove from sysfs */
594 edac_remove_sysfs_mci_device(mci);
595
596 edac_printk(KERN_INFO, EDAC_MC,
597 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
598 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
599
600 return mci;
601 }
602 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
603
edac_mc_scrub_block(unsigned long page,unsigned long offset,u32 size)604 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
605 u32 size)
606 {
607 struct page *pg;
608 void *virt_addr;
609 unsigned long flags = 0;
610
611 debugf3("%s()\n", __func__);
612
613 /* ECC error page was not in our memory. Ignore it. */
614 if (!pfn_valid(page))
615 return;
616
617 /* Find the actual page structure then map it and fix */
618 pg = pfn_to_page(page);
619
620 if (PageHighMem(pg))
621 local_irq_save(flags);
622
623 virt_addr = kmap_atomic(pg);
624
625 /* Perform architecture specific atomic scrub operation */
626 atomic_scrub(virt_addr + offset, size);
627
628 /* Unmap and complete */
629 kunmap_atomic(virt_addr);
630
631 if (PageHighMem(pg))
632 local_irq_restore(flags);
633 }
634
635 /* FIXME - should return -1 */
edac_mc_find_csrow_by_page(struct mem_ctl_info * mci,unsigned long page)636 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
637 {
638 struct csrow_info *csrows = mci->csrows;
639 int row, i;
640
641 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
642 row = -1;
643
644 for (i = 0; i < mci->nr_csrows; i++) {
645 struct csrow_info *csrow = &csrows[i];
646
647 if (csrow->nr_pages == 0)
648 continue;
649
650 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
651 "mask(0x%lx)\n", mci->mc_idx, __func__,
652 csrow->first_page, page, csrow->last_page,
653 csrow->page_mask);
654
655 if ((page >= csrow->first_page) &&
656 (page <= csrow->last_page) &&
657 ((page & csrow->page_mask) ==
658 (csrow->first_page & csrow->page_mask))) {
659 row = i;
660 break;
661 }
662 }
663
664 if (row == -1)
665 edac_mc_printk(mci, KERN_ERR,
666 "could not look up page error address %lx\n",
667 (unsigned long)page);
668
669 return row;
670 }
671 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
672
673 /* FIXME - setable log (warning/emerg) levels */
674 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
edac_mc_handle_ce(struct mem_ctl_info * mci,unsigned long page_frame_number,unsigned long offset_in_page,unsigned long syndrome,int row,int channel,const char * msg)675 void edac_mc_handle_ce(struct mem_ctl_info *mci,
676 unsigned long page_frame_number,
677 unsigned long offset_in_page, unsigned long syndrome,
678 int row, int channel, const char *msg)
679 {
680 unsigned long remapped_page;
681
682 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
683
684 /* FIXME - maybe make panic on INTERNAL ERROR an option */
685 if (row >= mci->nr_csrows || row < 0) {
686 /* something is wrong */
687 edac_mc_printk(mci, KERN_ERR,
688 "INTERNAL ERROR: row out of range "
689 "(%d >= %d)\n", row, mci->nr_csrows);
690 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
691 return;
692 }
693
694 if (channel >= mci->csrows[row].nr_channels || channel < 0) {
695 /* something is wrong */
696 edac_mc_printk(mci, KERN_ERR,
697 "INTERNAL ERROR: channel out of range "
698 "(%d >= %d)\n", channel,
699 mci->csrows[row].nr_channels);
700 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
701 return;
702 }
703
704 if (edac_mc_get_log_ce())
705 /* FIXME - put in DIMM location */
706 edac_mc_printk(mci, KERN_WARNING,
707 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
708 "0x%lx, row %d, channel %d, label \"%s\": %s\n",
709 page_frame_number, offset_in_page,
710 mci->csrows[row].grain, syndrome, row, channel,
711 mci->csrows[row].channels[channel].label, msg);
712
713 mci->ce_count++;
714 mci->csrows[row].ce_count++;
715 mci->csrows[row].channels[channel].ce_count++;
716
717 if (mci->scrub_mode & SCRUB_SW_SRC) {
718 /*
719 * Some MC's can remap memory so that it is still available
720 * at a different address when PCI devices map into memory.
721 * MC's that can't do this lose the memory where PCI devices
722 * are mapped. This mapping is MC dependent and so we call
723 * back into the MC driver for it to map the MC page to
724 * a physical (CPU) page which can then be mapped to a virtual
725 * page - which can then be scrubbed.
726 */
727 remapped_page = mci->ctl_page_to_phys ?
728 mci->ctl_page_to_phys(mci, page_frame_number) :
729 page_frame_number;
730
731 edac_mc_scrub_block(remapped_page, offset_in_page,
732 mci->csrows[row].grain);
733 }
734 }
735 EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
736
edac_mc_handle_ce_no_info(struct mem_ctl_info * mci,const char * msg)737 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
738 {
739 if (edac_mc_get_log_ce())
740 edac_mc_printk(mci, KERN_WARNING,
741 "CE - no information available: %s\n", msg);
742
743 mci->ce_noinfo_count++;
744 mci->ce_count++;
745 }
746 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
747
edac_mc_handle_ue(struct mem_ctl_info * mci,unsigned long page_frame_number,unsigned long offset_in_page,int row,const char * msg)748 void edac_mc_handle_ue(struct mem_ctl_info *mci,
749 unsigned long page_frame_number,
750 unsigned long offset_in_page, int row, const char *msg)
751 {
752 int len = EDAC_MC_LABEL_LEN * 4;
753 char labels[len + 1];
754 char *pos = labels;
755 int chan;
756 int chars;
757
758 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
759
760 /* FIXME - maybe make panic on INTERNAL ERROR an option */
761 if (row >= mci->nr_csrows || row < 0) {
762 /* something is wrong */
763 edac_mc_printk(mci, KERN_ERR,
764 "INTERNAL ERROR: row out of range "
765 "(%d >= %d)\n", row, mci->nr_csrows);
766 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
767 return;
768 }
769
770 chars = snprintf(pos, len + 1, "%s",
771 mci->csrows[row].channels[0].label);
772 len -= chars;
773 pos += chars;
774
775 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
776 chan++) {
777 chars = snprintf(pos, len + 1, ":%s",
778 mci->csrows[row].channels[chan].label);
779 len -= chars;
780 pos += chars;
781 }
782
783 if (edac_mc_get_log_ue())
784 edac_mc_printk(mci, KERN_EMERG,
785 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
786 "labels \"%s\": %s\n", page_frame_number,
787 offset_in_page, mci->csrows[row].grain, row,
788 labels, msg);
789
790 if (edac_mc_get_panic_on_ue())
791 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
792 "row %d, labels \"%s\": %s\n", mci->mc_idx,
793 page_frame_number, offset_in_page,
794 mci->csrows[row].grain, row, labels, msg);
795
796 mci->ue_count++;
797 mci->csrows[row].ue_count++;
798 }
799 EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
800
edac_mc_handle_ue_no_info(struct mem_ctl_info * mci,const char * msg)801 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
802 {
803 if (edac_mc_get_panic_on_ue())
804 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
805
806 if (edac_mc_get_log_ue())
807 edac_mc_printk(mci, KERN_WARNING,
808 "UE - no information available: %s\n", msg);
809 mci->ue_noinfo_count++;
810 mci->ue_count++;
811 }
812 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
813
814 /*************************************************************
815 * On Fully Buffered DIMM modules, this help function is
816 * called to process UE events
817 */
edac_mc_handle_fbd_ue(struct mem_ctl_info * mci,unsigned int csrow,unsigned int channela,unsigned int channelb,char * msg)818 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
819 unsigned int csrow,
820 unsigned int channela,
821 unsigned int channelb, char *msg)
822 {
823 int len = EDAC_MC_LABEL_LEN * 4;
824 char labels[len + 1];
825 char *pos = labels;
826 int chars;
827
828 if (csrow >= mci->nr_csrows) {
829 /* something is wrong */
830 edac_mc_printk(mci, KERN_ERR,
831 "INTERNAL ERROR: row out of range (%d >= %d)\n",
832 csrow, mci->nr_csrows);
833 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
834 return;
835 }
836
837 if (channela >= mci->csrows[csrow].nr_channels) {
838 /* something is wrong */
839 edac_mc_printk(mci, KERN_ERR,
840 "INTERNAL ERROR: channel-a out of range "
841 "(%d >= %d)\n",
842 channela, mci->csrows[csrow].nr_channels);
843 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
844 return;
845 }
846
847 if (channelb >= mci->csrows[csrow].nr_channels) {
848 /* something is wrong */
849 edac_mc_printk(mci, KERN_ERR,
850 "INTERNAL ERROR: channel-b out of range "
851 "(%d >= %d)\n",
852 channelb, mci->csrows[csrow].nr_channels);
853 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
854 return;
855 }
856
857 mci->ue_count++;
858 mci->csrows[csrow].ue_count++;
859
860 /* Generate the DIMM labels from the specified channels */
861 chars = snprintf(pos, len + 1, "%s",
862 mci->csrows[csrow].channels[channela].label);
863 len -= chars;
864 pos += chars;
865 chars = snprintf(pos, len + 1, "-%s",
866 mci->csrows[csrow].channels[channelb].label);
867
868 if (edac_mc_get_log_ue())
869 edac_mc_printk(mci, KERN_EMERG,
870 "UE row %d, channel-a= %d channel-b= %d "
871 "labels \"%s\": %s\n", csrow, channela, channelb,
872 labels, msg);
873
874 if (edac_mc_get_panic_on_ue())
875 panic("UE row %d, channel-a= %d channel-b= %d "
876 "labels \"%s\": %s\n", csrow, channela,
877 channelb, labels, msg);
878 }
879 EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
880
881 /*************************************************************
882 * On Fully Buffered DIMM modules, this help function is
883 * called to process CE events
884 */
edac_mc_handle_fbd_ce(struct mem_ctl_info * mci,unsigned int csrow,unsigned int channel,char * msg)885 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
886 unsigned int csrow, unsigned int channel, char *msg)
887 {
888
889 /* Ensure boundary values */
890 if (csrow >= mci->nr_csrows) {
891 /* something is wrong */
892 edac_mc_printk(mci, KERN_ERR,
893 "INTERNAL ERROR: row out of range (%d >= %d)\n",
894 csrow, mci->nr_csrows);
895 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
896 return;
897 }
898 if (channel >= mci->csrows[csrow].nr_channels) {
899 /* something is wrong */
900 edac_mc_printk(mci, KERN_ERR,
901 "INTERNAL ERROR: channel out of range (%d >= %d)\n",
902 channel, mci->csrows[csrow].nr_channels);
903 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
904 return;
905 }
906
907 if (edac_mc_get_log_ce())
908 /* FIXME - put in DIMM location */
909 edac_mc_printk(mci, KERN_WARNING,
910 "CE row %d, channel %d, label \"%s\": %s\n",
911 csrow, channel,
912 mci->csrows[csrow].channels[channel].label, msg);
913
914 mci->ce_count++;
915 mci->csrows[csrow].ce_count++;
916 mci->csrows[csrow].channels[channel].ce_count++;
917 }
918 EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
919