1 /*
2  * edac_mc kernel module
3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Thayne Harbaugh
8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
9  *	http://www.anime.net/~goemon/linux-ecc/
10  *
11  * Modified by Dave Peterson and Doug Thompson
12  *
13  */
14 
15 #include <linux/module.h>
16 #include <linux/proc_fs.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
19 #include <linux/smp.h>
20 #include <linux/init.h>
21 #include <linux/sysctl.h>
22 #include <linux/highmem.h>
23 #include <linux/timer.h>
24 #include <linux/slab.h>
25 #include <linux/jiffies.h>
26 #include <linux/spinlock.h>
27 #include <linux/list.h>
28 #include <linux/sysdev.h>
29 #include <linux/ctype.h>
30 #include <linux/edac.h>
31 #include <asm/uaccess.h>
32 #include <asm/page.h>
33 #include <asm/edac.h>
34 #include "edac_core.h"
35 #include "edac_module.h"
36 
37 /* lock to memory controller's control array */
38 static DEFINE_MUTEX(mem_ctls_mutex);
39 static LIST_HEAD(mc_devices);
40 
41 #ifdef CONFIG_EDAC_DEBUG
42 
edac_mc_dump_channel(struct channel_info * chan)43 static void edac_mc_dump_channel(struct channel_info *chan)
44 {
45 	debugf4("\tchannel = %p\n", chan);
46 	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
47 	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
48 	debugf4("\tchannel->label = '%s'\n", chan->label);
49 	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
50 }
51 
edac_mc_dump_csrow(struct csrow_info * csrow)52 static void edac_mc_dump_csrow(struct csrow_info *csrow)
53 {
54 	debugf4("\tcsrow = %p\n", csrow);
55 	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56 	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
57 	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58 	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59 	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60 	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
61 	debugf4("\tcsrow->channels = %p\n", csrow->channels);
62 	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
63 }
64 
edac_mc_dump_mci(struct mem_ctl_info * mci)65 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
66 {
67 	debugf3("\tmci = %p\n", mci);
68 	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
69 	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
70 	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
71 	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
72 	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
73 		mci->nr_csrows, mci->csrows);
74 	debugf3("\tdev = %p\n", mci->dev);
75 	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
76 	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
77 }
78 
79 #endif				/* CONFIG_EDAC_DEBUG */
80 
81 /*
82  * keep those in sync with the enum mem_type
83  */
84 const char *edac_mem_types[] = {
85 	"Empty csrow",
86 	"Reserved csrow type",
87 	"Unknown csrow type",
88 	"Fast page mode RAM",
89 	"Extended data out RAM",
90 	"Burst Extended data out RAM",
91 	"Single data rate SDRAM",
92 	"Registered single data rate SDRAM",
93 	"Double data rate SDRAM",
94 	"Registered Double data rate SDRAM",
95 	"Rambus DRAM",
96 	"Unbuffered DDR2 RAM",
97 	"Fully buffered DDR2",
98 	"Registered DDR2 RAM",
99 	"Rambus XDR",
100 	"Unbuffered DDR3 RAM",
101 	"Registered DDR3 RAM",
102 };
103 EXPORT_SYMBOL_GPL(edac_mem_types);
104 
105 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
106  * Adjust 'ptr' so that its alignment is at least as stringent as what the
107  * compiler would provide for X and return the aligned result.
108  *
109  * If 'size' is a constant, the compiler will optimize this whole function
110  * down to either a no-op or the addition of a constant to the value of 'ptr'.
111  */
edac_align_ptr(void * ptr,unsigned size)112 void *edac_align_ptr(void *ptr, unsigned size)
113 {
114 	unsigned align, r;
115 
116 	/* Here we assume that the alignment of a "long long" is the most
117 	 * stringent alignment that the compiler will ever provide by default.
118 	 * As far as I know, this is a reasonable assumption.
119 	 */
120 	if (size > sizeof(long))
121 		align = sizeof(long long);
122 	else if (size > sizeof(int))
123 		align = sizeof(long);
124 	else if (size > sizeof(short))
125 		align = sizeof(int);
126 	else if (size > sizeof(char))
127 		align = sizeof(short);
128 	else
129 		return (char *)ptr;
130 
131 	r = size % align;
132 
133 	if (r == 0)
134 		return (char *)ptr;
135 
136 	return (void *)(((unsigned long)ptr) + align - r);
137 }
138 
139 /**
140  * edac_mc_alloc: Allocate a struct mem_ctl_info structure
141  * @size_pvt:	size of private storage needed
142  * @nr_csrows:	Number of CWROWS needed for this MC
143  * @nr_chans:	Number of channels for the MC
144  *
145  * Everything is kmalloc'ed as one big chunk - more efficient.
146  * Only can be used if all structures have the same lifetime - otherwise
147  * you have to allocate and initialize your own structures.
148  *
149  * Use edac_mc_free() to free mc structures allocated by this function.
150  *
151  * Returns:
152  *	NULL allocation failed
153  *	struct mem_ctl_info pointer
154  */
edac_mc_alloc(unsigned sz_pvt,unsigned nr_csrows,unsigned nr_chans,int edac_index)155 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
156 				unsigned nr_chans, int edac_index)
157 {
158 	struct mem_ctl_info *mci;
159 	struct csrow_info *csi, *csrow;
160 	struct channel_info *chi, *chp, *chan;
161 	void *pvt;
162 	unsigned size;
163 	int row, chn;
164 	int err;
165 
166 	/* Figure out the offsets of the various items from the start of an mc
167 	 * structure.  We want the alignment of each item to be at least as
168 	 * stringent as what the compiler would provide if we could simply
169 	 * hardcode everything into a single struct.
170 	 */
171 	mci = (struct mem_ctl_info *)0;
172 	csi = edac_align_ptr(&mci[1], sizeof(*csi));
173 	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
174 	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
175 	size = ((unsigned long)pvt) + sz_pvt;
176 
177 	mci = kzalloc(size, GFP_KERNEL);
178 	if (mci == NULL)
179 		return NULL;
180 
181 	/* Adjust pointers so they point within the memory we just allocated
182 	 * rather than an imaginary chunk of memory located at address 0.
183 	 */
184 	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
185 	chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
186 	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
187 
188 	/* setup index and various internal pointers */
189 	mci->mc_idx = edac_index;
190 	mci->csrows = csi;
191 	mci->pvt_info = pvt;
192 	mci->nr_csrows = nr_csrows;
193 
194 	for (row = 0; row < nr_csrows; row++) {
195 		csrow = &csi[row];
196 		csrow->csrow_idx = row;
197 		csrow->mci = mci;
198 		csrow->nr_channels = nr_chans;
199 		chp = &chi[row * nr_chans];
200 		csrow->channels = chp;
201 
202 		for (chn = 0; chn < nr_chans; chn++) {
203 			chan = &chp[chn];
204 			chan->chan_idx = chn;
205 			chan->csrow = csrow;
206 		}
207 	}
208 
209 	mci->op_state = OP_ALLOC;
210 	INIT_LIST_HEAD(&mci->grp_kobj_list);
211 
212 	/*
213 	 * Initialize the 'root' kobj for the edac_mc controller
214 	 */
215 	err = edac_mc_register_sysfs_main_kobj(mci);
216 	if (err) {
217 		kfree(mci);
218 		return NULL;
219 	}
220 
221 	/* at this point, the root kobj is valid, and in order to
222 	 * 'free' the object, then the function:
223 	 *      edac_mc_unregister_sysfs_main_kobj() must be called
224 	 * which will perform kobj unregistration and the actual free
225 	 * will occur during the kobject callback operation
226 	 */
227 	return mci;
228 }
229 EXPORT_SYMBOL_GPL(edac_mc_alloc);
230 
231 /**
232  * edac_mc_free
233  *	'Free' a previously allocated 'mci' structure
234  * @mci: pointer to a struct mem_ctl_info structure
235  */
edac_mc_free(struct mem_ctl_info * mci)236 void edac_mc_free(struct mem_ctl_info *mci)
237 {
238 	debugf1("%s()\n", __func__);
239 
240 	edac_mc_unregister_sysfs_main_kobj(mci);
241 
242 	/* free the mci instance memory here */
243 	kfree(mci);
244 }
245 EXPORT_SYMBOL_GPL(edac_mc_free);
246 
247 
248 /**
249  * find_mci_by_dev
250  *
251  *	scan list of controllers looking for the one that manages
252  *	the 'dev' device
253  * @dev: pointer to a struct device related with the MCI
254  */
find_mci_by_dev(struct device * dev)255 struct mem_ctl_info *find_mci_by_dev(struct device *dev)
256 {
257 	struct mem_ctl_info *mci;
258 	struct list_head *item;
259 
260 	debugf3("%s()\n", __func__);
261 
262 	list_for_each(item, &mc_devices) {
263 		mci = list_entry(item, struct mem_ctl_info, link);
264 
265 		if (mci->dev == dev)
266 			return mci;
267 	}
268 
269 	return NULL;
270 }
271 EXPORT_SYMBOL_GPL(find_mci_by_dev);
272 
273 /*
274  * handler for EDAC to check if NMI type handler has asserted interrupt
275  */
edac_mc_assert_error_check_and_clear(void)276 static int edac_mc_assert_error_check_and_clear(void)
277 {
278 	int old_state;
279 
280 	if (edac_op_state == EDAC_OPSTATE_POLL)
281 		return 1;
282 
283 	old_state = edac_err_assert;
284 	edac_err_assert = 0;
285 
286 	return old_state;
287 }
288 
289 /*
290  * edac_mc_workq_function
291  *	performs the operation scheduled by a workq request
292  */
edac_mc_workq_function(struct work_struct * work_req)293 static void edac_mc_workq_function(struct work_struct *work_req)
294 {
295 	struct delayed_work *d_work = to_delayed_work(work_req);
296 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
297 
298 	mutex_lock(&mem_ctls_mutex);
299 
300 	/* if this control struct has movd to offline state, we are done */
301 	if (mci->op_state == OP_OFFLINE) {
302 		mutex_unlock(&mem_ctls_mutex);
303 		return;
304 	}
305 
306 	/* Only poll controllers that are running polled and have a check */
307 	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
308 		mci->edac_check(mci);
309 
310 	mutex_unlock(&mem_ctls_mutex);
311 
312 	/* Reschedule */
313 	queue_delayed_work(edac_workqueue, &mci->work,
314 			msecs_to_jiffies(edac_mc_get_poll_msec()));
315 }
316 
317 /*
318  * edac_mc_workq_setup
319  *	initialize a workq item for this mci
320  *	passing in the new delay period in msec
321  *
322  *	locking model:
323  *
324  *		called with the mem_ctls_mutex held
325  */
edac_mc_workq_setup(struct mem_ctl_info * mci,unsigned msec)326 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
327 {
328 	debugf0("%s()\n", __func__);
329 
330 	/* if this instance is not in the POLL state, then simply return */
331 	if (mci->op_state != OP_RUNNING_POLL)
332 		return;
333 
334 	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
335 	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
336 }
337 
338 /*
339  * edac_mc_workq_teardown
340  *	stop the workq processing on this mci
341  *
342  *	locking model:
343  *
344  *		called WITHOUT lock held
345  */
edac_mc_workq_teardown(struct mem_ctl_info * mci)346 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
347 {
348 	int status;
349 
350 	if (mci->op_state != OP_RUNNING_POLL)
351 		return;
352 
353 	status = cancel_delayed_work(&mci->work);
354 	if (status == 0) {
355 		debugf0("%s() not canceled, flush the queue\n",
356 			__func__);
357 
358 		/* workq instance might be running, wait for it */
359 		flush_workqueue(edac_workqueue);
360 	}
361 }
362 
363 /*
364  * edac_mc_reset_delay_period(unsigned long value)
365  *
366  *	user space has updated our poll period value, need to
367  *	reset our workq delays
368  */
edac_mc_reset_delay_period(int value)369 void edac_mc_reset_delay_period(int value)
370 {
371 	struct mem_ctl_info *mci;
372 	struct list_head *item;
373 
374 	mutex_lock(&mem_ctls_mutex);
375 
376 	/* scan the list and turn off all workq timers, doing so under lock
377 	 */
378 	list_for_each(item, &mc_devices) {
379 		mci = list_entry(item, struct mem_ctl_info, link);
380 
381 		if (mci->op_state == OP_RUNNING_POLL)
382 			cancel_delayed_work(&mci->work);
383 	}
384 
385 	mutex_unlock(&mem_ctls_mutex);
386 
387 
388 	/* re-walk the list, and reset the poll delay */
389 	mutex_lock(&mem_ctls_mutex);
390 
391 	list_for_each(item, &mc_devices) {
392 		mci = list_entry(item, struct mem_ctl_info, link);
393 
394 		edac_mc_workq_setup(mci, (unsigned long) value);
395 	}
396 
397 	mutex_unlock(&mem_ctls_mutex);
398 }
399 
400 
401 
402 /* Return 0 on success, 1 on failure.
403  * Before calling this function, caller must
404  * assign a unique value to mci->mc_idx.
405  *
406  *	locking model:
407  *
408  *		called with the mem_ctls_mutex lock held
409  */
add_mc_to_global_list(struct mem_ctl_info * mci)410 static int add_mc_to_global_list(struct mem_ctl_info *mci)
411 {
412 	struct list_head *item, *insert_before;
413 	struct mem_ctl_info *p;
414 
415 	insert_before = &mc_devices;
416 
417 	p = find_mci_by_dev(mci->dev);
418 	if (unlikely(p != NULL))
419 		goto fail0;
420 
421 	list_for_each(item, &mc_devices) {
422 		p = list_entry(item, struct mem_ctl_info, link);
423 
424 		if (p->mc_idx >= mci->mc_idx) {
425 			if (unlikely(p->mc_idx == mci->mc_idx))
426 				goto fail1;
427 
428 			insert_before = item;
429 			break;
430 		}
431 	}
432 
433 	list_add_tail_rcu(&mci->link, insert_before);
434 	atomic_inc(&edac_handlers);
435 	return 0;
436 
437 fail0:
438 	edac_printk(KERN_WARNING, EDAC_MC,
439 		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
440 		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
441 	return 1;
442 
443 fail1:
444 	edac_printk(KERN_WARNING, EDAC_MC,
445 		"bug in low-level driver: attempt to assign\n"
446 		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
447 	return 1;
448 }
449 
complete_mc_list_del(struct rcu_head * head)450 static void complete_mc_list_del(struct rcu_head *head)
451 {
452 	struct mem_ctl_info *mci;
453 
454 	mci = container_of(head, struct mem_ctl_info, rcu);
455 	INIT_LIST_HEAD(&mci->link);
456 }
457 
del_mc_from_global_list(struct mem_ctl_info * mci)458 static void del_mc_from_global_list(struct mem_ctl_info *mci)
459 {
460 	atomic_dec(&edac_handlers);
461 	list_del_rcu(&mci->link);
462 	call_rcu(&mci->rcu, complete_mc_list_del);
463 	rcu_barrier();
464 }
465 
466 /**
467  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
468  *
469  * If found, return a pointer to the structure.
470  * Else return NULL.
471  *
472  * Caller must hold mem_ctls_mutex.
473  */
edac_mc_find(int idx)474 struct mem_ctl_info *edac_mc_find(int idx)
475 {
476 	struct list_head *item;
477 	struct mem_ctl_info *mci;
478 
479 	list_for_each(item, &mc_devices) {
480 		mci = list_entry(item, struct mem_ctl_info, link);
481 
482 		if (mci->mc_idx >= idx) {
483 			if (mci->mc_idx == idx)
484 				return mci;
485 
486 			break;
487 		}
488 	}
489 
490 	return NULL;
491 }
492 EXPORT_SYMBOL(edac_mc_find);
493 
494 /**
495  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
496  *                 create sysfs entries associated with mci structure
497  * @mci: pointer to the mci structure to be added to the list
498  * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
499  *
500  * Return:
501  *	0	Success
502  *	!0	Failure
503  */
504 
505 /* FIXME - should a warning be printed if no error detection? correction? */
edac_mc_add_mc(struct mem_ctl_info * mci)506 int edac_mc_add_mc(struct mem_ctl_info *mci)
507 {
508 	debugf0("%s()\n", __func__);
509 
510 #ifdef CONFIG_EDAC_DEBUG
511 	if (edac_debug_level >= 3)
512 		edac_mc_dump_mci(mci);
513 
514 	if (edac_debug_level >= 4) {
515 		int i;
516 
517 		for (i = 0; i < mci->nr_csrows; i++) {
518 			int j;
519 
520 			edac_mc_dump_csrow(&mci->csrows[i]);
521 			for (j = 0; j < mci->csrows[i].nr_channels; j++)
522 				edac_mc_dump_channel(&mci->csrows[i].
523 						channels[j]);
524 		}
525 	}
526 #endif
527 	mutex_lock(&mem_ctls_mutex);
528 
529 	if (add_mc_to_global_list(mci))
530 		goto fail0;
531 
532 	/* set load time so that error rate can be tracked */
533 	mci->start_time = jiffies;
534 
535 	if (edac_create_sysfs_mci_device(mci)) {
536 		edac_mc_printk(mci, KERN_WARNING,
537 			"failed to create sysfs device\n");
538 		goto fail1;
539 	}
540 
541 	/* If there IS a check routine, then we are running POLLED */
542 	if (mci->edac_check != NULL) {
543 		/* This instance is NOW RUNNING */
544 		mci->op_state = OP_RUNNING_POLL;
545 
546 		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
547 	} else {
548 		mci->op_state = OP_RUNNING_INTERRUPT;
549 	}
550 
551 	/* Report action taken */
552 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
553 		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
554 
555 	mutex_unlock(&mem_ctls_mutex);
556 	return 0;
557 
558 fail1:
559 	del_mc_from_global_list(mci);
560 
561 fail0:
562 	mutex_unlock(&mem_ctls_mutex);
563 	return 1;
564 }
565 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
566 
567 /**
568  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
569  *                 remove mci structure from global list
570  * @pdev: Pointer to 'struct device' representing mci structure to remove.
571  *
572  * Return pointer to removed mci structure, or NULL if device not found.
573  */
edac_mc_del_mc(struct device * dev)574 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
575 {
576 	struct mem_ctl_info *mci;
577 
578 	debugf0("%s()\n", __func__);
579 
580 	mutex_lock(&mem_ctls_mutex);
581 
582 	/* find the requested mci struct in the global list */
583 	mci = find_mci_by_dev(dev);
584 	if (mci == NULL) {
585 		mutex_unlock(&mem_ctls_mutex);
586 		return NULL;
587 	}
588 
589 	del_mc_from_global_list(mci);
590 	mutex_unlock(&mem_ctls_mutex);
591 
592 	/* flush workq processes */
593 	edac_mc_workq_teardown(mci);
594 
595 	/* marking MCI offline */
596 	mci->op_state = OP_OFFLINE;
597 
598 	/* remove from sysfs */
599 	edac_remove_sysfs_mci_device(mci);
600 
601 	edac_printk(KERN_INFO, EDAC_MC,
602 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
603 		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
604 
605 	return mci;
606 }
607 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
608 
edac_mc_scrub_block(unsigned long page,unsigned long offset,u32 size)609 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
610 				u32 size)
611 {
612 	struct page *pg;
613 	void *virt_addr;
614 	unsigned long flags = 0;
615 
616 	debugf3("%s()\n", __func__);
617 
618 	/* ECC error page was not in our memory. Ignore it. */
619 	if (!pfn_valid(page))
620 		return;
621 
622 	/* Find the actual page structure then map it and fix */
623 	pg = pfn_to_page(page);
624 
625 	if (PageHighMem(pg))
626 		local_irq_save(flags);
627 
628 	virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
629 
630 	/* Perform architecture specific atomic scrub operation */
631 	atomic_scrub(virt_addr + offset, size);
632 
633 	/* Unmap and complete */
634 	kunmap_atomic(virt_addr, KM_BOUNCE_READ);
635 
636 	if (PageHighMem(pg))
637 		local_irq_restore(flags);
638 }
639 
640 /* FIXME - should return -1 */
edac_mc_find_csrow_by_page(struct mem_ctl_info * mci,unsigned long page)641 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
642 {
643 	struct csrow_info *csrows = mci->csrows;
644 	int row, i;
645 
646 	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
647 	row = -1;
648 
649 	for (i = 0; i < mci->nr_csrows; i++) {
650 		struct csrow_info *csrow = &csrows[i];
651 
652 		if (csrow->nr_pages == 0)
653 			continue;
654 
655 		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
656 			"mask(0x%lx)\n", mci->mc_idx, __func__,
657 			csrow->first_page, page, csrow->last_page,
658 			csrow->page_mask);
659 
660 		if ((page >= csrow->first_page) &&
661 		    (page <= csrow->last_page) &&
662 		    ((page & csrow->page_mask) ==
663 		     (csrow->first_page & csrow->page_mask))) {
664 			row = i;
665 			break;
666 		}
667 	}
668 
669 	if (row == -1)
670 		edac_mc_printk(mci, KERN_ERR,
671 			"could not look up page error address %lx\n",
672 			(unsigned long)page);
673 
674 	return row;
675 }
676 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
677 
678 /* FIXME - setable log (warning/emerg) levels */
679 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
edac_mc_handle_ce(struct mem_ctl_info * mci,unsigned long page_frame_number,unsigned long offset_in_page,unsigned long syndrome,int row,int channel,const char * msg)680 void edac_mc_handle_ce(struct mem_ctl_info *mci,
681 		unsigned long page_frame_number,
682 		unsigned long offset_in_page, unsigned long syndrome,
683 		int row, int channel, const char *msg)
684 {
685 	unsigned long remapped_page;
686 
687 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
688 
689 	/* FIXME - maybe make panic on INTERNAL ERROR an option */
690 	if (row >= mci->nr_csrows || row < 0) {
691 		/* something is wrong */
692 		edac_mc_printk(mci, KERN_ERR,
693 			"INTERNAL ERROR: row out of range "
694 			"(%d >= %d)\n", row, mci->nr_csrows);
695 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
696 		return;
697 	}
698 
699 	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
700 		/* something is wrong */
701 		edac_mc_printk(mci, KERN_ERR,
702 			"INTERNAL ERROR: channel out of range "
703 			"(%d >= %d)\n", channel,
704 			mci->csrows[row].nr_channels);
705 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
706 		return;
707 	}
708 
709 	if (edac_mc_get_log_ce())
710 		/* FIXME - put in DIMM location */
711 		edac_mc_printk(mci, KERN_WARNING,
712 			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
713 			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
714 			page_frame_number, offset_in_page,
715 			mci->csrows[row].grain, syndrome, row, channel,
716 			mci->csrows[row].channels[channel].label, msg);
717 
718 	mci->ce_count++;
719 	mci->csrows[row].ce_count++;
720 	mci->csrows[row].channels[channel].ce_count++;
721 
722 	if (mci->scrub_mode & SCRUB_SW_SRC) {
723 		/*
724 		 * Some MC's can remap memory so that it is still available
725 		 * at a different address when PCI devices map into memory.
726 		 * MC's that can't do this lose the memory where PCI devices
727 		 * are mapped.  This mapping is MC dependent and so we call
728 		 * back into the MC driver for it to map the MC page to
729 		 * a physical (CPU) page which can then be mapped to a virtual
730 		 * page - which can then be scrubbed.
731 		 */
732 		remapped_page = mci->ctl_page_to_phys ?
733 			mci->ctl_page_to_phys(mci, page_frame_number) :
734 			page_frame_number;
735 
736 		edac_mc_scrub_block(remapped_page, offset_in_page,
737 				mci->csrows[row].grain);
738 	}
739 }
740 EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
741 
edac_mc_handle_ce_no_info(struct mem_ctl_info * mci,const char * msg)742 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
743 {
744 	if (edac_mc_get_log_ce())
745 		edac_mc_printk(mci, KERN_WARNING,
746 			"CE - no information available: %s\n", msg);
747 
748 	mci->ce_noinfo_count++;
749 	mci->ce_count++;
750 }
751 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
752 
edac_mc_handle_ue(struct mem_ctl_info * mci,unsigned long page_frame_number,unsigned long offset_in_page,int row,const char * msg)753 void edac_mc_handle_ue(struct mem_ctl_info *mci,
754 		unsigned long page_frame_number,
755 		unsigned long offset_in_page, int row, const char *msg)
756 {
757 	int len = EDAC_MC_LABEL_LEN * 4;
758 	char labels[len + 1];
759 	char *pos = labels;
760 	int chan;
761 	int chars;
762 
763 	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
764 
765 	/* FIXME - maybe make panic on INTERNAL ERROR an option */
766 	if (row >= mci->nr_csrows || row < 0) {
767 		/* something is wrong */
768 		edac_mc_printk(mci, KERN_ERR,
769 			"INTERNAL ERROR: row out of range "
770 			"(%d >= %d)\n", row, mci->nr_csrows);
771 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
772 		return;
773 	}
774 
775 	chars = snprintf(pos, len + 1, "%s",
776 			 mci->csrows[row].channels[0].label);
777 	len -= chars;
778 	pos += chars;
779 
780 	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
781 		chan++) {
782 		chars = snprintf(pos, len + 1, ":%s",
783 				 mci->csrows[row].channels[chan].label);
784 		len -= chars;
785 		pos += chars;
786 	}
787 
788 	if (edac_mc_get_log_ue())
789 		edac_mc_printk(mci, KERN_EMERG,
790 			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
791 			"labels \"%s\": %s\n", page_frame_number,
792 			offset_in_page, mci->csrows[row].grain, row,
793 			labels, msg);
794 
795 	if (edac_mc_get_panic_on_ue())
796 		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
797 			"row %d, labels \"%s\": %s\n", mci->mc_idx,
798 			page_frame_number, offset_in_page,
799 			mci->csrows[row].grain, row, labels, msg);
800 
801 	mci->ue_count++;
802 	mci->csrows[row].ue_count++;
803 }
804 EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
805 
edac_mc_handle_ue_no_info(struct mem_ctl_info * mci,const char * msg)806 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
807 {
808 	if (edac_mc_get_panic_on_ue())
809 		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
810 
811 	if (edac_mc_get_log_ue())
812 		edac_mc_printk(mci, KERN_WARNING,
813 			"UE - no information available: %s\n", msg);
814 	mci->ue_noinfo_count++;
815 	mci->ue_count++;
816 }
817 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
818 
819 /*************************************************************
820  * On Fully Buffered DIMM modules, this help function is
821  * called to process UE events
822  */
edac_mc_handle_fbd_ue(struct mem_ctl_info * mci,unsigned int csrow,unsigned int channela,unsigned int channelb,char * msg)823 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
824 			unsigned int csrow,
825 			unsigned int channela,
826 			unsigned int channelb, char *msg)
827 {
828 	int len = EDAC_MC_LABEL_LEN * 4;
829 	char labels[len + 1];
830 	char *pos = labels;
831 	int chars;
832 
833 	if (csrow >= mci->nr_csrows) {
834 		/* something is wrong */
835 		edac_mc_printk(mci, KERN_ERR,
836 			"INTERNAL ERROR: row out of range (%d >= %d)\n",
837 			csrow, mci->nr_csrows);
838 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
839 		return;
840 	}
841 
842 	if (channela >= mci->csrows[csrow].nr_channels) {
843 		/* something is wrong */
844 		edac_mc_printk(mci, KERN_ERR,
845 			"INTERNAL ERROR: channel-a out of range "
846 			"(%d >= %d)\n",
847 			channela, mci->csrows[csrow].nr_channels);
848 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
849 		return;
850 	}
851 
852 	if (channelb >= mci->csrows[csrow].nr_channels) {
853 		/* something is wrong */
854 		edac_mc_printk(mci, KERN_ERR,
855 			"INTERNAL ERROR: channel-b out of range "
856 			"(%d >= %d)\n",
857 			channelb, mci->csrows[csrow].nr_channels);
858 		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
859 		return;
860 	}
861 
862 	mci->ue_count++;
863 	mci->csrows[csrow].ue_count++;
864 
865 	/* Generate the DIMM labels from the specified channels */
866 	chars = snprintf(pos, len + 1, "%s",
867 			 mci->csrows[csrow].channels[channela].label);
868 	len -= chars;
869 	pos += chars;
870 	chars = snprintf(pos, len + 1, "-%s",
871 			 mci->csrows[csrow].channels[channelb].label);
872 
873 	if (edac_mc_get_log_ue())
874 		edac_mc_printk(mci, KERN_EMERG,
875 			"UE row %d, channel-a= %d channel-b= %d "
876 			"labels \"%s\": %s\n", csrow, channela, channelb,
877 			labels, msg);
878 
879 	if (edac_mc_get_panic_on_ue())
880 		panic("UE row %d, channel-a= %d channel-b= %d "
881 			"labels \"%s\": %s\n", csrow, channela,
882 			channelb, labels, msg);
883 }
884 EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
885 
886 /*************************************************************
887  * On Fully Buffered DIMM modules, this help function is
888  * called to process CE events
889  */
edac_mc_handle_fbd_ce(struct mem_ctl_info * mci,unsigned int csrow,unsigned int channel,char * msg)890 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
891 			unsigned int csrow, unsigned int channel, char *msg)
892 {
893 
894 	/* Ensure boundary values */
895 	if (csrow >= mci->nr_csrows) {
896 		/* something is wrong */
897 		edac_mc_printk(mci, KERN_ERR,
898 			"INTERNAL ERROR: row out of range (%d >= %d)\n",
899 			csrow, mci->nr_csrows);
900 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
901 		return;
902 	}
903 	if (channel >= mci->csrows[csrow].nr_channels) {
904 		/* something is wrong */
905 		edac_mc_printk(mci, KERN_ERR,
906 			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
907 			channel, mci->csrows[csrow].nr_channels);
908 		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
909 		return;
910 	}
911 
912 	if (edac_mc_get_log_ce())
913 		/* FIXME - put in DIMM location */
914 		edac_mc_printk(mci, KERN_WARNING,
915 			"CE row %d, channel %d, label \"%s\": %s\n",
916 			csrow, channel,
917 			mci->csrows[csrow].channels[channel].label, msg);
918 
919 	mci->ce_count++;
920 	mci->csrows[csrow].ce_count++;
921 	mci->csrows[csrow].channels[channel].ce_count++;
922 }
923 EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
924