View Incident: http://co-op.engr.sgi.com/BugWorks/query.cgi/904077

Status: open                          Priority: 3                           
Assigned Engineer: djh                Submitter: steiner                    
Assigned Group: linux-kernel          Project: communitylinux               
Opened Date: 11/07/03                 Description:

I got the following errors late in boot using a 2.6 kernel. Interrupt 0x1f
is suppose to be the CMC (correct machine check) error interrupt. This
*should* not have been an unexpected interrupt.

	Processor 507 has spun up...
	CPU 507 IS NOW UP!
	Starting migration thread for cpu 507
	CPUS done 512
	Total of 508 processors activated (772087.76 BogoMIPS).

.....

==========================
ADDITIONAL INFORMATION (ADD)
From: keith owens <kaos@sgi.com>
Date: Nov 10 2003 05:37:06PM
[BugWorks mailnews processor v1.4.4]
==========================
>Looks like we're missing a lot of Keith's MCA stuff from 2.6.  Keith, I can't
>find the thread on linux-ia64 about this, but I assume that you or Bjorn were
>going to sync this code up with David's tree at some point?

When a change has been accepted in 2.4/2.6, it is up to Bjorn and David
to reconcile the two trees, it is not our problem.  Alas there are a
lot of changes that have been accepted in 2.4 and not propagated to
2.6, so SGI have to do the work a second time.  This patch adds one of
the many missing 2.4 fixes to 2.6-sn.

Jesse, does this fix 904077?


===========================================================================
linux/arch/ia64/kernel/mca.c
===========================================================================

--- /usr/tmp/TmpDir.25937-0/linux/arch/ia64/kernel/mca.c_1.25	Tue Nov 11 12:30:45 2003
+++ linux/arch/ia64/kernel/mca.c	Tue Nov 11 12:29:56 2003
@@ -450,7 +450,10 @@
 ia64_mca_register_cpev (int cpev)
 {
 	/* Register the CPE interrupt vector with SAL */
-	if (ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0)) {
+	struct ia64_sal_retval isrv;
+
+	isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0);
+	if (isrv.status) {
 		printk(KERN_ERR "ia64_mca_platform_init: failed to register Corrected "
 		       "Platform Error interrupt vector with SAL.\n");
 		return;
@@ -629,6 +632,8 @@
 	ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
 	int i;
 	s64 rc;
+	struct ia64_sal_retval isrv;
+	u64 timeout = IA64_MCA_RENDEZ_TIMEOUT;	/* platform specific */
 
 	IA64_MCA_DEBUG("ia64_mca_init: begin\n");
 
@@ -644,23 +649,33 @@
 	 */
 
 	/* Register the rendezvous interrupt vector with SAL */
-	if ((rc = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT,
-					 SAL_MC_PARAM_MECHANISM_INT,
-					 IA64_MCA_RENDEZ_VECTOR,
-					 IA64_MCA_RENDEZ_TIMEOUT,
-					 SAL_MC_PARAM_RZ_ALWAYS)))
-	{
+	while (1) {
+		isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT,
+					      SAL_MC_PARAM_MECHANISM_INT,
+					      IA64_MCA_RENDEZ_VECTOR,
+					      timeout,
+					      SAL_MC_PARAM_RZ_ALWAYS);
+		rc = isrv.status;
+		if (rc == 0)
+			break;
+		if (rc == -2) {
+			printk(KERN_INFO "ia64_mca_init: increasing MCA rendezvous timeout from "
+				"%ld to %ld\n", timeout, isrv.v0);
+			timeout = isrv.v0;
+			continue;
+		}
 		printk(KERN_ERR "ia64_mca_init: Failed to register rendezvous interrupt "
 		       "with SAL.  rc = %ld\n", rc);
 		return;
 	}
 
 	/* Register the wakeup interrupt vector with SAL */
-	if ((rc = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP,
-					 SAL_MC_PARAM_MECHANISM_INT,
-					 IA64_MCA_WAKEUP_VECTOR,
-					 0, 0)))
-	{
+	isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP,
+				      SAL_MC_PARAM_MECHANISM_INT,
+				      IA64_MCA_WAKEUP_VECTOR,
+				      0, 0);
+	rc = isrv.status;
+	if (rc) {
 		printk(KERN_ERR "ia64_mca_init: Failed to register wakeup interrupt with SAL.  "
 		       "rc = %ld\n", rc);
 		return;

===========================================================================
linux/include/asm-ia64/sal.h
===========================================================================

--- /usr/tmp/TmpDir.25937-0/linux/include/asm-ia64/sal.h_1.16	Tue Nov 11 12:30:45 2003
+++ linux/include/asm-ia64/sal.h	Tue Nov 11 12:29:56 2003
@@ -725,14 +725,16 @@
  * Allow the OS to specify the interrupt number to be used by SAL to interrupt OS during
  * the machine check rendezvous sequence as well as the mechanism to wake up the
  * non-monarch processor at the end of machine check processing.
+ * Returns the complete ia64_sal_retval because some calls return more than just a status
+ * value.
  */
-static inline s64
+static inline struct ia64_sal_retval
 ia64_sal_mc_set_params (u64 param_type, u64 i_or_m, u64 i_or_m_val, u64 timeout, u64 rz_always)
 {
 	struct ia64_sal_retval isrv;
 	SAL_CALL(isrv, SAL_MC_SET_PARAMS, param_type, i_or_m, i_or_m_val,
 		 timeout, rz_always, 0, 0);
-	return isrv.status;
+	return isrv;
 }
 
 /* Read from PCI configuration space */
