From patchwork Wed Oct 27 05:28:29 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Huang, Ying" X-Patchwork-Id: 284772 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id o9R5UFOj023465 for ; Wed, 27 Oct 2010 05:30:16 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753350Ab0J0F2s (ORCPT ); Wed, 27 Oct 2010 01:28:48 -0400 Received: from mga09.intel.com ([134.134.136.24]:19326 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753191Ab0J0F2r (ORCPT ); Wed, 27 Oct 2010 01:28:47 -0400 Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga102.jf.intel.com with ESMTP; 26 Oct 2010 22:28:47 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.58,244,1286175600"; d="scan'208";a="671269861" Received: from yhuang-dev.sh.intel.com ([10.239.13.2]) by orsmga001.jf.intel.com with ESMTP; 26 Oct 2010 22:28:45 -0700 From: Huang Ying To: Len Brown Cc: linux-kernel@vger.kernel.org, Andi Kleen , ying.huang@intel.com, linux-acpi@vger.kernel.org Subject: [PATCH -v3 5/8] Hardware error record persistent support Date: Wed, 27 Oct 2010 13:28:29 +0800 Message-Id: <1288157312-10441-6-git-send-email-ying.huang@intel.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1288157312-10441-1-git-send-email-ying.huang@intel.com> References: <1288157312-10441-1-git-send-email-ying.huang@intel.com> Sender: linux-acpi-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-acpi@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Wed, 27 Oct 2010 05:30:16 +0000 (UTC) --- a/drivers/char/herror/Makefile +++ b/drivers/char/herror/Makefile @@ -1 +1 @@ -obj-y += herr-core.o +obj-y += herr-core.o herr-persist.o --- a/drivers/char/herror/herr-core.c +++ b/drivers/char/herror/herr-core.c @@ -43,9 +43,9 @@ #include #include -#define HERR_NOTIFY_BIT 0 +#include "herr-internal.h" -static unsigned long herr_flags; +unsigned long herr_flags; /* * Record list management and error reporting @@ -545,6 +545,7 @@ static ssize_t herr_mix_read(struct file { int rc; static DEFINE_MUTEX(read_mutex); + u64 record_id; if (*off != 0) return -EINVAL; @@ -552,7 +553,14 @@ static ssize_t herr_mix_read(struct file rc = mutex_lock_interruptible(&read_mutex); if (rc) return rc; + rc = herr_persist_peek_user(&record_id, ubuf, usize); + if (rc > 0) { + herr_persist_clear(record_id); + goto out; + } + rc = herr_rcd_lists_read(ubuf, usize, &read_mutex); +out: mutex_unlock(&read_mutex); return rc; @@ -561,15 +569,40 @@ static ssize_t herr_mix_read(struct file static unsigned int herr_mix_poll(struct file *file, poll_table *wait) { poll_wait(file, &herr_mix_wait, wait); - if (!herr_rcd_lists_is_empty()) + if (!herr_rcd_lists_is_empty() || !herr_persist_read_done()) return POLLIN | POLLRDNORM; return 0; } +static long herr_mix_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + void __user *p = (void __user *)arg; + int rc; + u64 record_id; + struct herr_persist_buffer buf; + + switch (cmd) { + case HERR_PERSIST_PEEK: + rc = copy_from_user(&buf, p, sizeof(buf)); + if (rc) + return -EFAULT; + return herr_persist_peek_user(&record_id, buf.buf, + buf.buf_size); + case HERR_PERSIST_CLEAR: + rc = copy_from_user(&record_id, p, sizeof(record_id)); + if (rc) + return -EFAULT; + return herr_persist_clear(record_id); + default: + return -ENOTTY; + } +} + static const struct file_operations herr_mix_dev_fops = { .owner = THIS_MODULE, .read = herr_mix_read, .poll = herr_mix_poll, + .unlocked_ioctl = herr_mix_ioctl, }; static int __init herr_mix_dev_init(void) --- /dev/null +++ b/drivers/char/herror/herr-internal.h @@ -0,0 +1,12 @@ +#ifndef HERR_INTERNAL_H +#define HERR_INTERNAL_H + +#define HERR_NOTIFY_BIT 0 + +extern unsigned long herr_flags; + +int herr_persist_read_done(void); +ssize_t herr_persist_peek_user(u64 *record_id, char __user *ercd, + size_t bufsiz); +int herr_persist_clear(u64 record_id); +#endif /* HERR_INTERNAL_H */ --- /dev/null +++ b/drivers/char/herror/herr-persist.c @@ -0,0 +1,174 @@ +/* + * Hardware error record persistent support + * + * Normally, corrected hardware error records will go through the + * kernel processing and be logged to disk or network finally. But + * for uncorrected errors, system may go panic directly for better + * error containment, disk or network is not usable in this + * half-working system. To avoid losing these valuable hardware error + * records, the error records are saved into some kind of simple + * persistent storage such as flash before panic, so that they can be + * read out after system reboot successfully. + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include + +#include + +#include "herr-internal.h" + +/* + * Simple persistent storage provider list, herr_persists_mutex is + * used for writer side mutual exclusion, RCU is used to implement + * lock-less reader side. + */ +static LIST_HEAD(herr_persists); +static DEFINE_MUTEX(herr_persists_mutex); + +int herr_persist_register(struct herr_persist *persist) +{ + if (!persist->peek_user) + return -EINVAL; + persist->read_done = 0; + if (mutex_lock_interruptible(&herr_persists_mutex)) + return -EINTR; + list_add_rcu(&persist->list, &herr_persists); + mutex_unlock(&herr_persists_mutex); + /* + * There may be hardware error records of previous boot in + * persistent storage, notify the user space error daemon to + * check. + */ + set_bit(HERR_NOTIFY_BIT, &herr_flags); + herr_notify(); + return 0; +} +EXPORT_SYMBOL_GPL(herr_persist_register); + +void herr_persist_unregister(struct herr_persist *persist) +{ + mutex_lock(&herr_persists_mutex); + list_del_rcu(&persist->list); + mutex_unlock(&herr_persists_mutex); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(herr_persist_unregister); + +/* Can be used in atomic context including NMI */ +int herr_persist_in(const struct herr_record *ercd) +{ + struct herr_persist *persist; + int rc = -ENODEV; + + rcu_read_lock(); + list_for_each_entry_rcu(persist, &herr_persists, list) { + if (!persist->in) + continue; + rc = persist->in(ercd); + if (!rc) + break; + } + rcu_read_unlock(); + return rc; +} +EXPORT_SYMBOL_GPL(herr_persist_in); + +int herr_persist_read_done(void) +{ + struct herr_persist *persist; + int rc = 1; + + rcu_read_lock(); + list_for_each_entry_rcu(persist, &herr_persists, list) { + if (!persist->read_done) { + rc = 0; + break; + } + } + rcu_read_unlock(); + return rc; +} + +/* Read next error record from persist storage, don't remove it */ +ssize_t herr_persist_peek_user(u64 *record_id, char __user *ercd, + size_t bufsiz) +{ + struct herr_persist *persist; + ssize_t rc = 0; + + if (mutex_lock_interruptible(&herr_persists_mutex)) + return -EINTR; + list_for_each_entry(persist, &herr_persists, list) { + if (persist->read_done) + continue; + rc = persist->peek_user(record_id, ercd, bufsiz); + if (rc > 0) + break; + else if (rc != -EINTR && rc != -EAGAIN && rc != -EINVAL) + persist->read_done = 1; + } + mutex_unlock(&herr_persists_mutex); + return rc; +} + +/* Clear specified error record from persist storage */ +int herr_persist_clear(u64 record_id) +{ + struct herr_persist *persist; + int rc = -ENOENT; + + if (mutex_lock_interruptible(&herr_persists_mutex)) + return -EINTR; + list_for_each_entry(persist, &herr_persists, list) { + if (!persist->clear) + continue; + rc = persist->clear(record_id); + if (!rc) + break; + /* + * Failed to clear, mark as read_done, because we can + * not skip this one + */ + else if (rc != -EINTR && rc != -EAGAIN && rc != -ENOENT) + persist->read_done = 1; + } + mutex_unlock(&herr_persists_mutex); + return rc; +} + +static int herr_persist_record(struct herr_record *ercd, void *data) +{ + int *severity = data; + + if (ercd->severity == *severity) + return herr_persist_in(ercd); + return 0; +} + +void herr_persist_all_records(void) +{ + int severity; + + for (severity = HERR_SEV_FATAL; severity >= HERR_SEV_NONE; severity--) + herr_for_each_record(herr_persist_record, &severity); +} +EXPORT_SYMBOL_GPL(herr_persist_all_records); --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -141,6 +141,7 @@ header-y += hdlc.h header-y += hdlcdrv.h header-y += hdreg.h header-y += herror_record.h +header-y += herror.h header-y += hid.h header-y += hiddev.h header-y += hidraw.h --- a/include/linux/herror.h +++ b/include/linux/herror.h @@ -1,10 +1,22 @@ #ifndef LINUX_HERROR_H #define LINUX_HERROR_H +#include +#include + +struct herr_persist_buffer { + void __user *buf; + unsigned int buf_size; +}; + +#define HERR_PERSIST_PEEK _IOW('H', 1, struct herr_persist_buffer) +#define HERR_PERSIST_CLEAR _IOW('H', 2, u64) + +#ifdef __KERNEL__ + #include #include #include -#include /* * Hardware error reporting @@ -66,4 +78,38 @@ static inline void herr_dev_put(struct h int herr_dev_register(struct herr_dev *dev); void herr_dev_unregister(struct herr_dev *dev); + + +/* + * Simple Persistent Storage + */ + +struct herr_persist; +/* Put an error record into simple persistent storage */ +int herr_persist_in(const struct herr_record *ercd); +/* Save all error records not yet consumed in persistent storage */ +void herr_persist_all_records(void); + +/* + * Simple Persistent Storage Provider Management + */ +struct herr_persist { + struct list_head list; + char *name; + unsigned int read_done:1; + /* Put an error record into storage, must be NMI-safe */ + int (*in)(const struct herr_record *ercd); + /* + * Read out an error record from storage to user space, don't + * remove it, the HERR_RCD_PERSIST must be set in record flags + */ + ssize_t (*peek_user)(u64 *record_id, char __user *ubuf, size_t usize); + /* Clear an error record */ + int (*clear)(u64 record_id); +}; + +/* Register (un-register) simple persistent storage provider */ +int herr_persist_register(struct herr_persist *persist); +void herr_persist_unregister(struct herr_persist *persist); +#endif #endif