From 01854e697a77a434104b2f7e6d7fd463a978af32 Mon Sep 17 00:00:00 2001 From: Luming Yu Date: Sat, 26 May 2007 22:49:58 +0800 Subject: ACPI: add ACPI 3.0 _TPC _TSS _PTC throttling support adds _TPC _TSS _PTC -- Throttling Present Capabilities Signed-off-by: Luming Yu Signed-off-by: Len Brown --- include/acpi/processor.h | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/acpi/processor.h b/include/acpi/processor.h index b4b0ffdab09..01d2f24c224 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -21,6 +21,8 @@ #define ACPI_PSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */ #define ACPI_PSD_REV0_ENTRIES 5 +#define ACPI_TSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */ +#define ACPI_TSD_REV0_ENTRIES 5 /* * Types of coordination defined in ACPI 3.0. Same macros can be used across * P, C and T states @@ -125,17 +127,52 @@ struct acpi_processor_performance { /* Throttling Control */ +struct acpi_tsd_package { + acpi_integer num_entries; + acpi_integer revision; + acpi_integer domain; + acpi_integer coord_type; + acpi_integer num_processors; +} __attribute__ ((packed)); + +struct acpi_ptc_register { + u8 descriptor; + u16 length; + u8 space_id; + u8 bit_width; + u8 bit_offset; + u8 reserved; + u64 address; +} __attribute__ ((packed)); + +struct acpi_processor_tx_tss { + acpi_integer freqpercentage; /* */ + acpi_integer power; /* milliWatts */ + acpi_integer transition_latency; /* microseconds */ + acpi_integer control; /* control value */ + acpi_integer status; /* success indicator */ +}; struct acpi_processor_tx { u16 power; u16 performance; }; +struct acpi_processor; struct acpi_processor_throttling { - int state; + unsigned int state; + unsigned int platform_limit; + struct acpi_pct_register control_register; + struct acpi_pct_register status_register; + unsigned int state_count; + struct acpi_processor_tx_tss *states_tss; + struct acpi_tsd_package domain_info; + cpumask_t shared_cpu_map; + int (*acpi_processor_get_throttling) (struct acpi_processor *pr); + int (*acpi_processor_set_throttling) (struct acpi_processor *pr, int state); + u32 address; u8 duty_offset; u8 duty_width; - int state_count; struct acpi_processor_tx states[ACPI_PROCESSOR_MAX_THROTTLING]; }; @@ -169,6 +206,9 @@ struct acpi_processor { u32 id; u32 pblk; int performance_platform_limit; + int throttling_platform_limit; + /*0 - states 0..n-th satte available*/ + struct acpi_processor_flags flags; struct acpi_processor_power power; struct acpi_processor_performance *performance; @@ -270,7 +310,7 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr) /* in processor_throttling.c */ int acpi_processor_get_throttling_info(struct acpi_processor *pr); -int acpi_processor_set_throttling(struct acpi_processor *pr, int state); +extern int acpi_processor_set_throttling(struct acpi_processor *pr, int state); extern struct file_operations acpi_processor_throttling_fops; /* in processor_idle.c */ -- cgit v1.2.3-70-g09d2 From ff55a9cebab02403f942121e2f898bb06ecfffbb Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 2 Jun 2007 00:15:25 -0400 Subject: ACPI: Lindent processor throttling code Signed-off-by: Len Brown --- drivers/acpi/processor_throttling.c | 140 +++++++++++++++++++----------------- include/acpi/processor.h | 7 +- 2 files changed, 79 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 1bae2e42a7c..3a2e9a60187 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -44,18 +44,18 @@ #define _COMPONENT ACPI_PROCESSOR_COMPONENT ACPI_MODULE_NAME("processor_throttling"); -static int acpi_processor_get_throttling (struct acpi_processor *pr); -int acpi_processor_set_throttling (struct acpi_processor *pr, int state); +static int acpi_processor_get_throttling(struct acpi_processor *pr); +int acpi_processor_set_throttling(struct acpi_processor *pr, int state); static int acpi_processor_get_platform_limit(struct acpi_processor *pr) { acpi_status status = 0; unsigned long tpc = 0; - if(!pr) + if (!pr) return -EINVAL; status = acpi_evaluate_integer(pr->handle, "_TPC", NULL, &tpc); - if(ACPI_FAILURE(status) && status != AE_NOT_FOUND){ + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TPC")); return -ENODEV; } @@ -102,7 +102,8 @@ static int acpi_processor_get_throttling_control(struct acpi_processor *pr) if ((obj.type != ACPI_TYPE_BUFFER) || (obj.buffer.length < sizeof(struct acpi_ptc_register)) || (obj.buffer.pointer == NULL)) { - printk(KERN_ERR PREFIX "Invalid _PTC data (control_register)\n"); + printk(KERN_ERR PREFIX + "Invalid _PTC data (control_register)\n"); result = -EFAULT; goto end; } @@ -124,9 +125,9 @@ static int acpi_processor_get_throttling_control(struct acpi_processor *pr) } memcpy(&pr->throttling.status_register, obj.buffer.pointer, - sizeof(struct acpi_ptc_register)); + sizeof(struct acpi_ptc_register)); - end: + end: kfree(buffer.pointer); return result; @@ -168,7 +169,9 @@ static int acpi_processor_get_throttling_states(struct acpi_processor *pr) for (i = 0; i < pr->throttling.state_count; i++) { - struct acpi_processor_tx_tss *tx = (struct acpi_processor_tx_tss *) &(pr->throttling.states_tss[i]); + struct acpi_processor_tx_tss *tx = + (struct acpi_processor_tx_tss *)&(pr->throttling. + states_tss[i]); state.length = sizeof(struct acpi_processor_tx_tss); state.pointer = tx; @@ -186,7 +189,7 @@ static int acpi_processor_get_throttling_states(struct acpi_processor *pr) if (!tx->freqpercentage) { printk(KERN_ERR PREFIX - "Invalid _TSS data: freq is zero\n"); + "Invalid _TSS data: freq is zero\n"); result = -EFAULT; kfree(pr->throttling.states_tss); goto end; @@ -198,14 +201,14 @@ static int acpi_processor_get_throttling_states(struct acpi_processor *pr) return result; } -static int acpi_processor_get_tsd(struct acpi_processor *pr) +static int acpi_processor_get_tsd(struct acpi_processor *pr) { int result = 0; acpi_status status = AE_OK; - struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; - struct acpi_buffer format = {sizeof("NNNNN"), "NNNNN"}; - struct acpi_buffer state = {0, NULL}; - union acpi_object *tsd = NULL; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" }; + struct acpi_buffer state = { 0, NULL }; + union acpi_object *tsd = NULL; struct acpi_tsd_package *pdomain; status = acpi_evaluate_object(pr->handle, "_TSD", NULL, &buffer); @@ -232,7 +235,7 @@ static int acpi_processor_get_tsd(struct acpi_processor *pr) state.pointer = pdomain; status = acpi_extract_package(&(tsd->package.elements[0]), - &format, &state); + &format, &state); if (ACPI_FAILURE(status)) { ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n")); result = -EFAULT; @@ -251,7 +254,7 @@ static int acpi_processor_get_tsd(struct acpi_processor *pr) goto end; } -end: + end: kfree(buffer.pointer); return result; } @@ -266,7 +269,6 @@ static int acpi_processor_get_throttling_fadt(struct acpi_processor *pr) u32 duty_mask = 0; u32 duty_value = 0; - if (!pr) return -EINVAL; @@ -306,65 +308,75 @@ static int acpi_processor_get_throttling_fadt(struct acpi_processor *pr) return 0; } -static int acpi_read_throttling_status(struct acpi_processor_throttling *throttling) +static int acpi_read_throttling_status(struct acpi_processor_throttling + *throttling) { int value = -1; switch (throttling->status_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: - acpi_os_read_port((acpi_io_address)throttling->status_register.address, - &value, - (u32)throttling->status_register.bit_width*8); + acpi_os_read_port((acpi_io_address) throttling->status_register. + address, &value, + (u32) throttling->status_register.bit_width * + 8); break; case ACPI_ADR_SPACE_FIXED_HARDWARE: - printk(KERN_ERR PREFIX "HARDWARE addr space,NOT supported yet\n"); + printk(KERN_ERR PREFIX + "HARDWARE addr space,NOT supported yet\n"); break; default: printk(KERN_ERR PREFIX "Unknown addr space %d\n", - (u32) (throttling->status_register.space_id)); + (u32) (throttling->status_register.space_id)); } return value; } -static int acpi_write_throttling_state(struct acpi_processor_throttling *throttling,int value) +static int acpi_write_throttling_state(struct acpi_processor_throttling + *throttling, int value) { int ret = -1; switch (throttling->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: - acpi_os_write_port((acpi_io_address)throttling->control_register.address, - value, - (u32)throttling->control_register.bit_width*8); + acpi_os_write_port((acpi_io_address) throttling-> + control_register.address, value, + (u32) throttling->control_register. + bit_width * 8); ret = 0; break; case ACPI_ADR_SPACE_FIXED_HARDWARE: - printk(KERN_ERR PREFIX "HARDWARE addr space,NOT supported yet\n"); + printk(KERN_ERR PREFIX + "HARDWARE addr space,NOT supported yet\n"); break; default: printk(KERN_ERR PREFIX "Unknown addr space %d\n", - (u32) (throttling->control_register.space_id)); + (u32) (throttling->control_register.space_id)); } return ret; } -static int acpi_get_throttling_state(struct acpi_processor *pr,int value) +static int acpi_get_throttling_state(struct acpi_processor *pr, int value) { int i; for (i = 0; i < pr->throttling.state_count; i++) { - struct acpi_processor_tx_tss *tx = (struct acpi_processor_tx_tss *) &(pr->throttling.states_tss[i]); - if(tx->control == value) + struct acpi_processor_tx_tss *tx = + (struct acpi_processor_tx_tss *)&(pr->throttling. + states_tss[i]); + if (tx->control == value) break; } - if(i > pr->throttling.state_count) - i=-1; + if (i > pr->throttling.state_count) + i = -1; return i; } -static int acpi_get_throttling_value(struct acpi_processor *pr,int state) +static int acpi_get_throttling_value(struct acpi_processor *pr, int state) { int value = -1; - if(state >=0 && state <= pr->throttling.state_count){ - struct acpi_processor_tx_tss *tx = (struct acpi_processor_tx_tss *) &(pr->throttling.states_tss[state]); + if (state >= 0 && state <= pr->throttling.state_count) { + struct acpi_processor_tx_tss *tx = + (struct acpi_processor_tx_tss *)&(pr->throttling. + states_tss[state]); value = tx->control; } return value; @@ -375,7 +387,6 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) int state = 0; u32 value = 0; - if (!pr) return -EINVAL; @@ -385,8 +396,8 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) pr->throttling.state = 0; local_irq_disable(); value = acpi_read_throttling_status(&pr->throttling); - if(value >= 0){ - state = acpi_get_throttling_state(pr,value); + if (value >= 0) { + state = acpi_get_throttling_state(pr, value); pr->throttling.state = state; } local_irq_enable(); @@ -394,7 +405,6 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) return 0; } - static int acpi_processor_get_throttling(struct acpi_processor *pr) { return pr->throttling.acpi_processor_get_throttling(pr); @@ -406,7 +416,6 @@ int acpi_processor_set_throttling_fadt(struct acpi_processor *pr, int state) u32 duty_mask = 0; u32 duty_value = 0; - if (!pr) return -EINVAL; @@ -494,9 +503,9 @@ int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int state) local_irq_disable(); - value = acpi_get_throttling_value(pr,state); - if(value >=0){ - acpi_write_throttling_state(&pr->throttling,value); + value = acpi_get_throttling_value(pr, state); + if (value >= 0) { + acpi_write_throttling_state(&pr->throttling, value); pr->throttling.state = state; } local_irq_enable(); @@ -506,7 +515,7 @@ int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int state) int acpi_processor_set_throttling(struct acpi_processor *pr, int state) { - return pr->throttling.acpi_processor_set_throttling(pr,state); + return pr->throttling.acpi_processor_set_throttling(pr, state); } int acpi_processor_get_throttling_info(struct acpi_processor *pr) @@ -518,7 +527,6 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr) int no_tss = 0; int no_tsd = 0; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "pblk_address[0x%08x] duty_offset[%d] duty_width[%d]\n", pr->throttling.address, @@ -533,12 +541,16 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr) no_tss = acpi_processor_get_throttling_states(pr); no_tsd = acpi_processor_get_tsd(pr); - if(no_ptc || no_tss) { - pr->throttling.acpi_processor_get_throttling = &acpi_processor_get_throttling_fadt; - pr->throttling.acpi_processor_set_throttling = &acpi_processor_set_throttling_fadt; + if (no_ptc || no_tss) { + pr->throttling.acpi_processor_get_throttling = + &acpi_processor_get_throttling_fadt; + pr->throttling.acpi_processor_set_throttling = + &acpi_processor_set_throttling_fadt; } else { - pr->throttling.acpi_processor_get_throttling = &acpi_processor_get_throttling_ptc; - pr->throttling.acpi_processor_set_throttling = &acpi_processor_set_throttling_ptc; + pr->throttling.acpi_processor_get_throttling = + &acpi_processor_get_throttling_ptc; + pr->throttling.acpi_processor_set_throttling = + &acpi_processor_set_throttling_ptc; } if (!pr->throttling.address) { @@ -620,7 +632,6 @@ static int acpi_processor_throttling_seq_show(struct seq_file *seq, int i = 0; int result = 0; - if (!pr) goto end; @@ -639,24 +650,24 @@ static int acpi_processor_throttling_seq_show(struct seq_file *seq, seq_printf(seq, "state count: %d\n" "active state: T%d\n" - "state available: T%d to T%d\n", + "state available: T%d to T%d\n", pr->throttling.state_count, pr->throttling.state, - pr->throttling_platform_limit, - pr->throttling.state_count-1); + pr->throttling_platform_limit, + pr->throttling.state_count - 1); seq_puts(seq, "states:\n"); - if(acpi_processor_get_throttling == acpi_processor_get_throttling_fadt) + if (acpi_processor_get_throttling == acpi_processor_get_throttling_fadt) for (i = 0; i < pr->throttling.state_count; i++) seq_printf(seq, " %cT%d: %02d%%\n", - (i == pr->throttling.state ? '*' : ' '), i, - (pr->throttling.states[i].performance ? pr-> - throttling.states[i].performance / 10 : 0)); + (i == pr->throttling.state ? '*' : ' '), i, + (pr->throttling.states[i].performance ? pr-> + throttling.states[i].performance / 10 : 0)); else for (i = 0; i < pr->throttling.state_count; i++) seq_printf(seq, " %cT%d: %02d%%\n", - (i == pr->throttling.state ? '*' : ' '), i, - (int)pr->throttling.states_tss[i].freqpercentage); - + (i == pr->throttling.state ? '*' : ' '), i, + (int)pr->throttling.states_tss[i]. + freqpercentage); end: return 0; @@ -669,7 +680,7 @@ static int acpi_processor_throttling_open_fs(struct inode *inode, PDE(inode)->data); } -static ssize_t acpi_processor_write_throttling(struct file * file, +static ssize_t acpi_processor_write_throttling(struct file *file, const char __user * buffer, size_t count, loff_t * data) { @@ -678,7 +689,6 @@ static ssize_t acpi_processor_write_throttling(struct file * file, struct acpi_processor *pr = m->private; char state_string[12] = { '\0' }; - if (!pr || (count > sizeof(state_string) - 1)) return -EINVAL; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 01d2f24c224..f9f987f8e66 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -167,8 +167,9 @@ struct acpi_processor_throttling { struct acpi_processor_tx_tss *states_tss; struct acpi_tsd_package domain_info; cpumask_t shared_cpu_map; - int (*acpi_processor_get_throttling) (struct acpi_processor *pr); - int (*acpi_processor_set_throttling) (struct acpi_processor *pr, int state); + int (*acpi_processor_get_throttling) (struct acpi_processor * pr); + int (*acpi_processor_set_throttling) (struct acpi_processor * pr, + int state); u32 address; u8 duty_offset; @@ -207,7 +208,7 @@ struct acpi_processor { u32 pblk; int performance_platform_limit; int throttling_platform_limit; - /*0 - states 0..n-th satte available*/ + /* 0 - states 0..n-th state available */ struct acpi_processor_flags flags; struct acpi_processor_power power; -- cgit v1.2.3-70-g09d2 From a0a3f6c69b45dcafefdce5dbac2221c02aaae871 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 27 Mar 2007 20:25:46 -0400 Subject: ACPICA: Changes for Cygwin compatibility Allow generation of ACPICA apps on Cygwin. Signed-off-by: Bob Moore Signed-off-by: Len Brown --- include/acpi/platform/acenv.h | 2 +- include/acpi/platform/aclinux.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index dab2ec59a3b..c785485e62a 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -136,7 +136,7 @@ /*! [Begin] no source code translation */ -#if defined(__linux__) +#if defined(_LINUX) || defined(__linux__) #include "aclinux.h" #elif defined(_AED_EFI) diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index a568717f98c..6ed15a0978e 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -91,7 +91,10 @@ #define ACPI_USE_NATIVE_DIVIDE #endif +#ifndef __cdecl #define __cdecl +#endif + #define ACPI_FLUSH_CPU_CACHE() #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From 864bdfb912e372670b5b2541dac9d273a4a7722a Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 19 Jun 2007 11:40:03 +0800 Subject: ACPI: Export events via generic netlink Upon ACPI events, send an "acpi_event" via Generic Netlink. This is in addition to /proc/acpi/event, which remains intact for now. Thanks to Jamal for his great help. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- drivers/acpi/bus.c | 4 ++ drivers/acpi/event.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++-- include/acpi/acpi_bus.h | 3 +- 3 files changed, 165 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index e5084ececb6..6b2658c9624 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -292,6 +292,10 @@ int acpi_bus_generate_event(struct acpi_device *device, u8 type, int data) if (!device) return -EINVAL; + if (acpi_bus_generate_genetlink_event(device, type, data)) + printk(KERN_WARNING PREFIX + "Failed to generate an ACPI event via genetlink!\n"); + /* drop event on the floor if no one's listening */ if (!event_is_open) return 0; diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index 3b23562e6f9..98627b02f54 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #define _COMPONENT ACPI_SYSTEM_COMPONENT ACPI_MODULE_NAME("event"); @@ -48,7 +50,6 @@ acpi_system_read_event(struct file *file, char __user * buffer, size_t count, static int chars_remaining = 0; static char *ptr; - if (!chars_remaining) { memset(&event, 0, sizeof(struct acpi_bus_event)); @@ -106,23 +107,174 @@ static const struct file_operations acpi_system_event_ops = { .poll = acpi_system_poll_event, }; +#ifdef CONFIG_NET +unsigned int acpi_event_seqnum; +struct acpi_genl_event { + acpi_device_class device_class; + char bus_id[15]; + u32 type; + u32 data; +}; + +/* attributes of acpi_genl_family */ +enum { + ACPI_GENL_ATTR_UNSPEC, + ACPI_GENL_ATTR_EVENT, /* ACPI event info needed by user space */ + __ACPI_GENL_ATTR_MAX, +}; +#define ACPI_GENL_ATTR_MAX (__ACPI_GENL_ATTR_MAX - 1) + +/* commands supported by the acpi_genl_family */ +enum { + ACPI_GENL_CMD_UNSPEC, + ACPI_GENL_CMD_EVENT, /* kernel->user notifications for ACPI events */ + __ACPI_GENL_CMD_MAX, +}; +#define ACPI_GENL_CMD_MAX (__ACPI_GENL_CMD_MAX - 1) + +#define ACPI_GENL_NAME "acpi_event" +#define ACPI_GENL_VERSION 0x01 + +static struct genl_family acpi_event_genl_family = { + .id = GENL_ID_GENERATE, + .name = ACPI_GENL_NAME, + .version = ACPI_GENL_VERSION, + .maxattr = ACPI_GENL_ATTR_MAX, +}; + +/* .doit: standard command callback */ +static int acpi_genl_cmd_event(struct sk_buff *skb, struct genl_info *info) +{ + struct acpi_genl_event *event = info->userhdr; + + if (!event) + ACPI_DEBUG_PRINT((ACPI_DB_WARN, "ACPI event: NULL\n")); + + return 0; +} + +static struct genl_ops acpi_event_genl_ops = { + .cmd = ACPI_GENL_CMD_EVENT, + .doit = acpi_genl_cmd_event, +}; + +int acpi_bus_generate_genetlink_event(struct acpi_device *device, + u8 type, int data) +{ + struct sk_buff *skb; + struct nlattr *attr; + struct acpi_genl_event *event; + void *msg_header; + int size; + int result; + + /* allocate memory */ + size = nla_total_size(sizeof(struct acpi_genl_event)) + + nla_total_size(0); + + skb = genlmsg_new(size, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + /* add the genetlink message header */ + msg_header = genlmsg_put(skb, 0, acpi_event_seqnum++, + &acpi_event_genl_family, 0, + ACPI_GENL_CMD_EVENT); + if (!msg_header) { + nlmsg_free(skb); + return -ENOMEM; + } + + /* fill the data */ + attr = + nla_reserve(skb, ACPI_GENL_ATTR_EVENT, + sizeof(struct acpi_genl_event)); + if (!attr) { + nlmsg_free(skb); + return -EINVAL; + } + + event = nla_data(attr); + if (!event) { + nlmsg_free(skb); + return -EINVAL; + } + + memset(event, 0, sizeof(struct acpi_genl_event)); + + strcpy(event->device_class, device->pnp.device_class); + strcpy(event->bus_id, device->dev.bus_id); + event->type = type; + event->data = data; + + /* send multicast genetlink message */ + result = genlmsg_end(skb, msg_header); + if (result < 0) { + nlmsg_free(skb); + return result; + } + + result = + genlmsg_multicast(skb, 0, acpi_event_genl_family.id, GFP_ATOMIC); + if (result) + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Failed to send a Genetlink message!\n")); + return 0; +} +EXPORT_SYMBOL(acpi_bus_generate_genetlink_event); + +static int acpi_event_genetlink_init(void) +{ + int result; + + result = genl_register_family(&acpi_event_genl_family); + if (result) + return result; + + result = + genl_register_ops(&acpi_event_genl_family, &acpi_event_genl_ops); + if (result) + genl_unregister_family(&acpi_event_genl_family); + + return result; +} + +#else +int acpi_bus_generate_genetlink_event(struct acpi_device *device, u8 type, + int data) +{ + return 0; +} +EXPORT_SYMBOL(acpi_bus_generate_genetlink_event); + +static int acpi_event_genetlink_init(void) +{ + return -ENODEV; +} +#endif + static int __init acpi_event_init(void) { struct proc_dir_entry *entry; int error = 0; - if (acpi_disabled) return 0; + /* create genetlink for acpi event */ + error = acpi_event_genetlink_init(); + if (error) + printk(KERN_WARNING PREFIX + "Failed to create genetlink family for ACPI event\n"); + /* 'event' [R] */ entry = create_proc_entry("event", S_IRUSR, acpi_root_dir); if (entry) entry->proc_fops = &acpi_system_event_ops; - else { - error = -ENODEV; - } - return error; + else + return -ENODEV; + + return 0; } -subsys_initcall(acpi_event_init); +fs_initcall(acpi_event_init); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c6fa5e023bc..5e3dcf3299b 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -321,7 +321,8 @@ struct acpi_bus_event { }; extern struct kset acpi_subsys; - +extern int acpi_bus_generate_genetlink_event(struct acpi_device *device, + u8 type, int data); /* * External Functions */ -- cgit v1.2.3-70-g09d2 From 315a8e34f7c12609947f9b435faae451aaa5dd41 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 24 Jul 2007 15:58:41 +0800 Subject: Blackfin arch: setup aliases for some core Core A MMRs setup aliases for some core Core A MMRs to ease porting in cases where common code would actually want Core A (or Core B MMR is reserved) Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/mach-bf561/head.S | 10 +++++----- include/asm-blackfin/mach-bf561/cdefBF561.h | 6 ++++++ include/asm-blackfin/mach-bf561/defBF561.h | 4 ++++ 3 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/blackfin/mach-bf561/head.S b/arch/blackfin/mach-bf561/head.S index 2f08bcb2dde..38650a62898 100644 --- a/arch/blackfin/mach-bf561/head.S +++ b/arch/blackfin/mach-bf561/head.S @@ -440,15 +440,15 @@ ENTRY(_bfin_reset) SSYNC; /* make sure SYSCR is set to use BMODE */ - P0.h = hi(SICA_SYSCR); - P0.l = lo(SICA_SYSCR); - R0.l = 0x20; + P0.h = hi(SYSCR); + P0.l = lo(SYSCR); + R0.l = 0x20; /* on BF561, disable core b */ W[P0] = R0.l; SSYNC; /* issue a system soft reset */ - P1.h = hi(SICA_SWRST); - P1.l = lo(SICA_SWRST); + P1.h = hi(SWRST); + P1.l = lo(SWRST); R1.l = 0x0007; W[P1] = R1; SSYNC; diff --git a/include/asm-blackfin/mach-bf561/cdefBF561.h b/include/asm-blackfin/mach-bf561/cdefBF561.h index 1a8ec9e4692..6e87ab269ff 100644 --- a/include/asm-blackfin/mach-bf561/cdefBF561.h +++ b/include/asm-blackfin/mach-bf561/cdefBF561.h @@ -81,6 +81,12 @@ static __inline__ void bfin_write_VR_CTL(unsigned int val) #define bfin_write_PLL_LOCKCNT(val) bfin_write16(PLL_LOCKCNT,val) #define bfin_read_CHIPID() bfin_read32(CHIPID) +/* For MMR's that are reserved on Core B, set up defines to better integrate with other ports */ +#define bfin_read_SWRST() bfin_read_SICA_SWRST() +#define bfin_write_SWRST() bfin_write_SICA_SWRST() +#define bfin_read_SYSCR() bfin_read_SICA_SYSCR() +#define bfin_write_SYSCR() bfin_write_SICA_SYSCR() + /* System Reset and Interrupt Controller registers for core A (0xFFC0 0100-0xFFC0 01FF) */ #define bfin_read_SICA_SWRST() bfin_read16(SICA_SWRST) #define bfin_write_SICA_SWRST(val) bfin_write16(SICA_SWRST,val) diff --git a/include/asm-blackfin/mach-bf561/defBF561.h b/include/asm-blackfin/mach-bf561/defBF561.h index 89150ecb909..0f2dc6e6335 100644 --- a/include/asm-blackfin/mach-bf561/defBF561.h +++ b/include/asm-blackfin/mach-bf561/defBF561.h @@ -52,6 +52,10 @@ #define PLL_LOCKCNT 0xFFC00010 /* PLL Lock Count register (16-bit) */ #define CHIPID 0xFFC00014 /* Chip ID Register */ +/* For MMR's that are reserved on Core B, set up defines to better integrate with other ports */ +#define SWRST SICA_SWRST +#define SYSCR SICA_SYSCR + /* System Reset and Interrupt Controller registers for core A (0xFFC0 0100-0xFFC0 01FF) */ #define SICA_SWRST 0xFFC00100 /* Software Reset register */ #define SICA_SYSCR 0xFFC00104 /* System Reset Configuration register */ -- cgit v1.2.3-70-g09d2 From 12a7991180f44e7d993a3a0a442890fc3de67f57 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 25 Jul 2007 10:25:29 +0800 Subject: Blackfin arch: switch to using proper defines this time THREAD_SIZE and PAGE_SIZE instead of just PAGE_SIZE everywhere Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- include/asm-blackfin/thread_info.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-blackfin/thread_info.h b/include/asm-blackfin/thread_info.h index fa8f08cf283..34d3c2eec94 100644 --- a/include/asm-blackfin/thread_info.h +++ b/include/asm-blackfin/thread_info.h @@ -39,6 +39,11 @@ */ #define ALIGN_PAGE_MASK 0xffffe000 +/* + * Size of kernel stack for each process. This must be a power of 2... + */ +#define THREAD_SIZE 8192 /* 2 pages */ + #ifndef __ASSEMBLY__ typedef unsigned long mm_segment_t; @@ -76,11 +81,6 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) -/* - * Size of kernel stack for each process. This must be a power of 2... - */ -#define THREAD_SIZE 8192 /* 2 pages */ - /* How to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) @@ -94,7 +94,7 @@ static inline struct thread_info *current_thread_info(void) struct thread_info *ti; __asm__("%0 = sp;": "=&d"(ti): ); - return (struct thread_info *)((long)ti & ~8191UL); + return (struct thread_info *)((long)ti & ~((long)THREAD_SIZE-1)); } /* thread information allocation */ -- cgit v1.2.3-70-g09d2 From 8be80ed3f74a60e1af5a30e6b4fa4f744911676e Mon Sep 17 00:00:00 2001 From: Bernd Schmidt Date: Wed, 25 Jul 2007 14:44:49 +0800 Subject: Blackfin arch: Initialize the exception vectors early in the boot process Initialize the exception vectors early in the boot process, so that CPLB faults can be handled when memory protection is enabled. Signed-off-by: Bernd Schmidt Signed-off-by: Bryan Wu --- arch/blackfin/kernel/setup.c | 1 + arch/blackfin/mach-common/ints-priority-dc.c | 39 +++++++++++++++----------- arch/blackfin/mach-common/ints-priority-sc.c | 42 +++++++++++++++------------- include/asm-blackfin/bfin-global.h | 1 + 4 files changed, 47 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c index 75aeca8f7c2..88f221b89b3 100644 --- a/arch/blackfin/kernel/setup.c +++ b/arch/blackfin/kernel/setup.c @@ -425,6 +425,7 @@ void __init setup_arch(char **cmdline_p) BUG_ON((char *)&atomic_xor32 - (char *)&fixed_code_start != ATOMIC_XOR32 - FIXED_CODE_START); + init_exception_vectors(); bf53x_cache_init(); } diff --git a/arch/blackfin/mach-common/ints-priority-dc.c b/arch/blackfin/mach-common/ints-priority-dc.c index 6b9fd03ce83..660f881b620 100644 --- a/arch/blackfin/mach-common/ints-priority-dc.c +++ b/arch/blackfin/mach-common/ints-priority-dc.c @@ -358,26 +358,10 @@ static void bf561_demux_gpio_irq(unsigned int inta_irq, #endif /* CONFIG_IRQCHIP_DEMUX_GPIO */ -/* - * This function should be called during kernel startup to initialize - * the BFin IRQ handling routines. - */ -int __init init_arch_irq(void) +void __init init_exception_vectors(void) { - int irq; - unsigned long ilat = 0; - /* Disable all the peripheral intrs - page 4-29 HW Ref manual */ - bfin_write_SICA_IMASK0(SIC_UNMASK_ALL); - bfin_write_SICA_IMASK1(SIC_UNMASK_ALL); SSYNC(); - bfin_write_SICA_IWR0(IWR_ENABLE_ALL); - bfin_write_SICA_IWR1(IWR_ENABLE_ALL); - - local_irq_disable(); - - init_exception_buff(); - #ifndef CONFIG_KGDB bfin_write_EVT0(evt_emulation); #endif @@ -395,6 +379,27 @@ int __init init_arch_irq(void) bfin_write_EVT14(evt14_softirq); bfin_write_EVT15(evt_system_call); CSYNC(); +} + +/* + * This function should be called during kernel startup to initialize + * the BFin IRQ handling routines. + */ +int __init init_arch_irq(void) +{ + int irq; + unsigned long ilat = 0; + /* Disable all the peripheral intrs - page 4-29 HW Ref manual */ + bfin_write_SICA_IMASK0(SIC_UNMASK_ALL); + bfin_write_SICA_IMASK1(SIC_UNMASK_ALL); + SSYNC(); + + bfin_write_SICA_IWR0(IWR_ENABLE_ALL); + bfin_write_SICA_IWR1(IWR_ENABLE_ALL); + + local_irq_disable(); + + init_exception_buff(); for (irq = 0; irq <= SYS_IRQS; irq++) { if (irq <= IRQ_CORETMR) diff --git a/arch/blackfin/mach-common/ints-priority-sc.c b/arch/blackfin/mach-common/ints-priority-sc.c index 28a878c3577..3f6305d1a1a 100644 --- a/arch/blackfin/mach-common/ints-priority-sc.c +++ b/arch/blackfin/mach-common/ints-priority-sc.c @@ -713,6 +713,29 @@ static void bfin_demux_gpio_irq(unsigned int intb_irq, } #endif /* CONFIG_IRQCHIP_DEMUX_GPIO */ +void __init init_exception_vectors(void) +{ + SSYNC(); + +#ifndef CONFIG_KGDB + bfin_write_EVT0(evt_emulation); +#endif + bfin_write_EVT2(evt_evt2); + bfin_write_EVT3(trap); + bfin_write_EVT5(evt_ivhw); + bfin_write_EVT6(evt_timer); + bfin_write_EVT7(evt_evt7); + bfin_write_EVT8(evt_evt8); + bfin_write_EVT9(evt_evt9); + bfin_write_EVT10(evt_evt10); + bfin_write_EVT11(evt_evt11); + bfin_write_EVT12(evt_evt12); + bfin_write_EVT13(evt_evt13); + bfin_write_EVT14(evt14_softirq); + bfin_write_EVT15(evt_system_call); + CSYNC(); +} + /* * This function should be called during kernel startup to initialize * the BFin IRQ handling routines. @@ -733,29 +756,10 @@ int __init init_arch_irq(void) bfin_write_SIC_IMASK(SIC_UNMASK_ALL); bfin_write_SIC_IWR(IWR_ENABLE_ALL); #endif - SSYNC(); local_irq_disable(); -#ifndef CONFIG_KGDB - bfin_write_EVT0(evt_emulation); -#endif - bfin_write_EVT2(evt_evt2); - bfin_write_EVT3(trap); - bfin_write_EVT5(evt_ivhw); - bfin_write_EVT6(evt_timer); - bfin_write_EVT7(evt_evt7); - bfin_write_EVT8(evt_evt8); - bfin_write_EVT9(evt_evt9); - bfin_write_EVT10(evt_evt10); - bfin_write_EVT11(evt_evt11); - bfin_write_EVT12(evt_evt12); - bfin_write_EVT13(evt_evt13); - bfin_write_EVT14(evt14_softirq); - bfin_write_EVT15(evt_system_call); - CSYNC(); - #if defined(CONFIG_IRQCHIP_DEMUX_GPIO) && defined(CONFIG_BF54x) #ifdef CONFIG_PINTx_REASSIGN pint[0]->assign = CONFIG_PINT0_ASSIGN; diff --git a/include/asm-blackfin/bfin-global.h b/include/asm-blackfin/bfin-global.h index c4d6cbbf96d..a970781a0f9 100644 --- a/include/asm-blackfin/bfin-global.h +++ b/include/asm-blackfin/bfin-global.h @@ -61,6 +61,7 @@ extern void bfin_dcache_init(void); extern int read_iloc(void); extern int bfin_console_init(void); extern asmlinkage void lower_to_irq14(void); +extern void init_exception_vectors(void); extern void init_dma(void); extern void program_IAR(void); extern void evt14_softirq(void); -- cgit v1.2.3-70-g09d2 From 22e03f3b58dfcca30f0c8de185022132459638d1 Mon Sep 17 00:00:00 2001 From: Raphael Assenat Date: Tue, 27 Feb 2007 19:49:53 +0000 Subject: leds: Add generic GPIO LED driver This patch adds support for GPIO connected leds via the new GPIO framework. Information about leds (gpio, polarity, name, default trigger) is passed to the driver via platform_data. Signed-off-by: Raphael Assenat Signed-off-by: Richard Purdie --- drivers/leds/Kconfig | 8 +++ drivers/leds/Makefile | 1 + drivers/leds/leds-gpio.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/leds.h | 14 ++++ 4 files changed, 197 insertions(+) create mode 100644 drivers/leds/leds-gpio.c (limited to 'include') diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 87d2046f866..9ce3ca109c2 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -95,6 +95,14 @@ config LEDS_COBALT help This option enables support for the front LED on Cobalt Server +config LEDS_GPIO + tristate "LED Support for GPIO connected LEDs" + depends on LEDS_CLASS && GENERIC_GPIO + help + This option enables support for the LEDs connected to GPIO + outputs. To be useful the particular board must have LEDs + and they must be connected to the GPIO lines. + comment "LED Triggers" config LEDS_TRIGGERS diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index aa2c18efa5b..f8995c9bc2e 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_LEDS_NET48XX) += leds-net48xx.o obj-$(CONFIG_LEDS_WRAP) += leds-wrap.o obj-$(CONFIG_LEDS_H1940) += leds-h1940.o obj-$(CONFIG_LEDS_COBALT) += leds-cobalt.o +obj-$(CONFIG_LEDS_GPIO) += leds-gpio.o # LED Triggers obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c new file mode 100644 index 00000000000..431dcb61902 --- /dev/null +++ b/drivers/leds/leds-gpio.c @@ -0,0 +1,174 @@ +/* + * LEDs driver for GPIOs + * + * Copyright (C) 2007 8D Technologies inc. + * Raphael Assenat + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include + +struct gpio_led_data { + struct led_classdev cdev; + unsigned gpio; + u8 active_low; +}; + + +static void gpio_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct gpio_led_data *led_dat = + container_of(led_cdev, struct gpio_led_data, cdev); + int level; + + if (value == LED_OFF) + level = 0; + else + level = 1; + + if (led_dat->active_low) + level = !level; + + gpio_set_value(led_dat->gpio, level); +} + +static int __init gpio_led_probe(struct platform_device *pdev) +{ + struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led *cur_led; + struct gpio_led_data *leds_data, *led_dat; + int i, ret = 0; + + if (!pdata) + return -EBUSY; + + leds_data = kzalloc(sizeof(struct gpio_led_data) * pdata->num_leds, + GFP_KERNEL); + if (!leds_data) + return -ENOMEM; + + for (i = 0; i < pdata->num_leds; i++) { + cur_led = &pdata->leds[i]; + led_dat = &leds_data[i]; + + led_dat->cdev.name = cur_led->name; + led_dat->cdev.default_trigger = cur_led->default_trigger; + led_dat->gpio = cur_led->gpio; + led_dat->active_low = cur_led->active_low; + led_dat->cdev.brightness_set = gpio_led_set; + led_dat->cdev.brightness = cur_led->active_low ? LED_FULL : LED_OFF; + + ret = gpio_request(led_dat->gpio, led_dat->cdev.name); + if (ret < 0) + goto err; + + gpio_direction_output(led_dat->gpio, led_dat->active_low); + + ret = led_classdev_register(&pdev->dev, &led_dat->cdev); + if (ret < 0) { + gpio_free(led_dat->gpio); + goto err; + } + } + + platform_set_drvdata(pdev, leds_data); + + return 0; + +err: + if (i > 0) { + for (i = i - 1; i >= 0; i--) { + led_classdev_unregister(&leds_data[i].cdev); + gpio_free(leds_data[i].gpio); + } + } + kfree(leds_data); + + return ret; +} + +static int __exit gpio_led_remove(struct platform_device *pdev) +{ + int i; + struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led_data *leds_data; + + leds_data = platform_get_drvdata(pdev); + + for (i = 0; i < pdata->num_leds; i++) { + led_classdev_unregister(&leds_data[i].cdev); + gpio_free(leds_data[i].gpio); + } + + kfree(leds_data); + + return 0; +} + +#ifdef CONFIG_PM +static int gpio_led_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led_data *leds_data; + int i; + + leds_data = platform_get_drvdata(pdev); + + for (i = 0; i < pdata->num_leds; i++) + led_classdev_suspend(&leds_data[i].cdev); + + return 0; +} + +static int gpio_led_resume(struct platform_device *pdev) +{ + struct gpio_led_platform_data *pdata = pdev->dev.platform_data; + struct gpio_led_data *leds_data; + int i; + + leds_data = platform_get_drvdata(pdev); + + for (i = 0; i < pdata->num_leds; i++) + led_classdev_resume(&leds_data[i].cdev); + + return 0; +} +#else +#define gpio_led_suspend NULL +#define gpio_led_resume NULL +#endif + +static struct platform_driver gpio_led_driver = { + .remove = __exit_p(gpio_led_remove), + .suspend = gpio_led_suspend, + .resume = gpio_led_resume, + .driver = { + .name = "leds-gpio", + .owner = THIS_MODULE, + }, +}; + +static int __init gpio_led_init(void) +{ + return platform_driver_probe(&gpio_led_driver, gpio_led_probe); +} + +static void __exit gpio_led_exit(void) +{ + platform_driver_unregister(&gpio_led_driver); +} + +module_init(gpio_led_init); +module_exit(gpio_led_exit); + +MODULE_AUTHOR("Raphael Assenat "); +MODULE_DESCRIPTION("GPIO LED driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/leds.h b/include/linux/leds.h index 88afceffb7c..059abfe219d 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -110,4 +110,18 @@ extern void ledtrig_ide_activity(void); #define ledtrig_ide_activity() do {} while(0) #endif +/* For the leds-gpio driver */ +struct gpio_led { + const char *name; + char *default_trigger; + unsigned gpio; + u8 active_low; +}; + +struct gpio_led_platform_data { + int num_leds; + struct gpio_led *leds; +}; + + #endif /* __LINUX_LEDS_H_INCLUDED */ -- cgit v1.2.3-70-g09d2 From f8a7c6fe14f556ca8eeddce258cb21392d0c3a2f Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Sun, 8 Jul 2007 23:19:31 +0100 Subject: leds: Convert from struct class_device to struct device Convert the LEDs class from struct class_device to struct device since class_device is scheduled for removal. Signed-off-by: Richard Purdie Acked-by: Greg Kroah-Hartman --- drivers/leds/led-class.c | 49 ++++++++++++++++++++------------------------ drivers/leds/led-triggers.c | 13 ++++++------ drivers/leds/leds-locomo.c | 2 +- drivers/leds/leds.h | 8 +++++--- drivers/leds/ledtrig-timer.c | 49 +++++++++++++++++++++----------------------- include/linux/leds.h | 3 +-- 6 files changed, 59 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 3c1711210e3..4211293ce86 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -2,7 +2,7 @@ * LED Class Core * * Copyright (C) 2005 John Lenz - * Copyright (C) 2005-2006 Richard Purdie + * Copyright (C) 2005-2007 Richard Purdie * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -24,9 +24,10 @@ static struct class *leds_class; -static ssize_t led_brightness_show(struct class_device *dev, char *buf) +static ssize_t led_brightness_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct led_classdev *led_cdev = class_get_devdata(dev); + struct led_classdev *led_cdev = dev_get_drvdata(dev); ssize_t ret = 0; /* no lock needed for this */ @@ -36,10 +37,10 @@ static ssize_t led_brightness_show(struct class_device *dev, char *buf) return ret; } -static ssize_t led_brightness_store(struct class_device *dev, - const char *buf, size_t size) +static ssize_t led_brightness_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { - struct led_classdev *led_cdev = class_get_devdata(dev); + struct led_classdev *led_cdev = dev_get_drvdata(dev); ssize_t ret = -EINVAL; char *after; unsigned long state = simple_strtoul(buf, &after, 10); @@ -56,10 +57,9 @@ static ssize_t led_brightness_store(struct class_device *dev, return ret; } -static CLASS_DEVICE_ATTR(brightness, 0644, led_brightness_show, - led_brightness_store); +static DEVICE_ATTR(brightness, 0644, led_brightness_show, led_brightness_store); #ifdef CONFIG_LEDS_TRIGGERS -static CLASS_DEVICE_ATTR(trigger, 0644, led_trigger_show, led_trigger_store); +static DEVICE_ATTR(trigger, 0644, led_trigger_show, led_trigger_store); #endif /** @@ -93,16 +93,15 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) { int rc; - led_cdev->class_dev = class_device_create(leds_class, NULL, 0, - parent, "%s", led_cdev->name); - if (unlikely(IS_ERR(led_cdev->class_dev))) - return PTR_ERR(led_cdev->class_dev); + led_cdev->dev = device_create(leds_class, parent, 0, "%s", + led_cdev->name); + if (unlikely(IS_ERR(led_cdev->dev))) + return PTR_ERR(led_cdev->dev); - class_set_devdata(led_cdev->class_dev, led_cdev); + dev_set_drvdata(led_cdev->dev, led_cdev); /* register the attributes */ - rc = class_device_create_file(led_cdev->class_dev, - &class_device_attr_brightness); + rc = device_create_file(led_cdev->dev, &dev_attr_brightness); if (rc) goto err_out; @@ -114,8 +113,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) #ifdef CONFIG_LEDS_TRIGGERS rwlock_init(&led_cdev->trigger_lock); - rc = class_device_create_file(led_cdev->class_dev, - &class_device_attr_trigger); + rc = device_create_file(led_cdev->dev, &dev_attr_trigger); if (rc) goto err_out_led_list; @@ -123,18 +121,17 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) #endif printk(KERN_INFO "Registered led device: %s\n", - led_cdev->class_dev->class_id); + led_cdev->name); return 0; #ifdef CONFIG_LEDS_TRIGGERS err_out_led_list: - class_device_remove_file(led_cdev->class_dev, - &class_device_attr_brightness); + device_remove_file(led_cdev->dev, &dev_attr_brightness); list_del(&led_cdev->node); #endif err_out: - class_device_unregister(led_cdev->class_dev); + device_unregister(led_cdev->dev); return rc; } EXPORT_SYMBOL_GPL(led_classdev_register); @@ -147,18 +144,16 @@ EXPORT_SYMBOL_GPL(led_classdev_register); */ void led_classdev_unregister(struct led_classdev *led_cdev) { - class_device_remove_file(led_cdev->class_dev, - &class_device_attr_brightness); + device_remove_file(led_cdev->dev, &dev_attr_brightness); #ifdef CONFIG_LEDS_TRIGGERS - class_device_remove_file(led_cdev->class_dev, - &class_device_attr_trigger); + device_remove_file(led_cdev->dev, &dev_attr_trigger); write_lock(&led_cdev->trigger_lock); if (led_cdev->trigger) led_trigger_set(led_cdev, NULL); write_unlock(&led_cdev->trigger_lock); #endif - class_device_unregister(led_cdev->class_dev); + device_unregister(led_cdev->dev); write_lock(&leds_list_lock); list_del(&led_cdev->node); diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index b2438a03082..575368c2b10 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -1,7 +1,7 @@ /* * LED Triggers Core * - * Copyright 2005-2006 Openedhand Ltd. + * Copyright 2005-2007 Openedhand Ltd. * * Author: Richard Purdie * @@ -28,10 +28,10 @@ static DEFINE_RWLOCK(triggers_list_lock); static LIST_HEAD(trigger_list); -ssize_t led_trigger_store(struct class_device *dev, const char *buf, - size_t count) +ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { - struct led_classdev *led_cdev = class_get_devdata(dev); + struct led_classdev *led_cdev = dev_get_drvdata(dev); char trigger_name[TRIG_NAME_MAX]; struct led_trigger *trig; size_t len; @@ -67,9 +67,10 @@ ssize_t led_trigger_store(struct class_device *dev, const char *buf, } -ssize_t led_trigger_show(struct class_device *dev, char *buf) +ssize_t led_trigger_show(struct device *dev, struct device_attribute *attr, + char *buf) { - struct led_classdev *led_cdev = class_get_devdata(dev); + struct led_classdev *led_cdev = dev_get_drvdata(dev); struct led_trigger *trig; int len = 0; diff --git a/drivers/leds/leds-locomo.c b/drivers/leds/leds-locomo.c index 6f2d449ba98..bfac499f325 100644 --- a/drivers/leds/leds-locomo.c +++ b/drivers/leds/leds-locomo.c @@ -19,7 +19,7 @@ static void locomoled_brightness_set(struct led_classdev *led_cdev, enum led_brightness value, int offset) { - struct locomo_dev *locomo_dev = LOCOMO_DEV(led_cdev->class_dev->dev); + struct locomo_dev *locomo_dev = LOCOMO_DEV(led_cdev->dev); unsigned long flags; local_irq_save(flags); diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h index a715c4ed93f..f2f3884fe06 100644 --- a/drivers/leds/leds.h +++ b/drivers/leds/leds.h @@ -13,6 +13,7 @@ #ifndef __LEDS_H_INCLUDED #define __LEDS_H_INCLUDED +#include #include static inline void led_set_brightness(struct led_classdev *led_cdev, @@ -37,8 +38,9 @@ void led_trigger_set(struct led_classdev *led_cdev, #define led_trigger_set(x, y) do {} while(0) #endif -ssize_t led_trigger_store(struct class_device *dev, const char *buf, - size_t count); -ssize_t led_trigger_show(struct class_device *dev, char *buf); +ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); +ssize_t led_trigger_show(struct device *dev, struct device_attribute *attr, + char *buf); #endif /* __LEDS_H_INCLUDED */ diff --git a/drivers/leds/ledtrig-timer.c b/drivers/leds/ledtrig-timer.c index d756bdb01c5..ed9ff02c77e 100644 --- a/drivers/leds/ledtrig-timer.c +++ b/drivers/leds/ledtrig-timer.c @@ -52,9 +52,10 @@ static void led_timer_function(unsigned long data) mod_timer(&timer_data->timer, jiffies + msecs_to_jiffies(delay)); } -static ssize_t led_delay_on_show(struct class_device *dev, char *buf) +static ssize_t led_delay_on_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct led_classdev *led_cdev = class_get_devdata(dev); + struct led_classdev *led_cdev = dev_get_drvdata(dev); struct timer_trig_data *timer_data = led_cdev->trigger_data; sprintf(buf, "%lu\n", timer_data->delay_on); @@ -62,10 +63,10 @@ static ssize_t led_delay_on_show(struct class_device *dev, char *buf) return strlen(buf) + 1; } -static ssize_t led_delay_on_store(struct class_device *dev, const char *buf, - size_t size) +static ssize_t led_delay_on_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { - struct led_classdev *led_cdev = class_get_devdata(dev); + struct led_classdev *led_cdev = dev_get_drvdata(dev); struct timer_trig_data *timer_data = led_cdev->trigger_data; int ret = -EINVAL; char *after; @@ -84,9 +85,10 @@ static ssize_t led_delay_on_store(struct class_device *dev, const char *buf, return ret; } -static ssize_t led_delay_off_show(struct class_device *dev, char *buf) +static ssize_t led_delay_off_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct led_classdev *led_cdev = class_get_devdata(dev); + struct led_classdev *led_cdev = dev_get_drvdata(dev); struct timer_trig_data *timer_data = led_cdev->trigger_data; sprintf(buf, "%lu\n", timer_data->delay_off); @@ -94,10 +96,10 @@ static ssize_t led_delay_off_show(struct class_device *dev, char *buf) return strlen(buf) + 1; } -static ssize_t led_delay_off_store(struct class_device *dev, const char *buf, - size_t size) +static ssize_t led_delay_off_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { - struct led_classdev *led_cdev = class_get_devdata(dev); + struct led_classdev *led_cdev = dev_get_drvdata(dev); struct timer_trig_data *timer_data = led_cdev->trigger_data; int ret = -EINVAL; char *after; @@ -116,10 +118,8 @@ static ssize_t led_delay_off_store(struct class_device *dev, const char *buf, return ret; } -static CLASS_DEVICE_ATTR(delay_on, 0644, led_delay_on_show, - led_delay_on_store); -static CLASS_DEVICE_ATTR(delay_off, 0644, led_delay_off_show, - led_delay_off_store); +static DEVICE_ATTR(delay_on, 0644, led_delay_on_show, led_delay_on_store); +static DEVICE_ATTR(delay_off, 0644, led_delay_off_show, led_delay_off_store); static void timer_trig_activate(struct led_classdev *led_cdev) { @@ -136,18 +136,17 @@ static void timer_trig_activate(struct led_classdev *led_cdev) timer_data->timer.function = led_timer_function; timer_data->timer.data = (unsigned long) led_cdev; - rc = class_device_create_file(led_cdev->class_dev, - &class_device_attr_delay_on); - if (rc) goto err_out; - rc = class_device_create_file(led_cdev->class_dev, - &class_device_attr_delay_off); - if (rc) goto err_out_delayon; + rc = device_create_file(led_cdev->dev, &dev_attr_delay_on); + if (rc) + goto err_out; + rc = device_create_file(led_cdev->dev, &dev_attr_delay_off); + if (rc) + goto err_out_delayon; return; err_out_delayon: - class_device_remove_file(led_cdev->class_dev, - &class_device_attr_delay_on); + device_remove_file(led_cdev->dev, &dev_attr_delay_on); err_out: led_cdev->trigger_data = NULL; kfree(timer_data); @@ -158,10 +157,8 @@ static void timer_trig_deactivate(struct led_classdev *led_cdev) struct timer_trig_data *timer_data = led_cdev->trigger_data; if (timer_data) { - class_device_remove_file(led_cdev->class_dev, - &class_device_attr_delay_on); - class_device_remove_file(led_cdev->class_dev, - &class_device_attr_delay_off); + device_remove_file(led_cdev->dev, &dev_attr_delay_on); + device_remove_file(led_cdev->dev, &dev_attr_delay_off); del_timer_sync(&timer_data->timer); kfree(timer_data); } diff --git a/include/linux/leds.h b/include/linux/leds.h index 059abfe219d..dc1178f6184 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -16,7 +16,6 @@ #include struct device; -struct class_device; /* * LED Core */ @@ -38,7 +37,7 @@ struct led_classdev { void (*brightness_set)(struct led_classdev *led_cdev, enum led_brightness brightness); - struct class_device *class_dev; + struct device *dev; struct list_head node; /* LED Device list */ char *default_trigger; /* Trigger to use */ -- cgit v1.2.3-70-g09d2 From 655bfd7aebb12481ab9275284d9500bee5ba3e70 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Mon, 9 Jul 2007 12:17:24 +0100 Subject: backlight: Convert from struct class_device to struct device Convert the backlight and LCD classes from struct class_device to struct device since class_device is scheduled for removal. One nasty API break is the backlight power attribute has had to be renamed to bl_power and the LCD power attribute has had to be renamed to lcd_power since the original names clash with the core. I can't see a way around this. Signed-off-by: Richard Purdie Acked-by: Greg Kroah-Hartman --- drivers/acpi/video.c | 4 +- drivers/usb/misc/appledisplay.c | 4 +- drivers/video/aty/aty128fb.c | 2 +- drivers/video/aty/atyfb_base.c | 2 +- drivers/video/aty/radeon_backlight.c | 4 +- drivers/video/backlight/backlight.c | 125 +++++++++++++++-------------------- drivers/video/backlight/cr_bllcd.c | 2 +- drivers/video/backlight/lcd.c | 112 ++++++++++++++----------------- drivers/video/nvidia/nv_backlight.c | 2 +- drivers/video/riva/fbdev.c | 2 +- include/linux/backlight.h | 11 ++- include/linux/lcd.h | 14 ++-- 12 files changed, 132 insertions(+), 152 deletions(-) (limited to 'include') diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 00d25b34725..7fd672af33b 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -278,7 +278,7 @@ static int acpi_video_get_brightness(struct backlight_device *bd) { unsigned long cur_level; struct acpi_video_device *vd = - (struct acpi_video_device *)class_get_devdata(&bd->class_dev); + (struct acpi_video_device *)bl_get_data(bd); acpi_video_device_lcd_get_level_current(vd, &cur_level); return (int) cur_level; } @@ -287,7 +287,7 @@ static int acpi_video_set_brightness(struct backlight_device *bd) { int request_level = bd->props.brightness; struct acpi_video_device *vd = - (struct acpi_video_device *)class_get_devdata(&bd->class_dev); + (struct acpi_video_device *)bl_get_data(bd); acpi_video_device_lcd_set_level(vd, request_level); return 0; } diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index cf70c16f0e3..4e88553e166 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -137,7 +137,7 @@ exit: static int appledisplay_bl_update_status(struct backlight_device *bd) { - struct appledisplay *pdata = class_get_devdata(&bd->class_dev); + struct appledisplay *pdata = bl_get_data(bd); int retval; pdata->msgdata[0] = 0x10; @@ -158,7 +158,7 @@ static int appledisplay_bl_update_status(struct backlight_device *bd) static int appledisplay_bl_get_brightness(struct backlight_device *bd) { - struct appledisplay *pdata = class_get_devdata(&bd->class_dev); + struct appledisplay *pdata = bl_get_data(bd); int retval; retval = usb_control_msg( diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c index 7fea4d8ae8e..cfcbe37d2d7 100644 --- a/drivers/video/aty/aty128fb.c +++ b/drivers/video/aty/aty128fb.c @@ -1733,7 +1733,7 @@ static int aty128_bl_get_level_brightness(struct aty128fb_par *par, static int aty128_bl_update_status(struct backlight_device *bd) { - struct aty128fb_par *par = class_get_devdata(&bd->class_dev); + struct aty128fb_par *par = bl_get_data(bd); unsigned int reg = aty_ld_le32(LVDS_GEN_CNTL); int level; diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 2fbff631743..d2c68c3d8d7 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -2141,7 +2141,7 @@ static int aty_bl_get_level_brightness(struct atyfb_par *par, int level) static int aty_bl_update_status(struct backlight_device *bd) { - struct atyfb_par *par = class_get_devdata(&bd->class_dev); + struct atyfb_par *par = bl_get_data(bd); unsigned int reg = aty_ld_lcd(LCD_MISC_CNTL, par); int level; diff --git a/drivers/video/aty/radeon_backlight.c b/drivers/video/aty/radeon_backlight.c index 0be25fa5540..1a056adb61c 100644 --- a/drivers/video/aty/radeon_backlight.c +++ b/drivers/video/aty/radeon_backlight.c @@ -47,7 +47,7 @@ static int radeon_bl_get_level_brightness(struct radeon_bl_privdata *pdata, static int radeon_bl_update_status(struct backlight_device *bd) { - struct radeon_bl_privdata *pdata = class_get_devdata(&bd->class_dev); + struct radeon_bl_privdata *pdata = bl_get_data(bd); struct radeonfb_info *rinfo = pdata->rinfo; u32 lvds_gen_cntl, tmpPixclksCntl; int level; @@ -206,7 +206,7 @@ void radeonfb_bl_exit(struct radeonfb_info *rinfo) if (bd) { struct radeon_bl_privdata *pdata; - pdata = class_get_devdata(&bd->class_dev); + pdata = bl_get_data(bd); backlight_device_unregister(bd); kfree(pdata); rinfo->info->bl_dev = NULL; diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index 7e06223bca9..b26de8cf311 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -69,18 +69,20 @@ static inline void backlight_unregister_fb(struct backlight_device *bd) } #endif /* CONFIG_FB */ -static ssize_t backlight_show_power(struct class_device *cdev, char *buf) +static ssize_t backlight_show_power(struct device *dev, + struct device_attribute *attr,char *buf) { - struct backlight_device *bd = to_backlight_device(cdev); + struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%d\n", bd->props.power); } -static ssize_t backlight_store_power(struct class_device *cdev, const char *buf, size_t count) +static ssize_t backlight_store_power(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { int rc = -ENXIO; char *endp; - struct backlight_device *bd = to_backlight_device(cdev); + struct backlight_device *bd = to_backlight_device(dev); int power = simple_strtoul(buf, &endp, 0); size_t size = endp - buf; @@ -101,18 +103,20 @@ static ssize_t backlight_store_power(struct class_device *cdev, const char *buf, return rc; } -static ssize_t backlight_show_brightness(struct class_device *cdev, char *buf) +static ssize_t backlight_show_brightness(struct device *dev, + struct device_attribute *attr, char *buf) { - struct backlight_device *bd = to_backlight_device(cdev); + struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%d\n", bd->props.brightness); } -static ssize_t backlight_store_brightness(struct class_device *cdev, const char *buf, size_t count) +static ssize_t backlight_store_brightness(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { int rc = -ENXIO; char *endp; - struct backlight_device *bd = to_backlight_device(cdev); + struct backlight_device *bd = to_backlight_device(dev); int brightness = simple_strtoul(buf, &endp, 0); size_t size = endp - buf; @@ -138,18 +142,19 @@ static ssize_t backlight_store_brightness(struct class_device *cdev, const char return rc; } -static ssize_t backlight_show_max_brightness(struct class_device *cdev, char *buf) +static ssize_t backlight_show_max_brightness(struct device *dev, + struct device_attribute *attr, char *buf) { - struct backlight_device *bd = to_backlight_device(cdev); + struct backlight_device *bd = to_backlight_device(dev); return sprintf(buf, "%d\n", bd->props.max_brightness); } -static ssize_t backlight_show_actual_brightness(struct class_device *cdev, - char *buf) +static ssize_t backlight_show_actual_brightness(struct device *dev, + struct device_attribute *attr, char *buf) { int rc = -ENXIO; - struct backlight_device *bd = to_backlight_device(cdev); + struct backlight_device *bd = to_backlight_device(dev); mutex_lock(&bd->ops_lock); if (bd->ops && bd->ops->get_brightness) @@ -159,31 +164,22 @@ static ssize_t backlight_show_actual_brightness(struct class_device *cdev, return rc; } -static void backlight_class_release(struct class_device *dev) +struct class *backlight_class; + +static void bl_device_release(struct device *dev) { struct backlight_device *bd = to_backlight_device(dev); kfree(bd); } -static struct class backlight_class = { - .name = "backlight", - .release = backlight_class_release, -}; - -#define DECLARE_ATTR(_name,_mode,_show,_store) \ -{ \ - .attr = { .name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ -} - -static const struct class_device_attribute bl_class_device_attributes[] = { - DECLARE_ATTR(power, 0644, backlight_show_power, backlight_store_power), - DECLARE_ATTR(brightness, 0644, backlight_show_brightness, +static struct device_attribute bl_device_attributes[] = { + __ATTR(bl_power, 0644, backlight_show_power, backlight_store_power), + __ATTR(brightness, 0644, backlight_show_brightness, backlight_store_brightness), - DECLARE_ATTR(actual_brightness, 0444, backlight_show_actual_brightness, + __ATTR(actual_brightness, 0444, backlight_show_actual_brightness, NULL), - DECLARE_ATTR(max_brightness, 0444, backlight_show_max_brightness, NULL), + __ATTR(max_brightness, 0444, backlight_show_max_brightness, NULL), + __ATTR_NULL, }; /** @@ -191,22 +187,20 @@ static const struct class_device_attribute bl_class_device_attributes[] = { * backlight_device class. * @name: the name of the new object(must be the same as the name of the * respective framebuffer device). - * @devdata: an optional pointer to be stored in the class_device. The - * methods may retrieve it by using class_get_devdata(&bd->class_dev). + * @devdata: an optional pointer to be stored for private driver use. The + * methods may retrieve it by using bl_get_data(bd). * @ops: the backlight operations structure. * - * Creates and registers new backlight class_device. Returns either an + * Creates and registers new backlight device. Returns either an * ERR_PTR() or a pointer to the newly allocated device. */ struct backlight_device *backlight_device_register(const char *name, - struct device *dev, - void *devdata, - struct backlight_ops *ops) + struct device *parent, void *devdata, struct backlight_ops *ops) { - int i, rc; struct backlight_device *new_bd; + int rc; - pr_debug("backlight_device_alloc: name=%s\n", name); + pr_debug("backlight_device_register: name=%s\n", name); new_bd = kzalloc(sizeof(struct backlight_device), GFP_KERNEL); if (!new_bd) @@ -214,13 +208,14 @@ struct backlight_device *backlight_device_register(const char *name, mutex_init(&new_bd->update_lock); mutex_init(&new_bd->ops_lock); - new_bd->ops = ops; - new_bd->class_dev.class = &backlight_class; - new_bd->class_dev.dev = dev; - strlcpy(new_bd->class_dev.class_id, name, KOBJ_NAME_LEN); - class_set_devdata(&new_bd->class_dev, devdata); - rc = class_device_register(&new_bd->class_dev); + new_bd->dev.class = backlight_class; + new_bd->dev.parent = parent; + new_bd->dev.release = bl_device_release; + strlcpy(new_bd->dev.bus_id, name, BUS_ID_SIZE); + dev_set_drvdata(&new_bd->dev, devdata); + + rc = device_register(&new_bd->dev); if (rc) { kfree(new_bd); return ERR_PTR(rc); @@ -228,23 +223,11 @@ struct backlight_device *backlight_device_register(const char *name, rc = backlight_register_fb(new_bd); if (rc) { - class_device_unregister(&new_bd->class_dev); + device_unregister(&new_bd->dev); return ERR_PTR(rc); } - - for (i = 0; i < ARRAY_SIZE(bl_class_device_attributes); i++) { - rc = class_device_create_file(&new_bd->class_dev, - &bl_class_device_attributes[i]); - if (rc) { - while (--i >= 0) - class_device_remove_file(&new_bd->class_dev, - &bl_class_device_attributes[i]); - class_device_unregister(&new_bd->class_dev); - /* No need to kfree(new_bd) since release() method was called */ - return ERR_PTR(rc); - } - } + new_bd->ops = ops; #ifdef CONFIG_PMAC_BACKLIGHT mutex_lock(&pmac_backlight_mutex); @@ -265,42 +248,40 @@ EXPORT_SYMBOL(backlight_device_register); */ void backlight_device_unregister(struct backlight_device *bd) { - int i; - if (!bd) return; - pr_debug("backlight_device_unregister: name=%s\n", bd->class_dev.class_id); - #ifdef CONFIG_PMAC_BACKLIGHT mutex_lock(&pmac_backlight_mutex); if (pmac_backlight == bd) pmac_backlight = NULL; mutex_unlock(&pmac_backlight_mutex); #endif - - for (i = 0; i < ARRAY_SIZE(bl_class_device_attributes); i++) - class_device_remove_file(&bd->class_dev, - &bl_class_device_attributes[i]); - mutex_lock(&bd->ops_lock); bd->ops = NULL; mutex_unlock(&bd->ops_lock); backlight_unregister_fb(bd); - - class_device_unregister(&bd->class_dev); + device_unregister(&bd->dev); } EXPORT_SYMBOL(backlight_device_unregister); static void __exit backlight_class_exit(void) { - class_unregister(&backlight_class); + class_destroy(backlight_class); } static int __init backlight_class_init(void) { - return class_register(&backlight_class); + backlight_class = class_create(THIS_MODULE, "backlight"); + if (IS_ERR(backlight_class)) { + printk(KERN_WARNING "Unable to create backlight class; errno = %ld\n", + PTR_ERR(backlight_class)); + return PTR_ERR(backlight_class); + } + + backlight_class->dev_attrs = bl_device_attributes; + return 0; } /* diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c index e9bbc3455c9..3633b6e93e2 100644 --- a/drivers/video/backlight/cr_bllcd.c +++ b/drivers/video/backlight/cr_bllcd.c @@ -202,7 +202,7 @@ static int cr_backlight_probe(struct platform_device *pdev) } crp->cr_lcd_device = lcd_device_register("cr-lcd", - &pdev->dev, + &pdev->dev, NULL &cr_lcd_ops); if (IS_ERR(crp->cr_lcd_device)) { diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c index 648b53c1fde..6f652c65fae 100644 --- a/drivers/video/backlight/lcd.c +++ b/drivers/video/backlight/lcd.c @@ -61,10 +61,11 @@ static inline void lcd_unregister_fb(struct lcd_device *ld) } #endif /* CONFIG_FB */ -static ssize_t lcd_show_power(struct class_device *cdev, char *buf) +static ssize_t lcd_show_power(struct device *dev, struct device_attribute *attr, + char *buf) { int rc; - struct lcd_device *ld = to_lcd_device(cdev); + struct lcd_device *ld = to_lcd_device(dev); mutex_lock(&ld->ops_lock); if (ld->ops && ld->ops->get_power) @@ -76,11 +77,12 @@ static ssize_t lcd_show_power(struct class_device *cdev, char *buf) return rc; } -static ssize_t lcd_store_power(struct class_device *cdev, const char *buf, size_t count) +static ssize_t lcd_store_power(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { int rc = -ENXIO; char *endp; - struct lcd_device *ld = to_lcd_device(cdev); + struct lcd_device *ld = to_lcd_device(dev); int power = simple_strtoul(buf, &endp, 0); size_t size = endp - buf; @@ -100,10 +102,11 @@ static ssize_t lcd_store_power(struct class_device *cdev, const char *buf, size_ return rc; } -static ssize_t lcd_show_contrast(struct class_device *cdev, char *buf) +static ssize_t lcd_show_contrast(struct device *dev, + struct device_attribute *attr, char *buf) { int rc = -ENXIO; - struct lcd_device *ld = to_lcd_device(cdev); + struct lcd_device *ld = to_lcd_device(dev); mutex_lock(&ld->ops_lock); if (ld->ops && ld->ops->get_contrast) @@ -113,11 +116,12 @@ static ssize_t lcd_show_contrast(struct class_device *cdev, char *buf) return rc; } -static ssize_t lcd_store_contrast(struct class_device *cdev, const char *buf, size_t count) +static ssize_t lcd_store_contrast(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { int rc = -ENXIO; char *endp; - struct lcd_device *ld = to_lcd_device(cdev); + struct lcd_device *ld = to_lcd_device(dev); int contrast = simple_strtoul(buf, &endp, 0); size_t size = endp - buf; @@ -137,53 +141,45 @@ static ssize_t lcd_store_contrast(struct class_device *cdev, const char *buf, si return rc; } -static ssize_t lcd_show_max_contrast(struct class_device *cdev, char *buf) +static ssize_t lcd_show_max_contrast(struct device *dev, + struct device_attribute *attr, char *buf) { - struct lcd_device *ld = to_lcd_device(cdev); + struct lcd_device *ld = to_lcd_device(dev); return sprintf(buf, "%d\n", ld->props.max_contrast); } -static void lcd_class_release(struct class_device *dev) +struct class *lcd_class; + +static void lcd_device_release(struct device *dev) { struct lcd_device *ld = to_lcd_device(dev); kfree(ld); } -static struct class lcd_class = { - .name = "lcd", - .release = lcd_class_release, -}; - -#define DECLARE_ATTR(_name,_mode,_show,_store) \ -{ \ - .attr = { .name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ -} - -static const struct class_device_attribute lcd_class_device_attributes[] = { - DECLARE_ATTR(power, 0644, lcd_show_power, lcd_store_power), - DECLARE_ATTR(contrast, 0644, lcd_show_contrast, lcd_store_contrast), - DECLARE_ATTR(max_contrast, 0444, lcd_show_max_contrast, NULL), +static struct device_attribute lcd_device_attributes[] = { + __ATTR(lcd_power, 0644, lcd_show_power, lcd_store_power), + __ATTR(contrast, 0644, lcd_show_contrast, lcd_store_contrast), + __ATTR(max_contrast, 0444, lcd_show_max_contrast, NULL), + __ATTR_NULL, }; /** * lcd_device_register - register a new object of lcd_device class. * @name: the name of the new object(must be the same as the name of the * respective framebuffer device). - * @devdata: an optional pointer to be stored in the class_device. The - * methods may retrieve it by using class_get_devdata(ld->class_dev). + * @devdata: an optional pointer to be stored in the device. The + * methods may retrieve it by using lcd_get_data(ld). * @ops: the lcd operations structure. * - * Creates and registers a new lcd class_device. Returns either an ERR_PTR() + * Creates and registers a new lcd device. Returns either an ERR_PTR() * or a pointer to the newly allocated device. */ -struct lcd_device *lcd_device_register(const char *name, void *devdata, - struct lcd_ops *ops) +struct lcd_device *lcd_device_register(const char *name, struct device *parent, + void *devdata, struct lcd_ops *ops) { - int i, rc; struct lcd_device *new_ld; + int rc; pr_debug("lcd_device_register: name=%s\n", name); @@ -193,12 +189,14 @@ struct lcd_device *lcd_device_register(const char *name, void *devdata, mutex_init(&new_ld->ops_lock); mutex_init(&new_ld->update_lock); - new_ld->ops = ops; - new_ld->class_dev.class = &lcd_class; - strlcpy(new_ld->class_dev.class_id, name, KOBJ_NAME_LEN); - class_set_devdata(&new_ld->class_dev, devdata); - rc = class_device_register(&new_ld->class_dev); + new_ld->dev.class = lcd_class; + new_ld->dev.parent = parent; + new_ld->dev.release = lcd_device_release; + strlcpy(new_ld->dev.bus_id, name, BUS_ID_SIZE); + dev_set_drvdata(&new_ld->dev, devdata); + + rc = device_register(&new_ld->dev); if (rc) { kfree(new_ld); return ERR_PTR(rc); @@ -206,22 +204,11 @@ struct lcd_device *lcd_device_register(const char *name, void *devdata, rc = lcd_register_fb(new_ld); if (rc) { - class_device_unregister(&new_ld->class_dev); + device_unregister(&new_ld->dev); return ERR_PTR(rc); } - for (i = 0; i < ARRAY_SIZE(lcd_class_device_attributes); i++) { - rc = class_device_create_file(&new_ld->class_dev, - &lcd_class_device_attributes[i]); - if (rc) { - while (--i >= 0) - class_device_remove_file(&new_ld->class_dev, - &lcd_class_device_attributes[i]); - class_device_unregister(&new_ld->class_dev); - /* No need to kfree(new_ld) since release() method was called */ - return ERR_PTR(rc); - } - } + new_ld->ops = ops; return new_ld; } @@ -235,33 +222,34 @@ EXPORT_SYMBOL(lcd_device_register); */ void lcd_device_unregister(struct lcd_device *ld) { - int i; - if (!ld) return; - pr_debug("lcd_device_unregister: name=%s\n", ld->class_dev.class_id); - - for (i = 0; i < ARRAY_SIZE(lcd_class_device_attributes); i++) - class_device_remove_file(&ld->class_dev, - &lcd_class_device_attributes[i]); - mutex_lock(&ld->ops_lock); ld->ops = NULL; mutex_unlock(&ld->ops_lock); lcd_unregister_fb(ld); - class_device_unregister(&ld->class_dev); + + device_unregister(&ld->dev); } EXPORT_SYMBOL(lcd_device_unregister); static void __exit lcd_class_exit(void) { - class_unregister(&lcd_class); + class_destroy(lcd_class); } static int __init lcd_class_init(void) { - return class_register(&lcd_class); + lcd_class = class_create(THIS_MODULE, "lcd"); + if (IS_ERR(lcd_class)) { + printk(KERN_WARNING "Unable to create backlight class; errno = %ld\n", + PTR_ERR(lcd_class)); + return PTR_ERR(lcd_class); + } + + lcd_class->dev_attrs = lcd_device_attributes; + return 0; } /* diff --git a/drivers/video/nvidia/nv_backlight.c b/drivers/video/nvidia/nv_backlight.c index 43f62d8ee41..443e3c85a9a 100644 --- a/drivers/video/nvidia/nv_backlight.c +++ b/drivers/video/nvidia/nv_backlight.c @@ -50,7 +50,7 @@ static int nvidia_bl_get_level_brightness(struct nvidia_par *par, static int nvidia_bl_update_status(struct backlight_device *bd) { - struct nvidia_par *par = class_get_devdata(&bd->class_dev); + struct nvidia_par *par = bl_get_data(bd); u32 tmp_pcrt, tmp_pmc, fpcontrol; int level; diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c index 0fe547842c6..d251174d8ba 100644 --- a/drivers/video/riva/fbdev.c +++ b/drivers/video/riva/fbdev.c @@ -307,7 +307,7 @@ static int riva_bl_get_level_brightness(struct riva_par *par, static int riva_bl_update_status(struct backlight_device *bd) { - struct riva_par *par = class_get_devdata(&bd->class_dev); + struct riva_par *par = bl_get_data(bd); U032 tmp_pcrt, tmp_pmc; int level; diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 1023ba0d6e5..c897c7b0385 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -69,8 +69,8 @@ struct backlight_device { /* The framebuffer notifier block */ struct notifier_block fb_notif; - /* The class device structure */ - struct class_device class_dev; + + struct device dev; }; static inline void backlight_update_status(struct backlight_device *bd) @@ -85,6 +85,11 @@ extern struct backlight_device *backlight_device_register(const char *name, struct device *dev, void *devdata, struct backlight_ops *ops); extern void backlight_device_unregister(struct backlight_device *bd); -#define to_backlight_device(obj) container_of(obj, struct backlight_device, class_dev) +#define to_backlight_device(obj) container_of(obj, struct backlight_device, dev) + +static inline void * bl_get_data(struct backlight_device *bl_dev) +{ + return dev_get_drvdata(&bl_dev->dev); +} #endif diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 598793c0745..1d379787f2e 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -62,8 +62,8 @@ struct lcd_device { struct mutex update_lock; /* The framebuffer notifier block */ struct notifier_block fb_notif; - /* The class device structure */ - struct class_device class_dev; + + struct device dev; }; static inline void lcd_set_power(struct lcd_device *ld, int power) @@ -75,9 +75,15 @@ static inline void lcd_set_power(struct lcd_device *ld, int power) } extern struct lcd_device *lcd_device_register(const char *name, - void *devdata, struct lcd_ops *ops); + struct device *parent, void *devdata, struct lcd_ops *ops); extern void lcd_device_unregister(struct lcd_device *ld); -#define to_lcd_device(obj) container_of(obj, struct lcd_device, class_dev) +#define to_lcd_device(obj) container_of(obj, struct lcd_device, dev) + +static inline void * lcd_get_data(struct lcd_device *ld_dev) +{ + return dev_get_drvdata(&ld_dev->dev); +} + #endif -- cgit v1.2.3-70-g09d2 From c1726d6f1ad2f1d83e5db1e0142756e9255a82b3 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 17 Jul 2007 21:21:26 +0900 Subject: [IA64] Use per iosapic lock for indirect iosapic register access Use per-iosapic lock for indirect iosapic register access. It reduces lock contention. Signed-off-by: Kenji Kaneshige Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tony Luck --- arch/ia64/kernel/iosapic.c | 57 +++++++++++++++++++++++----------------------- include/asm-ia64/iosapic.h | 4 ++-- 2 files changed, 31 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index b3dcdb7e7fc..29fea0a8c2c 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -125,6 +125,7 @@ static struct iosapic { #ifdef CONFIG_NUMA unsigned short node; /* numa node association via pxm */ #endif + spinlock_t lock; /* lock for indirect reg access */ } iosapic_lists[NR_IOSAPICS]; struct iosapic_rte_info { @@ -153,6 +154,16 @@ static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */ static int iosapic_kmalloc_ok; static LIST_HEAD(free_rte_list); +static inline void +iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val) +{ + unsigned long flags; + + spin_lock_irqsave(&iosapic->lock, flags); + __iosapic_write(iosapic->addr, reg, val); + spin_unlock_irqrestore(&iosapic->lock, flags); +} + /* * Find an IOSAPIC associated with a GSI */ @@ -226,7 +237,6 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) { unsigned long pol, trigger, dmode; u32 low32, high32; - char __iomem *addr; int rte_index; char redir; struct iosapic_rte_info *rte; @@ -238,7 +248,6 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) return; /* not an IOSAPIC interrupt */ rte_index = rte->rte_index; - addr = rte->iosapic->addr; pol = iosapic_intr_info[vector].polarity; trigger = iosapic_intr_info[vector].trigger; dmode = iosapic_intr_info[vector].dmode; @@ -268,8 +277,8 @@ set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask) /* dest contains both id and eid */ high32 = (dest << IOSAPIC_DEST_SHIFT); - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); iosapic_intr_info[vector].low32 = low32; iosapic_intr_info[vector].dest = dest; } @@ -292,7 +301,7 @@ kexec_disable_iosapic(void) iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) { list_for_each_entry(rte, &info->rtes, rte_list) { - iosapic_write(rte->iosapic->addr, + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), IOSAPIC_MASK|vec); iosapic_eoi(rte->iosapic->addr, vec); @@ -304,8 +313,6 @@ kexec_disable_iosapic(void) static void mask_irq (unsigned int irq) { - unsigned long flags; - char __iomem *addr; u32 low32; int rte_index; ia64_vector vec = irq_to_vector(irq); @@ -314,22 +321,17 @@ mask_irq (unsigned int irq) if (list_empty(&iosapic_intr_info[vec].rtes)) return; /* not an IOSAPIC interrupt! */ - spin_lock_irqsave(&iosapic_lock, flags); /* set only the mask bit */ low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK; list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { - addr = rte->iosapic->addr; rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } - spin_unlock_irqrestore(&iosapic_lock, flags); } static void unmask_irq (unsigned int irq) { - unsigned long flags; - char __iomem *addr; u32 low32; int rte_index; ia64_vector vec = irq_to_vector(irq); @@ -338,14 +340,11 @@ unmask_irq (unsigned int irq) if (list_empty(&iosapic_intr_info[vec].rtes)) return; /* not an IOSAPIC interrupt! */ - spin_lock_irqsave(&iosapic_lock, flags); low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK; list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { - addr = rte->iosapic->addr; rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } - spin_unlock_irqrestore(&iosapic_lock, flags); } @@ -353,13 +352,12 @@ static void iosapic_set_affinity (unsigned int irq, cpumask_t mask) { #ifdef CONFIG_SMP - unsigned long flags; u32 high32, low32; int dest, rte_index; - char __iomem *addr; int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; ia64_vector vec; struct iosapic_rte_info *rte; + struct iosapic *iosapic; irq &= (~IA64_IRQ_REDIRECTED); vec = irq_to_vector(irq); @@ -377,7 +375,6 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) /* dest contains both id and eid */ high32 = dest << IOSAPIC_DEST_SHIFT; - spin_lock_irqsave(&iosapic_lock, flags); low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); if (redir) /* change delivery mode to lowest priority */ @@ -389,12 +386,11 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) iosapic_intr_info[vec].low32 = low32; iosapic_intr_info[vec].dest = dest; list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list) { - addr = rte->iosapic->addr; + iosapic = rte->iosapic; rte_index = rte->rte_index; - iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); - iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } - spin_unlock_irqrestore(&iosapic_lock, flags); #endif } @@ -499,7 +495,7 @@ iosapic_version (char __iomem *addr) * unsigned int reserved2 : 8; * } */ - return iosapic_read(addr, IOSAPIC_VERSION); + return __iosapic_read(addr, IOSAPIC_VERSION); } static int iosapic_find_sharable_vector (unsigned long trigger, @@ -857,8 +853,7 @@ iosapic_unregister_intr (unsigned int gsi) /* Mask the interrupt */ low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK; - iosapic_write(rte->iosapic->addr, - IOSAPIC_RTE_LOW(rte->rte_index), low32); + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32); iosapic_intr_info[vector].count--; iosapic_free_rte(rte); @@ -1060,9 +1055,14 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base) unsigned long flags; spin_lock_irqsave(&iosapic_lock, flags); + index = find_iosapic(gsi_base); + if (index >= 0) { + spin_unlock_irqrestore(&iosapic_lock, flags); + return -EBUSY; + } + addr = ioremap(phys_addr, 0); ver = iosapic_version(addr); - if ((err = iosapic_check_gsi_range(gsi_base, ver))) { iounmap(addr); spin_unlock_irqrestore(&iosapic_lock, flags); @@ -1083,6 +1083,7 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base) #ifdef CONFIG_NUMA iosapic_lists[index].node = MAX_NUMNODES; #endif + spin_lock_init(&iosapic_lists[index].lock); spin_unlock_irqrestore(&iosapic_lock, flags); if ((gsi_base == 0) && pcat_compat) { diff --git a/include/asm-ia64/iosapic.h b/include/asm-ia64/iosapic.h index 421cb6b62a7..09bdc3898df 100644 --- a/include/asm-ia64/iosapic.h +++ b/include/asm-ia64/iosapic.h @@ -53,13 +53,13 @@ #define NR_IOSAPICS 256 -static inline unsigned int iosapic_read(char __iomem *iosapic, unsigned int reg) +static inline unsigned int __iosapic_read(char __iomem *iosapic, unsigned int reg) { writel(reg, iosapic + IOSAPIC_REG_SELECT); return readl(iosapic + IOSAPIC_WINDOW); } -static inline void iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +static inline void __iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) { writel(reg, iosapic + IOSAPIC_REG_SELECT); writel(val, iosapic + IOSAPIC_WINDOW); -- cgit v1.2.3-70-g09d2 From e1b30a392835e92581db09a4e8b4b2ad53a0c370 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 17 Jul 2007 21:22:23 +0900 Subject: [IA64] Add mapping table between irq and vector Add mapping tables between irqs and vectors, and its management code. This is necessary for supporting multiple vector domain because 1:1 mapping between irq and vector will be changed to n:1. The irq == vector relationship between irqs and vectors is explicitly remained for percpu interrupts, platform interrupts, isa IRQs and vectors assigned using assign_irq_vector() because some programs might depend on it. And I should consider the following problem. When pci drivers enabled/disabled devices dynamically, its irq number is changed to the different one. Therefore, suspend/resume code may happen problem. To fix this problem, I bound gsi to irq. Signed-off-by: Kenji Kaneshige Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tony Luck --- arch/ia64/kernel/iosapic.c | 61 ++++++++------ arch/ia64/kernel/irq.c | 2 +- arch/ia64/kernel/irq_ia64.c | 201 ++++++++++++++++++++++++++++++++++++-------- arch/ia64/kernel/smpboot.c | 4 + include/asm-ia64/hw_irq.h | 15 +++- 5 files changed, 219 insertions(+), 64 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index cffb443a557..cf27cfb4d16 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -117,6 +117,9 @@ static DEFINE_SPINLOCK(iosapic_lock); * These tables map IA-64 vectors to the IOSAPIC pin that generates this * vector. */ + +#define NO_REF_RTE 0 + static struct iosapic { char __iomem *addr; /* base address of IOSAPIC */ unsigned int gsi_base; /* GSI base */ @@ -204,7 +207,7 @@ inline int gsi_to_vector (unsigned int gsi) { int irq = __gsi_to_irq(gsi); - if (irq < 0) + if (check_irq_used(irq) < 0) return -1; return irq_to_vector(irq); } @@ -619,14 +622,18 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery, iosapic_intr_info[irq].count++; iosapic_lists[index].rtes_inuse++; } - else if (irq_is_shared(irq)) { + else if (rte->refcnt == NO_REF_RTE) { struct iosapic_intr_info *info = &iosapic_intr_info[irq]; - if (info->trigger != trigger || info->polarity != polarity) { + if (info->count > 0 && + (info->trigger != trigger || info->polarity != polarity)){ printk (KERN_WARNING "%s: cannot override the interrupt\n", __FUNCTION__); return -EINVAL; } + rte->refcnt++; + iosapic_intr_info[irq].count++; + iosapic_lists[index].rtes_inuse++; } iosapic_intr_info[irq].polarity = polarity; @@ -756,12 +763,17 @@ iosapic_register_intr (unsigned int gsi, irq = __gsi_to_irq(gsi); if (irq > 0) { rte = find_rte(irq, gsi); - rte->refcnt++; - goto unlock_iosapic_lock; - } + if(iosapic_intr_info[irq].count == 0) { + assign_irq_vector(irq); + dynamic_irq_init(irq); + } else if (rte->refcnt != NO_REF_RTE) { + rte->refcnt++; + goto unlock_iosapic_lock; + } + } else + irq = create_irq(); /* If vector is running out, we try to find a sharable vector */ - irq = create_irq(); if (irq < 0) { irq = iosapic_find_sharable_irq(trigger, polarity); if (irq < 0) @@ -832,18 +844,14 @@ iosapic_unregister_intr (unsigned int gsi) if (--rte->refcnt > 0) goto out; - /* Remove the rte entry from the list */ idesc = irq_desc + irq; - spin_lock(&idesc->lock); - list_del(&rte->rte_list); - spin_unlock(&idesc->lock); + rte->refcnt = NO_REF_RTE; /* Mask the interrupt */ low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK; iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32); iosapic_intr_info[irq].count--; - iosapic_free_rte(rte); index = find_iosapic(gsi); iosapic_lists[index].rtes_inuse--; WARN_ON(iosapic_lists[index].rtes_inuse < 0); @@ -857,21 +865,20 @@ iosapic_unregister_intr (unsigned int gsi) (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), cpu_logical_id(dest), dest, irq_to_vector(irq)); - if (list_empty(&iosapic_intr_info[irq].rtes)) { - /* Sanity check */ - BUG_ON(iosapic_intr_info[irq].count); + if (iosapic_intr_info[irq].count == 0) { #ifdef CONFIG_SMP /* Clear affinity */ cpus_setall(idesc->affinity); #endif /* Clear the interrupt information */ - memset(&iosapic_intr_info[irq], 0, - sizeof(struct iosapic_intr_info)); + iosapic_intr_info[irq].dest = 0; + iosapic_intr_info[irq].dmode = 0; + iosapic_intr_info[irq].polarity = 0; + iosapic_intr_info[irq].trigger = 0; iosapic_intr_info[irq].low32 |= IOSAPIC_MASK; - INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); - /* Destroy IRQ */ - destroy_irq(irq); + /* Destroy and reserve IRQ */ + destroy_and_reserve_irq(irq); } out: spin_unlock_irqrestore(&iosapic_lock, flags); @@ -892,8 +899,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, switch (int_type) { case ACPI_INTERRUPT_PMI: - vector = iosapic_vector; - irq = vector; /* FIXME */ + irq = vector = iosapic_vector; + bind_irq_vector(irq, vector); /* * since PMI vector is alloc'd by FW(ACPI) not by kernel, * we need to make sure the vector is available @@ -909,8 +916,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, delivery = IOSAPIC_INIT; break; case ACPI_INTERRUPT_CPEI: - vector = IA64_CPE_VECTOR; - irq = vector; /* FIXME */ + irq = vector = IA64_CPE_VECTOR; + BUG_ON(bind_irq_vector(irq, vector)); delivery = IOSAPIC_LOWEST_PRIORITY; mask = 1; break; @@ -945,8 +952,8 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, int vector, irq; unsigned int dest = cpu_physical_id(smp_processor_id()); - vector = isa_irq_to_vector(isa_irq); - irq = vector; /* FIXME */ + irq = vector = isa_irq_to_vector(isa_irq); + BUG_ON(bind_irq_vector(irq, vector)); register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY, polarity, trigger); DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n", @@ -966,6 +973,8 @@ iosapic_system_init (int system_pcat_compat) iosapic_intr_info[irq].low32 = IOSAPIC_MASK; /* mark as unused */ INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); + + iosapic_intr_info[irq].count = 0; } pcat_compat = system_pcat_compat; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 407b4587048..cc3ee4ef37a 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -35,7 +35,7 @@ void ack_bad_irq(unsigned int irq) #ifdef CONFIG_IA64_GENERIC unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { - return (unsigned int) vec; + return __get_cpu_var(vector_irq)[vec]; } #endif diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index bc47049f060..072427c2c3f 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -46,6 +46,12 @@ #define IRQ_DEBUG 0 +#define IRQ_VECTOR_UNASSIGNED (0) + +#define IRQ_UNUSED (0) +#define IRQ_USED (1) +#define IRQ_RSVD (2) + /* These can be overridden in platform_irq_init */ int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR; int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR; @@ -64,46 +70,161 @@ __u8 isa_irq_to_vector_map[16] = { }; EXPORT_SYMBOL(isa_irq_to_vector_map); -static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_MAX_DEVICE_VECTORS)]; +DEFINE_SPINLOCK(vector_lock); + +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { + [0 ... NR_IRQS - 1] = { .vector = IRQ_VECTOR_UNASSIGNED } +}; + +DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = { + [0 ... IA64_NUM_VECTORS - 1] = IA64_SPURIOUS_INT_VECTOR +}; + +static int irq_status[NR_IRQS] = { + [0 ... NR_IRQS -1] = IRQ_UNUSED +}; + +int check_irq_used(int irq) +{ + if (irq_status[irq] == IRQ_USED) + return 1; + + return -1; +} + +static void reserve_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + irq_status[irq] = IRQ_RSVD; + spin_unlock_irqrestore(&vector_lock, flags); +} + +static inline int find_unassigned_irq(void) +{ + int irq; + + for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++) + if (irq_status[irq] == IRQ_UNUSED) + return irq; + return -ENOSPC; +} + +static inline int find_unassigned_vector(void) +{ + int vector; + + for (vector = IA64_FIRST_DEVICE_VECTOR; + vector <= IA64_LAST_DEVICE_VECTOR; vector++) + if (__get_cpu_var(vector_irq[vector]) == IA64_SPURIOUS_INT_VECTOR) + return vector; + return -ENOSPC; +} + +static int __bind_irq_vector(int irq, int vector) +{ + int cpu; + + if (irq_to_vector(irq) == vector) + return 0; + if (irq_to_vector(irq) != IRQ_VECTOR_UNASSIGNED) + return -EBUSY; + for_each_online_cpu(cpu) + per_cpu(vector_irq, cpu)[vector] = irq; + irq_cfg[irq].vector = vector; + irq_status[irq] = IRQ_USED; + return 0; +} + +int bind_irq_vector(int irq, int vector) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&vector_lock, flags); + ret = __bind_irq_vector(irq, vector); + spin_unlock_irqrestore(&vector_lock, flags); + return ret; +} + +static void clear_irq_vector(int irq) +{ + unsigned long flags; + int vector, cpu; + + spin_lock_irqsave(&vector_lock, flags); + BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON(irq_cfg[irq].vector == IRQ_VECTOR_UNASSIGNED); + vector = irq_cfg[irq].vector; + for_each_online_cpu(cpu) + per_cpu(vector_irq, cpu)[vector] = IA64_SPURIOUS_INT_VECTOR; + irq_cfg[irq].vector = IRQ_VECTOR_UNASSIGNED; + irq_status[irq] = IRQ_UNUSED; + spin_unlock_irqrestore(&vector_lock, flags); +} int assign_irq_vector (int irq) { - int pos, vector; - again: - pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS); - vector = IA64_FIRST_DEVICE_VECTOR + pos; - if (vector > IA64_LAST_DEVICE_VECTOR) - return -ENOSPC; - if (test_and_set_bit(pos, ia64_vector_mask)) - goto again; + unsigned long flags; + int vector = -ENOSPC; + + if (irq < 0) { + goto out; + } + spin_lock_irqsave(&vector_lock, flags); + vector = find_unassigned_vector(); + if (vector < 0) + goto out; + BUG_ON(__bind_irq_vector(irq, vector)); + spin_unlock_irqrestore(&vector_lock, flags); + out: return vector; } void free_irq_vector (int vector) { - int pos; - - if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) + if (vector < IA64_FIRST_DEVICE_VECTOR || + vector > IA64_LAST_DEVICE_VECTOR) return; - - pos = vector - IA64_FIRST_DEVICE_VECTOR; - if (!test_and_clear_bit(pos, ia64_vector_mask)) - printk(KERN_WARNING "%s: double free!\n", __FUNCTION__); + clear_irq_vector(vector); } int reserve_irq_vector (int vector) { - int pos; - if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) return -EINVAL; + return !!bind_irq_vector(vector, vector); +} - pos = vector - IA64_FIRST_DEVICE_VECTOR; - return test_and_set_bit(pos, ia64_vector_mask); +/* + * Initialize vector_irq on a new cpu. This function must be called + * with vector_lock held. + */ +void __setup_vector_irq(int cpu) +{ + int irq, vector; + + /* Clear vector_irq */ + for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) + per_cpu(vector_irq, cpu)[vector] = IA64_SPURIOUS_INT_VECTOR; + /* Mark the inuse vectors */ + for (irq = 0; irq < NR_IRQS; ++irq) { + if ((vector = irq_to_vector(irq)) != IRQ_VECTOR_UNASSIGNED) + per_cpu(vector_irq, cpu)[vector] = irq; + } +} + +void destroy_and_reserve_irq(unsigned int irq) +{ + dynamic_irq_cleanup(irq); + + clear_irq_vector(irq); + reserve_irq(irq); } /* @@ -111,18 +232,29 @@ reserve_irq_vector (int vector) */ int create_irq(void) { - int vector = assign_irq_vector(AUTO_ASSIGN); - - if (vector >= 0) - dynamic_irq_init(vector); - - return vector; + unsigned long flags; + int irq, vector; + + irq = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); + vector = find_unassigned_vector(); + if (vector < 0) + goto out; + irq = find_unassigned_irq(); + if (irq < 0) + goto out; + BUG_ON(__bind_irq_vector(irq, vector)); + out: + spin_unlock_irqrestore(&vector_lock, flags); + if (irq >= 0) + dynamic_irq_init(irq); + return irq; } void destroy_irq(unsigned int irq) { dynamic_irq_cleanup(irq); - free_irq_vector(irq); + clear_irq_vector(irq); } #ifdef CONFIG_SMP @@ -301,14 +433,13 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action) irq_desc_t *desc; unsigned int irq; - for (irq = 0; irq < NR_IRQS; ++irq) - if (irq_to_vector(irq) == vec) { - desc = irq_desc + irq; - desc->status |= IRQ_PER_CPU; - desc->chip = &irq_type_ia64_lsapic; - if (action) - setup_irq(irq, action); - } + irq = vec; + BUG_ON(bind_irq_vector(irq, vec)); + desc = irq_desc + irq; + desc->status |= IRQ_PER_CPU; + desc->chip = &irq_type_ia64_lsapic; + if (action) + setup_irq(irq, action); } void __init diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 3c9d8e6089c..9f5c90b594b 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -395,9 +395,13 @@ smp_callin (void) fix_b0_for_bsp(); lock_ipi_calllock(); + spin_lock(&vector_lock); + /* Setup the per cpu irq handling data structures */ + __setup_vector_irq(cpuid); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); per_cpu(cpu_state, cpuid) = CPU_ONLINE; + spin_unlock(&vector_lock); smp_setup_percpu_timer(); diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h index c054d7a9aaa..4eff7ff2da8 100644 --- a/include/asm-ia64/hw_irq.h +++ b/include/asm-ia64/hw_irq.h @@ -90,13 +90,24 @@ enum { extern __u8 isa_irq_to_vector_map[16]; #define isa_irq_to_vector(x) isa_irq_to_vector_map[(x)] +struct irq_cfg { + ia64_vector vector; +}; +extern spinlock_t vector_lock; +extern struct irq_cfg irq_cfg[NR_IRQS]; +DECLARE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq); + extern struct hw_interrupt_type irq_type_ia64_lsapic; /* CPU-internal interrupt controller */ +extern int bind_irq_vector(int irq, int vector); extern int assign_irq_vector (int irq); /* allocate a free vector */ extern void free_irq_vector (int vector); extern int reserve_irq_vector (int vector); +extern void __setup_vector_irq(int cpu); extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); extern void register_percpu_irq (ia64_vector vec, struct irqaction *action); +extern int check_irq_used (int irq); +extern void destroy_and_reserve_irq (unsigned int irq); static inline void ia64_resend_irq(unsigned int vector) { @@ -113,7 +124,7 @@ extern irq_desc_t irq_desc[NR_IRQS]; static inline unsigned int __ia64_local_vector_to_irq (ia64_vector vec) { - return (unsigned int) vec; + return __get_cpu_var(vector_irq)[vec]; } #endif @@ -131,7 +142,7 @@ __ia64_local_vector_to_irq (ia64_vector vec) static inline ia64_vector irq_to_vector (int irq) { - return (ia64_vector) irq; + return irq_cfg[irq].vector; } /* -- cgit v1.2.3-70-g09d2 From 4994be1b3fe9120c88022ff5c0c33f6312b17adb Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 17 Jul 2007 21:22:33 +0900 Subject: [IA64] Add support for vector domain Add fundamental support for multiple vector domain. There still exists only one vector domain even with this patch. IRQ migration across domain is not supported yet by this patch. Signed-off-by: Kenji Kaneshige Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tony Luck --- arch/ia64/kernel/iosapic.c | 13 +++-- arch/ia64/kernel/irq_ia64.c | 120 ++++++++++++++++++++++++++++++++------------ arch/ia64/kernel/msi_ia64.c | 9 +++- include/asm-ia64/hw_irq.h | 4 +- include/asm-ia64/irq.h | 9 +++- 5 files changed, 113 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index cf27cfb4d16..e647254c270 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -354,6 +354,8 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) irq &= (~IA64_IRQ_REDIRECTED); + /* IRQ migration across domain is not supported yet */ + cpus_and(mask, mask, irq_to_domain(irq)); if (cpus_empty(mask)) return; @@ -663,6 +665,7 @@ get_target_cpu (unsigned int gsi, int irq) #ifdef CONFIG_SMP static int cpu = -1; extern int cpe_vector; + cpumask_t domain = irq_to_domain(irq); /* * In case of vector shared by multiple RTEs, all RTEs that @@ -701,7 +704,7 @@ get_target_cpu (unsigned int gsi, int irq) goto skip_numa_setup; cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); - + cpus_and(cpu_mask, cpu_mask, domain); for_each_cpu_mask(numa_cpu, cpu_mask) { if (!cpu_online(numa_cpu)) cpu_clear(numa_cpu, cpu_mask); @@ -731,7 +734,7 @@ skip_numa_setup: do { if (++cpu >= NR_CPUS) cpu = 0; - } while (!cpu_online(cpu)); + } while (!cpu_online(cpu) || !cpu_isset(cpu, domain)); return cpu_physical_id(cpu); #else /* CONFIG_SMP */ @@ -900,7 +903,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, switch (int_type) { case ACPI_INTERRUPT_PMI: irq = vector = iosapic_vector; - bind_irq_vector(irq, vector); + bind_irq_vector(irq, vector, CPU_MASK_ALL); /* * since PMI vector is alloc'd by FW(ACPI) not by kernel, * we need to make sure the vector is available @@ -917,7 +920,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, break; case ACPI_INTERRUPT_CPEI: irq = vector = IA64_CPE_VECTOR; - BUG_ON(bind_irq_vector(irq, vector)); + BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); delivery = IOSAPIC_LOWEST_PRIORITY; mask = 1; break; @@ -953,7 +956,7 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, unsigned int dest = cpu_physical_id(smp_processor_id()); irq = vector = isa_irq_to_vector(isa_irq); - BUG_ON(bind_irq_vector(irq, vector)); + BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY, polarity, trigger); DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n", diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 072427c2c3f..a3667631ed8 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -60,6 +60,8 @@ int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR; void __iomem *ipi_base_addr = ((void __iomem *) (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR)); +static cpumask_t vector_allocation_domain(int cpu); + /* * Legacy IRQ to IA-64 vector translation table. */ @@ -73,13 +75,20 @@ EXPORT_SYMBOL(isa_irq_to_vector_map); DEFINE_SPINLOCK(vector_lock); struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { - [0 ... NR_IRQS - 1] = { .vector = IRQ_VECTOR_UNASSIGNED } + [0 ... NR_IRQS - 1] = { + .vector = IRQ_VECTOR_UNASSIGNED, + .domain = CPU_MASK_NONE + } }; DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = { [0 ... IA64_NUM_VECTORS - 1] = IA64_SPURIOUS_INT_VECTOR }; +static cpumask_t vector_table[IA64_MAX_DEVICE_VECTORS] = { + [0 ... IA64_MAX_DEVICE_VECTORS - 1] = CPU_MASK_NONE +}; + static int irq_status[NR_IRQS] = { [0 ... NR_IRQS -1] = IRQ_UNUSED }; @@ -111,39 +120,54 @@ static inline int find_unassigned_irq(void) return -ENOSPC; } -static inline int find_unassigned_vector(void) +static inline int find_unassigned_vector(cpumask_t domain) { - int vector; + cpumask_t mask; + int pos; + + cpus_and(mask, domain, cpu_online_map); + if (cpus_empty(mask)) + return -EINVAL; - for (vector = IA64_FIRST_DEVICE_VECTOR; - vector <= IA64_LAST_DEVICE_VECTOR; vector++) - if (__get_cpu_var(vector_irq[vector]) == IA64_SPURIOUS_INT_VECTOR) - return vector; + for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) { + cpus_and(mask, domain, vector_table[pos]); + if (!cpus_empty(mask)) + continue; + return IA64_FIRST_DEVICE_VECTOR + pos; + } return -ENOSPC; } -static int __bind_irq_vector(int irq, int vector) +static int __bind_irq_vector(int irq, int vector, cpumask_t domain) { - int cpu; + cpumask_t mask; + int cpu, pos; + struct irq_cfg *cfg = &irq_cfg[irq]; - if (irq_to_vector(irq) == vector) + cpus_and(mask, domain, cpu_online_map); + if (cpus_empty(mask)) + return -EINVAL; + if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain)) return 0; - if (irq_to_vector(irq) != IRQ_VECTOR_UNASSIGNED) + if (cfg->vector != IRQ_VECTOR_UNASSIGNED) return -EBUSY; - for_each_online_cpu(cpu) + for_each_cpu_mask(cpu, mask) per_cpu(vector_irq, cpu)[vector] = irq; - irq_cfg[irq].vector = vector; + cfg->vector = vector; + cfg->domain = domain; irq_status[irq] = IRQ_USED; + pos = vector - IA64_FIRST_DEVICE_VECTOR; + cpus_or(vector_table[pos], vector_table[pos], domain); return 0; } -int bind_irq_vector(int irq, int vector) +int bind_irq_vector(int irq, int vector, cpumask_t domain) { unsigned long flags; int ret; spin_lock_irqsave(&vector_lock, flags); - ret = __bind_irq_vector(irq, vector); + ret = __bind_irq_vector(irq, vector, domain); spin_unlock_irqrestore(&vector_lock, flags); return ret; } @@ -151,16 +175,24 @@ int bind_irq_vector(int irq, int vector) static void clear_irq_vector(int irq) { unsigned long flags; - int vector, cpu; + int vector, cpu, pos; + cpumask_t mask; + cpumask_t domain; + struct irq_cfg *cfg = &irq_cfg[irq]; spin_lock_irqsave(&vector_lock, flags); BUG_ON((unsigned)irq >= NR_IRQS); - BUG_ON(irq_cfg[irq].vector == IRQ_VECTOR_UNASSIGNED); - vector = irq_cfg[irq].vector; - for_each_online_cpu(cpu) + BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED); + vector = cfg->vector; + domain = cfg->domain; + cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask(cpu, mask) per_cpu(vector_irq, cpu)[vector] = IA64_SPURIOUS_INT_VECTOR; - irq_cfg[irq].vector = IRQ_VECTOR_UNASSIGNED; + cfg->vector = IRQ_VECTOR_UNASSIGNED; + cfg->domain = CPU_MASK_NONE; irq_status[irq] = IRQ_UNUSED; + pos = vector - IA64_FIRST_DEVICE_VECTOR; + cpus_andnot(vector_table[pos], vector_table[pos], domain); spin_unlock_irqrestore(&vector_lock, flags); } @@ -168,18 +200,26 @@ int assign_irq_vector (int irq) { unsigned long flags; - int vector = -ENOSPC; + int vector, cpu; + cpumask_t domain; + + vector = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); if (irq < 0) { goto out; } - spin_lock_irqsave(&vector_lock, flags); - vector = find_unassigned_vector(); + for_each_online_cpu(cpu) { + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector >= 0) + break; + } if (vector < 0) goto out; - BUG_ON(__bind_irq_vector(irq, vector)); - spin_unlock_irqrestore(&vector_lock, flags); + BUG_ON(__bind_irq_vector(irq, vector, domain)); out: + spin_unlock_irqrestore(&vector_lock, flags); return vector; } @@ -198,7 +238,7 @@ reserve_irq_vector (int vector) if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) return -EINVAL; - return !!bind_irq_vector(vector, vector); + return !!bind_irq_vector(vector, vector, CPU_MASK_ALL); } /* @@ -214,11 +254,19 @@ void __setup_vector_irq(int cpu) per_cpu(vector_irq, cpu)[vector] = IA64_SPURIOUS_INT_VECTOR; /* Mark the inuse vectors */ for (irq = 0; irq < NR_IRQS; ++irq) { - if ((vector = irq_to_vector(irq)) != IRQ_VECTOR_UNASSIGNED) - per_cpu(vector_irq, cpu)[vector] = irq; + if (!cpu_isset(cpu, irq_cfg[irq].domain)) + continue; + vector = irq_to_vector(irq); + per_cpu(vector_irq, cpu)[vector] = irq; } } +static cpumask_t vector_allocation_domain(int cpu) +{ + return CPU_MASK_ALL; +} + + void destroy_and_reserve_irq(unsigned int irq) { dynamic_irq_cleanup(irq); @@ -233,17 +281,23 @@ void destroy_and_reserve_irq(unsigned int irq) int create_irq(void) { unsigned long flags; - int irq, vector; + int irq, vector, cpu; + cpumask_t domain; - irq = -ENOSPC; + irq = vector = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); - vector = find_unassigned_vector(); + for_each_online_cpu(cpu) { + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector >= 0) + break; + } if (vector < 0) goto out; irq = find_unassigned_irq(); if (irq < 0) goto out; - BUG_ON(__bind_irq_vector(irq, vector)); + BUG_ON(__bind_irq_vector(irq, vector, domain)); out: spin_unlock_irqrestore(&vector_lock, flags); if (irq >= 0) @@ -434,7 +488,7 @@ register_percpu_irq (ia64_vector vec, struct irqaction *action) unsigned int irq; irq = vec; - BUG_ON(bind_irq_vector(irq, vec)); + BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL)); desc = irq_desc + irq; desc->status |= IRQ_PER_CPU; desc->chip = &irq_type_ia64_lsapic; diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index c81080df70d..1d22670cc88 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -52,6 +52,11 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) struct msi_msg msg; u32 addr; + /* IRQ migration across domain is not supported yet */ + cpus_and(cpu_mask, cpu_mask, irq_to_domain(irq)); + if (cpus_empty(cpu_mask)) + return; + read_msi_msg(irq, &msg); addr = msg.address_lo; @@ -69,13 +74,15 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) struct msi_msg msg; unsigned long dest_phys_id; int irq, vector; + cpumask_t mask; irq = create_irq(); if (irq < 0) return irq; set_irq_msi(irq, desc); - dest_phys_id = cpu_physical_id(first_cpu(cpu_online_map)); + cpus_and(mask, irq_to_domain(irq), cpu_online_map); + dest_phys_id = cpu_physical_id(first_cpu(mask)); vector = irq_to_vector(irq); msg.address_hi = 0; diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h index 4eff7ff2da8..97ea3900d74 100644 --- a/include/asm-ia64/hw_irq.h +++ b/include/asm-ia64/hw_irq.h @@ -92,14 +92,16 @@ extern __u8 isa_irq_to_vector_map[16]; struct irq_cfg { ia64_vector vector; + cpumask_t domain; }; extern spinlock_t vector_lock; extern struct irq_cfg irq_cfg[NR_IRQS]; +#define irq_to_domain(x) irq_cfg[(x)].domain DECLARE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq); extern struct hw_interrupt_type irq_type_ia64_lsapic; /* CPU-internal interrupt controller */ -extern int bind_irq_vector(int irq, int vector); +extern int bind_irq_vector(int irq, int vector, cpumask_t domain); extern int assign_irq_vector (int irq); /* allocate a free vector */ extern void free_irq_vector (int vector); extern int reserve_irq_vector (int vector); diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h index 67221615e31..35b360b82e4 100644 --- a/include/asm-ia64/irq.h +++ b/include/asm-ia64/irq.h @@ -14,8 +14,13 @@ #include #include -#define NR_IRQS 256 -#define NR_IRQ_VECTORS NR_IRQS +#define NR_VECTORS 256 + +#if (NR_VECTORS + 32 * NR_CPUS) < 1024 +#define NR_IRQS (NR_VECTORS + 32 * NR_CPUS) +#else +#define NR_IRQS 1024 +#endif static __inline__ int irq_canonicalize (int irq) -- cgit v1.2.3-70-g09d2 From cd378f18cf73d92bf0b6e1e6b5759b5dd729a9f2 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 17 Jul 2007 21:22:48 +0900 Subject: [IA64] Support irq migration across domain Add support for IRQ migration across vector domain. Signed-off-by: Kenji Kaneshige Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tony Luck --- arch/ia64/kernel/iosapic.c | 20 +++++++++++++++++--- arch/ia64/kernel/irq_ia64.c | 42 +++++++++++++++++++++++++++++++++++++++--- arch/ia64/kernel/msi_ia64.c | 20 ++++++++++++++------ include/asm-ia64/hw_irq.h | 1 + include/asm-ia64/iosapic.h | 2 ++ 5 files changed, 73 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index e647254c270..c101c8bff27 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -354,11 +354,13 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) irq &= (~IA64_IRQ_REDIRECTED); - /* IRQ migration across domain is not supported yet */ - cpus_and(mask, mask, irq_to_domain(irq)); + cpus_and(mask, mask, cpu_online_map); if (cpus_empty(mask)) return; + if (reassign_irq_vector(irq, first_cpu(mask))) + return; + dest = cpu_physical_id(first_cpu(mask)); if (list_empty(&iosapic_intr_info[irq].rtes)) @@ -376,6 +378,8 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) else /* change delivery mode to fixed */ low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); + low32 &= IOSAPIC_VECTOR_MASK; + low32 |= irq_to_vector(irq); iosapic_intr_info[irq].low32 = low32; iosapic_intr_info[irq].dest = dest; @@ -404,10 +408,20 @@ iosapic_end_level_irq (unsigned int irq) { ia64_vector vec = irq_to_vector(irq); struct iosapic_rte_info *rte; + int do_unmask_irq = 0; + + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; + mask_irq(irq); + } - move_native_irq(irq); list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) iosapic_eoi(rte->iosapic->addr, vec); + + if (unlikely(do_unmask_irq)) { + move_masked_irq(irq); + unmask_irq(irq); + } } #define iosapic_shutdown_level_irq mask_irq diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index a3667631ed8..22806b94025 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -172,15 +172,13 @@ int bind_irq_vector(int irq, int vector, cpumask_t domain) return ret; } -static void clear_irq_vector(int irq) +static void __clear_irq_vector(int irq) { - unsigned long flags; int vector, cpu, pos; cpumask_t mask; cpumask_t domain; struct irq_cfg *cfg = &irq_cfg[irq]; - spin_lock_irqsave(&vector_lock, flags); BUG_ON((unsigned)irq >= NR_IRQS); BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED); vector = cfg->vector; @@ -193,6 +191,14 @@ static void clear_irq_vector(int irq) irq_status[irq] = IRQ_UNUSED; pos = vector - IA64_FIRST_DEVICE_VECTOR; cpus_andnot(vector_table[pos], vector_table[pos], domain); +} + +static void clear_irq_vector(int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); } @@ -275,6 +281,36 @@ void destroy_and_reserve_irq(unsigned int irq) reserve_irq(irq); } +static int __reassign_irq_vector(int irq, int cpu) +{ + struct irq_cfg *cfg = &irq_cfg[irq]; + int vector; + cpumask_t domain; + + if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu)) + return -EINVAL; + if (cpu_isset(cpu, cfg->domain)) + return 0; + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector < 0) + return -ENOSPC; + __clear_irq_vector(irq); + BUG_ON(__bind_irq_vector(irq, vector, domain)); + return 0; +} + +int reassign_irq_vector(int irq, int cpu) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&vector_lock, flags); + ret = __reassign_irq_vector(irq, cpu); + spin_unlock_irqrestore(&vector_lock, flags); + return ret; +} + /* * Dynamic irq allocate and deallocation for MSI */ diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 1d22670cc88..2fdbd5c3f21 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -13,6 +13,7 @@ #define MSI_DATA_VECTOR_SHIFT 0 #define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) +#define MSI_DATA_VECTOR_MASK 0xffffff00 #define MSI_DATA_DELIVERY_SHIFT 8 #define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT) @@ -50,22 +51,29 @@ static struct irq_chip ia64_msi_chip; static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) { struct msi_msg msg; - u32 addr; + u32 addr, data; + int cpu = first_cpu(cpu_mask); - /* IRQ migration across domain is not supported yet */ - cpus_and(cpu_mask, cpu_mask, irq_to_domain(irq)); - if (cpus_empty(cpu_mask)) + if (!cpu_online(cpu)) + return; + + if (reassign_irq_vector(irq, cpu)) return; read_msi_msg(irq, &msg); addr = msg.address_lo; addr &= MSI_ADDR_DESTID_MASK; - addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(first_cpu(cpu_mask))); + addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); msg.address_lo = addr; + data = msg.data; + data &= MSI_DATA_VECTOR_MASK; + data |= MSI_DATA_VECTOR(irq_to_vector(irq)); + msg.data = data; + write_msi_msg(irq, &msg); - irq_desc[irq].affinity = cpu_mask; + irq_desc[irq].affinity = cpumask_of_cpu(cpu); } #endif /* CONFIG_SMP */ diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h index 97ea3900d74..efa1b8f7251 100644 --- a/include/asm-ia64/hw_irq.h +++ b/include/asm-ia64/hw_irq.h @@ -106,6 +106,7 @@ extern int assign_irq_vector (int irq); /* allocate a free vector */ extern void free_irq_vector (int vector); extern int reserve_irq_vector (int vector); extern void __setup_vector_irq(int cpu); +extern int reassign_irq_vector(int irq, int cpu); extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); extern void register_percpu_irq (ia64_vector vec, struct irqaction *action); extern int check_irq_used (int irq); diff --git a/include/asm-ia64/iosapic.h b/include/asm-ia64/iosapic.h index 09bdc3898df..b8f71285914 100644 --- a/include/asm-ia64/iosapic.h +++ b/include/asm-ia64/iosapic.h @@ -47,6 +47,8 @@ #define IOSAPIC_MASK_SHIFT 16 #define IOSAPIC_MASK (1< Date: Tue, 17 Jul 2007 21:42:44 -0400 Subject: sys_fallocate() implementation on i386, x86_64 and powerpc fallocate() is a new system call being proposed here which will allow applications to preallocate space to any file(s) in a file system. Each file system implementation that wants to use this feature will need to support an inode operation called ->fallocate(). Applications can use this feature to avoid fragmentation to certain level and thus get faster access speed. With preallocation, applications also get a guarantee of space for particular file(s) - even if later the the system becomes full. Currently, glibc provides an interface called posix_fallocate() which can be used for similar cause. Though this has the advantage of working on all file systems, but it is quite slow (since it writes zeroes to each block that has to be preallocated). Without a doubt, file systems can do this more efficiently within the kernel, by implementing the proposed fallocate() system call. It is expected that posix_fallocate() will be modified to call this new system call first and incase the kernel/filesystem does not implement it, it should fall back to the current implementation of writing zeroes to the new blocks. ToDos: 1. Implementation on other architectures (other than i386, x86_64, and ppc). Patches for s390(x) and ia64 are already available from previous posts, but it was decided that they should be added later once fallocate is in the mainline. Hence not including those patches in this take. 2. Changes to glibc, a) to support fallocate() system call b) to make posix_fallocate() and posix_fallocate64() call fallocate() Signed-off-by: Amit Arora --- arch/i386/kernel/syscall_table.S | 1 + arch/powerpc/kernel/sys_ppc32.c | 7 +++++ arch/x86_64/ia32/ia32entry.S | 1 + arch/x86_64/ia32/sys_ia32.c | 8 ++++++ fs/open.c | 59 ++++++++++++++++++++++++++++++++++++++++ include/asm-i386/unistd.h | 3 +- include/asm-powerpc/systbl.h | 1 + include/asm-powerpc/unistd.h | 3 +- include/asm-x86_64/unistd.h | 2 ++ include/linux/falloc.h | 6 ++++ include/linux/fs.h | 2 ++ include/linux/syscalls.h | 1 + 12 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 include/linux/falloc.h (limited to 'include') diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index bf6adce5226..8344c70adf6 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -323,3 +323,4 @@ ENTRY(sys_call_table) .long sys_signalfd .long sys_timerfd .long sys_eventfd + .long sys_fallocate diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index b42cbf1e2d7..bd85b5fd08c 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -773,6 +773,13 @@ asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, return sys_truncate(path, (high << 32) | low); } +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, + u32 lenhi, u32 lenlo) +{ + return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, + ((loff_t)lenhi << 32) | lenlo); +} + asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, unsigned long low) { diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 782dea81943..3f66e970d86 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -719,4 +719,5 @@ ia32_sys_call_table: .quad compat_sys_signalfd .quad compat_sys_timerfd .quad sys_eventfd + .quad sys32_fallocate ia32_syscall_end: diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index 99a78a3cce7..bee96d61443 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -879,3 +879,11 @@ asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, len, advice); } + +asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo, + unsigned offset_hi, unsigned len_lo, + unsigned len_hi) +{ + return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, + ((u64)len_hi << 32) | len_lo); +} diff --git a/fs/open.c b/fs/open.c index be6a457f422..a6b054edacb 100644 --- a/fs/open.c +++ b/fs/open.c @@ -26,6 +26,7 @@ #include #include #include +#include int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { @@ -352,6 +353,64 @@ asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) } #endif +asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len) +{ + struct file *file; + struct inode *inode; + long ret = -EINVAL; + + if (offset < 0 || len <= 0) + goto out; + + /* Return error if mode is not supported */ + ret = -EOPNOTSUPP; + if (mode && !(mode & FALLOC_FL_KEEP_SIZE)) + goto out; + + ret = -EBADF; + file = fget(fd); + if (!file) + goto out; + if (!(file->f_mode & FMODE_WRITE)) + goto out_fput; + /* + * Revalidate the write permissions, in case security policy has + * changed since the files were opened. + */ + ret = security_file_permission(file, MAY_WRITE); + if (ret) + goto out_fput; + + inode = file->f_path.dentry->d_inode; + + ret = -ESPIPE; + if (S_ISFIFO(inode->i_mode)) + goto out_fput; + + ret = -ENODEV; + /* + * Let individual file system decide if it supports preallocation + * for directories or not. + */ + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + goto out_fput; + + ret = -EFBIG; + /* Check for wrap through zero too */ + if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) + goto out_fput; + + if (inode->i_op && inode->i_op->fallocate) + ret = inode->i_op->fallocate(inode, mode, offset, len); + else + ret = -ENOSYS; + +out_fput: + fput(file); +out: + return ret; +} + /* * access() needs to use the real uid/gid, not the effective uid/gid. * We do this by temporarily clearing all FS-related capabilities and diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index e84ace1ec8b..9b15545eb9b 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -329,10 +329,11 @@ #define __NR_signalfd 321 #define __NR_timerfd 322 #define __NR_eventfd 323 +#define __NR_fallocate 324 #ifdef __KERNEL__ -#define NR_syscalls 324 +#define NR_syscalls 325 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h index 1cc3f9cb6f4..cc6d8722825 100644 --- a/include/asm-powerpc/systbl.h +++ b/include/asm-powerpc/systbl.h @@ -308,6 +308,7 @@ COMPAT_SYS_SPU(move_pages) SYSCALL_SPU(getcpu) COMPAT_SYS(epoll_pwait) COMPAT_SYS_SPU(utimensat) +COMPAT_SYS(fallocate) COMPAT_SYS_SPU(signalfd) COMPAT_SYS_SPU(timerfd) SYSCALL_SPU(eventfd) diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index f71c6061f1e..97d82b6a940 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -331,10 +331,11 @@ #define __NR_timerfd 306 #define __NR_eventfd 307 #define __NR_sync_file_range2 308 +#define __NR_fallocate 309 #ifdef __KERNEL__ -#define __NR_syscalls 309 +#define __NR_syscalls 310 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 8696f8ad401..fc4e73f5f1f 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -630,6 +630,8 @@ __SYSCALL(__NR_signalfd, sys_signalfd) __SYSCALL(__NR_timerfd, sys_timerfd) #define __NR_eventfd 284 __SYSCALL(__NR_eventfd, sys_eventfd) +#define __NR_fallocate 285 +__SYSCALL(__NR_fallocate, sys_fallocate) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/include/linux/falloc.h b/include/linux/falloc.h new file mode 100644 index 00000000000..8e912ab6a07 --- /dev/null +++ b/include/linux/falloc.h @@ -0,0 +1,6 @@ +#ifndef _FALLOC_H_ +#define _FALLOC_H_ + +#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */ + +#endif /* _FALLOC_H_ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 98205f68047..0b806c5e32e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1147,6 +1147,8 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + long (*fallocate)(struct inode *inode, int mode, loff_t offset, + loff_t len); }; struct seq_file; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 83d0ec11235..7a8b1e3322e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -610,6 +610,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas asmlinkage long sys_timerfd(int ufd, int clockid, int flags, const struct itimerspec __user *utmr); asmlinkage long sys_eventfd(unsigned int count); +asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -- cgit v1.2.3-70-g09d2 From a2df2a63407803a833f82e1fa6693826c8c9d584 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Tue, 17 Jul 2007 21:42:41 -0400 Subject: fallocate support in ext4 This patch implements ->fallocate() inode operation in ext4. With this patch users of ext4 file systems will be able to use fallocate() system call for persistent preallocation. Current implementation only supports preallocation for regular files (directories not supported as of date) with extent maps. This patch does not support block-mapped files currently. Only FALLOC_ALLOCATE and FALLOC_RESV_SPACE modes are being supported as of now. Signed-off-by: Amit Arora --- fs/ext4/extents.c | 249 +++++++++++++++++++++++++++++++++------- fs/ext4/file.c | 1 + include/linux/ext4_fs.h | 8 ++ include/linux/ext4_fs_extents.h | 15 +++ 4 files changed, 232 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b9ce2412907..ba25832a756 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -282,7 +283,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) } else if (path->p_ext) { ext_debug(" %d:%d:%llu ", le32_to_cpu(path->p_ext->ee_block), - le16_to_cpu(path->p_ext->ee_len), + ext4_ext_get_actual_len(path->p_ext), ext_pblock(path->p_ext)); } else ext_debug(" []"); @@ -305,7 +306,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), - le16_to_cpu(ex->ee_len), ext_pblock(ex)); + ext4_ext_get_actual_len(ex), ext_pblock(ex)); } ext_debug("\n"); } @@ -425,7 +426,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) ext_debug(" -> %d:%llu:%d ", le32_to_cpu(path->p_ext->ee_block), ext_pblock(path->p_ext), - le16_to_cpu(path->p_ext->ee_len)); + ext4_ext_get_actual_len(path->p_ext)); #ifdef CHECK_BINSEARCH { @@ -686,7 +687,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("move %d:%llu:%d in new leaf %llu\n", le32_to_cpu(path[depth].p_ext->ee_block), ext_pblock(path[depth].p_ext), - le16_to_cpu(path[depth].p_ext->ee_len), + ext4_ext_get_actual_len(path[depth].p_ext), newblock); /*memmove(ex++, path[depth].p_ext++, sizeof(struct ext4_extent)); @@ -1106,7 +1107,19 @@ static int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { - if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) != + unsigned short ext1_ee_len, ext2_ee_len; + + /* + * Make sure that either both extents are uninitialized, or + * both are _not_. + */ + if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) + return 0; + + ext1_ee_len = ext4_ext_get_actual_len(ex1); + ext2_ee_len = ext4_ext_get_actual_len(ex2); + + if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != le32_to_cpu(ex2->ee_block)) return 0; @@ -1115,14 +1128,14 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * as an RO_COMPAT feature, refuse to merge to extents if * this can result in the top bit of ee_len being set. */ - if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN) + if (ext1_ee_len + ext2_ee_len > EXT_MAX_LEN) return 0; #ifdef AGGRESSIVE_TEST if (le16_to_cpu(ex1->ee_len) >= 4) return 0; #endif - if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2)) + if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2)) return 1; return 0; } @@ -1144,7 +1157,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode, unsigned int ret = 0; b1 = le32_to_cpu(newext->ee_block); - len1 = le16_to_cpu(newext->ee_len); + len1 = ext4_ext_get_actual_len(newext); depth = ext_depth(inode); if (!path[depth].p_ext) goto out; @@ -1191,8 +1204,9 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_extent *nearex; /* nearest extent */ struct ext4_ext_path *npath = NULL; int depth, len, err, next; + unsigned uninitialized = 0; - BUG_ON(newext->ee_len == 0); + BUG_ON(ext4_ext_get_actual_len(newext) == 0); depth = ext_depth(inode); ex = path[depth].p_ext; BUG_ON(path[depth].p_hdr == NULL); @@ -1200,14 +1214,24 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, /* try to insert block into found extent and return */ if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { ext_debug("append %d block to %d:%d (from %llu)\n", - le16_to_cpu(newext->ee_len), + ext4_ext_get_actual_len(newext), le32_to_cpu(ex->ee_block), - le16_to_cpu(ex->ee_len), ext_pblock(ex)); + ext4_ext_get_actual_len(ex), ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) return err; - ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len) - + le16_to_cpu(newext->ee_len)); + + /* + * ext4_can_extents_be_merged should have checked that either + * both extents are uninitialized, or both aren't. Thus we + * need to check only one of them here. + */ + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; + ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + + ext4_ext_get_actual_len(newext)); + if (uninitialized) + ext4_ext_mark_uninitialized(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -1263,7 +1287,7 @@ has_space: ext_debug("first extent in the leaf: %d:%llu:%d\n", le32_to_cpu(newext->ee_block), ext_pblock(newext), - le16_to_cpu(newext->ee_len)); + ext4_ext_get_actual_len(newext)); path[depth].p_ext = EXT_FIRST_EXTENT(eh); } else if (le32_to_cpu(newext->ee_block) > le32_to_cpu(nearex->ee_block)) { @@ -1276,7 +1300,7 @@ has_space: "move %d from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext_pblock(newext), - le16_to_cpu(newext->ee_len), + ext4_ext_get_actual_len(newext), nearex, len, nearex + 1, nearex + 2); memmove(nearex + 2, nearex + 1, len); } @@ -1289,7 +1313,7 @@ has_space: "move %d from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext_pblock(newext), - le16_to_cpu(newext->ee_len), + ext4_ext_get_actual_len(newext), nearex, len, nearex + 1, nearex + 2); memmove(nearex + 1, nearex, len); path[depth].p_ext = nearex; @@ -1308,8 +1332,13 @@ merge: if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) break; /* merge with next extent! */ - nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len) - + le16_to_cpu(nearex[1].ee_len)); + if (ext4_ext_is_uninitialized(nearex)) + uninitialized = 1; + nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex) + + ext4_ext_get_actual_len(nearex + 1)); + if (uninitialized) + ext4_ext_mark_uninitialized(nearex); + if (nearex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - nearex - 1) * sizeof(struct ext4_extent); @@ -1379,8 +1408,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block, end = le32_to_cpu(ex->ee_block); if (block + num < end) end = block + num; - } else if (block >= - le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) { + } else if (block >= le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex)) { /* need to allocate space after found extent */ start = block; end = block + num; @@ -1392,7 +1421,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block, * by found extent */ start = block; - end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len); + end = le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex); if (block + num < end) end = block + num; exists = 1; @@ -1408,7 +1438,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block, cbex.ec_type = EXT4_EXT_CACHE_GAP; } else { cbex.ec_block = le32_to_cpu(ex->ee_block); - cbex.ec_len = le16_to_cpu(ex->ee_len); + cbex.ec_len = ext4_ext_get_actual_len(ex); cbex.ec_start = ext_pblock(ex); cbex.ec_type = EXT4_EXT_CACHE_EXTENT; } @@ -1481,15 +1511,15 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, ext_debug("cache gap(before): %lu [%lu:%lu]", (unsigned long) block, (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len)); + (unsigned long) ext4_ext_get_actual_len(ex)); } else if (block >= le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len)) { + + ext4_ext_get_actual_len(ex)) { lblock = le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len); + + ext4_ext_get_actual_len(ex); len = ext4_ext_next_allocated_block(path); ext_debug("cache gap(after): [%lu:%lu] %lu", (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len), + (unsigned long) ext4_ext_get_actual_len(ex), (unsigned long) block); BUG_ON(len == lblock); len = len - lblock; @@ -1619,12 +1649,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, unsigned long from, unsigned long to) { struct buffer_head *bh; + unsigned short ee_len = ext4_ext_get_actual_len(ex); int i; #ifdef EXTENTS_STATS { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - unsigned short ee_len = le16_to_cpu(ex->ee_len); spin_lock(&sbi->s_ext_stats_lock); sbi->s_ext_blocks += ee_len; sbi->s_ext_extents++; @@ -1638,12 +1668,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, } #endif if (from >= le32_to_cpu(ex->ee_block) - && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { /* tail removal */ unsigned long num; ext4_fsblk_t start; - num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from; - start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num; + num = le32_to_cpu(ex->ee_block) + ee_len - from; + start = ext_pblock(ex) + ee_len - num; ext_debug("free last %lu blocks starting %llu\n", num, start); for (i = 0; i < num; i++) { bh = sb_find_get_block(inode->i_sb, start + i); @@ -1651,12 +1681,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, } ext4_free_blocks(handle, inode, start, num); } else if (from == le32_to_cpu(ex->ee_block) - && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); + from, to, le32_to_cpu(ex->ee_block), ee_len); } else { printk("strange request: removal(2) %lu-%lu from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); + from, to, le32_to_cpu(ex->ee_block), ee_len); } return 0; } @@ -1671,6 +1701,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned a, b, block, num; unsigned long ex_ee_block; unsigned short ex_ee_len; + unsigned uninitialized = 0; struct ext4_extent *ex; ext_debug("truncate since %lu in leaf\n", start); @@ -1685,7 +1716,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex = EXT_LAST_EXTENT(eh); ex_ee_block = le32_to_cpu(ex->ee_block); - ex_ee_len = le16_to_cpu(ex->ee_len); + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; + ex_ee_len = ext4_ext_get_actual_len(ex); while (ex >= EXT_FIRST_EXTENT(eh) && ex_ee_block + ex_ee_len > start) { @@ -1753,6 +1786,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex->ee_block = cpu_to_le32(block); ex->ee_len = cpu_to_le16(num); + if (uninitialized) + ext4_ext_mark_uninitialized(ex); err = ext4_ext_dirty(handle, inode, path + depth); if (err) @@ -1762,7 +1797,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ext_pblock(ex)); ex--; ex_ee_block = le32_to_cpu(ex->ee_block); - ex_ee_len = le16_to_cpu(ex->ee_len); + ex_ee_len = ext4_ext_get_actual_len(ex); } if (correct_index && eh->eh_entries) @@ -2038,7 +2073,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (ex) { unsigned long ee_block = le32_to_cpu(ex->ee_block); ext4_fsblk_t ee_start = ext_pblock(ex); - unsigned short ee_len = le16_to_cpu(ex->ee_len); + unsigned short ee_len; /* * Allow future support for preallocated extents to be added @@ -2046,8 +2081,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * Uninitialized extents are treated as holes, except that * we avoid (fail) allocating new blocks during a write. */ - if (ee_len > EXT_MAX_LEN) + if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN) goto out2; + ee_len = ext4_ext_get_actual_len(ex); /* if found extent covers block, simply return it */ if (iblock >= ee_block && iblock < ee_block + ee_len) { newblock = iblock - ee_block + ee_start; @@ -2055,8 +2091,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, allocated = ee_len - (iblock - ee_block); ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, ee_block, ee_len, newblock); - ext4_ext_put_in_cache(inode, ee_block, ee_len, - ee_start, EXT4_EXT_CACHE_EXTENT); + /* Do not put uninitialized extent in the cache */ + if (!ext4_ext_is_uninitialized(ex)) + ext4_ext_put_in_cache(inode, ee_block, + ee_len, ee_start, + EXT4_EXT_CACHE_EXTENT); goto out; } } @@ -2098,6 +2137,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* try to insert new extent into found leaf and return */ ext4_ext_store_pblock(&newex, newblock); newex.ee_len = cpu_to_le16(allocated); + if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ + ext4_ext_mark_uninitialized(&newex); err = ext4_ext_insert_extent(handle, inode, path, &newex); if (err) { /* free data blocks we just allocated */ @@ -2113,8 +2154,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, newblock = ext_pblock(&newex); __set_bit(BH_New, &bh_result->b_state); - ext4_ext_put_in_cache(inode, iblock, allocated, newblock, - EXT4_EXT_CACHE_EXTENT); + /* Cache only when it is _not_ an uninitialized extent */ + if (create != EXT4_CREATE_UNINITIALIZED_EXT) + ext4_ext_put_in_cache(inode, iblock, allocated, newblock, + EXT4_EXT_CACHE_EXTENT); out: if (allocated > max_blocks) allocated = max_blocks; @@ -2217,3 +2260,127 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) return needed; } + +/* + * preallocate space for a file. This implements ext4's fallocate inode + * operation, which gets called from sys_fallocate system call. + * For block-mapped files, posix_fallocate should fall back to the method + * of writing zeroes to the required new blocks (the same behavior which is + * expected for file systems which do not support fallocate() system call). + */ +long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) +{ + handle_t *handle; + ext4_fsblk_t block, max_blocks; + ext4_fsblk_t nblocks = 0; + int ret = 0; + int ret2 = 0; + int retries = 0; + struct buffer_head map_bh; + unsigned int credits, blkbits = inode->i_blkbits; + + /* + * currently supporting (pre)allocate mode for extent-based + * files _only_ + */ + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + return -EOPNOTSUPP; + + /* preallocation to directories is currently not supported */ + if (S_ISDIR(inode->i_mode)) + return -ENODEV; + + block = offset >> blkbits; + max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) + - block; + + /* + * credits to insert 1 extent into extent tree + buffers to be able to + * modify 1 super block, 1 block bitmap and 1 group descriptor. + */ + credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; +retry: + while (ret >= 0 && ret < max_blocks) { + block = block + ret; + max_blocks = max_blocks - ret; + handle = ext4_journal_start(inode, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + break; + } + + ret = ext4_ext_get_blocks(handle, inode, block, + max_blocks, &map_bh, + EXT4_CREATE_UNINITIALIZED_EXT, 0); + WARN_ON(!ret); + if (!ret) { + ext4_error(inode->i_sb, "ext4_fallocate", + "ext4_ext_get_blocks returned 0! inode#%lu" + ", block=%llu, max_blocks=%llu", + inode->i_ino, block, max_blocks); + ret = -EIO; + ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_journal_stop(handle); + break; + } + if (ret > 0) { + /* check wrap through sign-bit/zero here */ + if ((block + ret) < 0 || (block + ret) < block) { + ret = -EIO; + ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_journal_stop(handle); + break; + } + if (buffer_new(&map_bh) && ((block + ret) > + (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits) + >> blkbits))) + nblocks = nblocks + ret; + } + + /* Update ctime if new blocks get allocated */ + if (nblocks) { + struct timespec now; + + now = current_fs_time(inode->i_sb); + if (!timespec_equal(&inode->i_ctime, &now)) + inode->i_ctime = now; + } + + ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_journal_stop(handle); + if (ret2) + break; + } + + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + + /* + * Time to update the file size. + * Update only when preallocation was requested beyond the file size. + */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + (offset + len) > i_size_read(inode)) { + if (ret > 0) { + /* + * if no error, we assume preallocation succeeded + * completely + */ + mutex_lock(&inode->i_mutex); + i_size_write(inode, offset + len); + EXT4_I(inode)->i_disksize = i_size_read(inode); + mutex_unlock(&inode->i_mutex); + } else if (ret < 0 && nblocks) { + /* Handle partial allocation scenario */ + loff_t newsize; + + mutex_lock(&inode->i_mutex); + newsize = (nblocks << blkbits) + i_size_read(inode); + i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits)); + EXT4_I(inode)->i_disksize = i_size_read(inode); + mutex_unlock(&inode->i_mutex); + } + } + + return ret > 0 ? ret2 : ret; +} diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d4c8186aed6..1a81cd66d63 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -134,5 +134,6 @@ const struct inode_operations ext4_file_inode_operations = { .removexattr = generic_removexattr, #endif .permission = ext4_permission, + .fallocate = ext4_fallocate, }; diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index de1f9f78625..87c2d7a05b0 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -102,6 +102,7 @@ EXT4_GOOD_OLD_FIRST_INO : \ (s)->s_first_ino) #endif +#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) /* * Macro-instructions used to manage fragments @@ -225,6 +226,11 @@ struct ext4_new_group_data { __u32 free_blocks_count; }; +/* + * Following is used by preallocation code to tell get_blocks() that we + * want uninitialzed extents. + */ +#define EXT4_CREATE_UNINITIALIZED_EXT 2 /* * ioctl commands @@ -983,6 +989,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, extern void ext4_ext_truncate(struct inode *, struct page *); extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); +extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, + loff_t len); static inline int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, unsigned long max_blocks, struct buffer_head *bh, diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index acfe59740b0..e3d5afc6f23 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -188,6 +188,21 @@ ext4_ext_invalidate_cache(struct inode *inode) EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO; } +static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) +{ + ext->ee_len |= cpu_to_le16(0x8000); +} + +static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) +{ + return (int)(le16_to_cpu((ext)->ee_len) & 0x8000); +} + +static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) +{ + return (int)(le16_to_cpu((ext)->ee_len) & 0x7FFF); +} + extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); -- cgit v1.2.3-70-g09d2 From 56055d3ae4cc7fa6d2b10885f20269de8a989ed7 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Tue, 17 Jul 2007 21:42:38 -0400 Subject: write support for preallocated blocks This patch adds write support to the uninitialized extents that get created when a preallocation is done using fallocate(). It takes care of splitting the extents into multiple (upto three) extents and merging the new split extents with neighbouring ones, if possible. Signed-off-by: Amit Arora --- fs/ext4/extents.c | 254 +++++++++++++++++++++++++++++++++++----- include/linux/ext4_fs_extents.h | 3 + 2 files changed, 225 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ba25832a756..ded3d469f97 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1140,6 +1140,53 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, return 0; } +/* + * This function tries to merge the "ex" extent to the next extent in the tree. + * It always tries to merge towards right. If you want to merge towards + * left, pass "ex - 1" as argument instead of "ex". + * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns + * 1 if they got merged. + */ +int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *ex) +{ + struct ext4_extent_header *eh; + unsigned int depth, len; + int merge_done = 0; + int uninitialized = 0; + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + eh = path[depth].p_hdr; + + while (ex < EXT_LAST_EXTENT(eh)) { + if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) + break; + /* merge with next extent! */ + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; + ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + + ext4_ext_get_actual_len(ex + 1)); + if (uninitialized) + ext4_ext_mark_uninitialized(ex); + + if (ex + 1 < EXT_LAST_EXTENT(eh)) { + len = (EXT_LAST_EXTENT(eh) - ex - 1) + * sizeof(struct ext4_extent); + memmove(ex + 1, ex + 2, len); + } + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); + merge_done = 1; + WARN_ON(eh->eh_entries == 0); + if (!eh->eh_entries) + ext4_error(inode->i_sb, "ext4_ext_try_to_merge", + "inode#%lu, eh->eh_entries = 0!", inode->i_ino); + } + + return merge_done; +} + /* * check if a portion of the "newext" extent overlaps with an * existing extent. @@ -1328,25 +1375,7 @@ has_space: merge: /* try to merge extents to the right */ - while (nearex < EXT_LAST_EXTENT(eh)) { - if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) - break; - /* merge with next extent! */ - if (ext4_ext_is_uninitialized(nearex)) - uninitialized = 1; - nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex) - + ext4_ext_get_actual_len(nearex + 1)); - if (uninitialized) - ext4_ext_mark_uninitialized(nearex); - - if (nearex + 1 < EXT_LAST_EXTENT(eh)) { - len = (EXT_LAST_EXTENT(eh) - nearex - 1) - * sizeof(struct ext4_extent); - memmove(nearex + 1, nearex + 2, len); - } - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); - BUG_ON(eh->eh_entries == 0); - } + ext4_ext_try_to_merge(inode, path, nearex); /* try to merge extents to the left */ @@ -2012,15 +2041,158 @@ void ext4_ext_release(struct super_block *sb) #endif } +/* + * This function is called by ext4_ext_get_blocks() if someone tries to write + * to an uninitialized extent. It may result in splitting the uninitialized + * extent into multiple extents (upto three - one initialized and two + * uninitialized). + * There are three possibilities: + * a> There is no split required: Entire extent should be initialized + * b> Splits in two extents: Write is happening at either end of the extent + * c> Splits in three extents: Somone is writing in middle of the extent + */ +int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + ext4_fsblk_t iblock, + unsigned long max_blocks) +{ + struct ext4_extent *ex, newex; + struct ext4_extent *ex1 = NULL; + struct ext4_extent *ex2 = NULL; + struct ext4_extent *ex3 = NULL; + struct ext4_extent_header *eh; + unsigned int allocated, ee_block, ee_len, depth; + ext4_fsblk_t newblock; + int err = 0; + int ret = 0; + + depth = ext_depth(inode); + eh = path[depth].p_hdr; + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + allocated = ee_len - (iblock - ee_block); + newblock = iblock - ee_block + ext_pblock(ex); + ex2 = ex; + + /* ex1: ee_block to iblock - 1 : uninitialized */ + if (iblock > ee_block) { + ex1 = ex; + ex1->ee_len = cpu_to_le16(iblock - ee_block); + ext4_ext_mark_uninitialized(ex1); + ex2 = &newex; + } + /* + * for sanity, update the length of the ex2 extent before + * we insert ex3, if ex1 is NULL. This is to avoid temporary + * overlap of blocks. + */ + if (!ex1 && allocated > max_blocks) + ex2->ee_len = cpu_to_le16(max_blocks); + /* ex3: to ee_block + ee_len : uninitialised */ + if (allocated > max_blocks) { + unsigned int newdepth; + ex3 = &newex; + ex3->ee_block = cpu_to_le32(iblock + max_blocks); + ext4_ext_store_pblock(ex3, newblock + max_blocks); + ex3->ee_len = cpu_to_le16(allocated - max_blocks); + ext4_ext_mark_uninitialized(ex3); + err = ext4_ext_insert_extent(handle, inode, path, ex3); + if (err) + goto out; + /* + * The depth, and hence eh & ex might change + * as part of the insert above. + */ + newdepth = ext_depth(inode); + if (newdepth != depth) { + depth = newdepth; + path = ext4_ext_find_extent(inode, iblock, NULL); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + goto out; + } + eh = path[depth].p_hdr; + ex = path[depth].p_ext; + if (ex2 != &newex) + ex2 = ex; + } + allocated = max_blocks; + } + /* + * If there was a change of depth as part of the + * insertion of ex3 above, we need to update the length + * of the ex1 extent again here + */ + if (ex1 && ex1 != ex) { + ex1 = ex; + ex1->ee_len = cpu_to_le16(iblock - ee_block); + ext4_ext_mark_uninitialized(ex1); + ex2 = &newex; + } + /* ex2: iblock to iblock + maxblocks-1 : initialised */ + ex2->ee_block = cpu_to_le32(iblock); + ex2->ee_start = cpu_to_le32(newblock); + ext4_ext_store_pblock(ex2, newblock); + ex2->ee_len = cpu_to_le16(allocated); + if (ex2 != ex) + goto insert; + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + /* + * New (initialized) extent starts from the first block + * in the current extent. i.e., ex2 == ex + * We have to see if it can be merged with the extent + * on the left. + */ + if (ex2 > EXT_FIRST_EXTENT(eh)) { + /* + * To merge left, pass "ex2 - 1" to try_to_merge(), + * since it merges towards right _only_. + */ + ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); + if (ret) { + err = ext4_ext_correct_indexes(handle, inode, path); + if (err) + goto out; + depth = ext_depth(inode); + ex2--; + } + } + /* + * Try to Merge towards right. This might be required + * only when the whole extent is being written to. + * i.e. ex2 == ex and ex3 == NULL. + */ + if (!ex3) { + ret = ext4_ext_try_to_merge(inode, path, ex2); + if (ret) { + err = ext4_ext_correct_indexes(handle, inode, path); + if (err) + goto out; + } + } + /* Mark modified extent as dirty */ + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; +insert: + err = ext4_ext_insert_extent(handle, inode, path, &newex); +out: + return err ? err : allocated; +} + int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, int create, int extend_disksize) { struct ext4_ext_path *path = NULL; + struct ext4_extent_header *eh; struct ext4_extent newex, *ex; ext4_fsblk_t goal, newblock; - int err = 0, depth; + int err = 0, depth, ret; unsigned long allocated = 0; __clear_bit(BH_New, &bh_result->b_state); @@ -2033,8 +2205,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (goal) { if (goal == EXT4_EXT_CACHE_GAP) { if (!create) { - /* block isn't allocated yet and - * user doesn't want to allocate it */ + /* + * block isn't allocated yet and + * user doesn't want to allocate it + */ goto out2; } /* we should allocate requested block */ @@ -2068,6 +2242,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * this is why assert can't be put in ext4_ext_find_extent() */ BUG_ON(path[depth].p_ext == NULL && depth != 0); + eh = path[depth].p_hdr; ex = path[depth].p_ext; if (ex) { @@ -2076,13 +2251,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, unsigned short ee_len; /* - * Allow future support for preallocated extents to be added - * as an RO_COMPAT feature: * Uninitialized extents are treated as holes, except that - * we avoid (fail) allocating new blocks during a write. + * we split out initialized portions during a write. */ - if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN) - goto out2; ee_len = ext4_ext_get_actual_len(ex); /* if found extent covers block, simply return it */ if (iblock >= ee_block && iblock < ee_block + ee_len) { @@ -2091,12 +2262,27 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, allocated = ee_len - (iblock - ee_block); ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, ee_block, ee_len, newblock); + /* Do not put uninitialized extent in the cache */ - if (!ext4_ext_is_uninitialized(ex)) + if (!ext4_ext_is_uninitialized(ex)) { ext4_ext_put_in_cache(inode, ee_block, ee_len, ee_start, EXT4_EXT_CACHE_EXTENT); - goto out; + goto out; + } + if (create == EXT4_CREATE_UNINITIALIZED_EXT) + goto out; + if (!create) + goto out2; + + ret = ext4_ext_convert_to_initialized(handle, inode, + path, iblock, + max_blocks); + if (ret <= 0) + goto out2; + else + allocated = ret; + goto outnew; } } @@ -2105,8 +2291,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if (!create) { - /* put just found gap into cache to speed up - * subsequent requests */ + /* + * put just found gap into cache to speed up + * subsequent requests + */ ext4_ext_put_gap_in_cache(inode, path, iblock); goto out2; } @@ -2152,6 +2340,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); +outnew: __set_bit(BH_New, &bh_result->b_state); /* Cache only when it is _not_ an uninitialized extent */ @@ -2221,7 +2410,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) err = ext4_ext_remove_space(inode, last_block); /* In a multi-transaction truncate, we only make the final - * transaction synchronous. */ + * transaction synchronous. + */ if (IS_SYNC(inode)) handle->h_sync = 1; diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index e3d5afc6f23..edf49ec89ea 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -205,6 +205,9 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); +extern int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); -- cgit v1.2.3-70-g09d2 From 920c3ed741340a88f2042ab0c44a25b8c743a379 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2007 21:37:35 -0700 Subject: [SPARC64]: Add basic infrastructure for MD add/remove notification. And add dummy handlers for the VIO device layer. These will be filled in with real code after the vdc, vnet, and ds drivers are reworked to have simpler dependencies on the VIO device tree. Signed-off-by: David S. Miller --- arch/sparc64/kernel/mdesc.c | 78 +++++++++++++++++++++++++++++++++++++++++++-- arch/sparc64/kernel/vio.c | 33 +++++++++++++++++++ include/asm-sparc64/mdesc.h | 10 ++++++ 3 files changed, 118 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c index de5310ffdb4..302ba5e5a0b 100644 --- a/arch/sparc64/kernel/mdesc.c +++ b/arch/sparc64/kernel/mdesc.c @@ -137,7 +137,7 @@ static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) sizeof(struct mdesc_hdr) + mdesc_size); - base = kmalloc(handle_size + 15, GFP_KERNEL); + base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL); if (base) { struct mdesc_handle *hp; unsigned long addr; @@ -214,18 +214,83 @@ void mdesc_release(struct mdesc_handle *hp) } EXPORT_SYMBOL(mdesc_release); +static DEFINE_MUTEX(mdesc_mutex); +static struct mdesc_notifier_client *client_list; + +void mdesc_register_notifier(struct mdesc_notifier_client *client) +{ + u64 node; + + mutex_lock(&mdesc_mutex); + client->next = client_list; + client_list = client; + + mdesc_for_each_node_by_name(cur_mdesc, node, client->node_name) + client->add(cur_mdesc, node); + + mutex_unlock(&mdesc_mutex); +} + +/* Run 'func' on nodes which are in A but not in B. */ +static void invoke_on_missing(const char *name, + struct mdesc_handle *a, + struct mdesc_handle *b, + void (*func)(struct mdesc_handle *, u64)) +{ + u64 node; + + mdesc_for_each_node_by_name(a, node, name) { + const u64 *id = mdesc_get_property(a, node, "id", NULL); + int found = 0; + u64 fnode; + + mdesc_for_each_node_by_name(b, fnode, name) { + const u64 *fid = mdesc_get_property(b, fnode, + "id", NULL); + + if (*id == *fid) { + found = 1; + break; + } + } + if (!found) + func(a, node); + } +} + +static void notify_one(struct mdesc_notifier_client *p, + struct mdesc_handle *old_hp, + struct mdesc_handle *new_hp) +{ + invoke_on_missing(p->node_name, old_hp, new_hp, p->remove); + invoke_on_missing(p->node_name, new_hp, old_hp, p->add); +} + +static void mdesc_notify_clients(struct mdesc_handle *old_hp, + struct mdesc_handle *new_hp) +{ + struct mdesc_notifier_client *p = client_list; + + while (p) { + notify_one(p, old_hp, new_hp); + p = p->next; + } +} + void mdesc_update(void) { unsigned long len, real_len, status; struct mdesc_handle *hp, *orig_hp; unsigned long flags; + mutex_lock(&mdesc_mutex); + (void) sun4v_mach_desc(0UL, 0UL, &len); hp = mdesc_alloc(len, &kmalloc_mdesc_memops); if (!hp) { printk(KERN_ERR "MD: mdesc alloc fails\n"); - return; + goto out; } status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len); @@ -234,18 +299,25 @@ void mdesc_update(void) status); atomic_dec(&hp->refcnt); mdesc_free(hp); - return; + goto out; } spin_lock_irqsave(&mdesc_lock, flags); orig_hp = cur_mdesc; cur_mdesc = hp; + spin_unlock_irqrestore(&mdesc_lock, flags); + mdesc_notify_clients(orig_hp, hp); + + spin_lock_irqsave(&mdesc_lock, flags); if (atomic_dec_and_test(&orig_hp->refcnt)) mdesc_free(orig_hp); else list_add(&orig_hp->list, &mdesc_zombie_list); spin_unlock_irqrestore(&mdesc_lock, flags); + +out: + mutex_unlock(&mdesc_mutex); } static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c index 49569b44ea1..d487be093b4 100644 --- a/arch/sparc64/kernel/vio.c +++ b/arch/sparc64/kernel/vio.c @@ -172,6 +172,36 @@ struct device_node *cdev_node; static struct vio_dev *root_vdev; static u64 cdev_cfg_handle; +static void vio_add(struct mdesc_handle *hp, u64 node) +{ + const char *name = mdesc_get_property(hp, node, "name", NULL); + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + printk(KERN_ERR "VIO: Device add (%s) ID[%lx]\n", + name, *id); +} + +static void vio_remove(struct mdesc_handle *hp, u64 node) +{ + const char *name = mdesc_get_property(hp, node, "name", NULL); + const u64 *id = mdesc_get_property(hp, node, "id", NULL); + + printk(KERN_ERR "VIO: Device remove (%s) ID[%lx]\n", + name, *id); +} + +static struct mdesc_notifier_client vio_device_notifier = { + .add = vio_add, + .remove = vio_remove, + .node_name = "virtual-device-port", +}; + +static struct mdesc_notifier_client vio_ds_notifier = { + .add = vio_add, + .remove = vio_remove, + .node_name = "domain-services-port", +}; + static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, struct vio_dev *vdev) { @@ -381,6 +411,9 @@ static int __init vio_init(void) cdev_cfg_handle = *cfg_handle; + mdesc_register_notifier(&vio_device_notifier); + mdesc_register_notifier(&vio_ds_notifier); + create_devices(hp, root); mdesc_release(hp); diff --git a/include/asm-sparc64/mdesc.h b/include/asm-sparc64/mdesc.h index e97c4313375..1acc7272e53 100644 --- a/include/asm-sparc64/mdesc.h +++ b/include/asm-sparc64/mdesc.h @@ -61,6 +61,16 @@ extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc); extern void mdesc_update(void); +struct mdesc_notifier_client { + void (*add)(struct mdesc_handle *handle, u64 node); + void (*remove)(struct mdesc_handle *handle, u64 node); + + const char *node_name; + struct mdesc_notifier_client *next; +}; + +extern void mdesc_register_notifier(struct mdesc_notifier_client *client); + extern void mdesc_fill_in_cpu_data(cpumask_t mask); extern void sun4v_mdesc_init(void); -- cgit v1.2.3-70-g09d2 From 6160f63518406485c7009cb0f2e1588ea3abccc1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Jul 2007 23:03:47 -0700 Subject: [SPARC64]: Massively simplify VIO device layer and support hot add/remove. Create and destroy VIO devices in response to MD update events. These run synchronously inside of the MD update mutex so the VIO layer doesn't need to do internal locking of any sort. Signed-off-by: David S. Miller --- arch/sparc64/kernel/vio.c | 123 ++++++++++++++++++++++------------------------ include/asm-sparc64/vio.h | 2 +- 2 files changed, 60 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c index d487be093b4..8d3cc4fdb55 100644 --- a/arch/sparc64/kernel/vio.c +++ b/arch/sparc64/kernel/vio.c @@ -172,36 +172,6 @@ struct device_node *cdev_node; static struct vio_dev *root_vdev; static u64 cdev_cfg_handle; -static void vio_add(struct mdesc_handle *hp, u64 node) -{ - const char *name = mdesc_get_property(hp, node, "name", NULL); - const u64 *id = mdesc_get_property(hp, node, "id", NULL); - - printk(KERN_ERR "VIO: Device add (%s) ID[%lx]\n", - name, *id); -} - -static void vio_remove(struct mdesc_handle *hp, u64 node) -{ - const char *name = mdesc_get_property(hp, node, "name", NULL); - const u64 *id = mdesc_get_property(hp, node, "id", NULL); - - printk(KERN_ERR "VIO: Device remove (%s) ID[%lx]\n", - name, *id); -} - -static struct mdesc_notifier_client vio_device_notifier = { - .add = vio_add, - .remove = vio_remove, - .node_name = "virtual-device-port", -}; - -static struct mdesc_notifier_client vio_ds_notifier = { - .add = vio_add, - .remove = vio_remove, - .node_name = "domain-services-port", -}; - static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, struct vio_dev *vdev) { @@ -231,10 +201,11 @@ static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, struct device *parent) { - const char *type, *compat; + const char *type, *compat, *bus_id_name; struct device_node *dp; struct vio_dev *vdev; int err, tlen, clen; + const u64 *id; type = mdesc_get_property(hp, mp, "device-type", &tlen); if (!type) { @@ -250,6 +221,16 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, return NULL; } + bus_id_name = type; + if (!strcmp(type, "domain-services-port")) + bus_id_name = "ds"; + + if (strlen(bus_id_name) >= KOBJ_NAME_LEN - 4) { + printk(KERN_ERR "VIO: bus_id_name [%s] is too long.\n", + bus_id_name); + return NULL; + } + compat = mdesc_get_property(hp, mp, "device-type", &clen); if (!compat) { clen = 0; @@ -279,7 +260,14 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, vio_fill_channel_info(hp, mp, vdev); - snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%lx", mp); + id = mdesc_get_property(hp, mp, "id", NULL); + if (!id) + snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%s", + bus_id_name); + else + snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%s-%lu", + bus_id_name, *id); + vdev->dev.parent = parent; vdev->dev.bus = &vio_bus_type; vdev->dev.release = vio_dev_release; @@ -299,6 +287,8 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, } vdev->dp = dp; + printk(KERN_ERR "VIO: Adding device %s\n", vdev->dev.bus_id); + err = device_register(&vdev->dev); if (err) { printk(KERN_ERR "VIO: Could not register device %s, err=%d\n", @@ -313,46 +303,46 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, return vdev; } -static void walk_tree(struct mdesc_handle *hp, u64 n, struct vio_dev *parent) +static void vio_add(struct mdesc_handle *hp, u64 node) { - u64 a; - - mdesc_for_each_arc(a, hp, n, MDESC_ARC_TYPE_FWD) { - struct vio_dev *vdev; - u64 target; - - target = mdesc_arc_target(hp, a); - vdev = vio_create_one(hp, target, &parent->dev); - if (vdev) - walk_tree(hp, target, vdev); - } + (void) vio_create_one(hp, node, &root_vdev->dev); } -static void create_devices(struct mdesc_handle *hp, u64 root) +static int vio_md_node_match(struct device *dev, void *arg) { - u64 mp; + struct vio_dev *vdev = to_vio_dev(dev); - root_vdev = vio_create_one(hp, root, NULL); - if (!root_vdev) { - printk(KERN_ERR "VIO: Coult not create root device.\n"); - return; - } + if (vdev->mp == (u64) arg) + return 1; - walk_tree(hp, root, root_vdev); + return 0; +} - /* Domain services is odd as it doesn't sit underneath the - * channel-devices node, so we plug it in manually. - */ - mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "domain-services"); - if (mp != MDESC_NODE_NULL) { - struct vio_dev *parent = vio_create_one(hp, mp, - &root_vdev->dev); +static void vio_remove(struct mdesc_handle *hp, u64 node) +{ + struct device *dev; - if (parent) - walk_tree(hp, mp, parent); + dev = device_find_child(&root_vdev->dev, (void *) node, + vio_md_node_match); + if (dev) { + printk(KERN_INFO "VIO: Removing device %s\n", dev->bus_id); + + device_unregister(dev); } } +static struct mdesc_notifier_client vio_device_notifier = { + .add = vio_add, + .remove = vio_remove, + .node_name = "virtual-device-port", +}; + +static struct mdesc_notifier_client vio_ds_notifier = { + .add = vio_add, + .remove = vio_remove, + .node_name = "domain-services-port", +}; + const char *channel_devices_node = "channel-devices"; const char *channel_devices_compat = "SUNW,sun4v-channel-devices"; const char *cfg_handle_prop = "cfg-handle"; @@ -411,14 +401,19 @@ static int __init vio_init(void) cdev_cfg_handle = *cfg_handle; + root_vdev = vio_create_one(hp, root, NULL); + err = -ENODEV; + if (!root_vdev) { + printk(KERN_ERR "VIO: Coult not create root device.\n"); + goto out_release; + } + mdesc_register_notifier(&vio_device_notifier); mdesc_register_notifier(&vio_ds_notifier); - create_devices(hp, root); - mdesc_release(hp); - return 0; + return err; out_release: mdesc_release(hp); diff --git a/include/asm-sparc64/vio.h b/include/asm-sparc64/vio.h index 83c96422e9d..c0a8d4ed5bc 100644 --- a/include/asm-sparc64/vio.h +++ b/include/asm-sparc64/vio.h @@ -264,7 +264,7 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr, ((dr->prod - dr->cons) & (ring_size - 1))); } -#define VIO_MAX_TYPE_LEN 64 +#define VIO_MAX_TYPE_LEN 32 #define VIO_MAX_COMPAT_LEN 64 struct vio_dev { -- cgit v1.2.3-70-g09d2 From 0785b9dcdc3d93e67529e4bd819a427776d3a07e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 18 Jul 2007 00:09:30 -0700 Subject: [SPARC]: Mark sparc and sparc64 as not having virt_to_bus Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 3 +++ arch/sparc64/Kconfig | 3 +++ include/asm-sparc64/io.h | 5 ----- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 73df7115325..603d83ad65c 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -21,6 +21,9 @@ config GENERIC_ISA_DMA bool default y +config ARCH_NO_VIRT_TO_BUS + def_bool y + source "init/Kconfig" menu "General machine setup" diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index b84b6af1241..df6ee71894d 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -62,6 +62,9 @@ config AUDIT_ARCH bool default y +config ARCH_NO_VIRT_TO_BUS + def_bool y + choice prompt "Kernel page size" default SPARC64_PAGE_SIZE_8KB diff --git a/include/asm-sparc64/io.h b/include/asm-sparc64/io.h index ad595b67984..9565a892801 100644 --- a/include/asm-sparc64/io.h +++ b/include/asm-sparc64/io.h @@ -14,11 +14,6 @@ #define __SLOW_DOWN_IO do { } while (0) #define SLOW_DOWN_IO do { } while (0) -extern unsigned long virt_to_bus_not_defined_use_pci_map(volatile void *addr); -#define virt_to_bus virt_to_bus_not_defined_use_pci_map -extern unsigned long bus_to_virt_not_defined_use_pci_map(volatile void *addr); -#define bus_to_virt bus_to_virt_not_defined_use_pci_map - /* BIO layer definitions. */ extern unsigned long kern_base, kern_size; #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) -- cgit v1.2.3-70-g09d2 From 16751347a060a10c09b11593bb179fd5b0240c04 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 16 Jul 2007 18:35:52 -0700 Subject: [TCP]: remove unused argument to cong_avoid op None of the existing TCP congestion controls use the rtt value pased in the ca_ops->cong_avoid interface. Which is lucky because seq_rtt could have been -1 when handling a duplicate ack. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++---- net/ipv4/tcp_bic.c | 2 +- net/ipv4/tcp_cong.c | 3 +-- net/ipv4/tcp_cubic.c | 2 +- net/ipv4/tcp_highspeed.c | 2 +- net/ipv4/tcp_htcp.c | 2 +- net/ipv4/tcp_hybla.c | 4 ++-- net/ipv4/tcp_illinois.c | 2 +- net/ipv4/tcp_input.c | 8 ++++---- net/ipv4/tcp_lp.c | 5 ++--- net/ipv4/tcp_scalable.c | 2 +- net/ipv4/tcp_vegas.c | 6 +++--- net/ipv4/tcp_veno.c | 6 +++--- net/ipv4/tcp_yeah.c | 2 +- 14 files changed, 24 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a8af9ae0017..8b404b1ef7c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -652,8 +652,7 @@ struct tcp_congestion_ops { /* lower bound for congestion window (optional) */ u32 (*min_cwnd)(const struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, u32 ack, - u32 rtt, u32 in_flight, int good_ack); + void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight, int good_ack); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ @@ -684,8 +683,7 @@ extern void tcp_slow_start(struct tcp_sock *tp); extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); -extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, - u32 rtt, u32 in_flight, int flag); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag); extern u32 tcp_reno_min_cwnd(const struct sock *sk); extern struct tcp_congestion_ops tcp_reno; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index dd9ef65ad3f..519de091a94 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -137,7 +137,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) } static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int data_acked) + u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1260e52ad77..55fca1820c3 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -324,8 +324,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, - int flag) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ebfaac2f9f4..d17da30d82d 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -270,7 +270,7 @@ static inline void measure_delay(struct sock *sk) } static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int data_acked) + u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 43d624e5043..14a073d8b60 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -109,7 +109,7 @@ static void hstcp_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 4ba4a7ae0a8..632c05a7588 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void htcp_cong_avoid(struct sock *sk, u32 ack, s32 rtt, u32 in_flight, int data_acked) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index e5be3511722..b3e55cf5617 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -85,7 +85,7 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -103,7 +103,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, return; if (!ca->hybla_en) - return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight, flag); if (ca->rho == 0) hybla_recalc_param(sk); diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index b2b2256d3b8..cc5de6f69d4 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -258,7 +258,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state) /* * Increase window in response to successful acknowledgment. */ -static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4e5884ac8f2..fec8a7a4dba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2323,11 +2323,11 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, tcp_ack_no_tstamp(sk, seq_rtt, flag); } -static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int good) { const struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); + icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good); tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } @@ -2826,11 +2826,11 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); + tcp_cong_avoid(sk, ack, prior_in_flight, 0); tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) - tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); + tcp_cong_avoid(sk, ack, prior_in_flight, 1); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index e49836ce012..80e140e3ec2 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk) * Will only call newReno CA when away from inference. * From TCP-LP's paper, this will be handled in additive increasement. */ -static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, - int flag) +static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct lp *lp = inet_csk_ca(sk); if (!(lp->flag & LP_WITHIN_INF)) - tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight, flag); } /** diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 4624501e968..be27a33a1c6 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,7 +15,7 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index e218a51cece..914e0307f7a 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -163,13 +163,13 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight, flag); /* The key players are v_beg_snd_una and v_beg_snd_nxt. * @@ -228,7 +228,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight, flag); } else { u32 rtt, target_cwnd, diff; diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index ec854cc5fad..7a55ddf8603 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -115,13 +115,13 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) } static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); if (!veno->doing_veno_now) - return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight, flag); /* limited by applications */ if (!tcp_is_cwnd_limited(sk, in_flight)) @@ -132,7 +132,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, /* We don't have enough rtt samples to do the Veno * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight, flag); } else { u32 rtt, target_cwnd; diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 545ed237ab5..c04b7c6ec70 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -70,7 +70,7 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last) } static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, - u32 seq_rtt, u32 in_flight, int flag) + u32 in_flight, int flag) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); -- cgit v1.2.3-70-g09d2 From bd0bf0765ea1fba80d7085e1f0375ec045631dc1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 18 Jul 2007 01:55:52 -0700 Subject: [XFRM]: Fix crash introduced by struct dst_entry reordering XFRM expects xfrm_dst->u.next to be same pointer as dst->next, which was broken by the dst_entry reordering in commit 1e19e02c~, causing an oops in xfrm_bundle_ok when walking the bundle upwards. Kill xfrm_dst->u.next and change the only user to use dst->next instead. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/xfrm/xfrm_policy.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ae959e95017..a5f80bfbaaa 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -585,7 +585,6 @@ static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ct struct xfrm_dst { union { - struct xfrm_dst *next; struct dst_entry dst; struct rtable rt; struct rt6_info rt6; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 157bfbd250b..b48f06fc9fd 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2141,7 +2141,7 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (last == first) break; - last = last->u.next; + last = (struct xfrm_dst *)last->u.dst.next; last->child_mtu_cached = mtu; } -- cgit v1.2.3-70-g09d2 From 456ad75c89cdb72e11dcdb6b0794802a6f50c8a3 Mon Sep 17 00:00:00 2001 From: Denis Cheng Date: Wed, 18 Jul 2007 02:10:54 -0700 Subject: [NET]: move dev_mc_discard from dev_mcast.c to dev.c Because this function is only called by unregister_netdevice, this moving could make this non-global function static, and also remove its declaration in netdevice.h; Any further, function __dev_addr_discard is also just called by dev_mc_discard and dev_unicast_discard, keeping this two functions both in one c file could make __dev_addr_discard also static and remove its declaration in netdevice.h; Futhermore, the sequential call to dev_unicast_discard and then dev_mc_discard in unregister_netdevice have a similar mechanism that: (netif_tx_lock_bh / __dev_addr_discard / netif_tx_unlock_bh), they should merged into one to eliminate duplicates in acquiring and releasing the dev->_xmit_lock, this would be done in my following patch. Signed-off-by: Denis Cheng Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- net/core/dev.c | 14 +++++++++++++- net/core/dev_mcast.c | 12 ------------ 3 files changed, 13 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index da7a13c97eb..9820ca1e45e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1098,10 +1098,8 @@ extern int dev_mc_delete(struct net_device *dev, void *addr, int alen, int all extern int dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly); extern int dev_mc_sync(struct net_device *to, struct net_device *from); extern void dev_mc_unsync(struct net_device *to, struct net_device *from); -extern void dev_mc_discard(struct net_device *dev); extern int __dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all); extern int __dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly); -extern void __dev_addr_discard(struct dev_addr_list **list); extern void dev_set_promiscuity(struct net_device *dev, int inc); extern void dev_set_allmulti(struct net_device *dev, int inc); extern void netdev_state_change(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 13a0d9f6da5..3ba63aaa300 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2715,7 +2715,7 @@ int __dev_addr_add(struct dev_addr_list **list, int *count, return 0; } -void __dev_addr_discard(struct dev_addr_list **list) +static void __dev_addr_discard(struct dev_addr_list **list) { struct dev_addr_list *tmp; @@ -2785,6 +2785,18 @@ static void dev_unicast_discard(struct net_device *dev) netif_tx_unlock_bh(dev); } +/* + * Discard multicast list when a device is downed + */ + +static void dev_mc_discard(struct net_device *dev) +{ + netif_tx_lock_bh(dev); + __dev_addr_discard(&dev->mc_list); + dev->mc_count = 0; + netif_tx_unlock_bh(dev); +} + unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 235a2a8a0d0..99aece1aecc 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -177,18 +177,6 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) } EXPORT_SYMBOL(dev_mc_unsync); -/* - * Discard multicast list when a device is downed - */ - -void dev_mc_discard(struct net_device *dev) -{ - netif_tx_lock_bh(dev); - __dev_addr_discard(&dev->mc_list); - dev->mc_count = 0; - netif_tx_unlock_bh(dev); -} - #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) { -- cgit v1.2.3-70-g09d2 From ebd61cc042b16e6cf2486aafbfff9e4be8c213ee Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 18 Jul 2007 02:21:50 -0700 Subject: [NETFILTER]: ipt_iprange.h must #include ipt_iprange.h must #include since it uses __be32. This patch fixes kernel Bugzilla #7604. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_iprange.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ipt_iprange.h b/include/linux/netfilter_ipv4/ipt_iprange.h index 34ab0fb736e..a92fefc3c7e 100644 --- a/include/linux/netfilter_ipv4/ipt_iprange.h +++ b/include/linux/netfilter_ipv4/ipt_iprange.h @@ -1,6 +1,8 @@ #ifndef _IPT_IPRANGE_H #define _IPT_IPRANGE_H +#include + #define IPRANGE_SRC 0x01 /* Match source IP address */ #define IPRANGE_DST 0x02 /* Match destination IP address */ #define IPRANGE_SRC_INV 0x10 /* Negate the condition */ -- cgit v1.2.3-70-g09d2 From 749269facaf87f6e516c3af12763e03181b9c139 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Wed, 18 Jul 2007 09:02:56 -0400 Subject: Change on-disk format to support 2^15 uninitialized extents This change was suggested by Andreas Dilger. This patch changes the EXT_MAX_LEN value and extent code which marks/checks uninitialized extents. With this change it will be possible to have initialized extents with 2^15 blocks (earlier the max blocks we could have was 2^15 - 1). This way we can have better extent-to-block alignment. Now, maximum number of blocks we can have in an initialized extent is 2^15 and in an uninitialized extent is 2^15 - 1. Signed-off-by: Amit Arora --- fs/ext4/extents.c | 28 +++++++++++++++++++++++++--- include/linux/ext4_fs_extents.h | 31 +++++++++++++++++++++++++++---- 2 files changed, 52 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ded3d469f97..77146b826a1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1107,7 +1107,7 @@ static int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { - unsigned short ext1_ee_len, ext2_ee_len; + unsigned short ext1_ee_len, ext2_ee_len, max_len; /* * Make sure that either both extents are uninitialized, or @@ -1116,6 +1116,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) return 0; + if (ext4_ext_is_uninitialized(ex1)) + max_len = EXT_UNINIT_MAX_LEN; + else + max_len = EXT_INIT_MAX_LEN; + ext1_ee_len = ext4_ext_get_actual_len(ex1); ext2_ee_len = ext4_ext_get_actual_len(ex2); @@ -1128,7 +1133,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * as an RO_COMPAT feature, refuse to merge to extents if * this can result in the top bit of ee_len being set. */ - if (ext1_ee_len + ext2_ee_len > EXT_MAX_LEN) + if (ext1_ee_len + ext2_ee_len > max_len) return 0; #ifdef AGGRESSIVE_TEST if (le16_to_cpu(ex1->ee_len) >= 4) @@ -1815,7 +1820,11 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex->ee_block = cpu_to_le32(block); ex->ee_len = cpu_to_le16(num); - if (uninitialized) + /* + * Do not mark uninitialized if all the blocks in the + * extent have been removed. + */ + if (uninitialized && num) ext4_ext_mark_uninitialized(ex); err = ext4_ext_dirty(handle, inode, path + depth); @@ -2308,6 +2317,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* allocate new block */ goal = ext4_ext_find_goal(inode, path, iblock); + /* + * See if request is beyond maximum number of blocks we can have in + * a single extent. For an initialized extent this limit is + * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is + * EXT_UNINIT_MAX_LEN. + */ + if (max_blocks > EXT_INIT_MAX_LEN && + create != EXT4_CREATE_UNINITIALIZED_EXT) + max_blocks = EXT_INIT_MAX_LEN; + else if (max_blocks > EXT_UNINIT_MAX_LEN && + create == EXT4_CREATE_UNINITIALIZED_EXT) + max_blocks = EXT_UNINIT_MAX_LEN; + /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ newex.ee_block = cpu_to_le32(iblock); newex.ee_len = cpu_to_le16(max_blocks); diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index edf49ec89ea..81406f3655d 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -141,7 +141,25 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, #define EXT_MAX_BLOCK 0xffffffff -#define EXT_MAX_LEN ((1UL << 15) - 1) +/* + * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an + * initialized extent. This is 2^15 and not (2^16 - 1), since we use the + * MSB of ee_len field in the extent datastructure to signify if this + * particular extent is an initialized extent or an uninitialized (i.e. + * preallocated). + * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an + * uninitialized extent. + * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an + * uninitialized one. In other words, if MSB of ee_len is set, it is an + * uninitialized extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an uninitialized extent of zero length and + * thus we make it as a special case of initialized extent with 0x8000 length. + * This way we get better extent-to-group alignment for initialized extents. + * Hence, the maximum number of blocks we can have in an *initialized* + * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + */ +#define EXT_INIT_MAX_LEN (1UL << 15) +#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) #define EXT_FIRST_EXTENT(__hdr__) \ @@ -190,17 +208,22 @@ ext4_ext_invalidate_cache(struct inode *inode) static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) { - ext->ee_len |= cpu_to_le16(0x8000); + /* We can not have an uninitialized extent of zero length! */ + BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); + ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); } static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) { - return (int)(le16_to_cpu((ext)->ee_len) & 0x8000); + /* Extent with ee_len of 0x8000 is treated as an initialized extent */ + return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); } static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) { - return (int)(le16_to_cpu((ext)->ee_len) & 0x7FFF); + return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? + le16_to_cpu(ext->ee_len) : + (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); } extern int ext4_extent_tree_init(handle_t *, struct inode *); -- cgit v1.2.3-70-g09d2 From ff9ddf7e847c4dc533f119efb6c77a6e57ab6397 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 18 Jul 2007 09:24:20 -0400 Subject: ext4: copy i_flags to inode flags on write Propagate flags such as S_APPEND, S_IMMUTABLE, etc. from i_flags into ext4-specific i_flags. Quota code changes these flags on quota files (to make it harder for sysadmin to screw himself) and these changes were not correctly propagated into the filesystem. (This is a forward port patch from ext3) Signed-off-by: Jan Kara Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 20 ++++++++++++++++++++ fs/ext4/ioctl.c | 1 + include/linux/ext4_fs.h | 1 + 3 files changed, 22 insertions(+) (limited to 'include') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8416fa28c42..49035c5a2c4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2583,6 +2583,25 @@ void ext4_set_inode_flags(struct inode *inode) inode->i_flags |= S_DIRSYNC; } +/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ +void ext4_get_inode_flags(struct ext4_inode_info *ei) +{ + unsigned int flags = ei->vfs_inode.i_flags; + + ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL| + EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL); + if (flags & S_SYNC) + ei->i_flags |= EXT4_SYNC_FL; + if (flags & S_APPEND) + ei->i_flags |= EXT4_APPEND_FL; + if (flags & S_IMMUTABLE) + ei->i_flags |= EXT4_IMMUTABLE_FL; + if (flags & S_NOATIME) + ei->i_flags |= EXT4_NOATIME_FL; + if (flags & S_DIRSYNC) + ei->i_flags |= EXT4_DIRSYNC_FL; +} + void ext4_read_inode(struct inode * inode) { struct ext4_iloc iloc; @@ -2744,6 +2763,7 @@ static int ext4_do_update_inode(handle_t *handle, if (ei->i_state & EXT4_STATE_NEW) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + ext4_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7b4aa4543c8..9737432f079 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -28,6 +28,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, switch (cmd) { case EXT4_IOC_GETFLAGS: + ext4_get_inode_flags(ei); flags = ei->i_flags & EXT4_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 87c2d7a05b0..33b2b1a2d79 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -868,6 +868,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern void ext4_truncate (struct inode *); extern void ext4_set_inode_flags(struct inode *); +extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_block_truncate_page(handle_t *handle, struct page *page, -- cgit v1.2.3-70-g09d2 From e23291b9120c11aafb2ee76fb71a062eb3c1056c Mon Sep 17 00:00:00 2001 From: "Jose R. Santos" Date: Wed, 18 Jul 2007 08:57:06 -0400 Subject: jbd2: Fix CONFIG_JBD_DEBUG ifdef to be CONFIG_JBD2_DEBUG When the JBD code was forked to create the new JBD2 code base, the references to CONFIG_JBD_DEBUG where never changed to CONFIG_JBD2_DEBUG. This patch fixes that. Signed-off-by: Jose R. Santos Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 4 ++-- fs/ext4/ioctl.c | 4 ++-- fs/jbd2/journal.c | 14 +++++++------- fs/jbd2/recovery.c | 2 +- include/linux/ext4_fs.h | 4 ++-- include/linux/ext4_fs_sb.h | 2 +- include/linux/jbd2.h | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 9de54ae48de..e53b4af52f1 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -517,7 +517,7 @@ do_more: /* * An HJ special. This is expensive... */ -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG jbd_unlock_bh_state(bitmap_bh); { struct buffer_head *debug_bh; @@ -1597,7 +1597,7 @@ allocated: performed_allocation = 1; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG { struct buffer_head *debug_bh; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9737432f079..5b00775d509 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -141,7 +141,7 @@ flags_err: ext4_journal_stop(handle); return err; } -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG case EXT4_IOC_WAIT_FOR_READONLY: /* * This is racy - by the time we're woken up and running, @@ -283,7 +283,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC32_SETVERSION_OLD: cmd = EXT4_IOC_SETVERSION_OLD; break; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG case EXT4_IOC32_WAIT_FOR_READONLY: cmd = EXT4_IOC_WAIT_FOR_READONLY; break; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 78d63b818f0..8f530cc66d3 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -528,7 +528,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) { int err = 0; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG spin_lock(&journal->j_state_lock); if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_EMERG @@ -1709,7 +1709,7 @@ void jbd2_slab_free(void *ptr, size_t size) * Journal_head storage management */ static struct kmem_cache *jbd2_journal_head_cache; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif @@ -1747,7 +1747,7 @@ static struct journal_head *journal_alloc_journal_head(void) struct journal_head *ret; static unsigned long last_warning; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG atomic_inc(&nr_journal_heads); #endif ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); @@ -1768,7 +1768,7 @@ static struct journal_head *journal_alloc_journal_head(void) static void journal_free_journal_head(struct journal_head *jh) { -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG atomic_dec(&nr_journal_heads); memset(jh, JBD_POISON_FREE, sizeof(*jh)); #endif @@ -1953,12 +1953,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) /* * /proc tunables */ -#if defined(CONFIG_JBD_DEBUG) +#if defined(CONFIG_JBD2_DEBUG) int jbd2_journal_enable_debug; EXPORT_SYMBOL(jbd2_journal_enable_debug); #endif -#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS) +#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_PROC_FS) static struct proc_dir_entry *proc_jbd_debug; @@ -2073,7 +2073,7 @@ static int __init journal_init(void) static void __exit journal_exit(void) { -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG int n = atomic_read(&nr_journal_heads); if (n) printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 395c92a04ac..e7730a045b9 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -295,7 +295,7 @@ int jbd2_journal_skip_recovery(journal_t *journal) printk(KERN_ERR "JBD: error %d scanning journal\n", err); ++journal->j_transaction_sequence; } else { -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); #endif jbd_debug(0, diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 33b2b1a2d79..45ec7258b2b 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -243,7 +243,7 @@ struct ext4_new_group_data { #define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG #define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) @@ -259,7 +259,7 @@ struct ext4_new_group_data { #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) #endif #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index 2347557a327..0f7dc15924b 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -73,7 +73,7 @@ struct ext4_sb_info { struct list_head s_orphan; unsigned long s_commit_interval; struct block_device *journal_bdev; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0e0fedd2039..a37aca31de4 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -50,11 +50,11 @@ */ #define JBD_DEFAULT_MAX_COMMIT_AGE 5 -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG /* * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal * consistency checks. By default we don't do this unless - * CONFIG_JBD_DEBUG is on. + * CONFIG_JBD2_DEBUG is on. */ #define JBD_EXPENSIVE_CHECKING extern int jbd2_journal_enable_debug; -- cgit v1.2.3-70-g09d2 From 0f49d5d019afa4e94253bfc92f0daca3badb990b Mon Sep 17 00:00:00 2001 From: "Jose R. Santos" Date: Wed, 18 Jul 2007 08:50:18 -0400 Subject: jbd2: Move jbd2-debug file to debugfs The jbd2-debug file used to be located in /proc/sys/fs/jbd2-debug, but it incorrectly used create_proc_entry() instead of the sysctl routines, and no proc entry was ever created. Instead of fixing this we might as well move the jbd2-debug file to debugfs which would be the preferred location for this kind of tunable. The new location is now /sys/kernel/debug/jbd2/jbd2-debug. Signed-off-by: Jose R. Santos Signed-off-by: "Theodore Ts'o" --- fs/Kconfig | 10 ++++---- fs/jbd2/journal.c | 67 +++++++++++++++++++++------------------------------- include/linux/jbd2.h | 2 +- 3 files changed, 33 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/fs/Kconfig b/fs/Kconfig index 613df554728..6a649902c5a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -251,7 +251,7 @@ config JBD2 config JBD2_DEBUG bool "JBD2 (ext4dev/ext4) debugging support" - depends on JBD2 + depends on JBD2 && DEBUG_FS help If you are using the ext4dev/ext4 journaled file system (or potentially any other filesystem/device using JBD2), this option @@ -260,10 +260,10 @@ config JBD2_DEBUG By default, the debugging output will be turned off. If you select Y here, then you will be able to turn on debugging - with "echo N > /proc/sys/fs/jbd2-debug", where N is a number between - 1 and 5. The higher the number, the more debugging output is - generated. To turn debugging off again, do - "echo 0 > /proc/sys/fs/jbd2-debug". + with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a + number between 1 and 5. The higher the number, the more debugging + output is generated. To turn debugging off again, do + "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". config FS_MBCACHE # Meta block cache for Extended Attributes (ext2/ext3/ext4) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8f530cc66d3..f290cb7cb83 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -1951,64 +1952,50 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) } /* - * /proc tunables + * debugfs tunables */ #if defined(CONFIG_JBD2_DEBUG) -int jbd2_journal_enable_debug; +u8 jbd2_journal_enable_debug; EXPORT_SYMBOL(jbd2_journal_enable_debug); #endif -#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_PROC_FS) +#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_DEBUG_FS) -static struct proc_dir_entry *proc_jbd_debug; +#define JBD2_DEBUG_NAME "jbd2-debug" -static int read_jbd_debug(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int ret; +struct dentry *jbd2_debugfs_dir, *jbd2_debug; - ret = sprintf(page + off, "%d\n", jbd2_journal_enable_debug); - *eof = 1; - return ret; +static void __init jbd2_create_debugfs_entry(void) +{ + jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL); + if (jbd2_debugfs_dir) + jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO, + jbd2_debugfs_dir, + &jbd2_journal_enable_debug); } -static int write_jbd_debug(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static void __exit jbd2_remove_debugfs_entry(void) { - char buf[32]; - - if (count > ARRAY_SIZE(buf) - 1) - count = ARRAY_SIZE(buf) - 1; - if (copy_from_user(buf, buffer, count)) - return -EFAULT; - buf[ARRAY_SIZE(buf) - 1] = '\0'; - jbd2_journal_enable_debug = simple_strtoul(buf, NULL, 10); - return count; + if (jbd2_debug) + debugfs_remove(jbd2_debug); + if (jbd2_debugfs_dir) + debugfs_remove(jbd2_debugfs_dir); } -#define JBD_PROC_NAME "sys/fs/jbd2-debug" +#else -static void __init create_jbd_proc_entry(void) +static void __init jbd2_create_debugfs_entry(void) { - proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL); - if (proc_jbd_debug) { - /* Why is this so hard? */ - proc_jbd_debug->read_proc = read_jbd_debug; - proc_jbd_debug->write_proc = write_jbd_debug; - } + do { + } while (0); } -static void __exit jbd2_remove_jbd_proc_entry(void) +static void __exit jbd2_remove_debugfs_entry(void) { - if (proc_jbd_debug) - remove_proc_entry(JBD_PROC_NAME, NULL); + do { + } while (0); } -#else - -#define create_jbd_proc_entry() do {} while (0) -#define jbd2_remove_jbd_proc_entry() do {} while (0) - #endif struct kmem_cache *jbd2_handle_cache; @@ -2067,7 +2054,7 @@ static int __init journal_init(void) ret = journal_init_caches(); if (ret != 0) jbd2_journal_destroy_caches(); - create_jbd_proc_entry(); + jbd2_create_debugfs_entry(); return ret; } @@ -2078,7 +2065,7 @@ static void __exit journal_exit(void) if (n) printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); #endif - jbd2_remove_jbd_proc_entry(); + jbd2_remove_debugfs_entry(); jbd2_journal_destroy_caches(); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index a37aca31de4..260d6d76c5f 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -57,7 +57,7 @@ * CONFIG_JBD2_DEBUG is on. */ #define JBD_EXPENSIVE_CHECKING -extern int jbd2_journal_enable_debug; +extern u8 jbd2_journal_enable_debug; #define jbd_debug(n, f, a...) \ do { \ -- cgit v1.2.3-70-g09d2 From ef7f38359ea8b3e9c7f2cae9a4d4935f55ca9e80 Mon Sep 17 00:00:00 2001 From: Kalpak Shah Date: Wed, 18 Jul 2007 09:15:20 -0400 Subject: ext4: Add nanosecond timestamps This patch adds nanosecond timestamps for ext4. This involves adding *time_extra fields to the ext4_inode to extend the timestamps to 64-bits. Creation time is also added by this patch. These extended fields will fit into an inode if the filesystem was formatted with large inodes (-I 256 or larger) and there are currently no EAs consuming all of the available space. For new inodes we always reserve enough space for the kernel's known extended fields, but for inodes created with an old kernel this might not have been the case. So this patch also adds the EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature flag(ro-compat so that older kernels can't create inodes with a smaller extra_isize). which indicates if the fields fitting inside s_min_extra_isize are available or not. If the expansion of inodes if unsuccessful then this feature will be disabled. This feature is only enabled if requested by the sysadmin. None of the extended inode fields is critical for correct filesystem operation. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: Eric Sandeen Signed-off-by: Dave Kleikamp Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 8 ++--- fs/ext4/inode.c | 22 +++++++----- fs/ext4/ioctl.c | 4 +-- fs/ext4/namei.c | 16 ++++----- fs/ext4/super.c | 28 +++++++++++++++ fs/ext4/xattr.c | 2 +- include/linux/ext4_fs.h | 86 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/ext4_fs_i.h | 5 +++ include/linux/ext4_fs_sb.h | 1 + 9 files changed, 147 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index c88b439ba5c..427f83066a0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -563,7 +563,8 @@ got: inode->i_ino = ino; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = + ext4_current_time(inode); memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_dir_start_lookup = 0; @@ -595,9 +596,8 @@ got: spin_unlock(&sbi->s_next_gen_lock); ei->i_state = EXT4_STATE_NEW; - ei->i_extra_isize = - (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ? - sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0; + + ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ret = inode; if(DQUOT_ALLOC_INODE(inode)) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 49035c5a2c4..b83f91edebd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -726,7 +726,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, /* We are done with atomic stuff, now do the rest of housekeeping */ - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); /* had we spliced it onto indirect block? */ @@ -2375,7 +2375,7 @@ do_indirects: ext4_discard_reservation(inode); mutex_unlock(&ei->truncate_mutex); - inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); /* @@ -2629,10 +2629,6 @@ void ext4_read_inode(struct inode * inode) } inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; ei->i_state = 0; ei->i_dir_start_lookup = 0; @@ -2710,6 +2706,11 @@ void ext4_read_inode(struct inode * inode) } else ei->i_extra_isize = 0; + EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); + EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); + EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); + if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; @@ -2791,9 +2792,12 @@ static int ext4_do_update_inode(handle_t *handle, } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); - raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); + + EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); + EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); + raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5b00775d509..c04c7ccba9e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -97,7 +97,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, ei->i_flags = flags; ext4_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); flags_err: @@ -134,7 +134,7 @@ flags_err: return PTR_ERR(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); inode->i_generation = generation; err = ext4_mark_iloc_dirty(handle, inode, &iloc); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2de339dd755..40106b7ea4b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1295,7 +1295,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, * happen is that the times are slightly out of date * and/or different from the directory change time. */ - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + dir->i_mtime = dir->i_ctime = ext4_current_time(dir); ext4_update_dx_flag(dir); dir->i_version++; ext4_mark_inode_dirty(handle, dir); @@ -2056,7 +2056,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) * recovery. */ inode->i_size = 0; ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); drop_nlink(dir); ext4_update_dx_flag(dir); @@ -2106,13 +2106,13 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + dir->i_ctime = dir->i_mtime = ext4_current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime; + inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); retval = 0; @@ -2203,7 +2203,7 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); inc_nlink(inode); atomic_inc(&inode->i_count); @@ -2305,7 +2305,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = CURRENT_TIME_SEC; + old_inode->i_ctime = ext4_current_time(old_inode); ext4_mark_inode_dirty(handle, old_inode); /* @@ -2338,9 +2338,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, if (new_inode) { drop_nlink(new_inode); - new_inode->i_ctime = CURRENT_TIME_SEC; + new_inode->i_ctime = ext4_current_time(new_inode); } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; + old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); ext4_update_dx_flag(old_dir); if (dir_bh) { BUFFER_TRACE(dir_bh, "get_write_access"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index af0835187e7..b47259f6f39 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1651,6 +1651,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_inode_size); goto failed_mount; } + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) + sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); } sbi->s_frag_size = EXT4_MIN_FRAG_SIZE << le32_to_cpu(es->s_log_frag_size); @@ -1874,6 +1876,32 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) } ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); + + /* determine the minimum size of new large inodes, if present */ + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_want_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_want_extra_isize); + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_min_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_min_extra_isize); + } + } + /* Check if enough inode space is available */ + if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + printk(KERN_INFO "EXT4-fs: required extra inode space not" + "available.\n"); + } + /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e832e96095b..fe16a569d06 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1013,7 +1013,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, } if (!error) { ext4_xattr_update_super_block(handle, inode->i_sb); - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 45ec7258b2b..df5e38faa15 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -288,7 +288,7 @@ struct ext4_inode { __le16 i_uid; /* Low 16 bits of Owner Uid */ __le32 i_size; /* Size in bytes */ __le32 i_atime; /* Access time */ - __le32 i_ctime; /* Creation time */ + __le32 i_ctime; /* Inode Change time */ __le32 i_mtime; /* Modification time */ __le32 i_dtime; /* Deletion Time */ __le16 i_gid; /* Low 16 bits of Group Id */ @@ -337,10 +337,85 @@ struct ext4_inode { } osd2; /* OS dependent 2 */ __le16 i_extra_isize; __le16 i_pad1; + __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ + __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ }; #define i_size_high i_dir_acl +#define EXT4_EPOCH_BITS 2 +#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) +#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) + +/* + * Extended fields will fit into an inode if the filesystem was formatted + * with large inodes (-I 256 or larger) and there are not currently any EAs + * consuming all of the available space. For new inodes we always reserve + * enough space for the kernel's known extended fields, but for inodes + * created with an old kernel this might not have been the case. None of + * the extended inode fields is critical for correct filesystem operation. + * This macro checks if a certain field fits in the inode. Note that + * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize + */ +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ + ((offsetof(typeof(*ext4_inode), field) + \ + sizeof((ext4_inode)->field)) \ + <= (EXT4_GOOD_OLD_INODE_SIZE + \ + (einode)->i_extra_isize)) \ + +static inline __le32 ext4_encode_extra_time(struct timespec *time) +{ + return cpu_to_le32((sizeof(time->tv_sec) > 4 ? + time->tv_sec >> 32 : 0) | + ((time->tv_nsec << 2) & EXT4_NSEC_MASK)); +} + +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +{ + if (sizeof(time->tv_sec) > 4) + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) + << 32; + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2; +} + +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +do { \ + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(inode)->xtime); \ +} while (0) + +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(einode)->xtime); \ +} while (0) + +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ +do { \ + (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + ext4_decode_extra_time(&(inode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (einode)->xtime.tv_sec = \ + (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + ext4_decode_extra_time(&(einode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + #if defined(__KERNEL__) || defined(__linux__) #define i_reserved1 osd1.linux1.l_i_reserved1 #define i_frag osd2.linux2.l_i_frag @@ -539,6 +614,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode) return container_of(inode, struct ext4_inode_info, vfs_inode); } +static inline struct timespec ext4_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; +} + + static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || @@ -609,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -626,6 +709,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) EXT4_FEATURE_INCOMPAT_64BIT) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) /* diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index 9de49440699..1a511e9905a 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -153,6 +153,11 @@ struct ext4_inode_info { unsigned long i_ext_generation; struct ext4_ext_cache i_cached_extent; + /* + * File creation time. Its function is same as that of + * struct timespec i_{a,c,m}time in the generic inode. + */ + struct timespec i_crtime; }; #endif /* _LINUX_EXT4_FS_I */ diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index 0f7dc15924b..1b2ffee12be 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -81,6 +81,7 @@ struct ext4_sb_info { char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ #endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ #ifdef EXTENTS_STATS /* ext4 extents stats */ -- cgit v1.2.3-70-g09d2 From 6dd4ee7cab7e3a17c571aebd444f4344c8c4946e Mon Sep 17 00:00:00 2001 From: Kalpak Shah Date: Wed, 18 Jul 2007 09:19:57 -0400 Subject: ext4: Expand extra_inodes space per the s_{want,min}_extra_isize fields We need to make sure that existing ext3 filesystems can also avail the new fields that have been added to the ext4 inode. We use s_want_extra_isize and s_min_extra_isize to decide by how much we should expand the inode. If EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature is set then we expand the inode by max(s_want_extra_isize, s_min_extra_isize , sizeof(ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE) bytes. Actually it is still an open question about whether users should be able to set s_*_extra_isize smaller than the known fields or not. This patch also adds the functionality to expand inodes to include the newly added fields. We start by trying to expand by s_want_extra_isize bytes and if its fails we try to expand by s_min_extra_isize bytes. This is done by changing the i_extra_isize if enough space is available in the inode and no EAs are present. If EAs are present and there is enough space in the inode then the EAs in the inode are shifted to make space. If enough space is not available in the inode due to the EAs then 1 or more EAs are shifted to the external EA block. In the worst case when even the external EA block does not have enough space we inform the user that some EA would need to be deleted or s_min_extra_isize would have to be reduced. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 63 ++++++++++- fs/ext4/xattr.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++-- fs/ext4/xattr.h | 17 +++ include/linux/ext4_fs.h | 1 + 4 files changed, 347 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b83f91edebd..f6d8528c4f5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3105,6 +3105,39 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, return err; } +/* + * Expand an inode by new_extra_isize bytes. + * Returns 0 on success or negative error number on failure. + */ +int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize, + struct ext4_iloc iloc, handle_t *handle) +{ + struct ext4_inode *raw_inode; + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *entry; + + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) + return 0; + + raw_inode = ext4_raw_inode(&iloc); + + header = IHDR(inode, raw_inode); + entry = IFIRST(header); + + /* No extended attributes present */ + if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || + header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { + memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, + new_extra_isize); + EXT4_I(inode)->i_extra_isize = new_extra_isize; + return 0; + } + + /* try to expand with EAs present */ + return ext4_expand_extra_isize_ea(inode, new_extra_isize, + raw_inode, handle); +} + /* * What we do here is to mark the in-core inode as clean with respect to inode * dirtiness (it may still be data-dirty). @@ -3129,10 +3162,38 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) { struct ext4_iloc iloc; - int err; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + static unsigned int mnt_count; + int err, ret; might_sleep(); err = ext4_reserve_inode_write(handle, inode, &iloc); + if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && + !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { + /* + * We need extra buffer credits since we may write into EA block + * with this same handle. If journal_extend fails, then it will + * only result in a minor loss of functionality for that inode. + * If this is felt to be critical, then e2fsck should be run to + * force a large enough s_min_extra_isize. + */ + if ((jbd2_journal_extend(handle, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { + ret = ext4_expand_extra_isize(inode, + sbi->s_want_extra_isize, + iloc, handle); + if (ret) { + EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; + if (mnt_count != sbi->s_es->s_mnt_count) { + ext4_warning(inode->i_sb, __FUNCTION__, + "Unable to expand inode %lu. Delete" + " some EAs or run e2fsck.", + inode->i_ino); + mnt_count = sbi->s_es->s_mnt_count; + } + } + } + } if (!err) err = ext4_mark_iloc_dirty(handle, inode, &iloc); return err; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fe16a569d06..b10d68fffb5 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -66,13 +66,6 @@ #define BFIRST(bh) ENTRY(BHDR(bh)+1) #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -#define IHDR(inode, raw_inode) \ - ((struct ext4_xattr_ibody_header *) \ - ((void *)raw_inode + \ - EXT4_GOOD_OLD_INODE_SIZE + \ - EXT4_I(inode)->i_extra_isize)) -#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) - #ifdef EXT4_XATTR_DEBUG # define ea_idebug(inode, f...) do { \ printk(KERN_DEBUG "inode %s:%lu: ", \ @@ -508,6 +501,24 @@ out: return; } +/* + * Find the available free space for EAs. This also returns the total number of + * bytes used by EA entries. + */ +static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, + size_t *min_offs, void *base, int *total) +{ + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + *total += EXT4_XATTR_LEN(last->e_name_len); + if (!last->e_value_block && last->e_value_size) { + size_t offs = le16_to_cpu(last->e_value_offs); + if (offs < *min_offs) + *min_offs = offs; + } + } + return (*min_offs - ((void *)last - base) - sizeof(__u32)); +} + struct ext4_xattr_info { int name_index; const char *name; @@ -1014,6 +1025,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (!error) { ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = ext4_current_time(inode); + if (!value) + EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1066,6 +1079,253 @@ retry: return error; } +/* + * Shift the EA entries in the inode to create space for the increased + * i_extra_isize. + */ +static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry, + int value_offs_shift, void *to, + void *from, size_t n, int blocksize) +{ + struct ext4_xattr_entry *last = entry; + int new_offs; + + /* Adjust the value offsets of the entries */ + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + if (!last->e_value_block && last->e_value_size) { + new_offs = le16_to_cpu(last->e_value_offs) + + value_offs_shift; + BUG_ON(new_offs + le32_to_cpu(last->e_value_size) + > blocksize); + last->e_value_offs = cpu_to_le16(new_offs); + } + } + /* Shift the entries by n bytes */ + memmove(to, from, n); +} + +/* + * Expand an inode by new_extra_isize bytes when EAs are present. + * Returns 0 on success or negative error number on failure. + */ +int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle) +{ + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *entry, *last, *first; + struct buffer_head *bh = NULL; + struct ext4_xattr_ibody_find *is = NULL; + struct ext4_xattr_block_find *bs = NULL; + char *buffer = NULL, *b_entry_name = NULL; + size_t min_offs, free; + int total_ino, total_blk; + void *base, *start, *end; + int extra_isize = 0, error = 0, tried_min_extra_isize = 0; + int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize; + + down_write(&EXT4_I(inode)->xattr_sem); +retry: + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) { + up_write(&EXT4_I(inode)->xattr_sem); + return 0; + } + + header = IHDR(inode, raw_inode); + entry = IFIRST(header); + + /* + * Check if enough free space is available in the inode to shift the + * entries ahead by new_extra_isize. + */ + + base = start = entry; + end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; + min_offs = end - base; + last = entry; + total_ino = sizeof(struct ext4_xattr_ibody_header); + + free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); + if (free >= new_extra_isize) { + entry = IFIRST(header); + ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize + - new_extra_isize, (void *)raw_inode + + EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, + (void *)header, total_ino, + inode->i_sb->s_blocksize); + EXT4_I(inode)->i_extra_isize = new_extra_isize; + error = 0; + goto cleanup; + } + + /* + * Enough free space isn't available in the inode, check if + * EA block can hold new_extra_isize bytes. + */ + if (EXT4_I(inode)->i_file_acl) { + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); + error = -EIO; + if (!bh) + goto cleanup; + if (ext4_xattr_check_block(bh)) { + ext4_error(inode->i_sb, __FUNCTION__, + "inode %lu: bad block %llu", inode->i_ino, + EXT4_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; + } + base = BHDR(bh); + first = BFIRST(bh); + end = bh->b_data + bh->b_size; + min_offs = end - base; + free = ext4_xattr_free_space(first, &min_offs, base, + &total_blk); + if (free < new_extra_isize) { + if (!tried_min_extra_isize && s_min_extra_isize) { + tried_min_extra_isize++; + new_extra_isize = s_min_extra_isize; + brelse(bh); + goto retry; + } + error = -1; + goto cleanup; + } + } else { + free = inode->i_sb->s_blocksize; + } + + while (new_extra_isize > 0) { + size_t offs, size, entry_size; + struct ext4_xattr_entry *small_entry = NULL; + struct ext4_xattr_info i = { + .value = NULL, + .value_len = 0, + }; + unsigned int total_size; /* EA entry size + value size */ + unsigned int shift_bytes; /* No. of bytes to shift EAs by? */ + unsigned int min_total_size = ~0U; + + is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); + bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); + if (!is || !bs) { + error = -ENOMEM; + goto cleanup; + } + + is->s.not_found = -ENODATA; + bs->s.not_found = -ENODATA; + is->iloc.bh = NULL; + bs->bh = NULL; + + last = IFIRST(header); + /* Find the entry best suited to be pushed into EA block */ + entry = NULL; + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + total_size = + EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + + EXT4_XATTR_LEN(last->e_name_len); + if (total_size <= free && total_size < min_total_size) { + if (total_size < new_extra_isize) { + small_entry = last; + } else { + entry = last; + min_total_size = total_size; + } + } + } + + if (entry == NULL) { + if (small_entry) { + entry = small_entry; + } else { + if (!tried_min_extra_isize && + s_min_extra_isize) { + tried_min_extra_isize++; + new_extra_isize = s_min_extra_isize; + goto retry; + } + error = -1; + goto cleanup; + } + } + offs = le16_to_cpu(entry->e_value_offs); + size = le32_to_cpu(entry->e_value_size); + entry_size = EXT4_XATTR_LEN(entry->e_name_len); + i.name_index = entry->e_name_index, + buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS); + b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); + if (!buffer || !b_entry_name) { + error = -ENOMEM; + goto cleanup; + } + /* Save the entry name and the entry value */ + memcpy(buffer, (void *)IFIRST(header) + offs, + EXT4_XATTR_SIZE(size)); + memcpy(b_entry_name, entry->e_name, entry->e_name_len); + b_entry_name[entry->e_name_len] = '\0'; + i.name = b_entry_name; + + error = ext4_get_inode_loc(inode, &is->iloc); + if (error) + goto cleanup; + + error = ext4_xattr_ibody_find(inode, &i, is); + if (error) + goto cleanup; + + /* Remove the chosen entry from the inode */ + error = ext4_xattr_ibody_set(handle, inode, &i, is); + + entry = IFIRST(header); + if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) + shift_bytes = new_extra_isize; + else + shift_bytes = entry_size + size; + /* Adjust the offsets and shift the remaining entries ahead */ + ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize - + shift_bytes, (void *)raw_inode + + EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, + (void *)header, total_ino - entry_size, + inode->i_sb->s_blocksize); + + extra_isize += shift_bytes; + new_extra_isize -= shift_bytes; + EXT4_I(inode)->i_extra_isize = extra_isize; + + i.name = b_entry_name; + i.value = buffer; + i.value_len = cpu_to_le32(size); + error = ext4_xattr_block_find(inode, &i, bs); + if (error) + goto cleanup; + + /* Add entry which was removed from the inode into the block */ + error = ext4_xattr_block_set(handle, inode, &i, bs); + if (error) + goto cleanup; + kfree(b_entry_name); + kfree(buffer); + brelse(is->iloc.bh); + kfree(is); + kfree(bs); + } + brelse(bh); + up_write(&EXT4_I(inode)->xattr_sem); + return 0; + +cleanup: + kfree(b_entry_name); + kfree(buffer); + if (is) + brelse(is->iloc.bh); + kfree(is); + kfree(bs); + brelse(bh); + up_write(&EXT4_I(inode)->xattr_sem); + return error; +} + + + /* * ext4_xattr_delete_inode() * diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 79432b35398..d7f5d6a1265 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -56,6 +56,13 @@ struct ext4_xattr_entry { #define EXT4_XATTR_SIZE(size) \ (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) +#define IHDR(inode, raw_inode) \ + ((struct ext4_xattr_ibody_header *) \ + ((void *)raw_inode + \ + EXT4_GOOD_OLD_INODE_SIZE + \ + EXT4_I(inode)->i_extra_isize)) +#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) + # ifdef CONFIG_EXT4DEV_FS_XATTR extern struct xattr_handler ext4_xattr_user_handler; @@ -74,6 +81,9 @@ extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, extern void ext4_xattr_delete_inode(handle_t *, struct inode *); extern void ext4_xattr_put_super(struct super_block *); +extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle); + extern int init_ext4_xattr(void); extern void exit_ext4_xattr(void); @@ -129,6 +139,13 @@ exit_ext4_xattr(void) { } +static inline int +ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle) +{ + return -EOPNOTSUPP; +} + #define ext4_xattr_handlers NULL # endif /* CONFIG_EXT4DEV_FS_XATTR */ diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index df5e38faa15..52dcc24dd98 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -202,6 +202,7 @@ struct ext4_group_desc #define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ /* Used to pass group descriptor data when online resize is done */ struct ext4_new_group_input { -- cgit v1.2.3-70-g09d2 From f8628a14a27eb4512a1ede43de1d9db4d9f92bc3 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 18 Jul 2007 08:38:01 -0400 Subject: ext4: Remove 65000 subdirectory limit This patch adds support to ext4 for allowing more than 65000 subdirectories. Currently the maximum number of subdirectories is capped at 32000. If we exceed 65000 subdirectories in an htree directory it sets the inode link count to 1 and no longer counts subdirectories. The directory link count is not actually used when determining if a directory is empty, as that only counts subdirectories and not regular files that might be in there. A EXT4_FEATURE_RO_COMPAT_DIR_NLINK flag has been added and it is set if the subdir count for any directory crosses 65000. A later fsck will clear EXT4_FEATURE_RO_COMPAT_DIR_NLINK if there are no longer any directory with >65000 subdirs. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 60 +++++++++++++++++++++++++++++++++++++------------ include/linux/ext4_fs.h | 4 +++- 2 files changed, 49 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 40106b7ea4b..da224974af7 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1629,6 +1629,35 @@ static int ext4_delete_entry (handle_t *handle, return -ENOENT; } +/* + * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2, + * since this indicates that nlinks count was previously 1. + */ +static void ext4_inc_count(handle_t *handle, struct inode *inode) +{ + inc_nlink(inode); + if (is_dx(inode) && inode->i_nlink > 1) { + /* limit is 16-bit i_links_count */ + if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { + inode->i_nlink = 1; + EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_DIR_NLINK); + } + } +} + +/* + * If a directory had nlink == 1, then we should let it be 1. This indicates + * directory has >EXT4_LINK_MAX subdirs. + */ +static void ext4_dec_count(handle_t *handle, struct inode *inode) +{ + drop_nlink(inode); + if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0) + inc_nlink(inode); +} + + static int ext4_add_nondir(handle_t *handle, struct dentry *dentry, struct inode *inode) { @@ -1725,7 +1754,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) struct ext4_dir_entry_2 * de; int err, retries = 0; - if (dir->i_nlink >= EXT4_LINK_MAX) + if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; retry: @@ -1748,7 +1777,7 @@ retry: inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; dir_block = ext4_bread (handle, inode, 0, 1, &err); if (!dir_block) { - drop_nlink(inode); /* is this nlink == 0? */ + ext4_dec_count(handle, inode); /* is this nlink == 0? */ ext4_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -1780,7 +1809,7 @@ retry: iput (inode); goto out_stop; } - inc_nlink(dir); + ext4_inc_count(handle, dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); d_instantiate(dentry, inode); @@ -2045,9 +2074,9 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_rmdir; - if (inode->i_nlink != 2) + if (!EXT4_DIR_LINK_EMPTY(inode)) ext4_warning (inode->i_sb, "ext4_rmdir", - "empty directory has nlink!=2 (%d)", + "empty directory has too many links (%d)", inode->i_nlink); inode->i_version++; clear_nlink(inode); @@ -2058,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) ext4_orphan_add(handle, inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); - drop_nlink(dir); + ext4_dec_count(handle, dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); @@ -2109,7 +2138,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) dir->i_ctime = dir->i_mtime = ext4_current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); - drop_nlink(inode); + ext4_dec_count(handle, inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); inode->i_ctime = ext4_current_time(inode); @@ -2159,7 +2188,7 @@ retry: err = __page_symlink(inode, symname, l, mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (err) { - drop_nlink(inode); + ext4_dec_count(handle, inode); ext4_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -2185,8 +2214,9 @@ static int ext4_link (struct dentry * old_dentry, struct inode *inode = old_dentry->d_inode; int err, retries = 0; - if (inode->i_nlink >= EXT4_LINK_MAX) + if (EXT4_DIR_LINK_MAX(inode)) return -EMLINK; + /* * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing * otherwise has the potential to corrupt the orphan inode list. @@ -2204,7 +2234,7 @@ retry: handle->h_sync = 1; inode->i_ctime = ext4_current_time(inode); - inc_nlink(inode); + ext4_inc_count(handle, inode); atomic_inc(&inode->i_count); err = ext4_add_nondir(handle, dentry, inode); @@ -2337,7 +2367,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, } if (new_inode) { - drop_nlink(new_inode); + ext4_dec_count(handle, new_inode); new_inode->i_ctime = ext4_current_time(new_inode); } old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); @@ -2348,11 +2378,13 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata"); ext4_journal_dirty_metadata(handle, dir_bh); - drop_nlink(old_dir); + ext4_dec_count(handle, old_dir); if (new_inode) { - drop_nlink(new_inode); + /* checked empty_dir above, can't have another parent, + * ext3_dec_count() won't work for many-linked dirs */ + new_inode->i_nlink = 0; } else { - inc_nlink(new_dir); + ext4_inc_count(handle, new_dir); ext4_update_dx_flag(new_dir); ext4_mark_inode_dirty(handle, new_dir); } diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 52dcc24dd98..cdee7aaa57a 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -71,7 +71,7 @@ /* * Maximal count of links to a file */ -#define EXT4_LINK_MAX 32000 +#define EXT4_LINK_MAX 65000 /* * Macro-instructions used to manage several block sizes @@ -692,6 +692,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 @@ -710,6 +711,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) EXT4_FEATURE_INCOMPAT_64BIT) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) -- cgit v1.2.3-70-g09d2 From 3261ebd7d4194ff30d0eae7ba8d937dcccf7235d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 May 2007 17:41:46 +0300 Subject: UBI: kill homegrown endian macros Kill UBI's homegrown endianess handling and replace it with the standard kernel endianess handling. Signed-off-by: Christoph Hellwig Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/debug.c | 38 +++++++++--------- drivers/mtd/ubi/eba.c | 62 ++++++++++++++--------------- drivers/mtd/ubi/io.c | 46 ++++++++++----------- drivers/mtd/ubi/scan.c | 56 +++++++++++++------------- drivers/mtd/ubi/vmt.c | 18 ++++----- drivers/mtd/ubi/vtbl.c | 40 +++++++++---------- drivers/mtd/ubi/wl.c | 4 +- include/mtd/ubi-header.h | 101 +++++++++++++++++------------------------------ 8 files changed, 168 insertions(+), 197 deletions(-) (limited to 'include') diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index 86364221faf..9a996c5280f 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -35,12 +35,12 @@ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) { dbg_msg("erase counter header dump:"); - dbg_msg("magic %#08x", ubi32_to_cpu(ec_hdr->magic)); + dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic)); dbg_msg("version %d", (int)ec_hdr->version); - dbg_msg("ec %llu", (long long)ubi64_to_cpu(ec_hdr->ec)); - dbg_msg("vid_hdr_offset %d", ubi32_to_cpu(ec_hdr->vid_hdr_offset)); - dbg_msg("data_offset %d", ubi32_to_cpu(ec_hdr->data_offset)); - dbg_msg("hdr_crc %#08x", ubi32_to_cpu(ec_hdr->hdr_crc)); + dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec)); + dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset)); + dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset)); + dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc)); dbg_msg("erase counter header hexdump:"); ubi_dbg_hexdump(ec_hdr, UBI_EC_HDR_SIZE); } @@ -52,20 +52,20 @@ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { dbg_msg("volume identifier header dump:"); - dbg_msg("magic %08x", ubi32_to_cpu(vid_hdr->magic)); + dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic)); dbg_msg("version %d", (int)vid_hdr->version); dbg_msg("vol_type %d", (int)vid_hdr->vol_type); dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag); dbg_msg("compat %d", (int)vid_hdr->compat); - dbg_msg("vol_id %d", ubi32_to_cpu(vid_hdr->vol_id)); - dbg_msg("lnum %d", ubi32_to_cpu(vid_hdr->lnum)); - dbg_msg("leb_ver %u", ubi32_to_cpu(vid_hdr->leb_ver)); - dbg_msg("data_size %d", ubi32_to_cpu(vid_hdr->data_size)); - dbg_msg("used_ebs %d", ubi32_to_cpu(vid_hdr->used_ebs)); - dbg_msg("data_pad %d", ubi32_to_cpu(vid_hdr->data_pad)); + dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id)); + dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum)); + dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver)); + dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size)); + dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs)); + dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad)); dbg_msg("sqnum %llu", - (unsigned long long)ubi64_to_cpu(vid_hdr->sqnum)); - dbg_msg("hdr_crc %08x", ubi32_to_cpu(vid_hdr->hdr_crc)); + (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); + dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc)); dbg_msg("volume identifier header hexdump:"); } @@ -106,12 +106,12 @@ void ubi_dbg_dump_vol_info(const struct ubi_volume *vol) */ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) { - int name_len = ubi16_to_cpu(r->name_len); + int name_len = be16_to_cpu(r->name_len); dbg_msg("volume table record %d dump:", idx); - dbg_msg("reserved_pebs %d", ubi32_to_cpu(r->reserved_pebs)); - dbg_msg("alignment %d", ubi32_to_cpu(r->alignment)); - dbg_msg("data_pad %d", ubi32_to_cpu(r->data_pad)); + dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs)); + dbg_msg("alignment %d", be32_to_cpu(r->alignment)); + dbg_msg("data_pad %d", be32_to_cpu(r->data_pad)); dbg_msg("vol_type %d", (int)r->vol_type); dbg_msg("upd_marker %d", (int)r->upd_marker); dbg_msg("name_len %d", name_len); @@ -129,7 +129,7 @@ void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) r->name[0], r->name[1], r->name[2], r->name[3], r->name[4]); } - dbg_msg("crc %#08x", ubi32_to_cpu(r->crc)); + dbg_msg("crc %#08x", be32_to_cpu(r->crc)); } /** diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 6964fe4ab41..a1820151e9f 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -425,10 +425,10 @@ retry: } else if (err == UBI_IO_BITFLIPS) scrub = 1; - ubi_assert(lnum < ubi32_to_cpu(vid_hdr->used_ebs)); - ubi_assert(len == ubi32_to_cpu(vid_hdr->data_size)); + ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs)); + ubi_assert(len == be32_to_cpu(vid_hdr->data_size)); - crc = ubi32_to_cpu(vid_hdr->data_crc); + crc = be32_to_cpu(vid_hdr->data_crc); ubi_free_vid_hdr(ubi, vid_hdr); } @@ -518,7 +518,7 @@ retry: goto out_put; } - vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr); if (err) goto write_error; @@ -634,11 +634,11 @@ int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum, } vid_hdr->vol_type = UBI_VID_DYNAMIC; - vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); - vid_hdr->vol_id = cpu_to_ubi32(vol_id); - vid_hdr->lnum = cpu_to_ubi32(lnum); + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_be32(vol_id); + vid_hdr->lnum = cpu_to_be32(lnum); vid_hdr->compat = ubi_get_compat(ubi, vol_id); - vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad); + vid_hdr->data_pad = cpu_to_be32(vol->data_pad); retry: pnum = ubi_wl_get_peb(ubi, dtype); @@ -692,7 +692,7 @@ write_error: return err; } - vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); ubi_msg("try another PEB"); goto retry; } @@ -748,17 +748,17 @@ int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum, return err; } - vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); - vid_hdr->vol_id = cpu_to_ubi32(vol_id); - vid_hdr->lnum = cpu_to_ubi32(lnum); + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_be32(vol_id); + vid_hdr->lnum = cpu_to_be32(lnum); vid_hdr->compat = ubi_get_compat(ubi, vol_id); - vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad); + vid_hdr->data_pad = cpu_to_be32(vol->data_pad); crc = crc32(UBI_CRC32_INIT, buf, data_size); vid_hdr->vol_type = UBI_VID_STATIC; - vid_hdr->data_size = cpu_to_ubi32(data_size); - vid_hdr->used_ebs = cpu_to_ubi32(used_ebs); - vid_hdr->data_crc = cpu_to_ubi32(crc); + vid_hdr->data_size = cpu_to_be32(data_size); + vid_hdr->used_ebs = cpu_to_be32(used_ebs); + vid_hdr->data_crc = cpu_to_be32(crc); retry: pnum = ubi_wl_get_peb(ubi, dtype); @@ -813,7 +813,7 @@ write_error: return err; } - vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); ubi_msg("try another PEB"); goto retry; } @@ -854,17 +854,17 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum, return err; } - vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); - vid_hdr->vol_id = cpu_to_ubi32(vol_id); - vid_hdr->lnum = cpu_to_ubi32(lnum); + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_be32(vol_id); + vid_hdr->lnum = cpu_to_be32(lnum); vid_hdr->compat = ubi_get_compat(ubi, vol_id); - vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad); + vid_hdr->data_pad = cpu_to_be32(vol->data_pad); crc = crc32(UBI_CRC32_INIT, buf, len); vid_hdr->vol_type = UBI_VID_STATIC; - vid_hdr->data_size = cpu_to_ubi32(len); + vid_hdr->data_size = cpu_to_be32(len); vid_hdr->copy_flag = 1; - vid_hdr->data_crc = cpu_to_ubi32(crc); + vid_hdr->data_crc = cpu_to_be32(crc); retry: pnum = ubi_wl_get_peb(ubi, dtype); @@ -924,7 +924,7 @@ write_error: return err; } - vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); ubi_msg("try another PEB"); goto retry; } @@ -965,17 +965,17 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, uint32_t crc; void *buf, *buf1 = NULL; - vol_id = ubi32_to_cpu(vid_hdr->vol_id); - lnum = ubi32_to_cpu(vid_hdr->lnum); + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); if (vid_hdr->vol_type == UBI_VID_STATIC) { - data_size = ubi32_to_cpu(vid_hdr->data_size); + data_size = be32_to_cpu(vid_hdr->data_size); aldata_size = ALIGN(data_size, ubi->min_io_size); } else data_size = aldata_size = - ubi->leb_size - ubi32_to_cpu(vid_hdr->data_pad); + ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); buf = vmalloc(aldata_size); if (!buf) @@ -1054,10 +1054,10 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, */ if (data_size > 0) { vid_hdr->copy_flag = 1; - vid_hdr->data_size = cpu_to_ubi32(data_size); - vid_hdr->data_crc = cpu_to_ubi32(crc); + vid_hdr->data_size = cpu_to_be32(data_size); + vid_hdr->data_crc = cpu_to_be32(crc); } - vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); if (err) diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 7bb473e646e..20e297ad7f6 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -557,9 +557,9 @@ static int validate_ec_hdr(const struct ubi_device *ubi, long long ec; int vid_hdr_offset, leb_start; - ec = ubi64_to_cpu(ec_hdr->ec); - vid_hdr_offset = ubi32_to_cpu(ec_hdr->vid_hdr_offset); - leb_start = ubi32_to_cpu(ec_hdr->data_offset); + ec = be64_to_cpu(ec_hdr->ec); + vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset); + leb_start = be32_to_cpu(ec_hdr->data_offset); if (ec_hdr->version != UBI_VERSION) { ubi_err("node with incompatible UBI version found: " @@ -640,7 +640,7 @@ int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum, read_err = err; } - magic = ubi32_to_cpu(ec_hdr->magic); + magic = be32_to_cpu(ec_hdr->magic); if (magic != UBI_EC_HDR_MAGIC) { /* * The magic field is wrong. Let's check if we have read all @@ -684,7 +684,7 @@ int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum, } crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); - hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc); + hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); if (hdr_crc != crc) { if (verbose) { @@ -729,12 +729,12 @@ int ubi_io_write_ec_hdr(const struct ubi_device *ubi, int pnum, dbg_io("write EC header to PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); - ec_hdr->magic = cpu_to_ubi32(UBI_EC_HDR_MAGIC); + ec_hdr->magic = cpu_to_be32(UBI_EC_HDR_MAGIC); ec_hdr->version = UBI_VERSION; - ec_hdr->vid_hdr_offset = cpu_to_ubi32(ubi->vid_hdr_offset); - ec_hdr->data_offset = cpu_to_ubi32(ubi->leb_start); + ec_hdr->vid_hdr_offset = cpu_to_be32(ubi->vid_hdr_offset); + ec_hdr->data_offset = cpu_to_be32(ubi->leb_start); crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); - ec_hdr->hdr_crc = cpu_to_ubi32(crc); + ec_hdr->hdr_crc = cpu_to_be32(crc); err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr); if (err) @@ -757,13 +757,13 @@ static int validate_vid_hdr(const struct ubi_device *ubi, { int vol_type = vid_hdr->vol_type; int copy_flag = vid_hdr->copy_flag; - int vol_id = ubi32_to_cpu(vid_hdr->vol_id); - int lnum = ubi32_to_cpu(vid_hdr->lnum); + int vol_id = be32_to_cpu(vid_hdr->vol_id); + int lnum = be32_to_cpu(vid_hdr->lnum); int compat = vid_hdr->compat; - int data_size = ubi32_to_cpu(vid_hdr->data_size); - int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs); - int data_pad = ubi32_to_cpu(vid_hdr->data_pad); - int data_crc = ubi32_to_cpu(vid_hdr->data_crc); + int data_size = be32_to_cpu(vid_hdr->data_size); + int used_ebs = be32_to_cpu(vid_hdr->used_ebs); + int data_pad = be32_to_cpu(vid_hdr->data_pad); + int data_crc = be32_to_cpu(vid_hdr->data_crc); int usable_leb_size = ubi->leb_size - data_pad; if (copy_flag != 0 && copy_flag != 1) { @@ -914,7 +914,7 @@ int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum, read_err = err; } - magic = ubi32_to_cpu(vid_hdr->magic); + magic = be32_to_cpu(vid_hdr->magic); if (magic != UBI_VID_HDR_MAGIC) { /* * If we have read all 0xFF bytes, the VID header probably does @@ -957,7 +957,7 @@ int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum, } crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); - hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc); + hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); if (hdr_crc != crc) { if (verbose) { @@ -1007,10 +1007,10 @@ int ubi_io_write_vid_hdr(const struct ubi_device *ubi, int pnum, if (err) return err > 0 ? -EINVAL: err; - vid_hdr->magic = cpu_to_ubi32(UBI_VID_HDR_MAGIC); + vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); vid_hdr->version = UBI_VERSION; crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); - vid_hdr->hdr_crc = cpu_to_ubi32(crc); + vid_hdr->hdr_crc = cpu_to_be32(crc); err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr); if (err) @@ -1060,7 +1060,7 @@ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - magic = ubi32_to_cpu(ec_hdr->magic); + magic = be32_to_cpu(ec_hdr->magic); if (magic != UBI_EC_HDR_MAGIC) { ubi_err("bad magic %#08x, must be %#08x", magic, UBI_EC_HDR_MAGIC); @@ -1105,7 +1105,7 @@ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) goto exit; crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); - hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc); + hdr_crc = be32_to_cpu(ec_hdr->hdr_crc); if (hdr_crc != crc) { ubi_err("bad CRC, calculated %#08x, read %#08x", crc, hdr_crc); ubi_err("paranoid check failed for PEB %d", pnum); @@ -1137,7 +1137,7 @@ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, int err; uint32_t magic; - magic = ubi32_to_cpu(vid_hdr->magic); + magic = be32_to_cpu(vid_hdr->magic); if (magic != UBI_VID_HDR_MAGIC) { ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x", magic, pnum, UBI_VID_HDR_MAGIC); @@ -1187,7 +1187,7 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) goto exit; crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC); - hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc); + hdr_crc = be32_to_cpu(vid_hdr->hdr_crc); if (hdr_crc != crc) { ubi_err("bad VID header CRC at PEB %d, calculated %#08x, " "read %#08x", pnum, crc, hdr_crc); diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index cbd588d6016..23e30ac089c 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -132,9 +132,9 @@ static int validate_vid_hdr(const struct ubi_vid_hdr *vid_hdr, const struct ubi_scan_volume *sv, int pnum) { int vol_type = vid_hdr->vol_type; - int vol_id = ubi32_to_cpu(vid_hdr->vol_id); - int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs); - int data_pad = ubi32_to_cpu(vid_hdr->data_pad); + int vol_id = be32_to_cpu(vid_hdr->vol_id); + int used_ebs = be32_to_cpu(vid_hdr->used_ebs); + int data_pad = be32_to_cpu(vid_hdr->data_pad); if (sv->leb_count != 0) { int sv_vol_type; @@ -200,7 +200,7 @@ static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id, struct ubi_scan_volume *sv; struct rb_node **p = &si->volumes.rb_node, *parent = NULL; - ubi_assert(vol_id == ubi32_to_cpu(vid_hdr->vol_id)); + ubi_assert(vol_id == be32_to_cpu(vid_hdr->vol_id)); /* Walk the volume RB-tree to look if this volume is already present */ while (*p) { @@ -225,8 +225,8 @@ static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id, si->max_sqnum = 0; sv->vol_id = vol_id; sv->root = RB_ROOT; - sv->used_ebs = ubi32_to_cpu(vid_hdr->used_ebs); - sv->data_pad = ubi32_to_cpu(vid_hdr->data_pad); + sv->used_ebs = be32_to_cpu(vid_hdr->used_ebs); + sv->data_pad = be32_to_cpu(vid_hdr->data_pad); sv->compat = vid_hdr->compat; sv->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; @@ -268,10 +268,10 @@ static int compare_lebs(const struct ubi_device *ubi, int len, err, second_is_newer, bitflips = 0, corrupted = 0; uint32_t data_crc, crc; struct ubi_vid_hdr *vidh = NULL; - unsigned long long sqnum2 = ubi64_to_cpu(vid_hdr->sqnum); + unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum); if (seb->sqnum == 0 && sqnum2 == 0) { - long long abs, v1 = seb->leb_ver, v2 = ubi32_to_cpu(vid_hdr->leb_ver); + long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver); /* * UBI constantly increases the logical eraseblock version @@ -355,7 +355,7 @@ static int compare_lebs(const struct ubi_device *ubi, /* Read the data of the copy and check the CRC */ - len = ubi32_to_cpu(vid_hdr->data_size); + len = be32_to_cpu(vid_hdr->data_size); buf = vmalloc(len); if (!buf) { err = -ENOMEM; @@ -366,7 +366,7 @@ static int compare_lebs(const struct ubi_device *ubi, if (err && err != UBI_IO_BITFLIPS) goto out_free_buf; - data_crc = ubi32_to_cpu(vid_hdr->data_crc); + data_crc = be32_to_cpu(vid_hdr->data_crc); crc = crc32(UBI_CRC32_INIT, buf, len); if (crc != data_crc) { dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x", @@ -425,10 +425,10 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si, struct ubi_scan_leb *seb; struct rb_node **p, *parent = NULL; - vol_id = ubi32_to_cpu(vid_hdr->vol_id); - lnum = ubi32_to_cpu(vid_hdr->lnum); - sqnum = ubi64_to_cpu(vid_hdr->sqnum); - leb_ver = ubi32_to_cpu(vid_hdr->leb_ver); + vol_id = be32_to_cpu(vid_hdr->vol_id); + lnum = be32_to_cpu(vid_hdr->lnum); + sqnum = be64_to_cpu(vid_hdr->sqnum); + leb_ver = be32_to_cpu(vid_hdr->leb_ver); dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d", pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips); @@ -523,7 +523,7 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si, if (sv->highest_lnum == lnum) sv->last_data_size = - ubi32_to_cpu(vid_hdr->data_size); + be32_to_cpu(vid_hdr->data_size); return 0; } else { @@ -560,7 +560,7 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si, if (sv->highest_lnum <= lnum) { sv->highest_lnum = lnum; - sv->last_data_size = ubi32_to_cpu(vid_hdr->data_size); + sv->last_data_size = be32_to_cpu(vid_hdr->data_size); } if (si->max_sqnum < sqnum) @@ -687,7 +687,7 @@ int ubi_scan_erase_peb(const struct ubi_device *ubi, return -EINVAL; } - ec_hdr->ec = cpu_to_ubi64(ec); + ec_hdr->ec = cpu_to_be64(ec); err = ubi_io_sync_erase(ubi, pnum, 0); if (err < 0) @@ -818,7 +818,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum return -EINVAL; } - ec = ubi64_to_cpu(ech->ec); + ec = be64_to_cpu(ech->ec); if (ec > UBI_MAX_ERASECOUNTER) { /* * Erase counter overflow. The EC headers have 64 bits @@ -856,9 +856,9 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum goto adjust_mean_ec; } - vol_id = ubi32_to_cpu(vidh->vol_id); + vol_id = be32_to_cpu(vidh->vol_id); if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOL_ID) { - int lnum = ubi32_to_cpu(vidh->lnum); + int lnum = be32_to_cpu(vidh->lnum); /* Unsupported internal volume */ switch (vidh->compat) { @@ -1261,12 +1261,12 @@ static int paranoid_check_si(const struct ubi_device *ubi, goto bad_vid_hdr; } - if (seb->sqnum != ubi64_to_cpu(vidh->sqnum)) { + if (seb->sqnum != be64_to_cpu(vidh->sqnum)) { ubi_err("bad sqnum %llu", seb->sqnum); goto bad_vid_hdr; } - if (sv->vol_id != ubi32_to_cpu(vidh->vol_id)) { + if (sv->vol_id != be32_to_cpu(vidh->vol_id)) { ubi_err("bad vol_id %d", sv->vol_id); goto bad_vid_hdr; } @@ -1276,22 +1276,22 @@ static int paranoid_check_si(const struct ubi_device *ubi, goto bad_vid_hdr; } - if (seb->lnum != ubi32_to_cpu(vidh->lnum)) { + if (seb->lnum != be32_to_cpu(vidh->lnum)) { ubi_err("bad lnum %d", seb->lnum); goto bad_vid_hdr; } - if (sv->used_ebs != ubi32_to_cpu(vidh->used_ebs)) { + if (sv->used_ebs != be32_to_cpu(vidh->used_ebs)) { ubi_err("bad used_ebs %d", sv->used_ebs); goto bad_vid_hdr; } - if (sv->data_pad != ubi32_to_cpu(vidh->data_pad)) { + if (sv->data_pad != be32_to_cpu(vidh->data_pad)) { ubi_err("bad data_pad %d", sv->data_pad); goto bad_vid_hdr; } - if (seb->leb_ver != ubi32_to_cpu(vidh->leb_ver)) { + if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) { ubi_err("bad leb_ver %u", seb->leb_ver); goto bad_vid_hdr; } @@ -1300,12 +1300,12 @@ static int paranoid_check_si(const struct ubi_device *ubi, if (!last_seb) continue; - if (sv->highest_lnum != ubi32_to_cpu(vidh->lnum)) { + if (sv->highest_lnum != be32_to_cpu(vidh->lnum)) { ubi_err("bad highest_lnum %d", sv->highest_lnum); goto bad_vid_hdr; } - if (sv->last_data_size != ubi32_to_cpu(vidh->data_size)) { + if (sv->last_data_size != be32_to_cpu(vidh->data_size)) { ubi_err("bad last_data_size %d", sv->last_data_size); goto bad_vid_hdr; } diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c index 4add5c816b1..6e135996e42 100644 --- a/drivers/mtd/ubi/vmt.c +++ b/drivers/mtd/ubi/vmt.c @@ -319,10 +319,10 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) /* Fill volume table record */ memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record)); - vtbl_rec.reserved_pebs = cpu_to_ubi32(vol->reserved_pebs); - vtbl_rec.alignment = cpu_to_ubi32(vol->alignment); - vtbl_rec.data_pad = cpu_to_ubi32(vol->data_pad); - vtbl_rec.name_len = cpu_to_ubi16(vol->name_len); + vtbl_rec.reserved_pebs = cpu_to_be32(vol->reserved_pebs); + vtbl_rec.alignment = cpu_to_be32(vol->alignment); + vtbl_rec.data_pad = cpu_to_be32(vol->data_pad); + vtbl_rec.name_len = cpu_to_be16(vol->name_len); if (vol->vol_type == UBI_DYNAMIC_VOLUME) vtbl_rec.vol_type = UBI_VID_DYNAMIC; else @@ -502,7 +502,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) /* Change volume table record */ memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); - vtbl_rec.reserved_pebs = cpu_to_ubi32(reserved_pebs); + vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); if (err) goto out_acc; @@ -650,7 +650,7 @@ static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id) long long n; const char *name; - reserved_pebs = ubi32_to_cpu(ubi->vtbl[vol_id].reserved_pebs); + reserved_pebs = be32_to_cpu(ubi->vtbl[vol_id].reserved_pebs); if (!vol) { if (reserved_pebs) { @@ -764,9 +764,9 @@ static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id) } } - alignment = ubi32_to_cpu(ubi->vtbl[vol_id].alignment); - data_pad = ubi32_to_cpu(ubi->vtbl[vol_id].data_pad); - name_len = ubi16_to_cpu(ubi->vtbl[vol_id].name_len); + alignment = be32_to_cpu(ubi->vtbl[vol_id].alignment); + data_pad = be32_to_cpu(ubi->vtbl[vol_id].data_pad); + name_len = be16_to_cpu(ubi->vtbl[vol_id].name_len); upd_marker = ubi->vtbl[vol_id].upd_marker; name = &ubi->vtbl[vol_id].name[0]; if (ubi->vtbl[vol_id].vol_type == UBI_VID_DYNAMIC) diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index e3557b987ef..800ce940a82 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -93,7 +93,7 @@ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, vtbl_rec = &empty_vtbl_record; else { crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC); - vtbl_rec->crc = cpu_to_ubi32(crc); + vtbl_rec->crc = cpu_to_be32(crc); } dbg_msg("change record %d", idx); @@ -141,18 +141,18 @@ static int vtbl_check(const struct ubi_device *ubi, for (i = 0; i < ubi->vtbl_slots; i++) { cond_resched(); - reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs); - alignment = ubi32_to_cpu(vtbl[i].alignment); - data_pad = ubi32_to_cpu(vtbl[i].data_pad); + reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); + alignment = be32_to_cpu(vtbl[i].alignment); + data_pad = be32_to_cpu(vtbl[i].data_pad); upd_marker = vtbl[i].upd_marker; vol_type = vtbl[i].vol_type; - name_len = ubi16_to_cpu(vtbl[i].name_len); + name_len = be16_to_cpu(vtbl[i].name_len); name = &vtbl[i].name[0]; crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC); - if (ubi32_to_cpu(vtbl[i].crc) != crc) { + if (be32_to_cpu(vtbl[i].crc) != crc) { ubi_err("bad CRC at record %u: %#08x, not %#08x", - i, crc, ubi32_to_cpu(vtbl[i].crc)); + i, crc, be32_to_cpu(vtbl[i].crc)); ubi_dbg_dump_vtbl_record(&vtbl[i], i); return 1; } @@ -225,8 +225,8 @@ static int vtbl_check(const struct ubi_device *ubi, /* Checks that all names are unique */ for (i = 0; i < ubi->vtbl_slots - 1; i++) { for (n = i + 1; n < ubi->vtbl_slots; n++) { - int len1 = ubi16_to_cpu(vtbl[i].name_len); - int len2 = ubi16_to_cpu(vtbl[n].name_len); + int len1 = be16_to_cpu(vtbl[i].name_len); + int len2 = be16_to_cpu(vtbl[n].name_len); if (len1 > 0 && len1 == len2 && !strncmp(vtbl[i].name, vtbl[n].name, len1)) { @@ -288,13 +288,13 @@ retry: } vid_hdr->vol_type = UBI_VID_DYNAMIC; - vid_hdr->vol_id = cpu_to_ubi32(UBI_LAYOUT_VOL_ID); + vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOL_ID); vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; vid_hdr->data_size = vid_hdr->used_ebs = - vid_hdr->data_pad = cpu_to_ubi32(0); - vid_hdr->lnum = cpu_to_ubi32(copy); - vid_hdr->sqnum = cpu_to_ubi64(++si->max_sqnum); - vid_hdr->leb_ver = cpu_to_ubi32(old_seb ? old_seb->leb_ver + 1: 0); + vid_hdr->data_pad = cpu_to_be32(0); + vid_hdr->lnum = cpu_to_be32(copy); + vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum); + vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0); /* The EC header is already there, write the VID header */ err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr); @@ -503,19 +503,19 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, for (i = 0; i < ubi->vtbl_slots; i++) { cond_resched(); - if (ubi32_to_cpu(vtbl[i].reserved_pebs) == 0) + if (be32_to_cpu(vtbl[i].reserved_pebs) == 0) continue; /* Empty record */ vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); if (!vol) return -ENOMEM; - vol->reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs); - vol->alignment = ubi32_to_cpu(vtbl[i].alignment); - vol->data_pad = ubi32_to_cpu(vtbl[i].data_pad); + vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); + vol->alignment = be32_to_cpu(vtbl[i].alignment); + vol->data_pad = be32_to_cpu(vtbl[i].data_pad); vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; - vol->name_len = ubi16_to_cpu(vtbl[i].name_len); + vol->name_len = be16_to_cpu(vtbl[i].name_len); vol->usable_leb_size = ubi->leb_size - vol->data_pad; memcpy(vol->name, vtbl[i].name, vol->name_len); vol->name[vol->name_len] = '\0'; @@ -721,7 +721,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) int i, err; struct ubi_scan_volume *sv; - empty_vtbl_record.crc = cpu_to_ubi32(0xf116c36b); + empty_vtbl_record.crc = cpu_to_be32(0xf116c36b); /* * The number of supported volumes is limited by the eraseblock size diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index ab2174a56bc..d512cf16350 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -667,7 +667,7 @@ static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int tortur dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec); - ec_hdr->ec = cpu_to_ubi64(ec); + ec_hdr->ec = cpu_to_be64(ec); err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); if (err) @@ -1634,7 +1634,7 @@ static int paranoid_check_ec(const struct ubi_device *ubi, int pnum, int ec) goto out_free; } - read_ec = ubi64_to_cpu(ec_hdr->ec); + read_ec = be64_to_cpu(ec_hdr->ec); if (ec != read_ec) { ubi_err("paranoid check failed for PEB %d", pnum); ubi_err("read EC is %lld, should be %d", read_ec, ec); diff --git a/include/mtd/ubi-header.h b/include/mtd/ubi-header.h index fa479c71aa3..74efa776347 100644 --- a/include/mtd/ubi-header.h +++ b/include/mtd/ubi-header.h @@ -74,42 +74,13 @@ enum { UBI_COMPAT_REJECT = 5 }; -/* - * ubi16_t/ubi32_t/ubi64_t - 16, 32, and 64-bit integers used in UBI on-flash - * data structures. - */ -typedef struct { - uint16_t int16; -} __attribute__ ((packed)) ubi16_t; - -typedef struct { - uint32_t int32; -} __attribute__ ((packed)) ubi32_t; - -typedef struct { - uint64_t int64; -} __attribute__ ((packed)) ubi64_t; - -/* - * In this implementation of UBI uses the big-endian format for on-flash - * integers. The below are the corresponding conversion macros. - */ -#define cpu_to_ubi16(x) ((ubi16_t){__cpu_to_be16(x)}) -#define ubi16_to_cpu(x) ((uint16_t)__be16_to_cpu((x).int16)) - -#define cpu_to_ubi32(x) ((ubi32_t){__cpu_to_be32(x)}) -#define ubi32_to_cpu(x) ((uint32_t)__be32_to_cpu((x).int32)) - -#define cpu_to_ubi64(x) ((ubi64_t){__cpu_to_be64(x)}) -#define ubi64_to_cpu(x) ((uint64_t)__be64_to_cpu((x).int64)) - /* Sizes of UBI headers */ #define UBI_EC_HDR_SIZE sizeof(struct ubi_ec_hdr) #define UBI_VID_HDR_SIZE sizeof(struct ubi_vid_hdr) /* Sizes of UBI headers without the ending CRC */ -#define UBI_EC_HDR_SIZE_CRC (UBI_EC_HDR_SIZE - sizeof(ubi32_t)) -#define UBI_VID_HDR_SIZE_CRC (UBI_VID_HDR_SIZE - sizeof(ubi32_t)) +#define UBI_EC_HDR_SIZE_CRC (UBI_EC_HDR_SIZE - sizeof(__be32)) +#define UBI_VID_HDR_SIZE_CRC (UBI_VID_HDR_SIZE - sizeof(__be32)) /** * struct ubi_ec_hdr - UBI erase counter header. @@ -137,14 +108,14 @@ typedef struct { * eraseblocks. */ struct ubi_ec_hdr { - ubi32_t magic; - uint8_t version; - uint8_t padding1[3]; - ubi64_t ec; /* Warning: the current limit is 31-bit anyway! */ - ubi32_t vid_hdr_offset; - ubi32_t data_offset; - uint8_t padding2[36]; - ubi32_t hdr_crc; + __be32 magic; + __u8 version; + __u8 padding1[3]; + __be64 ec; /* Warning: the current limit is 31-bit anyway! */ + __be32 vid_hdr_offset; + __be32 data_offset; + __u8 padding2[36]; + __be32 hdr_crc; } __attribute__ ((packed)); /** @@ -262,22 +233,22 @@ struct ubi_ec_hdr { * software (say, cramfs) on top of the UBI volume. */ struct ubi_vid_hdr { - ubi32_t magic; - uint8_t version; - uint8_t vol_type; - uint8_t copy_flag; - uint8_t compat; - ubi32_t vol_id; - ubi32_t lnum; - ubi32_t leb_ver; /* obsolete, to be removed, don't use */ - ubi32_t data_size; - ubi32_t used_ebs; - ubi32_t data_pad; - ubi32_t data_crc; - uint8_t padding1[4]; - ubi64_t sqnum; - uint8_t padding2[12]; - ubi32_t hdr_crc; + __be32 magic; + __u8 version; + __u8 vol_type; + __u8 copy_flag; + __u8 compat; + __be32 vol_id; + __be32 lnum; + __be32 leb_ver; /* obsolete, to be removed, don't use */ + __be32 data_size; + __be32 used_ebs; + __be32 data_pad; + __be32 data_crc; + __u8 padding1[4]; + __be64 sqnum; + __u8 padding2[12]; + __be32 hdr_crc; } __attribute__ ((packed)); /* Internal UBI volumes count */ @@ -306,7 +277,7 @@ struct ubi_vid_hdr { #define UBI_VTBL_RECORD_SIZE sizeof(struct ubi_vtbl_record) /* Size of the volume table record without the ending CRC */ -#define UBI_VTBL_RECORD_SIZE_CRC (UBI_VTBL_RECORD_SIZE - sizeof(ubi32_t)) +#define UBI_VTBL_RECORD_SIZE_CRC (UBI_VTBL_RECORD_SIZE - sizeof(__be32)) /** * struct ubi_vtbl_record - a record in the volume table. @@ -346,15 +317,15 @@ struct ubi_vid_hdr { * Empty records contain all zeroes and the CRC checksum of those zeroes. */ struct ubi_vtbl_record { - ubi32_t reserved_pebs; - ubi32_t alignment; - ubi32_t data_pad; - uint8_t vol_type; - uint8_t upd_marker; - ubi16_t name_len; - uint8_t name[UBI_VOL_NAME_MAX+1]; - uint8_t padding2[24]; - ubi32_t crc; + __be32 reserved_pebs; + __be32 alignment; + __be32 data_pad; + __u8 vol_type; + __u8 upd_marker; + __be16 name_len; + __u8 name[UBI_VOL_NAME_MAX+1]; + __u8 padding2[24]; + __be32 crc; } __attribute__ ((packed)); #endif /* !__UBI_HEADER_H__ */ -- cgit v1.2.3-70-g09d2 From b187f180cc942e50007aa039f8e3a620ee5f3171 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 18 Jul 2007 00:49:10 -0700 Subject: serial: add early_serial_setup() back to header file early_serial_setup was removed from serial.h, but forgot to put in serial_8250.h Signed-off-by: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/setup.c | 1 + arch/mips/basler/excite/excite_setup.c | 1 + arch/mips/gt64120/wrppmc/setup.c | 1 + arch/mips/mips-boards/atlas/atlas_setup.c | 1 + arch/mips/mips-boards/sead/sead_setup.c | 1 + arch/mips/mipssim/sim_setup.c | 1 + arch/mips/pmc-sierra/msp71xx/msp_serial.c | 1 + arch/mips/pmc-sierra/yosemite/setup.c | 1 + arch/ppc/platforms/4xx/bamboo.c | 1 + arch/ppc/platforms/4xx/bubinga.c | 1 + arch/ppc/platforms/4xx/cpci405.c | 1 + arch/ppc/platforms/4xx/ebony.c | 1 + arch/ppc/platforms/4xx/luan.c | 1 + arch/ppc/platforms/4xx/ocotea.c | 1 + arch/ppc/platforms/4xx/taishan.c | 1 + arch/ppc/platforms/4xx/yucca.c | 1 + arch/ppc/platforms/85xx/sbc8560.c | 1 + arch/ppc/platforms/chestnut.c | 1 + arch/ppc/platforms/ev64260.c | 1 + arch/ppc/platforms/radstone_ppc7d.c | 1 + arch/ppc/platforms/spruce.c | 1 + drivers/parisc/superio.c | 1 + drivers/serial/8250_hp300.c | 1 + include/linux/serial_8250.h | 2 ++ 24 files changed, 25 insertions(+) (limited to 'include') diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c index c1c32e4c863..a74c08786b2 100644 --- a/arch/frv/kernel/setup.c +++ b/arch/frv/kernel/setup.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/basler/excite/excite_setup.c b/arch/mips/basler/excite/excite_setup.c index 2f0e4c08eb0..56003188f17 100644 --- a/arch/mips/basler/excite/excite_setup.c +++ b/arch/mips/basler/excite/excite_setup.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/gt64120/wrppmc/setup.c b/arch/mips/gt64120/wrppmc/setup.c index ea965529e5e..ed58c13b603 100644 --- a/arch/mips/gt64120/wrppmc/setup.c +++ b/arch/mips/gt64120/wrppmc/setup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/mips-boards/atlas/atlas_setup.c b/arch/mips/mips-boards/atlas/atlas_setup.c index 1cc6ebbedfd..c68358a476d 100644 --- a/arch/mips/mips-boards/atlas/atlas_setup.c +++ b/arch/mips/mips-boards/atlas/atlas_setup.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/mips-boards/sead/sead_setup.c b/arch/mips/mips-boards/sead/sead_setup.c index bb801409d39..5f70eaf01fa 100644 --- a/arch/mips/mips-boards/sead/sead_setup.c +++ b/arch/mips/mips-boards/sead/sead_setup.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/mipssim/sim_setup.c b/arch/mips/mipssim/sim_setup.c index 60e66906be6..17819b59410 100644 --- a/arch/mips/mipssim/sim_setup.c +++ b/arch/mips/mipssim/sim_setup.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/pmc-sierra/msp71xx/msp_serial.c b/arch/mips/pmc-sierra/msp71xx/msp_serial.c index c41b53faa8f..e25bac537d7 100644 --- a/arch/mips/pmc-sierra/msp71xx/msp_serial.c +++ b/arch/mips/pmc-sierra/msp71xx/msp_serial.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c index 6a6e15e4000..f7f93ae24c3 100644 --- a/arch/mips/pmc-sierra/yosemite/setup.c +++ b/arch/mips/pmc-sierra/yosemite/setup.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include diff --git a/arch/ppc/platforms/4xx/bamboo.c b/arch/ppc/platforms/4xx/bamboo.c index 349660b84a0..017623c9bc4 100644 --- a/arch/ppc/platforms/4xx/bamboo.c +++ b/arch/ppc/platforms/4xx/bamboo.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/arch/ppc/platforms/4xx/bubinga.c b/arch/ppc/platforms/4xx/bubinga.c index 1a7f075b754..cd696be55ac 100644 --- a/arch/ppc/platforms/4xx/bubinga.c +++ b/arch/ppc/platforms/4xx/bubinga.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/ppc/platforms/4xx/cpci405.c b/arch/ppc/platforms/4xx/cpci405.c index 8474b05b795..2e7e25dd84c 100644 --- a/arch/ppc/platforms/4xx/cpci405.c +++ b/arch/ppc/platforms/4xx/cpci405.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ppc/platforms/4xx/ebony.c b/arch/ppc/platforms/4xx/ebony.c index f0f9cc8480c..05d7184d7e1 100644 --- a/arch/ppc/platforms/4xx/ebony.c +++ b/arch/ppc/platforms/4xx/ebony.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff --git a/arch/ppc/platforms/4xx/luan.c b/arch/ppc/platforms/4xx/luan.c index 61706ef3711..4b169610f15 100644 --- a/arch/ppc/platforms/4xx/luan.c +++ b/arch/ppc/platforms/4xx/luan.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/arch/ppc/platforms/4xx/ocotea.c b/arch/ppc/platforms/4xx/ocotea.c index 5e994e146ba..fd0f971881d 100644 --- a/arch/ppc/platforms/4xx/ocotea.c +++ b/arch/ppc/platforms/4xx/ocotea.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/arch/ppc/platforms/4xx/taishan.c b/arch/ppc/platforms/4xx/taishan.c index 5d9af8ddb15..888c492b4a4 100644 --- a/arch/ppc/platforms/4xx/taishan.c +++ b/arch/ppc/platforms/4xx/taishan.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ppc/platforms/4xx/yucca.c b/arch/ppc/platforms/4xx/yucca.c index 346787df0dd..a83b0baea01 100644 --- a/arch/ppc/platforms/4xx/yucca.c +++ b/arch/ppc/platforms/4xx/yucca.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/arch/ppc/platforms/85xx/sbc8560.c b/arch/ppc/platforms/85xx/sbc8560.c index 1d10ab98f66..3d7addbdecf 100644 --- a/arch/ppc/platforms/85xx/sbc8560.c +++ b/arch/ppc/platforms/85xx/sbc8560.c @@ -26,6 +26,7 @@ #include #include /* for linux/serial_core.h */ #include +#include #include #include #include diff --git a/arch/ppc/platforms/chestnut.c b/arch/ppc/platforms/chestnut.c index a764ae71cbc..248684f50dd 100644 --- a/arch/ppc/platforms/chestnut.c +++ b/arch/ppc/platforms/chestnut.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ppc/platforms/ev64260.c b/arch/ppc/platforms/ev64260.c index 4957a7bcde2..976270d537c 100644 --- a/arch/ppc/platforms/ev64260.c +++ b/arch/ppc/platforms/ev64260.c @@ -35,6 +35,7 @@ #include #include #include +#include #else #include #endif diff --git a/arch/ppc/platforms/radstone_ppc7d.c b/arch/ppc/platforms/radstone_ppc7d.c index b55860734a7..44d4398a36f 100644 --- a/arch/ppc/platforms/radstone_ppc7d.c +++ b/arch/ppc/platforms/radstone_ppc7d.c @@ -35,6 +35,7 @@ #include #include /* for linux/serial_core.h */ #include +#include #include #include #include diff --git a/arch/ppc/platforms/spruce.c b/arch/ppc/platforms/spruce.c index 3c784278487..f4de50ba292 100644 --- a/arch/ppc/platforms/spruce.c +++ b/arch/ppc/platforms/spruce.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/drivers/parisc/superio.c b/drivers/parisc/superio.c index a708c329675..38cdf9fa36a 100644 --- a/drivers/parisc/superio.c +++ b/drivers/parisc/superio.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include diff --git a/drivers/serial/8250_hp300.c b/drivers/serial/8250_hp300.c index 53e81a44c1a..2cf0953fe0e 100644 --- a/drivers/serial/8250_hp300.c +++ b/drivers/serial/8250_hp300.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 706ee9a4c80..8518fa2a6f8 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -60,6 +60,8 @@ void serial8250_unregister_port(int line); void serial8250_suspend_port(int line); void serial8250_resume_port(int line); +extern int early_serial_setup(struct uart_port *port); + extern int serial8250_find_port(struct uart_port *p); extern int serial8250_find_port_for_earlycon(void); extern int setup_early_serial8250_console(char *cmdline); -- cgit v1.2.3-70-g09d2 From 8b4a40809e5330c9da5d20107d693d92d73b31dc Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 18 Jul 2007 00:49:11 -0700 Subject: zs: move to the serial subsystem This is a reimplementation of the zs driver for the serial subsystem. Any resemblance to the old driver is purely coincidential. ;-) I do hope I got the handling of modem lines right -- better do not tackle me about the issue unless you feel too good... Any users of the old driver: please note the numbers of the serial lines have now been swapped, i.e. ttyS0 <-> ttyS1 and ttyS2 <-> ttyS3. It has to do with the modem lines mentioned above; basically the port A in a given chip has to be initialised before the port B if you want to use the latter as the serial console (which is usually the case), as operations on modem lines of the serial line associated with the port B access both ports (see the comment at the top of the driver for the details of wiring used). Please update your scripts. This is also the reason each SCC now requests an IRQ once only (as seen in "/proc/interrupts") -- the handler takes care of both ports at once as the line associated with the port B has to take status update interrupts from both ports (and yet the line of the port A takes its own for itself too). The old driver never got it right... Signed-off-by: Maciej W. Rozycki Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 5 + drivers/char/Kconfig | 33 - drivers/char/decserial.c | 67 -- drivers/serial/Kconfig | 30 + drivers/serial/Makefile | 1 + drivers/serial/zs.c | 1287 ++++++++++++++++++++++++ drivers/serial/zs.h | 284 ++++++ drivers/tc/Makefile | 1 - drivers/tc/zs.c | 2203 ----------------------------------------- drivers/tc/zs.h | 404 -------- include/asm-mips/dec/serial.h | 36 - include/linux/serial_core.h | 5 +- 12 files changed, 1610 insertions(+), 2746 deletions(-) delete mode 100644 drivers/char/decserial.c create mode 100644 drivers/serial/zs.c create mode 100644 drivers/serial/zs.h delete mode 100644 drivers/tc/zs.c delete mode 100644 drivers/tc/zs.h delete mode 100644 include/asm-mips/dec/serial.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 368a7181fa1..a9615a567da 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4110,6 +4110,11 @@ W: http://www.polyware.nl/~middelin/En/hobbies.html W: http://www.polyware.nl/~middelin/hobbies.html S: Maintained +ZS DECSTATION Z85C30 SERIAL DRIVER +P: Maciej W. Rozycki +M: macro@linux-mips.org +S: Maintained + THE REST P: Linus Torvalds S: Buried alive in reporters diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index d8d7125529c..97bd71bc3ae 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -372,39 +372,6 @@ config ISTALLION To compile this driver as a module, choose M here: the module will be called istallion. -config SERIAL_DEC - bool "DECstation serial support" - depends on MACH_DECSTATION - default y - help - This selects whether you want to be asked about drivers for - DECstation serial ports. - - Note that the answer to this question won't directly affect the - kernel: saying N will just cause the configurator to skip all - the questions about DECstation serial ports. - -config SERIAL_DEC_CONSOLE - bool "Support for console on a DECstation serial port" - depends on SERIAL_DEC - default y - help - If you say Y here, it will be possible to use a serial port as the - system console (the system console is the device which receives all - kernel messages and warnings and which allows logins in single user - mode). Note that the firmware uses ttyS0 as the serial console on - the Maxine and ttyS2 on the others. - - If unsure, say Y. - -config ZS - bool "Z85C30 Serial Support" - depends on SERIAL_DEC - default y - help - Documentation on the Zilog 85C350 serial communications controller - is downloadable at - config A2232 tristate "Commodore A2232 serial support (EXPERIMENTAL)" depends on EXPERIMENTAL && ZORRO && BROKEN_ON_SMP diff --git a/drivers/char/decserial.c b/drivers/char/decserial.c deleted file mode 100644 index 8ea2bea2b18..00000000000 --- a/drivers/char/decserial.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * sercons.c - * choose the right serial device at boot time - * - * triemer 6-SEP-1998 - * sercons.c is designed to allow the three different kinds - * of serial devices under the decstation world to co-exist - * in the same kernel. The idea here is to abstract - * the pieces of the drivers that are common to this file - * so that they do not clash at compile time and runtime. - * - * HK 16-SEP-1998 v0.002 - * removed the PROM console as this is not a real serial - * device. Added support for PROM console in drivers/char/tty_io.c - * instead. Although it may work to enable more than one - * console device I strongly recommend to use only one. - */ - -#include -#include - -#ifdef CONFIG_ZS -extern int zs_init(void); -#endif - -#ifdef CONFIG_SERIAL_CONSOLE - -#ifdef CONFIG_ZS -extern void zs_serial_console_init(void); -#endif - -#endif - -/* rs_init - starts up the serial interface - - handle normal case of starting up the serial interface */ - -#ifdef CONFIG_SERIAL - -int __init rs_init(void) -{ -#ifdef CONFIG_ZS - if (IOASIC) - return zs_init(); -#endif - return -ENXIO; -} - -__initcall(rs_init); - -#endif - -#ifdef CONFIG_SERIAL_CONSOLE - -/* serial_console_init handles the special case of starting - * up the console on the serial port - */ -static int __init decserial_console_init(void) -{ -#ifdef CONFIG_ZS - if (IOASIC) - zs_serial_console_init(); -#endif - return 0; -} -console_initcall(decserial_console_init); - -#endif diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 7fa413ddccf..18f62970644 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -486,6 +486,36 @@ config SERIAL_DZ_CONSOLE If unsure, say Y. +config SERIAL_ZS + tristate "DECstation Z85C30 serial support" + depends on MACH_DECSTATION + select SERIAL_CORE + default y + ---help--- + Support for the Zilog 85C350 serial communications controller used + for serial ports in newer DECstation systems. These include the + DECsystem 5900 and all models of the DECstation and DECsystem 5000 + systems except from model 200. + + If unsure, say Y. To compile this driver as a module, choose M here: + the module will be called zs. + +config SERIAL_ZS_CONSOLE + bool "Support for console on a DECstation Z85C30 serial port" + depends on SERIAL_ZS=y + select SERIAL_CORE_CONSOLE + default y + ---help--- + If you say Y here, it will be possible to use a serial port as the + system console (the system console is the device which receives all + kernel messages and warnings and which allows logins in single user + mode). + + Note that the firmware uses ttyS1 as the serial console on the + Maxine and ttyS3 on the others using this driver. + + If unsure, say Y. + config SERIAL_21285 tristate "DC21285 serial port support" depends on ARM && FOOTBRIDGE diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index c48cdd61b73..af6377d480d 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_V850E_UART) += v850e_uart.o obj-$(CONFIG_SERIAL_PMACZILOG) += pmac_zilog.o obj-$(CONFIG_SERIAL_LH7A40X) += serial_lh7a40x.o obj-$(CONFIG_SERIAL_DZ) += dz.o +obj-$(CONFIG_SERIAL_ZS) += zs.o obj-$(CONFIG_SERIAL_SH_SCI) += sh-sci.o obj-$(CONFIG_SERIAL_SGI_L1_CONSOLE) += sn_console.o obj-$(CONFIG_SERIAL_CPM) += cpm_uart/ diff --git a/drivers/serial/zs.c b/drivers/serial/zs.c new file mode 100644 index 00000000000..65f1294fd27 --- /dev/null +++ b/drivers/serial/zs.c @@ -0,0 +1,1287 @@ +/* + * zs.c: Serial port driver for IOASIC DECstations. + * + * Derived from drivers/sbus/char/sunserial.c by Paul Mackerras. + * Derived from drivers/macintosh/macserial.c by Harald Koerfgen. + * + * DECstation changes + * Copyright (C) 1998-2000 Harald Koerfgen + * Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007 Maciej W. Rozycki + * + * For the rest of the code the original Copyright applies: + * Copyright (C) 1996 Paul Mackerras (Paul.Mackerras@cs.anu.edu.au) + * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * + * + * Note: for IOASIC systems the wiring is as follows: + * + * mouse/keyboard: + * DIN-7 MJ-4 signal SCC + * 2 1 TxD <- A.TxD + * 3 4 RxD -> A.RxD + * + * EIA-232/EIA-423: + * DB-25 MMJ-6 signal SCC + * 2 2 TxD <- B.TxD + * 3 5 RxD -> B.RxD + * 4 RTS <- ~A.RTS + * 5 CTS -> ~B.CTS + * 6 6 DSR -> ~A.SYNC + * 8 CD -> ~B.DCD + * 12 DSRS(DCE) -> ~A.CTS (*) + * 15 TxC -> B.TxC + * 17 RxC -> B.RxC + * 20 1 DTR <- ~A.DTR + * 22 RI -> ~A.DCD + * 23 DSRS(DTE) <- ~B.RTS + * + * (*) EIA-232 defines the signal at this pin to be SCD, while DSRS(DCE) + * is shared with DSRS(DTE) at pin 23. + * + * As you can immediately notice the wiring of the RTS, DTR and DSR signals + * is a bit odd. This makes the handling of port B unnecessarily + * complicated and prevents the use of some automatic modes of operation. + */ + +#if defined(CONFIG_SERIAL_ZS_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "zs.h" + + +MODULE_AUTHOR("Maciej W. Rozycki "); +MODULE_DESCRIPTION("DECstation Z85C30 serial driver"); +MODULE_LICENSE("GPL"); + + +static char zs_name[] __initdata = "DECstation Z85C30 serial driver version "; +static char zs_version[] __initdata = "0.10"; + +/* + * It would be nice to dynamically allocate everything that + * depends on ZS_NUM_SCCS, so we could support any number of + * Z85C30s, but for now... + */ +#define ZS_NUM_SCCS 2 /* Max # of ZS chips supported. */ +#define ZS_NUM_CHAN 2 /* 2 channels per chip. */ +#define ZS_CHAN_A 0 /* Index of the channel A. */ +#define ZS_CHAN_B 1 /* Index of the channel B. */ +#define ZS_CHAN_IO_SIZE 8 /* IOMEM space size. */ +#define ZS_CHAN_IO_STRIDE 4 /* Register alignment. */ +#define ZS_CHAN_IO_OFFSET 1 /* The SCC resides on the high byte + of the 16-bit IOBUS. */ +#define ZS_CLOCK 7372800 /* Z85C30 PCLK input clock rate. */ + +#define to_zport(uport) container_of(uport, struct zs_port, port) + +struct zs_parms { + resource_size_t scc[ZS_NUM_SCCS]; + int irq[ZS_NUM_SCCS]; +}; + +static struct zs_scc zs_sccs[ZS_NUM_SCCS]; + +static u8 zs_init_regs[ZS_NUM_REGS] __initdata = { + 0, /* write 0 */ + PAR_SPEC, /* write 1 */ + 0, /* write 2 */ + 0, /* write 3 */ + X16CLK | SB1, /* write 4 */ + 0, /* write 5 */ + 0, 0, 0, /* write 6, 7, 8 */ + MIE | DLC | NV, /* write 9 */ + NRZ, /* write 10 */ + TCBR | RCBR, /* write 11 */ + 0, 0, /* BRG time constant, write 12 + 13 */ + BRSRC | BRENABL, /* write 14 */ + 0, /* write 15 */ +}; + +/* + * Debugging. + */ +#undef ZS_DEBUG_REGS + + +/* + * Reading and writing Z85C30 registers. + */ +static void recovery_delay(void) +{ + udelay(2); +} + +static u8 read_zsreg(struct zs_port *zport, int reg) +{ + void __iomem *control = zport->port.membase + ZS_CHAN_IO_OFFSET; + u8 retval; + + if (reg != 0) { + writeb(reg & 0xf, control); + fast_iob(); + recovery_delay(); + } + retval = readb(control); + recovery_delay(); + return retval; +} + +static void write_zsreg(struct zs_port *zport, int reg, u8 value) +{ + void __iomem *control = zport->port.membase + ZS_CHAN_IO_OFFSET; + + if (reg != 0) { + writeb(reg & 0xf, control); + fast_iob(); recovery_delay(); + } + writeb(value, control); + fast_iob(); + recovery_delay(); + return; +} + +static u8 read_zsdata(struct zs_port *zport) +{ + void __iomem *data = zport->port.membase + + ZS_CHAN_IO_STRIDE + ZS_CHAN_IO_OFFSET; + u8 retval; + + retval = readb(data); + recovery_delay(); + return retval; +} + +static void write_zsdata(struct zs_port *zport, u8 value) +{ + void __iomem *data = zport->port.membase + + ZS_CHAN_IO_STRIDE + ZS_CHAN_IO_OFFSET; + + writeb(value, data); + fast_iob(); + recovery_delay(); + return; +} + +#ifdef ZS_DEBUG_REGS +void zs_dump(void) +{ + struct zs_port *zport; + int i, j; + + for (i = 0; i < ZS_NUM_SCCS * ZS_NUM_CHAN; i++) { + zport = &zs_sccs[i / ZS_NUM_CHAN].zport[i % ZS_NUM_CHAN]; + + if (!zport->scc) + continue; + + for (j = 0; j < 16; j++) + printk("W%-2d = 0x%02x\t", j, zport->regs[j]); + printk("\n"); + for (j = 0; j < 16; j++) + printk("R%-2d = 0x%02x\t", j, read_zsreg(zport, j)); + printk("\n\n"); + } +} +#endif + + +static void zs_spin_lock_cond_irq(spinlock_t *lock, int irq) +{ + if (irq) + spin_lock_irq(lock); + else + spin_lock(lock); +} + +static void zs_spin_unlock_cond_irq(spinlock_t *lock, int irq) +{ + if (irq) + spin_unlock_irq(lock); + else + spin_unlock(lock); +} + +static int zs_receive_drain(struct zs_port *zport) +{ + int loops = 10000; + + while ((read_zsreg(zport, R0) & Rx_CH_AV) && loops--) + read_zsdata(zport); + return loops; +} + +static int zs_transmit_drain(struct zs_port *zport, int irq) +{ + struct zs_scc *scc = zport->scc; + int loops = 10000; + + while (!(read_zsreg(zport, R0) & Tx_BUF_EMP) && loops--) { + zs_spin_unlock_cond_irq(&scc->zlock, irq); + udelay(2); + zs_spin_lock_cond_irq(&scc->zlock, irq); + } + return loops; +} + +static int zs_line_drain(struct zs_port *zport, int irq) +{ + struct zs_scc *scc = zport->scc; + int loops = 10000; + + while (!(read_zsreg(zport, R1) & ALL_SNT) && loops--) { + zs_spin_unlock_cond_irq(&scc->zlock, irq); + udelay(2); + zs_spin_lock_cond_irq(&scc->zlock, irq); + } + return loops; +} + + +static void load_zsregs(struct zs_port *zport, u8 *regs, int irq) +{ + /* Let the current transmission finish. */ + zs_line_drain(zport, irq); + /* Load 'em up. */ + write_zsreg(zport, R3, regs[3] & ~RxENABLE); + write_zsreg(zport, R5, regs[5] & ~TxENAB); + write_zsreg(zport, R4, regs[4]); + write_zsreg(zport, R9, regs[9]); + write_zsreg(zport, R1, regs[1]); + write_zsreg(zport, R2, regs[2]); + write_zsreg(zport, R10, regs[10]); + write_zsreg(zport, R14, regs[14] & ~BRENABL); + write_zsreg(zport, R11, regs[11]); + write_zsreg(zport, R12, regs[12]); + write_zsreg(zport, R13, regs[13]); + write_zsreg(zport, R14, regs[14]); + write_zsreg(zport, R15, regs[15]); + if (regs[3] & RxENABLE) + write_zsreg(zport, R3, regs[3]); + if (regs[5] & TxENAB) + write_zsreg(zport, R5, regs[5]); + return; +} + + +/* + * Status handling routines. + */ + +/* + * zs_tx_empty() -- get the transmitter empty status + * + * Purpose: Let user call ioctl() to get info when the UART physically + * is emptied. On bus types like RS485, the transmitter must + * release the bus after transmitting. This must be done when + * the transmit shift register is empty, not be done when the + * transmit holding register is empty. This functionality + * allows an RS485 driver to be written in user space. + */ +static unsigned int zs_tx_empty(struct uart_port *uport) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + unsigned long flags; + u8 status; + + spin_lock_irqsave(&scc->zlock, flags); + status = read_zsreg(zport, R1); + spin_unlock_irqrestore(&scc->zlock, flags); + + return status & ALL_SNT ? TIOCSER_TEMT : 0; +} + +static unsigned int zs_raw_get_ab_mctrl(struct zs_port *zport_a, + struct zs_port *zport_b) +{ + u8 status_a, status_b; + unsigned int mctrl; + + status_a = read_zsreg(zport_a, R0); + status_b = read_zsreg(zport_b, R0); + + mctrl = ((status_b & CTS) ? TIOCM_CTS : 0) | + ((status_b & DCD) ? TIOCM_CAR : 0) | + ((status_a & DCD) ? TIOCM_RNG : 0) | + ((status_a & SYNC_HUNT) ? TIOCM_DSR : 0); + + return mctrl; +} + +static unsigned int zs_raw_get_mctrl(struct zs_port *zport) +{ + struct zs_port *zport_a = &zport->scc->zport[ZS_CHAN_A]; + + return zport != zport_a ? zs_raw_get_ab_mctrl(zport_a, zport) : 0; +} + +static unsigned int zs_raw_xor_mctrl(struct zs_port *zport) +{ + struct zs_port *zport_a = &zport->scc->zport[ZS_CHAN_A]; + unsigned int mmask, mctrl, delta; + u8 mask_a, mask_b; + + if (zport == zport_a) + return 0; + + mask_a = zport_a->regs[15]; + mask_b = zport->regs[15]; + + mmask = ((mask_b & CTSIE) ? TIOCM_CTS : 0) | + ((mask_b & DCDIE) ? TIOCM_CAR : 0) | + ((mask_a & DCDIE) ? TIOCM_RNG : 0) | + ((mask_a & SYNCIE) ? TIOCM_DSR : 0); + + mctrl = zport->mctrl; + if (mmask) { + mctrl &= ~mmask; + mctrl |= zs_raw_get_ab_mctrl(zport_a, zport) & mmask; + } + + delta = mctrl ^ zport->mctrl; + if (delta) + zport->mctrl = mctrl; + + return delta; +} + +static unsigned int zs_get_mctrl(struct uart_port *uport) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + unsigned int mctrl; + + spin_lock(&scc->zlock); + mctrl = zs_raw_get_mctrl(zport); + spin_unlock(&scc->zlock); + + return mctrl; +} + +static void zs_set_mctrl(struct uart_port *uport, unsigned int mctrl) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + struct zs_port *zport_a = &scc->zport[ZS_CHAN_A]; + u8 oldloop, newloop; + + spin_lock(&scc->zlock); + if (zport != zport_a) { + if (mctrl & TIOCM_DTR) + zport_a->regs[5] |= DTR; + else + zport_a->regs[5] &= ~DTR; + if (mctrl & TIOCM_RTS) + zport_a->regs[5] |= RTS; + else + zport_a->regs[5] &= ~RTS; + write_zsreg(zport_a, R5, zport_a->regs[5]); + } + + /* Rarely modified, so don't poke at hardware unless necessary. */ + oldloop = zport->regs[14]; + newloop = oldloop; + if (mctrl & TIOCM_LOOP) + newloop |= LOOPBAK; + else + newloop &= ~LOOPBAK; + if (newloop != oldloop) { + zport->regs[14] = newloop; + write_zsreg(zport, R14, zport->regs[14]); + } + spin_unlock(&scc->zlock); +} + +static void zs_raw_stop_tx(struct zs_port *zport) +{ + write_zsreg(zport, R0, RES_Tx_P); + zport->tx_stopped = 1; +} + +static void zs_stop_tx(struct uart_port *uport) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + + spin_lock(&scc->zlock); + zs_raw_stop_tx(zport); + spin_unlock(&scc->zlock); +} + +static void zs_raw_transmit_chars(struct zs_port *); + +static void zs_start_tx(struct uart_port *uport) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + + spin_lock(&scc->zlock); + if (zport->tx_stopped) { + zs_transmit_drain(zport, 0); + zport->tx_stopped = 0; + zs_raw_transmit_chars(zport); + } + spin_unlock(&scc->zlock); +} + +static void zs_stop_rx(struct uart_port *uport) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + struct zs_port *zport_a = &scc->zport[ZS_CHAN_A]; + + spin_lock(&scc->zlock); + zport->regs[15] &= ~BRKIE; + zport->regs[1] &= ~(RxINT_MASK | TxINT_ENAB); + zport->regs[1] |= RxINT_DISAB; + + if (zport != zport_a) { + /* A-side DCD tracks RI and SYNC tracks DSR. */ + zport_a->regs[15] &= ~(DCDIE | SYNCIE); + write_zsreg(zport_a, R15, zport_a->regs[15]); + if (!(zport_a->regs[15] & BRKIE)) { + zport_a->regs[1] &= ~EXT_INT_ENAB; + write_zsreg(zport_a, R1, zport_a->regs[1]); + } + + /* This-side DCD tracks DCD and CTS tracks CTS. */ + zport->regs[15] &= ~(DCDIE | CTSIE); + zport->regs[1] &= ~EXT_INT_ENAB; + } else { + /* DCD tracks RI and SYNC tracks DSR for the B side. */ + if (!(zport->regs[15] & (DCDIE | SYNCIE))) + zport->regs[1] &= ~EXT_INT_ENAB; + } + + write_zsreg(zport, R15, zport->regs[15]); + write_zsreg(zport, R1, zport->regs[1]); + spin_unlock(&scc->zlock); +} + +static void zs_enable_ms(struct uart_port *uport) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + struct zs_port *zport_a = &scc->zport[ZS_CHAN_A]; + + if (zport == zport_a) + return; + + spin_lock(&scc->zlock); + + /* Clear Ext interrupts if not being handled already. */ + if (!(zport_a->regs[1] & EXT_INT_ENAB)) + write_zsreg(zport_a, R0, RES_EXT_INT); + + /* A-side DCD tracks RI and SYNC tracks DSR. */ + zport_a->regs[1] |= EXT_INT_ENAB; + zport_a->regs[15] |= DCDIE | SYNCIE; + + /* This-side DCD tracks DCD and CTS tracks CTS. */ + zport->regs[15] |= DCDIE | CTSIE; + + zs_raw_xor_mctrl(zport); + + write_zsreg(zport_a, R1, zport_a->regs[1]); + write_zsreg(zport_a, R15, zport_a->regs[15]); + write_zsreg(zport, R15, zport->regs[15]); + spin_unlock(&scc->zlock); +} + +static void zs_break_ctl(struct uart_port *uport, int break_state) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + unsigned long flags; + + spin_lock_irqsave(&scc->zlock, flags); + if (break_state == -1) + zport->regs[5] |= SND_BRK; + else + zport->regs[5] &= ~SND_BRK; + write_zsreg(zport, R5, zport->regs[5]); + spin_unlock_irqrestore(&scc->zlock, flags); +} + + +/* + * Interrupt handling routines. + */ +#define Rx_BRK 0x0100 /* BREAK event software flag. */ +#define Rx_SYS 0x0200 /* SysRq event software flag. */ + +static void zs_receive_chars(struct zs_port *zport) +{ + struct uart_port *uport = &zport->port; + struct zs_scc *scc = zport->scc; + struct uart_icount *icount; + unsigned int avail, status, ch, flag; + int count; + + for (count = 16; count; count--) { + spin_lock(&scc->zlock); + avail = read_zsreg(zport, R0) & Rx_CH_AV; + spin_unlock(&scc->zlock); + if (!avail) + break; + + spin_lock(&scc->zlock); + status = read_zsreg(zport, R1) & (Rx_OVR | FRM_ERR | PAR_ERR); + ch = read_zsdata(zport); + spin_unlock(&scc->zlock); + + flag = TTY_NORMAL; + + icount = &uport->icount; + icount->rx++; + + /* Handle the null char got when BREAK is removed. */ + if (!ch) + status |= zport->tty_break; + if (unlikely(status & + (Rx_OVR | FRM_ERR | PAR_ERR | Rx_SYS | Rx_BRK))) { + zport->tty_break = 0; + + /* Reset the error indication. */ + if (status & (Rx_OVR | FRM_ERR | PAR_ERR)) { + spin_lock(&scc->zlock); + write_zsreg(zport, R0, ERR_RES); + spin_unlock(&scc->zlock); + } + + if (status & (Rx_SYS | Rx_BRK)) { + icount->brk++; + /* SysRq discards the null char. */ + if (status & Rx_SYS) + continue; + } else if (status & FRM_ERR) + icount->frame++; + else if (status & PAR_ERR) + icount->parity++; + if (status & Rx_OVR) + icount->overrun++; + + status &= uport->read_status_mask; + if (status & Rx_BRK) + flag = TTY_BREAK; + else if (status & FRM_ERR) + flag = TTY_FRAME; + else if (status & PAR_ERR) + flag = TTY_PARITY; + } + + if (uart_handle_sysrq_char(uport, ch)) + continue; + + uart_insert_char(uport, status, Rx_OVR, ch, flag); + } + + tty_flip_buffer_push(uport->info->tty); +} + +static void zs_raw_transmit_chars(struct zs_port *zport) +{ + struct circ_buf *xmit = &zport->port.info->xmit; + + /* XON/XOFF chars. */ + if (zport->port.x_char) { + write_zsdata(zport, zport->port.x_char); + zport->port.icount.tx++; + zport->port.x_char = 0; + return; + } + + /* If nothing to do or stopped or hardware stopped. */ + if (uart_circ_empty(xmit) || uart_tx_stopped(&zport->port)) { + zs_raw_stop_tx(zport); + return; + } + + /* Send char. */ + write_zsdata(zport, xmit->buf[xmit->tail]); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + zport->port.icount.tx++; + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&zport->port); + + /* Are we are done? */ + if (uart_circ_empty(xmit)) + zs_raw_stop_tx(zport); +} + +static void zs_transmit_chars(struct zs_port *zport) +{ + struct zs_scc *scc = zport->scc; + + spin_lock(&scc->zlock); + zs_raw_transmit_chars(zport); + spin_unlock(&scc->zlock); +} + +static void zs_status_handle(struct zs_port *zport, struct zs_port *zport_a) +{ + struct uart_port *uport = &zport->port; + struct zs_scc *scc = zport->scc; + unsigned int delta; + u8 status, brk; + + spin_lock(&scc->zlock); + + /* Get status from Read Register 0. */ + status = read_zsreg(zport, R0); + + if (zport->regs[15] & BRKIE) { + brk = status & BRK_ABRT; + if (brk && !zport->brk) { + spin_unlock(&scc->zlock); + if (uart_handle_break(uport)) + zport->tty_break = Rx_SYS; + else + zport->tty_break = Rx_BRK; + spin_lock(&scc->zlock); + } + zport->brk = brk; + } + + if (zport != zport_a) { + delta = zs_raw_xor_mctrl(zport); + spin_unlock(&scc->zlock); + + if (delta & TIOCM_CTS) + uart_handle_cts_change(uport, + zport->mctrl & TIOCM_CTS); + if (delta & TIOCM_CAR) + uart_handle_dcd_change(uport, + zport->mctrl & TIOCM_CAR); + if (delta & TIOCM_RNG) + uport->icount.dsr++; + if (delta & TIOCM_DSR) + uport->icount.rng++; + + if (delta) + wake_up_interruptible(&uport->info->delta_msr_wait); + + spin_lock(&scc->zlock); + } + + /* Clear the status condition... */ + write_zsreg(zport, R0, RES_EXT_INT); + + spin_unlock(&scc->zlock); +} + +/* + * This is the Z85C30 driver's generic interrupt routine. + */ +static irqreturn_t zs_interrupt(int irq, void *dev_id) +{ + struct zs_scc *scc = dev_id; + struct zs_port *zport_a = &scc->zport[ZS_CHAN_A]; + struct zs_port *zport_b = &scc->zport[ZS_CHAN_B]; + irqreturn_t status = IRQ_NONE; + u8 zs_intreg; + int count; + + /* + * NOTE: The read register 3, which holds the irq status, + * does so for both channels on each chip. Although + * the status value itself must be read from the A + * channel and is only valid when read from channel A. + * Yes... broken hardware... + */ + for (count = 16; count; count--) { + spin_lock(&scc->zlock); + zs_intreg = read_zsreg(zport_a, R3); + spin_unlock(&scc->zlock); + if (!zs_intreg) + break; + + /* + * We do not like losing characters, so we prioritise + * interrupt sources a little bit differently than + * the SCC would, was it allowed to. + */ + if (zs_intreg & CHBRxIP) + zs_receive_chars(zport_b); + if (zs_intreg & CHARxIP) + zs_receive_chars(zport_a); + if (zs_intreg & CHBEXT) + zs_status_handle(zport_b, zport_a); + if (zs_intreg & CHAEXT) + zs_status_handle(zport_a, zport_a); + if (zs_intreg & CHBTxIP) + zs_transmit_chars(zport_b); + if (zs_intreg & CHATxIP) + zs_transmit_chars(zport_a); + + status = IRQ_HANDLED; + } + + return status; +} + + +/* + * Finally, routines used to initialize the serial port. + */ +static int zs_startup(struct uart_port *uport) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + unsigned long flags; + int irq_guard; + int ret; + + irq_guard = atomic_add_return(1, &scc->irq_guard); + if (irq_guard == 1) { + ret = request_irq(zport->port.irq, zs_interrupt, + IRQF_SHARED, "scc", scc); + if (ret) { + atomic_add(-1, &scc->irq_guard); + printk(KERN_ERR "zs: can't get irq %d\n", + zport->port.irq); + return ret; + } + } + + spin_lock_irqsave(&scc->zlock, flags); + + /* Clear the receive FIFO. */ + zs_receive_drain(zport); + + /* Clear the interrupt registers. */ + write_zsreg(zport, R0, ERR_RES); + write_zsreg(zport, R0, RES_Tx_P); + /* But Ext only if not being handled already. */ + if (!(zport->regs[1] & EXT_INT_ENAB)) + write_zsreg(zport, R0, RES_EXT_INT); + + /* Finally, enable sequencing and interrupts. */ + zport->regs[1] &= ~RxINT_MASK; + zport->regs[1] |= RxINT_ALL | TxINT_ENAB | EXT_INT_ENAB; + zport->regs[3] |= RxENABLE; + zport->regs[5] |= TxENAB; + zport->regs[15] |= BRKIE; + write_zsreg(zport, R1, zport->regs[1]); + write_zsreg(zport, R3, zport->regs[3]); + write_zsreg(zport, R5, zport->regs[5]); + write_zsreg(zport, R15, zport->regs[15]); + + /* Record the current state of RR0. */ + zport->mctrl = zs_raw_get_mctrl(zport); + zport->brk = read_zsreg(zport, R0) & BRK_ABRT; + + zport->tx_stopped = 1; + + spin_unlock_irqrestore(&scc->zlock, flags); + + return 0; +} + +static void zs_shutdown(struct uart_port *uport) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + unsigned long flags; + int irq_guard; + + spin_lock_irqsave(&scc->zlock, flags); + + zport->regs[5] &= ~TxENAB; + zport->regs[3] &= ~RxENABLE; + write_zsreg(zport, R5, zport->regs[5]); + write_zsreg(zport, R3, zport->regs[3]); + + spin_unlock_irqrestore(&scc->zlock, flags); + + irq_guard = atomic_add_return(-1, &scc->irq_guard); + if (!irq_guard) + free_irq(zport->port.irq, scc); +} + + +static void zs_reset(struct zs_port *zport) +{ + struct zs_scc *scc = zport->scc; + int irq; + unsigned long flags; + + spin_lock_irqsave(&scc->zlock, flags); + irq = !irqs_disabled_flags(flags); + if (!scc->initialised) { + /* Reset the pointer first, just in case... */ + read_zsreg(zport, R0); + /* And let the current transmission finish. */ + zs_line_drain(zport, irq); + write_zsreg(zport, R9, FHWRES); + udelay(10); + write_zsreg(zport, R9, 0); + scc->initialised = 1; + } + load_zsregs(zport, zport->regs, irq); + spin_unlock_irqrestore(&scc->zlock, flags); +} + +static void zs_set_termios(struct uart_port *uport, struct ktermios *termios, + struct ktermios *old_termios) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + struct zs_port *zport_a = &scc->zport[ZS_CHAN_A]; + int irq; + unsigned int baud, brg; + unsigned long flags; + + spin_lock_irqsave(&scc->zlock, flags); + irq = !irqs_disabled_flags(flags); + + /* Byte size. */ + zport->regs[3] &= ~RxNBITS_MASK; + zport->regs[5] &= ~TxNBITS_MASK; + switch (termios->c_cflag & CSIZE) { + case CS5: + zport->regs[3] |= Rx5; + zport->regs[5] |= Tx5; + break; + case CS6: + zport->regs[3] |= Rx6; + zport->regs[5] |= Tx6; + break; + case CS7: + zport->regs[3] |= Rx7; + zport->regs[5] |= Tx7; + break; + case CS8: + default: + zport->regs[3] |= Rx8; + zport->regs[5] |= Tx8; + break; + } + + /* Parity and stop bits. */ + zport->regs[4] &= ~(XCLK_MASK | SB_MASK | PAR_ENA | PAR_EVEN); + if (termios->c_cflag & CSTOPB) + zport->regs[4] |= SB2; + else + zport->regs[4] |= SB1; + if (termios->c_cflag & PARENB) + zport->regs[4] |= PAR_ENA; + if (!(termios->c_cflag & PARODD)) + zport->regs[4] |= PAR_EVEN; + switch (zport->clk_mode) { + case 64: + zport->regs[4] |= X64CLK; + break; + case 32: + zport->regs[4] |= X32CLK; + break; + case 16: + zport->regs[4] |= X16CLK; + break; + case 1: + zport->regs[4] |= X1CLK; + break; + default: + BUG(); + } + + baud = uart_get_baud_rate(uport, termios, old_termios, 0, + uport->uartclk / zport->clk_mode / 4); + + brg = ZS_BPS_TO_BRG(baud, uport->uartclk / zport->clk_mode); + zport->regs[12] = brg & 0xff; + zport->regs[13] = (brg >> 8) & 0xff; + + uart_update_timeout(uport, termios->c_cflag, baud); + + uport->read_status_mask = Rx_OVR; + if (termios->c_iflag & INPCK) + uport->read_status_mask |= FRM_ERR | PAR_ERR; + if (termios->c_iflag & (BRKINT | PARMRK)) + uport->read_status_mask |= Rx_BRK; + + uport->ignore_status_mask = 0; + if (termios->c_iflag & IGNPAR) + uport->ignore_status_mask |= FRM_ERR | PAR_ERR; + if (termios->c_iflag & IGNBRK) { + uport->ignore_status_mask |= Rx_BRK; + if (termios->c_iflag & IGNPAR) + uport->ignore_status_mask |= Rx_OVR; + } + + if (termios->c_cflag & CREAD) + zport->regs[3] |= RxENABLE; + else + zport->regs[3] &= ~RxENABLE; + + if (zport != zport_a) { + if (!(termios->c_cflag & CLOCAL)) { + zport->regs[15] |= DCDIE; + } else + zport->regs[15] &= ~DCDIE; + if (termios->c_cflag & CRTSCTS) { + zport->regs[15] |= CTSIE; + } else + zport->regs[15] &= ~CTSIE; + zs_raw_xor_mctrl(zport); + } + + /* Load up the new values. */ + load_zsregs(zport, zport->regs, irq); + + spin_unlock_irqrestore(&scc->zlock, flags); +} + + +static const char *zs_type(struct uart_port *uport) +{ + return "Z85C30 SCC"; +} + +static void zs_release_port(struct uart_port *uport) +{ + iounmap(uport->membase); + uport->membase = 0; + release_mem_region(uport->mapbase, ZS_CHAN_IO_SIZE); +} + +static int zs_map_port(struct uart_port *uport) +{ + if (!uport->membase) + uport->membase = ioremap_nocache(uport->mapbase, + ZS_CHAN_IO_SIZE); + if (!uport->membase) { + printk(KERN_ERR "zs: Cannot map MMIO\n"); + return -ENOMEM; + } + return 0; +} + +static int zs_request_port(struct uart_port *uport) +{ + int ret; + + if (!request_mem_region(uport->mapbase, ZS_CHAN_IO_SIZE, "scc")) { + printk(KERN_ERR "zs: Unable to reserve MMIO resource\n"); + return -EBUSY; + } + ret = zs_map_port(uport); + if (ret) { + release_mem_region(uport->mapbase, ZS_CHAN_IO_SIZE); + return ret; + } + return 0; +} + +static void zs_config_port(struct uart_port *uport, int flags) +{ + struct zs_port *zport = to_zport(uport); + + if (flags & UART_CONFIG_TYPE) { + if (zs_request_port(uport)) + return; + + uport->type = PORT_ZS; + + zs_reset(zport); + } +} + +static int zs_verify_port(struct uart_port *uport, struct serial_struct *ser) +{ + struct zs_port *zport = to_zport(uport); + int ret = 0; + + if (ser->type != PORT_UNKNOWN && ser->type != PORT_ZS) + ret = -EINVAL; + if (ser->irq != uport->irq) + ret = -EINVAL; + if (ser->baud_base != uport->uartclk / zport->clk_mode / 4) + ret = -EINVAL; + return ret; +} + + +static struct uart_ops zs_ops = { + .tx_empty = zs_tx_empty, + .set_mctrl = zs_set_mctrl, + .get_mctrl = zs_get_mctrl, + .stop_tx = zs_stop_tx, + .start_tx = zs_start_tx, + .stop_rx = zs_stop_rx, + .enable_ms = zs_enable_ms, + .break_ctl = zs_break_ctl, + .startup = zs_startup, + .shutdown = zs_shutdown, + .set_termios = zs_set_termios, + .type = zs_type, + .release_port = zs_release_port, + .request_port = zs_request_port, + .config_port = zs_config_port, + .verify_port = zs_verify_port, +}; + +/* + * Initialize Z85C30 port structures. + */ +static int __init zs_probe_sccs(void) +{ + static int probed; + struct zs_parms zs_parms; + int chip, side, irq; + int n_chips = 0; + int i; + + if (probed) + return 0; + + irq = dec_interrupt[DEC_IRQ_SCC0]; + if (irq >= 0) { + zs_parms.scc[n_chips] = IOASIC_SCC0; + zs_parms.irq[n_chips] = dec_interrupt[DEC_IRQ_SCC0]; + n_chips++; + } + irq = dec_interrupt[DEC_IRQ_SCC1]; + if (irq >= 0) { + zs_parms.scc[n_chips] = IOASIC_SCC1; + zs_parms.irq[n_chips] = dec_interrupt[DEC_IRQ_SCC1]; + n_chips++; + } + if (!n_chips) + return -ENXIO; + + probed = 1; + + for (chip = 0; chip < n_chips; chip++) { + spin_lock_init(&zs_sccs[chip].zlock); + for (side = 0; side < ZS_NUM_CHAN; side++) { + struct zs_port *zport = &zs_sccs[chip].zport[side]; + struct uart_port *uport = &zport->port; + + zport->scc = &zs_sccs[chip]; + zport->clk_mode = 16; + + uport->irq = zs_parms.irq[chip]; + uport->uartclk = ZS_CLOCK; + uport->fifosize = 1; + uport->iotype = UPIO_MEM; + uport->flags = UPF_BOOT_AUTOCONF; + uport->ops = &zs_ops; + uport->line = chip * ZS_NUM_CHAN + side; + uport->mapbase = dec_kn_slot_base + + zs_parms.scc[chip] + + (side ^ ZS_CHAN_B) * ZS_CHAN_IO_SIZE; + + for (i = 0; i < ZS_NUM_REGS; i++) + zport->regs[i] = zs_init_regs[i]; + } + } + + return 0; +} + + +#ifdef CONFIG_SERIAL_ZS_CONSOLE +static void zs_console_putchar(struct uart_port *uport, int ch) +{ + struct zs_port *zport = to_zport(uport); + struct zs_scc *scc = zport->scc; + int irq; + unsigned long flags; + + spin_lock_irqsave(&scc->zlock, flags); + irq = !irqs_disabled_flags(flags); + if (zs_transmit_drain(zport, irq)) + write_zsdata(zport, ch); + spin_unlock_irqrestore(&scc->zlock, flags); +} + +/* + * Print a string to the serial port trying not to disturb + * any possible real use of the port... + */ +static void zs_console_write(struct console *co, const char *s, + unsigned int count) +{ + int chip = co->index / ZS_NUM_CHAN, side = co->index % ZS_NUM_CHAN; + struct zs_port *zport = &zs_sccs[chip].zport[side]; + struct zs_scc *scc = zport->scc; + unsigned long flags; + u8 txint, txenb; + int irq; + + /* Disable transmit interrupts and enable the transmitter. */ + spin_lock_irqsave(&scc->zlock, flags); + txint = zport->regs[1]; + txenb = zport->regs[5]; + if (txint & TxINT_ENAB) { + zport->regs[1] = txint & ~TxINT_ENAB; + write_zsreg(zport, R1, zport->regs[1]); + } + if (!(txenb & TxENAB)) { + zport->regs[5] = txenb | TxENAB; + write_zsreg(zport, R5, zport->regs[5]); + } + spin_unlock_irqrestore(&scc->zlock, flags); + + uart_console_write(&zport->port, s, count, zs_console_putchar); + + /* Restore transmit interrupts and the transmitter enable. */ + spin_lock_irqsave(&scc->zlock, flags); + irq = !irqs_disabled_flags(flags); + zs_line_drain(zport, irq); + if (!(txenb & TxENAB)) { + zport->regs[5] &= ~TxENAB; + write_zsreg(zport, R5, zport->regs[5]); + } + if (txint & TxINT_ENAB) { + zport->regs[1] |= TxINT_ENAB; + write_zsreg(zport, R1, zport->regs[1]); + } + spin_unlock_irqrestore(&scc->zlock, flags); +} + +/* + * Setup serial console baud/bits/parity. We do two things here: + * - construct a cflag setting for the first uart_open() + * - initialise the serial port + * Return non-zero if we didn't find a serial port. + */ +static int __init zs_console_setup(struct console *co, char *options) +{ + int chip = co->index / ZS_NUM_CHAN, side = co->index % ZS_NUM_CHAN; + struct zs_port *zport = &zs_sccs[chip].zport[side]; + struct uart_port *uport = &zport->port; + int baud = 9600; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + int ret; + + ret = zs_map_port(uport); + if (ret) + return ret; + + zs_reset(zport); + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + return uart_set_options(uport, co, baud, parity, bits, flow); +} + +static struct uart_driver zs_reg; +static struct console zs_console = { + .name = "ttyS", + .write = zs_console_write, + .device = uart_console_device, + .setup = zs_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &zs_reg, +}; + +/* + * Register console. + */ +static int __init zs_serial_console_init(void) +{ + int ret; + + ret = zs_probe_sccs(); + if (ret) + return ret; + register_console(&zs_console); + + return 0; +} + +console_initcall(zs_serial_console_init); + +#define SERIAL_ZS_CONSOLE &zs_console +#else +#define SERIAL_ZS_CONSOLE NULL +#endif /* CONFIG_SERIAL_ZS_CONSOLE */ + +static struct uart_driver zs_reg = { + .owner = THIS_MODULE, + .driver_name = "serial", + .dev_name = "ttyS", + .major = TTY_MAJOR, + .minor = 64, + .nr = ZS_NUM_SCCS * ZS_NUM_CHAN, + .cons = SERIAL_ZS_CONSOLE, +}; + +/* zs_init inits the driver. */ +static int __init zs_init(void) +{ + int i, ret; + + pr_info("%s%s\n", zs_name, zs_version); + + /* Find out how many Z85C30 SCCs we have. */ + ret = zs_probe_sccs(); + if (ret) + return ret; + + ret = uart_register_driver(&zs_reg); + if (ret) + return ret; + + for (i = 0; i < ZS_NUM_SCCS * ZS_NUM_CHAN; i++) { + struct zs_scc *scc = &zs_sccs[i / ZS_NUM_CHAN]; + struct zs_port *zport = &scc->zport[i % ZS_NUM_CHAN]; + struct uart_port *uport = &zport->port; + + if (zport->scc) + uart_add_one_port(&zs_reg, uport); + } + + return 0; +} + +static void __exit zs_exit(void) +{ + int i; + + for (i = ZS_NUM_SCCS * ZS_NUM_CHAN - 1; i >= 0; i--) { + struct zs_scc *scc = &zs_sccs[i / ZS_NUM_CHAN]; + struct zs_port *zport = &scc->zport[i % ZS_NUM_CHAN]; + struct uart_port *uport = &zport->port; + + if (zport->scc) + uart_remove_one_port(&zs_reg, uport); + } + + uart_unregister_driver(&zs_reg); +} + +module_init(zs_init); +module_exit(zs_exit); diff --git a/drivers/serial/zs.h b/drivers/serial/zs.h new file mode 100644 index 00000000000..aa921b57d82 --- /dev/null +++ b/drivers/serial/zs.h @@ -0,0 +1,284 @@ +/* + * zs.h: Definitions for the DECstation Z85C30 serial driver. + * + * Adapted from drivers/sbus/char/sunserial.h by Paul Mackerras. + * Adapted from drivers/macintosh/macserial.h by Harald Koerfgen. + * + * Copyright (C) 1996 Paul Mackerras (Paul.Mackerras@cs.anu.edu.au) + * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 2004, 2005, 2007 Maciej W. Rozycki + */ +#ifndef _SERIAL_ZS_H +#define _SERIAL_ZS_H + +#ifdef __KERNEL__ + +#define ZS_NUM_REGS 16 + +/* + * This is our internal structure for each serial port's state. + */ +struct zs_port { + struct zs_scc *scc; /* Containing SCC. */ + struct uart_port port; /* Underlying UART. */ + + int clk_mode; /* May be 1, 16, 32, or 64. */ + + unsigned int tty_break; /* Set on BREAK condition. */ + int tx_stopped; /* Output is suspended. */ + + unsigned int mctrl; /* State of modem lines. */ + u8 brk; /* BREAK state from RR0. */ + + u8 regs[ZS_NUM_REGS]; /* Channel write registers. */ +}; + +/* + * Per-SCC state for locking and the interrupt handler. + */ +struct zs_scc { + struct zs_port zport[2]; + spinlock_t zlock; + atomic_t irq_guard; + int initialised; +}; + +#endif /* __KERNEL__ */ + +/* + * Conversion routines to/from brg time constants from/to bits per second. + */ +#define ZS_BRG_TO_BPS(brg, freq) ((freq) / 2 / ((brg) + 2)) +#define ZS_BPS_TO_BRG(bps, freq) ((((freq) + (bps)) / (2 * (bps))) - 2) + +/* + * The Zilog register set. + */ + +/* Write Register 0 (Command) */ +#define R0 0 /* Register selects */ +#define R1 1 +#define R2 2 +#define R3 3 +#define R4 4 +#define R5 5 +#define R6 6 +#define R7 7 +#define R8 8 +#define R9 9 +#define R10 10 +#define R11 11 +#define R12 12 +#define R13 13 +#define R14 14 +#define R15 15 + +#define NULLCODE 0 /* Null Code */ +#define POINT_HIGH 0x8 /* Select upper half of registers */ +#define RES_EXT_INT 0x10 /* Reset Ext. Status Interrupts */ +#define SEND_ABORT 0x18 /* HDLC Abort */ +#define RES_RxINT_FC 0x20 /* Reset RxINT on First Character */ +#define RES_Tx_P 0x28 /* Reset TxINT Pending */ +#define ERR_RES 0x30 /* Error Reset */ +#define RES_H_IUS 0x38 /* Reset highest IUS */ + +#define RES_Rx_CRC 0x40 /* Reset Rx CRC Checker */ +#define RES_Tx_CRC 0x80 /* Reset Tx CRC Checker */ +#define RES_EOM_L 0xC0 /* Reset EOM latch */ + +/* Write Register 1 (Tx/Rx/Ext Int Enable and WAIT/DMA Commands) */ +#define EXT_INT_ENAB 0x1 /* Ext Int Enable */ +#define TxINT_ENAB 0x2 /* Tx Int Enable */ +#define PAR_SPEC 0x4 /* Parity is special condition */ + +#define RxINT_DISAB 0 /* Rx Int Disable */ +#define RxINT_FCERR 0x8 /* Rx Int on First Character Only or Error */ +#define RxINT_ALL 0x10 /* Int on all Rx Characters or error */ +#define RxINT_ERR 0x18 /* Int on error only */ +#define RxINT_MASK 0x18 + +#define WT_RDY_RT 0x20 /* Wait/Ready on R/T */ +#define WT_FN_RDYFN 0x40 /* Wait/FN/Ready FN */ +#define WT_RDY_ENAB 0x80 /* Wait/Ready Enable */ + +/* Write Register 2 (Interrupt Vector) */ + +/* Write Register 3 (Receive Parameters and Control) */ +#define RxENABLE 0x1 /* Rx Enable */ +#define SYNC_L_INH 0x2 /* Sync Character Load Inhibit */ +#define ADD_SM 0x4 /* Address Search Mode (SDLC) */ +#define RxCRC_ENAB 0x8 /* Rx CRC Enable */ +#define ENT_HM 0x10 /* Enter Hunt Mode */ +#define AUTO_ENAB 0x20 /* Auto Enables */ +#define Rx5 0x0 /* Rx 5 Bits/Character */ +#define Rx7 0x40 /* Rx 7 Bits/Character */ +#define Rx6 0x80 /* Rx 6 Bits/Character */ +#define Rx8 0xc0 /* Rx 8 Bits/Character */ +#define RxNBITS_MASK 0xc0 + +/* Write Register 4 (Transmit/Receive Miscellaneous Parameters and Modes) */ +#define PAR_ENA 0x1 /* Parity Enable */ +#define PAR_EVEN 0x2 /* Parity Even/Odd* */ + +#define SYNC_ENAB 0 /* Sync Modes Enable */ +#define SB1 0x4 /* 1 stop bit/char */ +#define SB15 0x8 /* 1.5 stop bits/char */ +#define SB2 0xc /* 2 stop bits/char */ +#define SB_MASK 0xc + +#define MONSYNC 0 /* 8 Bit Sync character */ +#define BISYNC 0x10 /* 16 bit sync character */ +#define SDLC 0x20 /* SDLC Mode (01111110 Sync Flag) */ +#define EXTSYNC 0x30 /* External Sync Mode */ + +#define X1CLK 0x0 /* x1 clock mode */ +#define X16CLK 0x40 /* x16 clock mode */ +#define X32CLK 0x80 /* x32 clock mode */ +#define X64CLK 0xc0 /* x64 clock mode */ +#define XCLK_MASK 0xc0 + +/* Write Register 5 (Transmit Parameters and Controls) */ +#define TxCRC_ENAB 0x1 /* Tx CRC Enable */ +#define RTS 0x2 /* RTS */ +#define SDLC_CRC 0x4 /* SDLC/CRC-16 */ +#define TxENAB 0x8 /* Tx Enable */ +#define SND_BRK 0x10 /* Send Break */ +#define Tx5 0x0 /* Tx 5 bits (or less)/character */ +#define Tx7 0x20 /* Tx 7 bits/character */ +#define Tx6 0x40 /* Tx 6 bits/character */ +#define Tx8 0x60 /* Tx 8 bits/character */ +#define TxNBITS_MASK 0x60 +#define DTR 0x80 /* DTR */ + +/* Write Register 6 (Sync bits 0-7/SDLC Address Field) */ + +/* Write Register 7 (Sync bits 8-15/SDLC 01111110) */ + +/* Write Register 8 (Transmit Buffer) */ + +/* Write Register 9 (Master Interrupt Control) */ +#define VIS 1 /* Vector Includes Status */ +#define NV 2 /* No Vector */ +#define DLC 4 /* Disable Lower Chain */ +#define MIE 8 /* Master Interrupt Enable */ +#define STATHI 0x10 /* Status high */ +#define SOFTACK 0x20 /* Software Interrupt Acknowledge */ +#define NORESET 0 /* No reset on write to R9 */ +#define CHRB 0x40 /* Reset channel B */ +#define CHRA 0x80 /* Reset channel A */ +#define FHWRES 0xc0 /* Force hardware reset */ + +/* Write Register 10 (Miscellaneous Transmitter/Receiver Control Bits) */ +#define BIT6 1 /* 6 bit/8bit sync */ +#define LOOPMODE 2 /* SDLC Loop mode */ +#define ABUNDER 4 /* Abort/flag on SDLC xmit underrun */ +#define MARKIDLE 8 /* Mark/flag on idle */ +#define GAOP 0x10 /* Go active on poll */ +#define NRZ 0 /* NRZ mode */ +#define NRZI 0x20 /* NRZI mode */ +#define FM1 0x40 /* FM1 (transition = 1) */ +#define FM0 0x60 /* FM0 (transition = 0) */ +#define CRCPS 0x80 /* CRC Preset I/O */ + +/* Write Register 11 (Clock Mode Control) */ +#define TRxCXT 0 /* TRxC = Xtal output */ +#define TRxCTC 1 /* TRxC = Transmit clock */ +#define TRxCBR 2 /* TRxC = BR Generator Output */ +#define TRxCDP 3 /* TRxC = DPLL output */ +#define TRxCOI 4 /* TRxC O/I */ +#define TCRTxCP 0 /* Transmit clock = RTxC pin */ +#define TCTRxCP 8 /* Transmit clock = TRxC pin */ +#define TCBR 0x10 /* Transmit clock = BR Generator output */ +#define TCDPLL 0x18 /* Transmit clock = DPLL output */ +#define RCRTxCP 0 /* Receive clock = RTxC pin */ +#define RCTRxCP 0x20 /* Receive clock = TRxC pin */ +#define RCBR 0x40 /* Receive clock = BR Generator output */ +#define RCDPLL 0x60 /* Receive clock = DPLL output */ +#define RTxCX 0x80 /* RTxC Xtal/No Xtal */ + +/* Write Register 12 (Lower Byte of Baud Rate Generator Time Constant) */ + +/* Write Register 13 (Upper Byte of Baud Rate Generator Time Constant) */ + +/* Write Register 14 (Miscellaneous Control Bits) */ +#define BRENABL 1 /* Baud rate generator enable */ +#define BRSRC 2 /* Baud rate generator source */ +#define DTRREQ 4 /* DTR/Request function */ +#define AUTOECHO 8 /* Auto Echo */ +#define LOOPBAK 0x10 /* Local loopback */ +#define SEARCH 0x20 /* Enter search mode */ +#define RMC 0x40 /* Reset missing clock */ +#define DISDPLL 0x60 /* Disable DPLL */ +#define SSBR 0x80 /* Set DPLL source = BR generator */ +#define SSRTxC 0xa0 /* Set DPLL source = RTxC */ +#define SFMM 0xc0 /* Set FM mode */ +#define SNRZI 0xe0 /* Set NRZI mode */ + +/* Write Register 15 (External/Status Interrupt Control) */ +#define WR7P_EN 1 /* WR7 Prime SDLC Feature Enable */ +#define ZCIE 2 /* Zero count IE */ +#define DCDIE 8 /* DCD IE */ +#define SYNCIE 0x10 /* Sync/hunt IE */ +#define CTSIE 0x20 /* CTS IE */ +#define TxUIE 0x40 /* Tx Underrun/EOM IE */ +#define BRKIE 0x80 /* Break/Abort IE */ + + +/* Read Register 0 (Transmit/Receive Buffer Status and External Status) */ +#define Rx_CH_AV 0x1 /* Rx Character Available */ +#define ZCOUNT 0x2 /* Zero count */ +#define Tx_BUF_EMP 0x4 /* Tx Buffer empty */ +#define DCD 0x8 /* DCD */ +#define SYNC_HUNT 0x10 /* Sync/hunt */ +#define CTS 0x20 /* CTS */ +#define TxEOM 0x40 /* Tx underrun */ +#define BRK_ABRT 0x80 /* Break/Abort */ + +/* Read Register 1 (Special Receive Condition Status) */ +#define ALL_SNT 0x1 /* All sent */ +/* Residue Data for 8 Rx bits/char programmed */ +#define RES3 0x8 /* 0/3 */ +#define RES4 0x4 /* 0/4 */ +#define RES5 0xc /* 0/5 */ +#define RES6 0x2 /* 0/6 */ +#define RES7 0xa /* 0/7 */ +#define RES8 0x6 /* 0/8 */ +#define RES18 0xe /* 1/8 */ +#define RES28 0x0 /* 2/8 */ +/* Special Rx Condition Interrupts */ +#define PAR_ERR 0x10 /* Parity Error */ +#define Rx_OVR 0x20 /* Rx Overrun Error */ +#define FRM_ERR 0x40 /* CRC/Framing Error */ +#define END_FR 0x80 /* End of Frame (SDLC) */ + +/* Read Register 2 (Interrupt Vector (WR2) -- channel A). */ + +/* Read Register 2 (Modified Interrupt Vector -- channel B). */ + +/* Read Register 3 (Interrupt Pending Bits -- channel A only). */ +#define CHBEXT 0x1 /* Channel B Ext/Stat IP */ +#define CHBTxIP 0x2 /* Channel B Tx IP */ +#define CHBRxIP 0x4 /* Channel B Rx IP */ +#define CHAEXT 0x8 /* Channel A Ext/Stat IP */ +#define CHATxIP 0x10 /* Channel A Tx IP */ +#define CHARxIP 0x20 /* Channel A Rx IP */ + +/* Read Register 6 (SDLC FIFO Status and Byte Count LSB) */ + +/* Read Register 7 (SDLC FIFO Status and Byte Count MSB) */ + +/* Read Register 8 (Receive Data) */ + +/* Read Register 10 (Miscellaneous Status Bits) */ +#define ONLOOP 2 /* On loop */ +#define LOOPSEND 0x10 /* Loop sending */ +#define CLK2MIS 0x40 /* Two clocks missing */ +#define CLK1MIS 0x80 /* One clock missing */ + +/* Read Register 12 (Lower Byte of Baud Rate Generator Constant (WR12)) */ + +/* Read Register 13 (Upper Byte of Baud Rate Generator Constant (WR13) */ + +/* Read Register 15 (External/Status Interrupt Control (WR15)) */ + +#endif /* _SERIAL_ZS_H */ diff --git a/drivers/tc/Makefile b/drivers/tc/Makefile index 96734269221..c899246bd36 100644 --- a/drivers/tc/Makefile +++ b/drivers/tc/Makefile @@ -5,7 +5,6 @@ # Object file lists. obj-$(CONFIG_TC) += tc.o tc-driver.o -obj-$(CONFIG_ZS) += zs.o obj-$(CONFIG_VT) += lk201.o lk201-map.o lk201-remap.o $(obj)/lk201-map.o: $(obj)/lk201-map.c diff --git a/drivers/tc/zs.c b/drivers/tc/zs.c deleted file mode 100644 index ed979f13908..00000000000 --- a/drivers/tc/zs.c +++ /dev/null @@ -1,2203 +0,0 @@ -/* - * decserial.c: Serial port driver for IOASIC DECstations. - * - * Derived from drivers/sbus/char/sunserial.c by Paul Mackerras. - * Derived from drivers/macintosh/macserial.c by Harald Koerfgen. - * - * DECstation changes - * Copyright (C) 1998-2000 Harald Koerfgen - * Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Maciej W. Rozycki - * - * For the rest of the code the original Copyright applies: - * Copyright (C) 1996 Paul Mackerras (Paul.Mackerras@cs.anu.edu.au) - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - * - * - * Note: for IOASIC systems the wiring is as follows: - * - * mouse/keyboard: - * DIN-7 MJ-4 signal SCC - * 2 1 TxD <- A.TxD - * 3 4 RxD -> A.RxD - * - * EIA-232/EIA-423: - * DB-25 MMJ-6 signal SCC - * 2 2 TxD <- B.TxD - * 3 5 RxD -> B.RxD - * 4 RTS <- ~A.RTS - * 5 CTS -> ~B.CTS - * 6 6 DSR -> ~A.SYNC - * 8 CD -> ~B.DCD - * 12 DSRS(DCE) -> ~A.CTS (*) - * 15 TxC -> B.TxC - * 17 RxC -> B.RxC - * 20 1 DTR <- ~A.DTR - * 22 RI -> ~A.DCD - * 23 DSRS(DTE) <- ~B.RTS - * - * (*) EIA-232 defines the signal at this pin to be SCD, while DSRS(DCE) - * is shared with DSRS(DTE) at pin 23. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_SERIAL_DEC_CONSOLE -#include -#endif - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_KGDB -#include -#endif -#ifdef CONFIG_MAGIC_SYSRQ -#include -#endif - -#include "zs.h" - -/* - * It would be nice to dynamically allocate everything that - * depends on NUM_SERIAL, so we could support any number of - * Z8530s, but for now... - */ -#define NUM_SERIAL 2 /* Max number of ZS chips supported */ -#define NUM_CHANNELS (NUM_SERIAL * 2) /* 2 channels per chip */ -#define CHANNEL_A_NR (zs_parms->channel_a_offset > zs_parms->channel_b_offset) - /* Number of channel A in the chip */ -#define ZS_CHAN_IO_SIZE 8 -#define ZS_CLOCK 7372800 /* Z8530 RTxC input clock rate */ - -#define RECOVERY_DELAY udelay(2) - -struct zs_parms { - unsigned long scc0; - unsigned long scc1; - int channel_a_offset; - int channel_b_offset; - int irq0; - int irq1; - int clock; -}; - -static struct zs_parms *zs_parms; - -#ifdef CONFIG_MACH_DECSTATION -static struct zs_parms ds_parms = { - scc0 : IOASIC_SCC0, - scc1 : IOASIC_SCC1, - channel_a_offset : 1, - channel_b_offset : 9, - irq0 : -1, - irq1 : -1, - clock : ZS_CLOCK -}; -#endif - -#ifdef CONFIG_MACH_DECSTATION -#define DS_BUS_PRESENT (IOASIC) -#else -#define DS_BUS_PRESENT 0 -#endif - -#define BUS_PRESENT (DS_BUS_PRESENT) - -DEFINE_SPINLOCK(zs_lock); - -struct dec_zschannel zs_channels[NUM_CHANNELS]; -struct dec_serial zs_soft[NUM_CHANNELS]; -int zs_channels_found; -struct dec_serial *zs_chain; /* list of all channels */ - -struct tty_struct zs_ttys[NUM_CHANNELS]; - -#ifdef CONFIG_SERIAL_DEC_CONSOLE -static struct console zs_console; -#endif -#if defined(CONFIG_SERIAL_DEC_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) && \ - !defined(MODULE) -static unsigned long break_pressed; /* break, really ... */ -#endif - -static unsigned char zs_init_regs[16] __initdata = { - 0, /* write 0 */ - 0, /* write 1 */ - 0, /* write 2 */ - 0, /* write 3 */ - (X16CLK), /* write 4 */ - 0, /* write 5 */ - 0, 0, 0, /* write 6, 7, 8 */ - (MIE | DLC | NV), /* write 9 */ - (NRZ), /* write 10 */ - (TCBR | RCBR), /* write 11 */ - 0, 0, /* BRG time constant, write 12 + 13 */ - (BRSRC | BRENABL), /* write 14 */ - 0 /* write 15 */ -}; - -static struct tty_driver *serial_driver; - -/* serial subtype definitions */ -#define SERIAL_TYPE_NORMAL 1 - -/* number of characters left in xmit buffer before we ask for more */ -#define WAKEUP_CHARS 256 - -/* - * Debugging. - */ -#undef SERIAL_DEBUG_OPEN -#undef SERIAL_DEBUG_FLOW -#undef SERIAL_DEBUG_THROTTLE -#undef SERIAL_PARANOIA_CHECK - -#undef ZS_DEBUG_REGS - -#ifdef SERIAL_DEBUG_THROTTLE -#define _tty_name(tty,buf) tty_name(tty,buf) -#endif - -#define RS_STROBE_TIME 10 -#define RS_ISR_PASS_LIMIT 256 - -static void probe_sccs(void); -static void change_speed(struct dec_serial *info); -static void rs_wait_until_sent(struct tty_struct *tty, int timeout); - -static inline int serial_paranoia_check(struct dec_serial *info, - char *name, const char *routine) -{ -#ifdef SERIAL_PARANOIA_CHECK - static const char *badmagic = - "Warning: bad magic number for serial struct %s in %s\n"; - static const char *badinfo = - "Warning: null mac_serial for %s in %s\n"; - - if (!info) { - printk(badinfo, name, routine); - return 1; - } - if (info->magic != SERIAL_MAGIC) { - printk(badmagic, name, routine); - return 1; - } -#endif - return 0; -} - -/* - * This is used to figure out the divisor speeds and the timeouts - */ -static int baud_table[] = { - 0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800, - 9600, 19200, 38400, 57600, 115200, 0 }; - -/* - * Reading and writing Z8530 registers. - */ -static inline unsigned char read_zsreg(struct dec_zschannel *channel, - unsigned char reg) -{ - unsigned char retval; - - if (reg != 0) { - *channel->control = reg & 0xf; - fast_iob(); RECOVERY_DELAY; - } - retval = *channel->control; - RECOVERY_DELAY; - return retval; -} - -static inline void write_zsreg(struct dec_zschannel *channel, - unsigned char reg, unsigned char value) -{ - if (reg != 0) { - *channel->control = reg & 0xf; - fast_iob(); RECOVERY_DELAY; - } - *channel->control = value; - fast_iob(); RECOVERY_DELAY; - return; -} - -static inline unsigned char read_zsdata(struct dec_zschannel *channel) -{ - unsigned char retval; - - retval = *channel->data; - RECOVERY_DELAY; - return retval; -} - -static inline void write_zsdata(struct dec_zschannel *channel, - unsigned char value) -{ - *channel->data = value; - fast_iob(); RECOVERY_DELAY; - return; -} - -static inline void load_zsregs(struct dec_zschannel *channel, - unsigned char *regs) -{ -/* ZS_CLEARERR(channel); - ZS_CLEARFIFO(channel); */ - /* Load 'em up */ - write_zsreg(channel, R3, regs[R3] & ~RxENABLE); - write_zsreg(channel, R5, regs[R5] & ~TxENAB); - write_zsreg(channel, R4, regs[R4]); - write_zsreg(channel, R9, regs[R9]); - write_zsreg(channel, R1, regs[R1]); - write_zsreg(channel, R2, regs[R2]); - write_zsreg(channel, R10, regs[R10]); - write_zsreg(channel, R11, regs[R11]); - write_zsreg(channel, R12, regs[R12]); - write_zsreg(channel, R13, regs[R13]); - write_zsreg(channel, R14, regs[R14]); - write_zsreg(channel, R15, regs[R15]); - write_zsreg(channel, R3, regs[R3]); - write_zsreg(channel, R5, regs[R5]); - return; -} - -/* Sets or clears DTR/RTS on the requested line */ -static inline void zs_rtsdtr(struct dec_serial *info, int which, int set) -{ - unsigned long flags; - - spin_lock_irqsave(&zs_lock, flags); - if (info->zs_channel != info->zs_chan_a) { - if (set) { - info->zs_chan_a->curregs[5] |= (which & (RTS | DTR)); - } else { - info->zs_chan_a->curregs[5] &= ~(which & (RTS | DTR)); - } - write_zsreg(info->zs_chan_a, 5, info->zs_chan_a->curregs[5]); - } - spin_unlock_irqrestore(&zs_lock, flags); -} - -/* Utility routines for the Zilog */ -static inline int get_zsbaud(struct dec_serial *ss) -{ - struct dec_zschannel *channel = ss->zs_channel; - int brg; - - /* The baud rate is split up between two 8-bit registers in - * what is termed 'BRG time constant' format in my docs for - * the chip, it is a function of the clk rate the chip is - * receiving which happens to be constant. - */ - brg = (read_zsreg(channel, 13) << 8); - brg |= read_zsreg(channel, 12); - return BRG_TO_BPS(brg, (zs_parms->clock/(ss->clk_divisor))); -} - -/* On receive, this clears errors and the receiver interrupts */ -static inline void rs_recv_clear(struct dec_zschannel *zsc) -{ - write_zsreg(zsc, 0, ERR_RES); - write_zsreg(zsc, 0, RES_H_IUS); /* XXX this is unnecessary */ -} - -/* - * ---------------------------------------------------------------------- - * - * Here starts the interrupt handling routines. All of the following - * subroutines are declared as inline and are folded into - * rs_interrupt(). They were separated out for readability's sake. - * - * - Ted Ts'o (tytso@mit.edu), 7-Mar-93 - * ----------------------------------------------------------------------- - */ - -/* - * This routine is used by the interrupt handler to schedule - * processing in the software interrupt portion of the driver. - */ -static void rs_sched_event(struct dec_serial *info, int event) -{ - info->event |= 1 << event; - tasklet_schedule(&info->tlet); -} - -static void receive_chars(struct dec_serial *info) -{ - struct tty_struct *tty = info->tty; - unsigned char ch, stat, flag; - - while ((read_zsreg(info->zs_channel, R0) & Rx_CH_AV) != 0) { - - stat = read_zsreg(info->zs_channel, R1); - ch = read_zsdata(info->zs_channel); - - if (!tty && (!info->hook || !info->hook->rx_char)) - continue; - - flag = TTY_NORMAL; - if (info->tty_break) { - info->tty_break = 0; - flag = TTY_BREAK; - if (info->flags & ZILOG_SAK) - do_SAK(tty); - /* Ignore the null char got when BREAK is removed. */ - if (ch == 0) - continue; - } else { - if (stat & Rx_OVR) { - flag = TTY_OVERRUN; - } else if (stat & FRM_ERR) { - flag = TTY_FRAME; - } else if (stat & PAR_ERR) { - flag = TTY_PARITY; - } - if (flag != TTY_NORMAL) - /* reset the error indication */ - write_zsreg(info->zs_channel, R0, ERR_RES); - } - -#if defined(CONFIG_SERIAL_DEC_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) && \ - !defined(MODULE) - if (break_pressed && info->line == zs_console.index) { - /* Ignore the null char got when BREAK is removed. */ - if (ch == 0) - continue; - if (time_before(jiffies, break_pressed + HZ * 5)) { - handle_sysrq(ch, NULL); - break_pressed = 0; - continue; - } - break_pressed = 0; - } -#endif - - if (info->hook && info->hook->rx_char) { - (*info->hook->rx_char)(ch, flag); - return; - } - - tty_insert_flip_char(tty, ch, flag); - } - if (tty) - tty_flip_buffer_push(tty); -} - -static void transmit_chars(struct dec_serial *info) -{ - if ((read_zsreg(info->zs_channel, R0) & Tx_BUF_EMP) == 0) - return; - info->tx_active = 0; - - if (info->x_char) { - /* Send next char */ - write_zsdata(info->zs_channel, info->x_char); - info->x_char = 0; - info->tx_active = 1; - return; - } - - if ((info->xmit_cnt <= 0) || (info->tty && info->tty->stopped) - || info->tx_stopped) { - write_zsreg(info->zs_channel, R0, RES_Tx_P); - return; - } - /* Send char */ - write_zsdata(info->zs_channel, info->xmit_buf[info->xmit_tail++]); - info->xmit_tail = info->xmit_tail & (SERIAL_XMIT_SIZE-1); - info->xmit_cnt--; - info->tx_active = 1; - - if (info->xmit_cnt < WAKEUP_CHARS) - rs_sched_event(info, RS_EVENT_WRITE_WAKEUP); -} - -static void status_handle(struct dec_serial *info) -{ - unsigned char stat; - - /* Get status from Read Register 0 */ - stat = read_zsreg(info->zs_channel, R0); - - if ((stat & BRK_ABRT) && !(info->read_reg_zero & BRK_ABRT)) { -#if defined(CONFIG_SERIAL_DEC_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) && \ - !defined(MODULE) - if (info->line == zs_console.index) { - if (!break_pressed) - break_pressed = jiffies; - } else -#endif - info->tty_break = 1; - } - - if (info->zs_channel != info->zs_chan_a) { - - /* Check for DCD transitions */ - if (info->tty && !C_CLOCAL(info->tty) && - ((stat ^ info->read_reg_zero) & DCD) != 0 ) { - if (stat & DCD) { - wake_up_interruptible(&info->open_wait); - } else { - tty_hangup(info->tty); - } - } - - /* Check for CTS transitions */ - if (info->tty && C_CRTSCTS(info->tty)) { - if ((stat & CTS) != 0) { - if (info->tx_stopped) { - info->tx_stopped = 0; - if (!info->tx_active) - transmit_chars(info); - } - } else { - info->tx_stopped = 1; - } - } - - } - - /* Clear status condition... */ - write_zsreg(info->zs_channel, R0, RES_EXT_INT); - info->read_reg_zero = stat; -} - -/* - * This is the serial driver's generic interrupt routine - */ -static irqreturn_t rs_interrupt(int irq, void *dev_id) -{ - struct dec_serial *info = (struct dec_serial *) dev_id; - irqreturn_t status = IRQ_NONE; - unsigned char zs_intreg; - int shift; - - /* NOTE: The read register 3, which holds the irq status, - * does so for both channels on each chip. Although - * the status value itself must be read from the A - * channel and is only valid when read from channel A. - * Yes... broken hardware... - */ -#define CHAN_IRQMASK (CHBRxIP | CHBTxIP | CHBEXT) - - if (info->zs_chan_a == info->zs_channel) - shift = 3; /* Channel A */ - else - shift = 0; /* Channel B */ - - for (;;) { - zs_intreg = read_zsreg(info->zs_chan_a, R3) >> shift; - if ((zs_intreg & CHAN_IRQMASK) == 0) - break; - - status = IRQ_HANDLED; - - if (zs_intreg & CHBRxIP) { - receive_chars(info); - } - if (zs_intreg & CHBTxIP) { - transmit_chars(info); - } - if (zs_intreg & CHBEXT) { - status_handle(info); - } - } - - /* Why do we need this ? */ - write_zsreg(info->zs_channel, 0, RES_H_IUS); - - return status; -} - -#ifdef ZS_DEBUG_REGS -void zs_dump (void) { - int i, j; - for (i = 0; i < zs_channels_found; i++) { - struct dec_zschannel *ch = &zs_channels[i]; - if ((long)ch->control == UNI_IO_BASE+UNI_SCC1A_CTRL) { - for (j = 0; j < 15; j++) { - printk("W%d = 0x%x\t", - j, (int)ch->curregs[j]); - } - for (j = 0; j < 15; j++) { - printk("R%d = 0x%x\t", - j, (int)read_zsreg(ch,j)); - } - printk("\n\n"); - } - } -} -#endif - -/* - * ------------------------------------------------------------------- - * Here ends the serial interrupt routines. - * ------------------------------------------------------------------- - */ - -/* - * ------------------------------------------------------------ - * rs_stop() and rs_start() - * - * This routines are called before setting or resetting tty->stopped. - * ------------------------------------------------------------ - */ -static void rs_stop(struct tty_struct *tty) -{ - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - unsigned long flags; - - if (serial_paranoia_check(info, tty->name, "rs_stop")) - return; - -#if 1 - spin_lock_irqsave(&zs_lock, flags); - if (info->zs_channel->curregs[5] & TxENAB) { - info->zs_channel->curregs[5] &= ~TxENAB; - write_zsreg(info->zs_channel, 5, info->zs_channel->curregs[5]); - } - spin_unlock_irqrestore(&zs_lock, flags); -#endif -} - -static void rs_start(struct tty_struct *tty) -{ - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - unsigned long flags; - - if (serial_paranoia_check(info, tty->name, "rs_start")) - return; - - spin_lock_irqsave(&zs_lock, flags); -#if 1 - if (info->xmit_cnt && info->xmit_buf && !(info->zs_channel->curregs[5] & TxENAB)) { - info->zs_channel->curregs[5] |= TxENAB; - write_zsreg(info->zs_channel, 5, info->zs_channel->curregs[5]); - } -#else - if (info->xmit_cnt && info->xmit_buf && !info->tx_active) { - transmit_chars(info); - } -#endif - spin_unlock_irqrestore(&zs_lock, flags); -} - -/* - * This routine is used to handle the "bottom half" processing for the - * serial driver, known also the "software interrupt" processing. - * This processing is done at the kernel interrupt level, after the - * rs_interrupt() has returned, BUT WITH INTERRUPTS TURNED ON. This - * is where time-consuming activities which can not be done in the - * interrupt driver proper are done; the interrupt driver schedules - * them using rs_sched_event(), and they get done here. - */ - -static void do_softint(unsigned long private_) -{ - struct dec_serial *info = (struct dec_serial *) private_; - struct tty_struct *tty; - - tty = info->tty; - if (!tty) - return; - - if (test_and_clear_bit(RS_EVENT_WRITE_WAKEUP, &info->event)) - tty_wakeup(tty); -} - -static int zs_startup(struct dec_serial * info) -{ - unsigned long flags; - - if (info->flags & ZILOG_INITIALIZED) - return 0; - - if (!info->xmit_buf) { - info->xmit_buf = (unsigned char *) get_zeroed_page(GFP_KERNEL); - if (!info->xmit_buf) - return -ENOMEM; - } - - spin_lock_irqsave(&zs_lock, flags); - -#ifdef SERIAL_DEBUG_OPEN - printk("starting up ttyS%d (irq %d)...", info->line, info->irq); -#endif - - /* - * Clear the receive FIFO. - */ - ZS_CLEARFIFO(info->zs_channel); - info->xmit_fifo_size = 1; - - /* - * Clear the interrupt registers. - */ - write_zsreg(info->zs_channel, R0, ERR_RES); - write_zsreg(info->zs_channel, R0, RES_H_IUS); - - /* - * Set the speed of the serial port - */ - change_speed(info); - - /* - * Turn on RTS and DTR. - */ - zs_rtsdtr(info, RTS | DTR, 1); - - /* - * Finally, enable sequencing and interrupts - */ - info->zs_channel->curregs[R1] &= ~RxINT_MASK; - info->zs_channel->curregs[R1] |= (RxINT_ALL | TxINT_ENAB | - EXT_INT_ENAB); - info->zs_channel->curregs[R3] |= RxENABLE; - info->zs_channel->curregs[R5] |= TxENAB; - info->zs_channel->curregs[R15] |= (DCDIE | CTSIE | TxUIE | BRKIE); - write_zsreg(info->zs_channel, R1, info->zs_channel->curregs[R1]); - write_zsreg(info->zs_channel, R3, info->zs_channel->curregs[R3]); - write_zsreg(info->zs_channel, R5, info->zs_channel->curregs[R5]); - write_zsreg(info->zs_channel, R15, info->zs_channel->curregs[R15]); - - /* - * And clear the interrupt registers again for luck. - */ - write_zsreg(info->zs_channel, R0, ERR_RES); - write_zsreg(info->zs_channel, R0, RES_H_IUS); - - /* Save the current value of RR0 */ - info->read_reg_zero = read_zsreg(info->zs_channel, R0); - - if (info->tty) - clear_bit(TTY_IO_ERROR, &info->tty->flags); - info->xmit_cnt = info->xmit_head = info->xmit_tail = 0; - - info->flags |= ZILOG_INITIALIZED; - spin_unlock_irqrestore(&zs_lock, flags); - return 0; -} - -/* - * This routine will shutdown a serial port; interrupts are disabled, and - * DTR is dropped if the hangup on close termio flag is on. - */ -static void shutdown(struct dec_serial * info) -{ - unsigned long flags; - - if (!(info->flags & ZILOG_INITIALIZED)) - return; - -#ifdef SERIAL_DEBUG_OPEN - printk("Shutting down serial port %d (irq %d)....", info->line, - info->irq); -#endif - - spin_lock_irqsave(&zs_lock, flags); - - if (info->xmit_buf) { - free_page((unsigned long) info->xmit_buf); - info->xmit_buf = 0; - } - - info->zs_channel->curregs[1] = 0; - write_zsreg(info->zs_channel, 1, info->zs_channel->curregs[1]); /* no interrupts */ - - info->zs_channel->curregs[3] &= ~RxENABLE; - write_zsreg(info->zs_channel, 3, info->zs_channel->curregs[3]); - - info->zs_channel->curregs[5] &= ~TxENAB; - write_zsreg(info->zs_channel, 5, info->zs_channel->curregs[5]); - if (!info->tty || C_HUPCL(info->tty)) { - zs_rtsdtr(info, RTS | DTR, 0); - } - - if (info->tty) - set_bit(TTY_IO_ERROR, &info->tty->flags); - - info->flags &= ~ZILOG_INITIALIZED; - spin_unlock_irqrestore(&zs_lock, flags); -} - -/* - * This routine is called to set the UART divisor registers to match - * the specified baud rate for a serial port. - */ -static void change_speed(struct dec_serial *info) -{ - unsigned cflag; - int i; - int brg, bits; - unsigned long flags; - - if (!info->hook) { - if (!info->tty || !info->tty->termios) - return; - cflag = info->tty->termios->c_cflag; - if (!info->port) - return; - } else { - cflag = info->hook->cflags; - } - - i = cflag & CBAUD; - if (i & CBAUDEX) { - i &= ~CBAUDEX; - if (i < 1 || i > 2) { - if (!info->hook) - info->tty->termios->c_cflag &= ~CBAUDEX; - else - info->hook->cflags &= ~CBAUDEX; - } else - i += 15; - } - - spin_lock_irqsave(&zs_lock, flags); - info->zs_baud = baud_table[i]; - if (info->zs_baud) { - brg = BPS_TO_BRG(info->zs_baud, zs_parms->clock/info->clk_divisor); - info->zs_channel->curregs[12] = (brg & 255); - info->zs_channel->curregs[13] = ((brg >> 8) & 255); - zs_rtsdtr(info, DTR, 1); - } else { - zs_rtsdtr(info, RTS | DTR, 0); - return; - } - - /* byte size and parity */ - info->zs_channel->curregs[3] &= ~RxNBITS_MASK; - info->zs_channel->curregs[5] &= ~TxNBITS_MASK; - switch (cflag & CSIZE) { - case CS5: - bits = 7; - info->zs_channel->curregs[3] |= Rx5; - info->zs_channel->curregs[5] |= Tx5; - break; - case CS6: - bits = 8; - info->zs_channel->curregs[3] |= Rx6; - info->zs_channel->curregs[5] |= Tx6; - break; - case CS7: - bits = 9; - info->zs_channel->curregs[3] |= Rx7; - info->zs_channel->curregs[5] |= Tx7; - break; - case CS8: - default: /* defaults to 8 bits */ - bits = 10; - info->zs_channel->curregs[3] |= Rx8; - info->zs_channel->curregs[5] |= Tx8; - break; - } - - info->timeout = ((info->xmit_fifo_size*HZ*bits) / info->zs_baud); - info->timeout += HZ/50; /* Add .02 seconds of slop */ - - info->zs_channel->curregs[4] &= ~(SB_MASK | PAR_ENA | PAR_EVEN); - if (cflag & CSTOPB) { - info->zs_channel->curregs[4] |= SB2; - } else { - info->zs_channel->curregs[4] |= SB1; - } - if (cflag & PARENB) { - info->zs_channel->curregs[4] |= PAR_ENA; - } - if (!(cflag & PARODD)) { - info->zs_channel->curregs[4] |= PAR_EVEN; - } - - if (!(cflag & CLOCAL)) { - if (!(info->zs_channel->curregs[15] & DCDIE)) - info->read_reg_zero = read_zsreg(info->zs_channel, 0); - info->zs_channel->curregs[15] |= DCDIE; - } else - info->zs_channel->curregs[15] &= ~DCDIE; - if (cflag & CRTSCTS) { - info->zs_channel->curregs[15] |= CTSIE; - if ((read_zsreg(info->zs_channel, 0) & CTS) == 0) - info->tx_stopped = 1; - } else { - info->zs_channel->curregs[15] &= ~CTSIE; - info->tx_stopped = 0; - } - - /* Load up the new values */ - load_zsregs(info->zs_channel, info->zs_channel->curregs); - - spin_unlock_irqrestore(&zs_lock, flags); -} - -static void rs_flush_chars(struct tty_struct *tty) -{ - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - unsigned long flags; - - if (serial_paranoia_check(info, tty->name, "rs_flush_chars")) - return; - - if (info->xmit_cnt <= 0 || tty->stopped || info->tx_stopped || - !info->xmit_buf) - return; - - /* Enable transmitter */ - spin_lock_irqsave(&zs_lock, flags); - transmit_chars(info); - spin_unlock_irqrestore(&zs_lock, flags); -} - -static int rs_write(struct tty_struct * tty, - const unsigned char *buf, int count) -{ - int c, total = 0; - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - unsigned long flags; - - if (serial_paranoia_check(info, tty->name, "rs_write")) - return 0; - - if (!tty || !info->xmit_buf) - return 0; - - while (1) { - spin_lock_irqsave(&zs_lock, flags); - c = min(count, min(SERIAL_XMIT_SIZE - info->xmit_cnt - 1, - SERIAL_XMIT_SIZE - info->xmit_head)); - if (c <= 0) - break; - - memcpy(info->xmit_buf + info->xmit_head, buf, c); - info->xmit_head = (info->xmit_head + c) & (SERIAL_XMIT_SIZE-1); - info->xmit_cnt += c; - spin_unlock_irqrestore(&zs_lock, flags); - buf += c; - count -= c; - total += c; - } - - if (info->xmit_cnt && !tty->stopped && !info->tx_stopped - && !info->tx_active) - transmit_chars(info); - spin_unlock_irqrestore(&zs_lock, flags); - return total; -} - -static int rs_write_room(struct tty_struct *tty) -{ - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - int ret; - - if (serial_paranoia_check(info, tty->name, "rs_write_room")) - return 0; - ret = SERIAL_XMIT_SIZE - info->xmit_cnt - 1; - if (ret < 0) - ret = 0; - return ret; -} - -static int rs_chars_in_buffer(struct tty_struct *tty) -{ - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - - if (serial_paranoia_check(info, tty->name, "rs_chars_in_buffer")) - return 0; - return info->xmit_cnt; -} - -static void rs_flush_buffer(struct tty_struct *tty) -{ - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - - if (serial_paranoia_check(info, tty->name, "rs_flush_buffer")) - return; - spin_lock_irq(&zs_lock); - info->xmit_cnt = info->xmit_head = info->xmit_tail = 0; - spin_unlock_irq(&zs_lock); - tty_wakeup(tty); -} - -/* - * ------------------------------------------------------------ - * rs_throttle() - * - * This routine is called by the upper-layer tty layer to signal that - * incoming characters should be throttled. - * ------------------------------------------------------------ - */ -static void rs_throttle(struct tty_struct * tty) -{ - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - unsigned long flags; - -#ifdef SERIAL_DEBUG_THROTTLE - char buf[64]; - - printk("throttle %s: %d....\n", _tty_name(tty, buf), - tty->ldisc.chars_in_buffer(tty)); -#endif - - if (serial_paranoia_check(info, tty->name, "rs_throttle")) - return; - - if (I_IXOFF(tty)) { - spin_lock_irqsave(&zs_lock, flags); - info->x_char = STOP_CHAR(tty); - if (!info->tx_active) - transmit_chars(info); - spin_unlock_irqrestore(&zs_lock, flags); - } - - if (C_CRTSCTS(tty)) { - zs_rtsdtr(info, RTS, 0); - } -} - -static void rs_unthrottle(struct tty_struct * tty) -{ - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - unsigned long flags; - -#ifdef SERIAL_DEBUG_THROTTLE - char buf[64]; - - printk("unthrottle %s: %d....\n", _tty_name(tty, buf), - tty->ldisc.chars_in_buffer(tty)); -#endif - - if (serial_paranoia_check(info, tty->name, "rs_unthrottle")) - return; - - if (I_IXOFF(tty)) { - spin_lock_irqsave(&zs_lock, flags); - if (info->x_char) - info->x_char = 0; - else { - info->x_char = START_CHAR(tty); - if (!info->tx_active) - transmit_chars(info); - } - spin_unlock_irqrestore(&zs_lock, flags); - } - - if (C_CRTSCTS(tty)) { - zs_rtsdtr(info, RTS, 1); - } -} - -/* - * ------------------------------------------------------------ - * rs_ioctl() and friends - * ------------------------------------------------------------ - */ - -static int get_serial_info(struct dec_serial * info, - struct serial_struct * retinfo) -{ - struct serial_struct tmp; - - if (!retinfo) - return -EFAULT; - memset(&tmp, 0, sizeof(tmp)); - tmp.type = info->type; - tmp.line = info->line; - tmp.port = info->port; - tmp.irq = info->irq; - tmp.flags = info->flags; - tmp.baud_base = info->baud_base; - tmp.close_delay = info->close_delay; - tmp.closing_wait = info->closing_wait; - tmp.custom_divisor = info->custom_divisor; - return copy_to_user(retinfo,&tmp,sizeof(*retinfo)) ? -EFAULT : 0; -} - -static int set_serial_info(struct dec_serial * info, - struct serial_struct * new_info) -{ - struct serial_struct new_serial; - struct dec_serial old_info; - int retval = 0; - - if (!new_info) - return -EFAULT; - copy_from_user(&new_serial,new_info,sizeof(new_serial)); - old_info = *info; - - if (!capable(CAP_SYS_ADMIN)) { - if ((new_serial.baud_base != info->baud_base) || - (new_serial.type != info->type) || - (new_serial.close_delay != info->close_delay) || - ((new_serial.flags & ~ZILOG_USR_MASK) != - (info->flags & ~ZILOG_USR_MASK))) - return -EPERM; - info->flags = ((info->flags & ~ZILOG_USR_MASK) | - (new_serial.flags & ZILOG_USR_MASK)); - info->custom_divisor = new_serial.custom_divisor; - goto check_and_exit; - } - - if (info->count > 1) - return -EBUSY; - - /* - * OK, past this point, all the error checking has been done. - * At this point, we start making changes..... - */ - - info->baud_base = new_serial.baud_base; - info->flags = ((info->flags & ~ZILOG_FLAGS) | - (new_serial.flags & ZILOG_FLAGS)); - info->type = new_serial.type; - info->close_delay = new_serial.close_delay; - info->closing_wait = new_serial.closing_wait; - -check_and_exit: - retval = zs_startup(info); - return retval; -} - -/* - * get_lsr_info - get line status register info - * - * Purpose: Let user call ioctl() to get info when the UART physically - * is emptied. On bus types like RS485, the transmitter must - * release the bus after transmitting. This must be done when - * the transmit shift register is empty, not be done when the - * transmit holding register is empty. This functionality - * allows an RS485 driver to be written in user space. - */ -static int get_lsr_info(struct dec_serial * info, unsigned int *value) -{ - unsigned char status; - - spin_lock(&zs_lock); - status = read_zsreg(info->zs_channel, 0); - spin_unlock_irq(&zs_lock); - put_user(status,value); - return 0; -} - -static int rs_tiocmget(struct tty_struct *tty, struct file *file) -{ - struct dec_serial * info = (struct dec_serial *)tty->driver_data; - unsigned char control, status_a, status_b; - unsigned int result; - - if (info->hook) - return -ENODEV; - - if (serial_paranoia_check(info, tty->name, __FUNCTION__)) - return -ENODEV; - - if (tty->flags & (1 << TTY_IO_ERROR)) - return -EIO; - - if (info->zs_channel == info->zs_chan_a) - result = 0; - else { - spin_lock(&zs_lock); - control = info->zs_chan_a->curregs[5]; - status_a = read_zsreg(info->zs_chan_a, 0); - status_b = read_zsreg(info->zs_channel, 0); - spin_unlock_irq(&zs_lock); - result = ((control & RTS) ? TIOCM_RTS: 0) - | ((control & DTR) ? TIOCM_DTR: 0) - | ((status_b & DCD) ? TIOCM_CAR: 0) - | ((status_a & DCD) ? TIOCM_RNG: 0) - | ((status_a & SYNC_HUNT) ? TIOCM_DSR: 0) - | ((status_b & CTS) ? TIOCM_CTS: 0); - } - return result; -} - -static int rs_tiocmset(struct tty_struct *tty, struct file *file, - unsigned int set, unsigned int clear) -{ - struct dec_serial * info = (struct dec_serial *)tty->driver_data; - - if (info->hook) - return -ENODEV; - - if (serial_paranoia_check(info, tty->name, __FUNCTION__)) - return -ENODEV; - - if (tty->flags & (1 << TTY_IO_ERROR)) - return -EIO; - - if (info->zs_channel == info->zs_chan_a) - return 0; - - spin_lock(&zs_lock); - if (set & TIOCM_RTS) - info->zs_chan_a->curregs[5] |= RTS; - if (set & TIOCM_DTR) - info->zs_chan_a->curregs[5] |= DTR; - if (clear & TIOCM_RTS) - info->zs_chan_a->curregs[5] &= ~RTS; - if (clear & TIOCM_DTR) - info->zs_chan_a->curregs[5] &= ~DTR; - write_zsreg(info->zs_chan_a, 5, info->zs_chan_a->curregs[5]); - spin_unlock_irq(&zs_lock); - return 0; -} - -/* - * rs_break - turn transmit break condition on/off - */ -static void rs_break(struct tty_struct *tty, int break_state) -{ - struct dec_serial *info = (struct dec_serial *) tty->driver_data; - unsigned long flags; - - if (serial_paranoia_check(info, tty->name, "rs_break")) - return; - if (!info->port) - return; - - spin_lock_irqsave(&zs_lock, flags); - if (break_state == -1) - info->zs_channel->curregs[5] |= SND_BRK; - else - info->zs_channel->curregs[5] &= ~SND_BRK; - write_zsreg(info->zs_channel, 5, info->zs_channel->curregs[5]); - spin_unlock_irqrestore(&zs_lock, flags); -} - -static int rs_ioctl(struct tty_struct *tty, struct file * file, - unsigned int cmd, unsigned long arg) -{ - struct dec_serial * info = (struct dec_serial *)tty->driver_data; - - if (info->hook) - return -ENODEV; - - if (serial_paranoia_check(info, tty->name, "rs_ioctl")) - return -ENODEV; - - if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) && - (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGWILD) && - (cmd != TIOCSERSWILD) && (cmd != TIOCSERGSTRUCT)) { - if (tty->flags & (1 << TTY_IO_ERROR)) - return -EIO; - } - - switch (cmd) { - case TIOCGSERIAL: - if (!access_ok(VERIFY_WRITE, (void *)arg, - sizeof(struct serial_struct))) - return -EFAULT; - return get_serial_info(info, (struct serial_struct *)arg); - - case TIOCSSERIAL: - return set_serial_info(info, (struct serial_struct *)arg); - - case TIOCSERGETLSR: /* Get line status register */ - if (!access_ok(VERIFY_WRITE, (void *)arg, - sizeof(unsigned int))) - return -EFAULT; - return get_lsr_info(info, (unsigned int *)arg); - - case TIOCSERGSTRUCT: - if (!access_ok(VERIFY_WRITE, (void *)arg, - sizeof(struct dec_serial))) - return -EFAULT; - copy_from_user((struct dec_serial *)arg, info, - sizeof(struct dec_serial)); - return 0; - - default: - return -ENOIOCTLCMD; - } - return 0; -} - -static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) -{ - struct dec_serial *info = (struct dec_serial *)tty->driver_data; - int was_stopped; - - if (tty->termios->c_cflag == old_termios->c_cflag) - return; - was_stopped = info->tx_stopped; - - change_speed(info); - - if (was_stopped && !info->tx_stopped) - rs_start(tty); -} - -/* - * ------------------------------------------------------------ - * rs_close() - * - * This routine is called when the serial port gets closed. - * Wait for the last remaining data to be sent. - * ------------------------------------------------------------ - */ -static void rs_close(struct tty_struct *tty, struct file * filp) -{ - struct dec_serial * info = (struct dec_serial *)tty->driver_data; - unsigned long flags; - - if (!info || serial_paranoia_check(info, tty->name, "rs_close")) - return; - - spin_lock_irqsave(&zs_lock, flags); - - if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&zs_lock, flags); - return; - } - -#ifdef SERIAL_DEBUG_OPEN - printk("rs_close ttyS%d, count = %d\n", info->line, info->count); -#endif - if ((tty->count == 1) && (info->count != 1)) { - /* - * Uh, oh. tty->count is 1, which means that the tty - * structure will be freed. Info->count should always - * be one in these conditions. If it's greater than - * one, we've got real problems, since it means the - * serial port won't be shutdown. - */ - printk("rs_close: bad serial port count; tty->count is 1, " - "info->count is %d\n", info->count); - info->count = 1; - } - if (--info->count < 0) { - printk("rs_close: bad serial port count for ttyS%d: %d\n", - info->line, info->count); - info->count = 0; - } - if (info->count) { - spin_unlock_irqrestore(&zs_lock, flags); - return; - } - info->flags |= ZILOG_CLOSING; - /* - * Now we wait for the transmit buffer to clear; and we notify - * the line discipline to only process XON/XOFF characters. - */ - tty->closing = 1; - if (info->closing_wait != ZILOG_CLOSING_WAIT_NONE) - tty_wait_until_sent(tty, info->closing_wait); - /* - * At this point we stop accepting input. To do this, we - * disable the receiver and receive interrupts. - */ - info->zs_channel->curregs[3] &= ~RxENABLE; - write_zsreg(info->zs_channel, 3, info->zs_channel->curregs[3]); - info->zs_channel->curregs[1] = 0; /* disable any rx ints */ - write_zsreg(info->zs_channel, 1, info->zs_channel->curregs[1]); - ZS_CLEARFIFO(info->zs_channel); - if (info->flags & ZILOG_INITIALIZED) { - /* - * Before we drop DTR, make sure the SCC transmitter - * has completely drained. - */ - rs_wait_until_sent(tty, info->timeout); - } - - shutdown(info); - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); - tty_ldisc_flush(tty); - tty->closing = 0; - info->event = 0; - info->tty = 0; - if (info->blocked_open) { - if (info->close_delay) { - msleep_interruptible(jiffies_to_msecs(info->close_delay)); - } - wake_up_interruptible(&info->open_wait); - } - info->flags &= ~(ZILOG_NORMAL_ACTIVE|ZILOG_CLOSING); - wake_up_interruptible(&info->close_wait); - spin_unlock_irqrestore(&zs_lock, flags); -} - -/* - * rs_wait_until_sent() --- wait until the transmitter is empty - */ -static void rs_wait_until_sent(struct tty_struct *tty, int timeout) -{ - struct dec_serial *info = (struct dec_serial *) tty->driver_data; - unsigned long orig_jiffies; - int char_time; - - if (serial_paranoia_check(info, tty->name, "rs_wait_until_sent")) - return; - - orig_jiffies = jiffies; - /* - * Set the check interval to be 1/5 of the estimated time to - * send a single character, and make it at least 1. The check - * interval should also be less than the timeout. - */ - char_time = (info->timeout - HZ/50) / info->xmit_fifo_size; - char_time = char_time / 5; - if (char_time == 0) - char_time = 1; - if (timeout) - char_time = min(char_time, timeout); - while ((read_zsreg(info->zs_channel, 1) & Tx_BUF_EMP) == 0) { - msleep_interruptible(jiffies_to_msecs(char_time)); - if (signal_pending(current)) - break; - if (timeout && time_after(jiffies, orig_jiffies + timeout)) - break; - } - current->state = TASK_RUNNING; -} - -/* - * rs_hangup() --- called by tty_hangup() when a hangup is signaled. - */ -static void rs_hangup(struct tty_struct *tty) -{ - struct dec_serial * info = (struct dec_serial *)tty->driver_data; - - if (serial_paranoia_check(info, tty->name, "rs_hangup")) - return; - - rs_flush_buffer(tty); - shutdown(info); - info->event = 0; - info->count = 0; - info->flags &= ~ZILOG_NORMAL_ACTIVE; - info->tty = 0; - wake_up_interruptible(&info->open_wait); -} - -/* - * ------------------------------------------------------------ - * rs_open() and friends - * ------------------------------------------------------------ - */ -static int block_til_ready(struct tty_struct *tty, struct file * filp, - struct dec_serial *info) -{ - DECLARE_WAITQUEUE(wait, current); - int retval; - int do_clocal = 0; - - /* - * If the device is in the middle of being closed, then block - * until it's done, and then try again. - */ - if (info->flags & ZILOG_CLOSING) { - interruptible_sleep_on(&info->close_wait); -#ifdef SERIAL_DO_RESTART - return ((info->flags & ZILOG_HUP_NOTIFY) ? - -EAGAIN : -ERESTARTSYS); -#else - return -EAGAIN; -#endif - } - - /* - * If non-blocking mode is set, or the port is not enabled, - * then make the check up front and then exit. - */ - if ((filp->f_flags & O_NONBLOCK) || - (tty->flags & (1 << TTY_IO_ERROR))) { - info->flags |= ZILOG_NORMAL_ACTIVE; - return 0; - } - - if (tty->termios->c_cflag & CLOCAL) - do_clocal = 1; - - /* - * Block waiting for the carrier detect and the line to become - * free (i.e., not in use by the callout). While we are in - * this loop, info->count is dropped by one, so that - * rs_close() knows when to free things. We restore it upon - * exit, either normal or abnormal. - */ - retval = 0; - add_wait_queue(&info->open_wait, &wait); -#ifdef SERIAL_DEBUG_OPEN - printk("block_til_ready before block: ttyS%d, count = %d\n", - info->line, info->count); -#endif - spin_lock(&zs_lock); - if (!tty_hung_up_p(filp)) - info->count--; - spin_unlock_irq(&zs_lock); - info->blocked_open++; - while (1) { - spin_lock(&zs_lock); - if (tty->termios->c_cflag & CBAUD) - zs_rtsdtr(info, RTS | DTR, 1); - spin_unlock_irq(&zs_lock); - set_current_state(TASK_INTERRUPTIBLE); - if (tty_hung_up_p(filp) || - !(info->flags & ZILOG_INITIALIZED)) { -#ifdef SERIAL_DO_RESTART - if (info->flags & ZILOG_HUP_NOTIFY) - retval = -EAGAIN; - else - retval = -ERESTARTSYS; -#else - retval = -EAGAIN; -#endif - break; - } - if (!(info->flags & ZILOG_CLOSING) && - (do_clocal || (read_zsreg(info->zs_channel, 0) & DCD))) - break; - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } -#ifdef SERIAL_DEBUG_OPEN - printk("block_til_ready blocking: ttyS%d, count = %d\n", - info->line, info->count); -#endif - schedule(); - } - current->state = TASK_RUNNING; - remove_wait_queue(&info->open_wait, &wait); - if (!tty_hung_up_p(filp)) - info->count++; - info->blocked_open--; -#ifdef SERIAL_DEBUG_OPEN - printk("block_til_ready after blocking: ttyS%d, count = %d\n", - info->line, info->count); -#endif - if (retval) - return retval; - info->flags |= ZILOG_NORMAL_ACTIVE; - return 0; -} - -/* - * This routine is called whenever a serial port is opened. It - * enables interrupts for a serial port, linking in its ZILOG structure into - * the IRQ chain. It also performs the serial-specific - * initialization for the tty structure. - */ -static int rs_open(struct tty_struct *tty, struct file * filp) -{ - struct dec_serial *info; - int retval, line; - - line = tty->index; - if ((line < 0) || (line >= zs_channels_found)) - return -ENODEV; - info = zs_soft + line; - - if (info->hook) - return -ENODEV; - - if (serial_paranoia_check(info, tty->name, "rs_open")) - return -ENODEV; -#ifdef SERIAL_DEBUG_OPEN - printk("rs_open %s, count = %d\n", tty->name, info->count); -#endif - - info->count++; - tty->driver_data = info; - info->tty = tty; - - /* - * If the port is the middle of closing, bail out now - */ - if (tty_hung_up_p(filp) || - (info->flags & ZILOG_CLOSING)) { - if (info->flags & ZILOG_CLOSING) - interruptible_sleep_on(&info->close_wait); -#ifdef SERIAL_DO_RESTART - return ((info->flags & ZILOG_HUP_NOTIFY) ? - -EAGAIN : -ERESTARTSYS); -#else - return -EAGAIN; -#endif - } - - /* - * Start up serial port - */ - retval = zs_startup(info); - if (retval) - return retval; - - retval = block_til_ready(tty, filp, info); - if (retval) { -#ifdef SERIAL_DEBUG_OPEN - printk("rs_open returning after block_til_ready with %d\n", - retval); -#endif - return retval; - } - -#ifdef CONFIG_SERIAL_DEC_CONSOLE - if (zs_console.cflag && zs_console.index == line) { - tty->termios->c_cflag = zs_console.cflag; - zs_console.cflag = 0; - change_speed(info); - } -#endif - -#ifdef SERIAL_DEBUG_OPEN - printk("rs_open %s successful...", tty->name); -#endif -/* tty->low_latency = 1; */ - return 0; -} - -/* Finally, routines used to initialize the serial driver. */ - -static void __init show_serial_version(void) -{ - printk("DECstation Z8530 serial driver version 0.09\n"); -} - -/* Initialize Z8530s zs_channels - */ - -static void __init probe_sccs(void) -{ - struct dec_serial **pp; - int i, n, n_chips = 0, n_channels, chip, channel; - unsigned long flags; - - /* - * did we get here by accident? - */ - if(!BUS_PRESENT) { - printk("Not on JUNKIO machine, skipping probe_sccs\n"); - return; - } - - switch(mips_machtype) { -#ifdef CONFIG_MACH_DECSTATION - case MACH_DS5000_2X0: - case MACH_DS5900: - n_chips = 2; - zs_parms = &ds_parms; - zs_parms->irq0 = dec_interrupt[DEC_IRQ_SCC0]; - zs_parms->irq1 = dec_interrupt[DEC_IRQ_SCC1]; - break; - case MACH_DS5000_1XX: - n_chips = 2; - zs_parms = &ds_parms; - zs_parms->irq0 = dec_interrupt[DEC_IRQ_SCC0]; - zs_parms->irq1 = dec_interrupt[DEC_IRQ_SCC1]; - break; - case MACH_DS5000_XX: - n_chips = 1; - zs_parms = &ds_parms; - zs_parms->irq0 = dec_interrupt[DEC_IRQ_SCC0]; - break; -#endif - default: - panic("zs: unsupported bus"); - } - if (!zs_parms) - panic("zs: uninitialized parms"); - - pp = &zs_chain; - - n_channels = 0; - - for (chip = 0; chip < n_chips; chip++) { - for (channel = 0; channel <= 1; channel++) { - /* - * The sccs reside on the high byte of the 16 bit IOBUS - */ - zs_channels[n_channels].control = - (volatile void *)CKSEG1ADDR(dec_kn_slot_base + - (0 == chip ? zs_parms->scc0 : zs_parms->scc1) + - (0 == channel ? zs_parms->channel_a_offset : - zs_parms->channel_b_offset)); - zs_channels[n_channels].data = - zs_channels[n_channels].control + 4; - -#ifndef CONFIG_SERIAL_DEC_CONSOLE - /* - * We're called early and memory managment isn't up, yet. - * Thus request_region would fail. - */ - if (!request_region((unsigned long) - zs_channels[n_channels].control, - ZS_CHAN_IO_SIZE, "SCC")) - panic("SCC I/O region is not free"); -#endif - zs_soft[n_channels].zs_channel = &zs_channels[n_channels]; - /* HACK alert! */ - if (!(chip & 1)) - zs_soft[n_channels].irq = zs_parms->irq0; - else - zs_soft[n_channels].irq = zs_parms->irq1; - - /* - * Identification of channel A. Location of channel A - * inside chip depends on mapping of internal address - * the chip decodes channels by. - * CHANNEL_A_NR returns either 0 (in case of - * DECstations) or 1 (in case of Baget). - */ - if (CHANNEL_A_NR == channel) - zs_soft[n_channels].zs_chan_a = - &zs_channels[n_channels+1-2*CHANNEL_A_NR]; - else - zs_soft[n_channels].zs_chan_a = - &zs_channels[n_channels]; - - *pp = &zs_soft[n_channels]; - pp = &zs_soft[n_channels].zs_next; - n_channels++; - } - } - - *pp = 0; - zs_channels_found = n_channels; - - for (n = 0; n < zs_channels_found; n++) { - for (i = 0; i < 16; i++) { - zs_soft[n].zs_channel->curregs[i] = zs_init_regs[i]; - } - } - - spin_lock_irqsave(&zs_lock, flags); - for (n = 0; n < zs_channels_found; n++) { - if (n % 2 == 0) { - write_zsreg(zs_soft[n].zs_chan_a, R9, FHWRES); - udelay(10); - write_zsreg(zs_soft[n].zs_chan_a, R9, 0); - } - load_zsregs(zs_soft[n].zs_channel, - zs_soft[n].zs_channel->curregs); - } - spin_unlock_irqrestore(&zs_lock, flags); -} - -static const struct tty_operations serial_ops = { - .open = rs_open, - .close = rs_close, - .write = rs_write, - .flush_chars = rs_flush_chars, - .write_room = rs_write_room, - .chars_in_buffer = rs_chars_in_buffer, - .flush_buffer = rs_flush_buffer, - .ioctl = rs_ioctl, - .throttle = rs_throttle, - .unthrottle = rs_unthrottle, - .set_termios = rs_set_termios, - .stop = rs_stop, - .start = rs_start, - .hangup = rs_hangup, - .break_ctl = rs_break, - .wait_until_sent = rs_wait_until_sent, - .tiocmget = rs_tiocmget, - .tiocmset = rs_tiocmset, -}; - -/* zs_init inits the driver */ -int __init zs_init(void) -{ - int channel, i; - struct dec_serial *info; - - if(!BUS_PRESENT) - return -ENODEV; - - /* Find out how many Z8530 SCCs we have */ - if (zs_chain == 0) - probe_sccs(); - serial_driver = alloc_tty_driver(zs_channels_found); - if (!serial_driver) - return -ENOMEM; - - show_serial_version(); - - /* Initialize the tty_driver structure */ - /* Not all of this is exactly right for us. */ - - serial_driver->owner = THIS_MODULE; - serial_driver->name = "ttyS"; - serial_driver->major = TTY_MAJOR; - serial_driver->minor_start = 64; - serial_driver->type = TTY_DRIVER_TYPE_SERIAL; - serial_driver->subtype = SERIAL_TYPE_NORMAL; - serial_driver->init_termios = tty_std_termios; - serial_driver->init_termios.c_cflag = - B9600 | CS8 | CREAD | HUPCL | CLOCAL; - serial_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; - tty_set_operations(serial_driver, &serial_ops); - - if (tty_register_driver(serial_driver)) - panic("Couldn't register serial driver"); - - for (info = zs_chain, i = 0; info; info = info->zs_next, i++) { - - /* Needed before interrupts are enabled. */ - info->tty = 0; - info->x_char = 0; - - if (info->hook && info->hook->init_info) { - (*info->hook->init_info)(info); - continue; - } - - info->magic = SERIAL_MAGIC; - info->port = (int) info->zs_channel->control; - info->line = i; - info->custom_divisor = 16; - info->close_delay = 50; - info->closing_wait = 3000; - info->event = 0; - info->count = 0; - info->blocked_open = 0; - tasklet_init(&info->tlet, do_softint, (unsigned long)info); - init_waitqueue_head(&info->open_wait); - init_waitqueue_head(&info->close_wait); - printk("ttyS%02d at 0x%08x (irq = %d) is a Z85C30 SCC\n", - info->line, info->port, info->irq); - tty_register_device(serial_driver, info->line, NULL); - - } - - for (channel = 0; channel < zs_channels_found; ++channel) { - zs_soft[channel].clk_divisor = 16; - zs_soft[channel].zs_baud = get_zsbaud(&zs_soft[channel]); - - if (request_irq(zs_soft[channel].irq, rs_interrupt, IRQF_SHARED, - "scc", &zs_soft[channel])) - printk(KERN_ERR "decserial: can't get irq %d\n", - zs_soft[channel].irq); - - if (zs_soft[channel].hook) { - zs_startup(&zs_soft[channel]); - if (zs_soft[channel].hook->init_channel) - (*zs_soft[channel].hook->init_channel) - (&zs_soft[channel]); - } - } - - return 0; -} - -/* - * polling I/O routines - */ -static int zs_poll_tx_char(void *handle, unsigned char ch) -{ - struct dec_serial *info = handle; - struct dec_zschannel *chan = info->zs_channel; - int ret; - - if(chan) { - int loops = 10000; - - while (loops && !(read_zsreg(chan, 0) & Tx_BUF_EMP)) - loops--; - - if (loops) { - write_zsdata(chan, ch); - ret = 0; - } else - ret = -EAGAIN; - - return ret; - } else - return -ENODEV; -} - -static int zs_poll_rx_char(void *handle) -{ - struct dec_serial *info = handle; - struct dec_zschannel *chan = info->zs_channel; - int ret; - - if(chan) { - int loops = 10000; - - while (loops && !(read_zsreg(chan, 0) & Rx_CH_AV)) - loops--; - - if (loops) - ret = read_zsdata(chan); - else - ret = -EAGAIN; - - return ret; - } else - return -ENODEV; -} - -int register_zs_hook(unsigned int channel, struct dec_serial_hook *hook) -{ - struct dec_serial *info = &zs_soft[channel]; - - if (info->hook) { - printk("%s: line %d has already a hook registered\n", - __FUNCTION__, channel); - - return 0; - } else { - hook->poll_rx_char = zs_poll_rx_char; - hook->poll_tx_char = zs_poll_tx_char; - info->hook = hook; - - return 1; - } -} - -int unregister_zs_hook(unsigned int channel) -{ - struct dec_serial *info = &zs_soft[channel]; - - if (info->hook) { - info->hook = NULL; - return 1; - } else { - printk("%s: trying to unregister hook on line %d," - " but none is registered\n", __FUNCTION__, channel); - return 0; - } -} - -/* - * ------------------------------------------------------------ - * Serial console driver - * ------------------------------------------------------------ - */ -#ifdef CONFIG_SERIAL_DEC_CONSOLE - - -/* - * Print a string to the serial port trying not to disturb - * any possible real use of the port... - */ -static void serial_console_write(struct console *co, const char *s, - unsigned count) -{ - struct dec_serial *info; - int i; - - info = zs_soft + co->index; - - for (i = 0; i < count; i++, s++) { - if(*s == '\n') - zs_poll_tx_char(info, '\r'); - zs_poll_tx_char(info, *s); - } -} - -static struct tty_driver *serial_console_device(struct console *c, int *index) -{ - *index = c->index; - return serial_driver; -} - -/* - * Setup initial baud/bits/parity. We do two things here: - * - construct a cflag setting for the first rs_open() - * - initialize the serial port - * Return non-zero if we didn't find a serial port. - */ -static int __init serial_console_setup(struct console *co, char *options) -{ - struct dec_serial *info; - int baud = 9600; - int bits = 8; - int parity = 'n'; - int cflag = CREAD | HUPCL | CLOCAL; - int clk_divisor = 16; - int brg; - char *s; - unsigned long flags; - - if(!BUS_PRESENT) - return -ENODEV; - - info = zs_soft + co->index; - - if (zs_chain == 0) - probe_sccs(); - - info->is_cons = 1; - - if (options) { - baud = simple_strtoul(options, NULL, 10); - s = options; - while(*s >= '0' && *s <= '9') - s++; - if (*s) - parity = *s++; - if (*s) - bits = *s - '0'; - } - - /* - * Now construct a cflag setting. - */ - switch(baud) { - case 1200: - cflag |= B1200; - break; - case 2400: - cflag |= B2400; - break; - case 4800: - cflag |= B4800; - break; - case 19200: - cflag |= B19200; - break; - case 38400: - cflag |= B38400; - break; - case 57600: - cflag |= B57600; - break; - case 115200: - cflag |= B115200; - break; - case 9600: - default: - cflag |= B9600; - /* - * Set this to a sane value to prevent a divide error. - */ - baud = 9600; - break; - } - switch(bits) { - case 7: - cflag |= CS7; - break; - default: - case 8: - cflag |= CS8; - break; - } - switch(parity) { - case 'o': case 'O': - cflag |= PARODD; - break; - case 'e': case 'E': - cflag |= PARENB; - break; - } - co->cflag = cflag; - - spin_lock_irqsave(&zs_lock, flags); - - /* - * Set up the baud rate generator. - */ - brg = BPS_TO_BRG(baud, zs_parms->clock / clk_divisor); - info->zs_channel->curregs[R12] = (brg & 255); - info->zs_channel->curregs[R13] = ((brg >> 8) & 255); - - /* - * Set byte size and parity. - */ - if (bits == 7) { - info->zs_channel->curregs[R3] |= Rx7; - info->zs_channel->curregs[R5] |= Tx7; - } else { - info->zs_channel->curregs[R3] |= Rx8; - info->zs_channel->curregs[R5] |= Tx8; - } - if (cflag & PARENB) { - info->zs_channel->curregs[R4] |= PAR_ENA; - } - if (!(cflag & PARODD)) { - info->zs_channel->curregs[R4] |= PAR_EVEN; - } - info->zs_channel->curregs[R4] |= SB1; - - /* - * Turn on RTS and DTR. - */ - zs_rtsdtr(info, RTS | DTR, 1); - - /* - * Finally, enable sequencing. - */ - info->zs_channel->curregs[R3] |= RxENABLE; - info->zs_channel->curregs[R5] |= TxENAB; - - /* - * Clear the interrupt registers. - */ - write_zsreg(info->zs_channel, R0, ERR_RES); - write_zsreg(info->zs_channel, R0, RES_H_IUS); - - /* - * Load up the new values. - */ - load_zsregs(info->zs_channel, info->zs_channel->curregs); - - /* Save the current value of RR0 */ - info->read_reg_zero = read_zsreg(info->zs_channel, R0); - - zs_soft[co->index].clk_divisor = clk_divisor; - zs_soft[co->index].zs_baud = get_zsbaud(&zs_soft[co->index]); - - spin_unlock_irqrestore(&zs_lock, flags); - - return 0; -} - -static struct console zs_console = { - .name = "ttyS", - .write = serial_console_write, - .device = serial_console_device, - .setup = serial_console_setup, - .flags = CON_PRINTBUFFER, - .index = -1, -}; - -/* - * Register console. - */ -void __init zs_serial_console_init(void) -{ - register_console(&zs_console); -} -#endif /* ifdef CONFIG_SERIAL_DEC_CONSOLE */ - -#ifdef CONFIG_KGDB -struct dec_zschannel *zs_kgdbchan; -static unsigned char scc_inittab[] = { - 9, 0x80, /* reset A side (CHRA) */ - 13, 0, /* set baud rate divisor */ - 12, 1, - 14, 1, /* baud rate gen enable, src=rtxc (BRENABL) */ - 11, 0x50, /* clocks = br gen (RCBR | TCBR) */ - 5, 0x6a, /* tx 8 bits, assert RTS (Tx8 | TxENAB | RTS) */ - 4, 0x44, /* x16 clock, 1 stop (SB1 | X16CLK)*/ - 3, 0xc1, /* rx enable, 8 bits (RxENABLE | Rx8)*/ -}; - -/* These are for receiving and sending characters under the kgdb - * source level kernel debugger. - */ -void putDebugChar(char kgdb_char) -{ - struct dec_zschannel *chan = zs_kgdbchan; - while ((read_zsreg(chan, 0) & Tx_BUF_EMP) == 0) - RECOVERY_DELAY; - write_zsdata(chan, kgdb_char); -} -char getDebugChar(void) -{ - struct dec_zschannel *chan = zs_kgdbchan; - while((read_zsreg(chan, 0) & Rx_CH_AV) == 0) - eieio(); /*barrier();*/ - return read_zsdata(chan); -} -void kgdb_interruptible(int yes) -{ - struct dec_zschannel *chan = zs_kgdbchan; - int one, nine; - nine = read_zsreg(chan, 9); - if (yes == 1) { - one = EXT_INT_ENAB|RxINT_ALL; - nine |= MIE; - printk("turning serial ints on\n"); - } else { - one = RxINT_DISAB; - nine &= ~MIE; - printk("turning serial ints off\n"); - } - write_zsreg(chan, 1, one); - write_zsreg(chan, 9, nine); -} - -static int kgdbhook_init_channel(void *handle) -{ - return 0; -} - -static void kgdbhook_init_info(void *handle) -{ -} - -static void kgdbhook_rx_char(void *handle, unsigned char ch, unsigned char fl) -{ - struct dec_serial *info = handle; - - if (fl != TTY_NORMAL) - return; - if (ch == 0x03 || ch == '$') - breakpoint(); -} - -/* This sets up the serial port we're using, and turns on - * interrupts for that channel, so kgdb is usable once we're done. - */ -static inline void kgdb_chaninit(struct dec_zschannel *ms, int intson, int bps) -{ - int brg; - int i, x; - volatile char *sccc = ms->control; - brg = BPS_TO_BRG(bps, zs_parms->clock/16); - printk("setting bps on kgdb line to %d [brg=%x]\n", bps, brg); - for (i = 20000; i != 0; --i) { - x = *sccc; eieio(); - } - for (i = 0; i < sizeof(scc_inittab); ++i) { - write_zsreg(ms, scc_inittab[i], scc_inittab[i+1]); - i++; - } -} -/* This is called at boot time to prime the kgdb serial debugging - * serial line. The 'tty_num' argument is 0 for /dev/ttya and 1 - * for /dev/ttyb which is determined in setup_arch() from the - * boot command line flags. - */ -struct dec_serial_hook zs_kgdbhook = { - .init_channel = kgdbhook_init_channel, - .init_info = kgdbhook_init_info, - .rx_char = kgdbhook_rx_char, - .cflags = B38400 | CS8 | CLOCAL, -}; - -void __init zs_kgdb_hook(int tty_num) -{ - /* Find out how many Z8530 SCCs we have */ - if (zs_chain == 0) - probe_sccs(); - zs_soft[tty_num].zs_channel = &zs_channels[tty_num]; - zs_kgdbchan = zs_soft[tty_num].zs_channel; - zs_soft[tty_num].change_needed = 0; - zs_soft[tty_num].clk_divisor = 16; - zs_soft[tty_num].zs_baud = 38400; - zs_soft[tty_num].hook = &zs_kgdbhook; /* This runs kgdb */ - /* Turn on transmitter/receiver at 8-bits/char */ - kgdb_chaninit(zs_soft[tty_num].zs_channel, 1, 38400); - printk("KGDB: on channel %d initialized\n", tty_num); - set_debug_traps(); /* init stub */ -} -#endif /* ifdef CONFIG_KGDB */ diff --git a/drivers/tc/zs.h b/drivers/tc/zs.h deleted file mode 100644 index 13512200ceb..00000000000 --- a/drivers/tc/zs.h +++ /dev/null @@ -1,404 +0,0 @@ -/* - * drivers/tc/zs.h: Definitions for the DECstation Z85C30 serial driver. - * - * Adapted from drivers/sbus/char/sunserial.h by Paul Mackerras. - * Adapted from drivers/macintosh/macserial.h by Harald Koerfgen. - * - * Copyright (C) 1996 Paul Mackerras (Paul.Mackerras@cs.anu.edu.au) - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 2004, 2005 Maciej W. Rozycki - */ -#ifndef _DECSERIAL_H -#define _DECSERIAL_H - -#include - -#define NUM_ZSREGS 16 - -struct serial_struct { - int type; - int line; - int port; - int irq; - int flags; - int xmit_fifo_size; - int custom_divisor; - int baud_base; - unsigned short close_delay; - char reserved_char[2]; - int hub6; - unsigned short closing_wait; /* time to wait before closing */ - unsigned short closing_wait2; /* no longer used... */ - int reserved[4]; -}; - -/* - * For the close wait times, 0 means wait forever for serial port to - * flush its output. 65535 means don't wait at all. - */ -#define ZILOG_CLOSING_WAIT_INF 0 -#define ZILOG_CLOSING_WAIT_NONE 65535 - -/* - * Definitions for ZILOG_struct (and serial_struct) flags field - */ -#define ZILOG_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes - on the callout port */ -#define ZILOG_FOURPORT 0x0002 /* Set OU1, OUT2 per AST Fourport settings */ -#define ZILOG_SAK 0x0004 /* Secure Attention Key (Orange book) */ -#define ZILOG_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */ - -#define ZILOG_SPD_MASK 0x0030 -#define ZILOG_SPD_HI 0x0010 /* Use 56000 instead of 38400 bps */ - -#define ZILOG_SPD_VHI 0x0020 /* Use 115200 instead of 38400 bps */ -#define ZILOG_SPD_CUST 0x0030 /* Use user-specified divisor */ - -#define ZILOG_SKIP_TEST 0x0040 /* Skip UART test during autoconfiguration */ -#define ZILOG_AUTO_IRQ 0x0080 /* Do automatic IRQ during autoconfiguration */ -#define ZILOG_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */ -#define ZILOG_PGRP_LOCKOUT 0x0200 /* Lock out cua opens based on pgrp */ -#define ZILOG_CALLOUT_NOHUP 0x0400 /* Don't do hangups for cua device */ - -#define ZILOG_FLAGS 0x0FFF /* Possible legal ZILOG flags */ -#define ZILOG_USR_MASK 0x0430 /* Legal flags that non-privileged - * users can set or reset */ - -/* Internal flags used only by kernel/chr_drv/serial.c */ -#define ZILOG_INITIALIZED 0x80000000 /* Serial port was initialized */ -#define ZILOG_CALLOUT_ACTIVE 0x40000000 /* Call out device is active */ -#define ZILOG_NORMAL_ACTIVE 0x20000000 /* Normal device is active */ -#define ZILOG_BOOT_AUTOCONF 0x10000000 /* Autoconfigure port on bootup */ -#define ZILOG_CLOSING 0x08000000 /* Serial port is closing */ -#define ZILOG_CTS_FLOW 0x04000000 /* Do CTS flow control */ -#define ZILOG_CHECK_CD 0x02000000 /* i.e., CLOCAL */ - -/* Software state per channel */ - -#ifdef __KERNEL__ -/* - * This is our internal structure for each serial port's state. - * - * Many fields are paralleled by the structure used by the serial_struct - * structure. - * - * For definitions of the flags field, see tty.h - */ - -struct dec_zschannel { - volatile unsigned char *control; - volatile unsigned char *data; - - /* Current write register values */ - unsigned char curregs[NUM_ZSREGS]; -}; - -struct dec_serial { - struct dec_serial *zs_next; /* For IRQ servicing chain. */ - struct dec_zschannel *zs_channel; /* Channel registers. */ - struct dec_zschannel *zs_chan_a; /* A side registers. */ - unsigned char read_reg_zero; - - struct dec_serial_hook *hook; /* Hook on this channel. */ - int tty_break; /* Set on BREAK condition. */ - int is_cons; /* Is this our console. */ - int tx_active; /* Char is being xmitted. */ - int tx_stopped; /* Output is suspended. */ - - /* - * We need to know the current clock divisor - * to read the bps rate the chip has currently loaded. - */ - int clk_divisor; /* May be 1, 16, 32, or 64. */ - int zs_baud; - - char change_needed; - - int magic; - int baud_base; - int port; - int irq; - int flags; /* Defined in tty.h. */ - int type; /* UART type. */ - struct tty_struct *tty; - int read_status_mask; - int ignore_status_mask; - int timeout; - int xmit_fifo_size; - int custom_divisor; - int x_char; /* XON/XOFF character. */ - int close_delay; - unsigned short closing_wait; - unsigned short closing_wait2; - unsigned long event; - unsigned long last_active; - int line; - int count; /* # of fds on device. */ - int blocked_open; /* # of blocked opens. */ - unsigned char *xmit_buf; - int xmit_head; - int xmit_tail; - int xmit_cnt; - struct tasklet_struct tlet; - wait_queue_head_t open_wait; - wait_queue_head_t close_wait; -}; - - -#define SERIAL_MAGIC 0x5301 - -/* - * The size of the serial xmit buffer is 1 page, or 4096 bytes - */ -#define SERIAL_XMIT_SIZE 4096 - -/* - * Events are used to schedule things to happen at timer-interrupt - * time, instead of at rs interrupt time. - */ -#define RS_EVENT_WRITE_WAKEUP 0 - -#endif /* __KERNEL__ */ - -/* Conversion routines to/from brg time constants from/to bits - * per second. - */ -#define BRG_TO_BPS(brg, freq) ((freq) / 2 / ((brg) + 2)) -#define BPS_TO_BRG(bps, freq) ((((freq) + (bps)) / (2 * (bps))) - 2) - -/* The Zilog register set */ - -#define FLAG 0x7e - -/* Write Register 0 */ -#define R0 0 /* Register selects */ -#define R1 1 -#define R2 2 -#define R3 3 -#define R4 4 -#define R5 5 -#define R6 6 -#define R7 7 -#define R8 8 -#define R9 9 -#define R10 10 -#define R11 11 -#define R12 12 -#define R13 13 -#define R14 14 -#define R15 15 - -#define NULLCODE 0 /* Null Code */ -#define POINT_HIGH 0x8 /* Select upper half of registers */ -#define RES_EXT_INT 0x10 /* Reset Ext. Status Interrupts */ -#define SEND_ABORT 0x18 /* HDLC Abort */ -#define RES_RxINT_FC 0x20 /* Reset RxINT on First Character */ -#define RES_Tx_P 0x28 /* Reset TxINT Pending */ -#define ERR_RES 0x30 /* Error Reset */ -#define RES_H_IUS 0x38 /* Reset highest IUS */ - -#define RES_Rx_CRC 0x40 /* Reset Rx CRC Checker */ -#define RES_Tx_CRC 0x80 /* Reset Tx CRC Checker */ -#define RES_EOM_L 0xC0 /* Reset EOM latch */ - -/* Write Register 1 */ - -#define EXT_INT_ENAB 0x1 /* Ext Int Enable */ -#define TxINT_ENAB 0x2 /* Tx Int Enable */ -#define PAR_SPEC 0x4 /* Parity is special condition */ - -#define RxINT_DISAB 0 /* Rx Int Disable */ -#define RxINT_FCERR 0x8 /* Rx Int on First Character Only or Error */ -#define RxINT_ALL 0x10 /* Int on all Rx Characters or error */ -#define RxINT_ERR 0x18 /* Int on error only */ -#define RxINT_MASK 0x18 - -#define WT_RDY_RT 0x20 /* Wait/Ready on R/T */ -#define WT_FN_RDYFN 0x40 /* Wait/FN/Ready FN */ -#define WT_RDY_ENAB 0x80 /* Wait/Ready Enable */ - -/* Write Register #2 (Interrupt Vector) */ - -/* Write Register 3 */ - -#define RxENABLE 0x1 /* Rx Enable */ -#define SYNC_L_INH 0x2 /* Sync Character Load Inhibit */ -#define ADD_SM 0x4 /* Address Search Mode (SDLC) */ -#define RxCRC_ENAB 0x8 /* Rx CRC Enable */ -#define ENT_HM 0x10 /* Enter Hunt Mode */ -#define AUTO_ENAB 0x20 /* Auto Enables */ -#define Rx5 0x0 /* Rx 5 Bits/Character */ -#define Rx7 0x40 /* Rx 7 Bits/Character */ -#define Rx6 0x80 /* Rx 6 Bits/Character */ -#define Rx8 0xc0 /* Rx 8 Bits/Character */ -#define RxNBITS_MASK 0xc0 - -/* Write Register 4 */ - -#define PAR_ENA 0x1 /* Parity Enable */ -#define PAR_EVEN 0x2 /* Parity Even/Odd* */ - -#define SYNC_ENAB 0 /* Sync Modes Enable */ -#define SB1 0x4 /* 1 stop bit/char */ -#define SB15 0x8 /* 1.5 stop bits/char */ -#define SB2 0xc /* 2 stop bits/char */ -#define SB_MASK 0xc - -#define MONSYNC 0 /* 8 Bit Sync character */ -#define BISYNC 0x10 /* 16 bit sync character */ -#define SDLC 0x20 /* SDLC Mode (01111110 Sync Flag) */ -#define EXTSYNC 0x30 /* External Sync Mode */ - -#define X1CLK 0x0 /* x1 clock mode */ -#define X16CLK 0x40 /* x16 clock mode */ -#define X32CLK 0x80 /* x32 clock mode */ -#define X64CLK 0xC0 /* x64 clock mode */ -#define XCLK_MASK 0xC0 - -/* Write Register 5 */ - -#define TxCRC_ENAB 0x1 /* Tx CRC Enable */ -#define RTS 0x2 /* RTS */ -#define SDLC_CRC 0x4 /* SDLC/CRC-16 */ -#define TxENAB 0x8 /* Tx Enable */ -#define SND_BRK 0x10 /* Send Break */ -#define Tx5 0x0 /* Tx 5 bits (or less)/character */ -#define Tx7 0x20 /* Tx 7 bits/character */ -#define Tx6 0x40 /* Tx 6 bits/character */ -#define Tx8 0x60 /* Tx 8 bits/character */ -#define TxNBITS_MASK 0x60 -#define DTR 0x80 /* DTR */ - -/* Write Register 6 (Sync bits 0-7/SDLC Address Field) */ - -/* Write Register 7 (Sync bits 8-15/SDLC 01111110) */ - -/* Write Register 8 (transmit buffer) */ - -/* Write Register 9 (Master interrupt control) */ -#define VIS 1 /* Vector Includes Status */ -#define NV 2 /* No Vector */ -#define DLC 4 /* Disable Lower Chain */ -#define MIE 8 /* Master Interrupt Enable */ -#define STATHI 0x10 /* Status high */ -#define SOFTACK 0x20 /* Software Interrupt Acknowledge */ -#define NORESET 0 /* No reset on write to R9 */ -#define CHRB 0x40 /* Reset channel B */ -#define CHRA 0x80 /* Reset channel A */ -#define FHWRES 0xc0 /* Force hardware reset */ - -/* Write Register 10 (misc control bits) */ -#define BIT6 1 /* 6 bit/8bit sync */ -#define LOOPMODE 2 /* SDLC Loop mode */ -#define ABUNDER 4 /* Abort/flag on SDLC xmit underrun */ -#define MARKIDLE 8 /* Mark/flag on idle */ -#define GAOP 0x10 /* Go active on poll */ -#define NRZ 0 /* NRZ mode */ -#define NRZI 0x20 /* NRZI mode */ -#define FM1 0x40 /* FM1 (transition = 1) */ -#define FM0 0x60 /* FM0 (transition = 0) */ -#define CRCPS 0x80 /* CRC Preset I/O */ - -/* Write Register 11 (Clock Mode control) */ -#define TRxCXT 0 /* TRxC = Xtal output */ -#define TRxCTC 1 /* TRxC = Transmit clock */ -#define TRxCBR 2 /* TRxC = BR Generator Output */ -#define TRxCDP 3 /* TRxC = DPLL output */ -#define TRxCOI 4 /* TRxC O/I */ -#define TCRTxCP 0 /* Transmit clock = RTxC pin */ -#define TCTRxCP 8 /* Transmit clock = TRxC pin */ -#define TCBR 0x10 /* Transmit clock = BR Generator output */ -#define TCDPLL 0x18 /* Transmit clock = DPLL output */ -#define RCRTxCP 0 /* Receive clock = RTxC pin */ -#define RCTRxCP 0x20 /* Receive clock = TRxC pin */ -#define RCBR 0x40 /* Receive clock = BR Generator output */ -#define RCDPLL 0x60 /* Receive clock = DPLL output */ -#define RTxCX 0x80 /* RTxC Xtal/No Xtal */ - -/* Write Register 12 (lower byte of baud rate generator time constant) */ - -/* Write Register 13 (upper byte of baud rate generator time constant) */ - -/* Write Register 14 (Misc control bits) */ -#define BRENABL 1 /* Baud rate generator enable */ -#define BRSRC 2 /* Baud rate generator source */ -#define DTRREQ 4 /* DTR/Request function */ -#define AUTOECHO 8 /* Auto Echo */ -#define LOOPBAK 0x10 /* Local loopback */ -#define SEARCH 0x20 /* Enter search mode */ -#define RMC 0x40 /* Reset missing clock */ -#define DISDPLL 0x60 /* Disable DPLL */ -#define SSBR 0x80 /* Set DPLL source = BR generator */ -#define SSRTxC 0xa0 /* Set DPLL source = RTxC */ -#define SFMM 0xc0 /* Set FM mode */ -#define SNRZI 0xe0 /* Set NRZI mode */ - -/* Write Register 15 (external/status interrupt control) */ -#define ZCIE 2 /* Zero count IE */ -#define DCDIE 8 /* DCD IE */ -#define SYNCIE 0x10 /* Sync/hunt IE */ -#define CTSIE 0x20 /* CTS IE */ -#define TxUIE 0x40 /* Tx Underrun/EOM IE */ -#define BRKIE 0x80 /* Break/Abort IE */ - - -/* Read Register 0 */ -#define Rx_CH_AV 0x1 /* Rx Character Available */ -#define ZCOUNT 0x2 /* Zero count */ -#define Tx_BUF_EMP 0x4 /* Tx Buffer empty */ -#define DCD 0x8 /* DCD */ -#define SYNC_HUNT 0x10 /* Sync/hunt */ -#define CTS 0x20 /* CTS */ -#define TxEOM 0x40 /* Tx underrun */ -#define BRK_ABRT 0x80 /* Break/Abort */ - -/* Read Register 1 */ -#define ALL_SNT 0x1 /* All sent */ -/* Residue Data for 8 Rx bits/char programmed */ -#define RES3 0x8 /* 0/3 */ -#define RES4 0x4 /* 0/4 */ -#define RES5 0xc /* 0/5 */ -#define RES6 0x2 /* 0/6 */ -#define RES7 0xa /* 0/7 */ -#define RES8 0x6 /* 0/8 */ -#define RES18 0xe /* 1/8 */ -#define RES28 0x0 /* 2/8 */ -/* Special Rx Condition Interrupts */ -#define PAR_ERR 0x10 /* Parity error */ -#define Rx_OVR 0x20 /* Rx Overrun Error */ -#define FRM_ERR 0x40 /* CRC/Framing Error */ -#define END_FR 0x80 /* End of Frame (SDLC) */ - -/* Read Register 2 (channel b only) - Interrupt vector */ - -/* Read Register 3 (interrupt pending register) ch a only */ -#define CHBEXT 0x1 /* Channel B Ext/Stat IP */ -#define CHBTxIP 0x2 /* Channel B Tx IP */ -#define CHBRxIP 0x4 /* Channel B Rx IP */ -#define CHAEXT 0x8 /* Channel A Ext/Stat IP */ -#define CHATxIP 0x10 /* Channel A Tx IP */ -#define CHARxIP 0x20 /* Channel A Rx IP */ - -/* Read Register 8 (receive data register) */ - -/* Read Register 10 (misc status bits) */ -#define ONLOOP 2 /* On loop */ -#define LOOPSEND 0x10 /* Loop sending */ -#define CLK2MIS 0x40 /* Two clocks missing */ -#define CLK1MIS 0x80 /* One clock missing */ - -/* Read Register 12 (lower byte of baud rate generator constant) */ - -/* Read Register 13 (upper byte of baud rate generator constant) */ - -/* Read Register 15 (value of WR 15) */ - -/* Misc macros */ -#define ZS_CLEARERR(channel) (write_zsreg(channel, 0, ERR_RES)) -#define ZS_CLEARFIFO(channel) do { volatile unsigned char garbage; \ - garbage = read_zsdata(channel); \ - garbage = read_zsdata(channel); \ - garbage = read_zsdata(channel); \ - } while(0) - -#endif /* !(_DECSERIAL_H) */ diff --git a/include/asm-mips/dec/serial.h b/include/asm-mips/dec/serial.h deleted file mode 100644 index acad75890a0..00000000000 --- a/include/asm-mips/dec/serial.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * include/asm-mips/dec/serial.h - * - * Definitions common to all DECstation serial devices. - * - * Copyright (C) 2004 Maciej W. Rozycki - * - * Based on bits extracted from drivers/tc/zs.h for which - * the following copyrights apply: - * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996 Paul Mackerras (Paul.Mackerras@cs.anu.edu.au) - * Copyright (C) Harald Koerfgen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef __ASM_MIPS_DEC_SERIAL_H -#define __ASM_MIPS_DEC_SERIAL_H - -struct dec_serial_hook { - int (*init_channel)(void *handle); - void (*init_info)(void *handle); - void (*rx_char)(unsigned char ch, unsigned char fl); - int (*poll_rx_char)(void *handle); - int (*poll_tx_char)(void *handle, unsigned char ch); - unsigned int cflags; -}; - -extern int register_dec_serial_hook(unsigned int channel, - struct dec_serial_hook *hook); -extern int unregister_dec_serial_hook(unsigned int channel); - -#endif /* __ASM_MIPS_DEC_SERIAL_H */ diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 9c721cd2c9d..773d8d8828a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -62,8 +62,9 @@ /* NEC v850. */ #define PORT_V850E_UART 40 -/* DZ */ -#define PORT_DZ 47 +/* DEC */ +#define PORT_DZ 46 +#define PORT_ZS 47 /* Parisc type numbers. */ #define PORT_MUX 48 -- cgit v1.2.3-70-g09d2 From 1e66df3ee301209f4a38df097d7cc5cb9b367a3f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: add kstrndup Add a kstrndup function, modelled on strndup. Like strndup this returns a string copied into its own allocated memory, but it copies no more than the specified number of bytes from the source. Remove private strndup() from irda code. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Andrew Morton Cc: Randy Dunlap Cc: YOSHIFUJI Hideaki Cc: Akinobu Mita Cc: Arnaldo Carvalho de Melo Cc: Al Viro Cc: Panagiotis Issaris Cc: Rene Scharfe --- include/linux/string.h | 1 + mm/util.c | 26 ++++++++++++++++++++++++-- net/irda/irias_object.c | 43 +++++-------------------------------------- 3 files changed, 30 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 7f2eb6a477f..ee5e9ccc4aa 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -105,6 +105,7 @@ extern void * memchr(const void *,int,__kernel_size_t); #endif extern char *kstrdup(const char *s, gfp_t gfp); +extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); #ifdef __cplusplus diff --git a/mm/util.c b/mm/util.c index 78f3783bdcc..bf340d80686 100644 --- a/mm/util.c +++ b/mm/util.c @@ -6,7 +6,6 @@ /** * kstrdup - allocate space for and copy an existing string - * * @s: the string to duplicate * @gfp: the GFP mask used in the kmalloc() call when allocating memory */ @@ -26,6 +25,30 @@ char *kstrdup(const char *s, gfp_t gfp) } EXPORT_SYMBOL(kstrdup); +/** + * kstrndup - allocate space for and copy an existing string + * @s: the string to duplicate + * @max: read at most @max chars from @s + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + */ +char *kstrndup(const char *s, size_t max, gfp_t gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strnlen(s, max); + buf = kmalloc_track_caller(len+1, gfp); + if (buf) { + memcpy(buf, s, len); + buf[len] = '\0'; + } + return buf; +} +EXPORT_SYMBOL(kstrndup); + /** * kmemdup - duplicate region of memory * @@ -80,7 +103,6 @@ EXPORT_SYMBOL(krealloc); /* * strndup_user - duplicate an existing string from user space - * * @s: The string to duplicate * @n: Maximum number of bytes to copy, including the trailing NUL. */ diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c index 4adaae242b9..cf302457097 100644 --- a/net/irda/irias_object.c +++ b/net/irda/irias_object.c @@ -36,39 +36,6 @@ hashbin_t *irias_objects; */ struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}}; -/* - * Function strndup (str, max) - * - * My own kernel version of strndup! - * - * Faster, check boundary... Jean II - */ -static char *strndup(char *str, size_t max) -{ - char *new_str; - int len; - - /* Check string */ - if (str == NULL) - return NULL; - /* Check length, truncate */ - len = strlen(str); - if(len > max) - len = max; - - /* Allocate new string */ - new_str = kmalloc(len + 1, GFP_ATOMIC); - if (new_str == NULL) { - IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); - return NULL; - } - - /* Copy and truncate */ - memcpy(new_str, str, len); - new_str[len] = '\0'; - - return new_str; -} /* * Function ias_new_object (name, id) @@ -90,7 +57,7 @@ struct ias_object *irias_new_object( char *name, int id) } obj->magic = IAS_OBJECT_MAGIC; - obj->name = strndup(name, IAS_MAX_CLASSNAME); + obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC); if (!obj->name) { IRDA_WARNING("%s(), Unable to allocate name!\n", __FUNCTION__); @@ -360,7 +327,7 @@ void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); /* Insert value */ attrib->value = irias_new_integer_value(value); @@ -404,7 +371,7 @@ void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); attrib->value = irias_new_octseq_value( octets, len); if (!attrib->name || !attrib->value) { @@ -446,7 +413,7 @@ void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, } attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); + attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); attrib->value = irias_new_string_value(value); if (!attrib->name || !attrib->value) { @@ -506,7 +473,7 @@ struct ias_value *irias_new_string_value(char *string) value->type = IAS_STRING; value->charset = CS_ASCII; - value->t.string = strndup(string, IAS_MAX_STRING); + value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC); if (!value->t.string) { IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); kfree(value); -- cgit v1.2.3-70-g09d2 From d84d1cc7647c7e4f77d517e2d87b4a106a0420d9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: add argv_split() argv_split() is a helper function which takes a string, splits it at whitespace, and returns a NULL-terminated argv vector. This is deliberately simple - it does no quote processing of any kind. [ Seems to me that this is something which is already being done in the kernel, but I couldn't find any other implementations, either to steal or replace. Keep an eye out. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Andrew Morton Cc: Randy Dunlap --- include/linux/string.h | 3 ++ lib/Makefile | 2 +- lib/argv_split.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 lib/argv_split.c (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index ee5e9ccc4aa..836062b7582 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -108,6 +108,9 @@ extern char *kstrdup(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); +extern char **argv_split(gfp_t gfp, const char *str, int *argcp); +extern void argv_free(char **argv); + #ifdef __cplusplus } #endif diff --git a/lib/Makefile b/lib/Makefile index da68b2ca060..61496638740 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -5,7 +5,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ - sha1.o irq_regs.o reciprocal_div.o + sha1.o irq_regs.o reciprocal_div.o argv_split.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/argv_split.c b/lib/argv_split.c new file mode 100644 index 00000000000..4096ed42f49 --- /dev/null +++ b/lib/argv_split.c @@ -0,0 +1,105 @@ +/* + * Helper function for splitting a string into an argv-like array. + */ + +#include +#include +#include + +static const char *skip_sep(const char *cp) +{ + while (*cp && isspace(*cp)) + cp++; + + return cp; +} + +static const char *skip_arg(const char *cp) +{ + while (*cp && !isspace(*cp)) + cp++; + + return cp; +} + +static int count_argc(const char *str) +{ + int count = 0; + + while (*str) { + str = skip_sep(str); + if (*str) { + count++; + str = skip_arg(str); + } + } + + return count; +} + +/** + * argv_free - free an argv + * @argv - the argument vector to be freed + * + * Frees an argv and the strings it points to. + */ +void argv_free(char **argv) +{ + char **p; + for (p = argv; *p; p++) + kfree(*p); + + kfree(argv); +} +EXPORT_SYMBOL(argv_free); + +/** + * argv_split - split a string at whitespace, returning an argv + * @gfp: the GFP mask used to allocate memory + * @str: the string to be split + * @argcp: returned argument count + * + * Returns an array of pointers to strings which are split out from + * @str. This is performed by strictly splitting on white-space; no + * quote processing is performed. Multiple whitespace characters are + * considered to be a single argument separator. The returned array + * is always NULL-terminated. Returns NULL on memory allocation + * failure. + */ +char **argv_split(gfp_t gfp, const char *str, int *argcp) +{ + int argc = count_argc(str); + char **argv = kzalloc(sizeof(*argv) * (argc+1), gfp); + char **argvp; + + if (argv == NULL) + goto out; + + *argcp = argc; + argvp = argv; + + while (*str) { + str = skip_sep(str); + + if (*str) { + const char *p = str; + char *t; + + str = skip_arg(str); + + t = kstrndup(p, str-p, gfp); + if (t == NULL) + goto fail; + *argvp++ = t; + } + } + *argvp = NULL; + + out: + return argv; + + fail: + argv_free(argv); + return NULL; +} +EXPORT_SYMBOL(argv_split); -- cgit v1.2.3-70-g09d2 From 0ab4dc92278a0f3816e486d6350c6652a72e06c8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: usermodehelper: split setup from execution Rather than having hundreds of variations of call_usermodehelper for various pieces of usermode state which could be set up, split the info allocation and initialization from the actual process execution. This means the general pattern becomes: info = call_usermodehelper_setup(path, argv, envp); /* basic state */ call_usermodehelper_(info, stuff...); /* extra state */ call_usermodehelper_exec(info, wait); /* run process and free info */ This patch introduces wrappers for all the existing calling styles for call_usermodehelper_*, but folds their implementations into one. Signed-off-by: Jeremy Fitzhardinge Cc: Andi Kleen Cc: Rusty Russell Cc: David Howells Cc: Bj?rn Steinbrink Cc: Randy Dunlap --- include/linux/kmod.h | 44 +++++++++++- kernel/kmod.c | 191 ++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 176 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 10f505c8431..c4cbe59d9c6 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -36,13 +36,51 @@ static inline int request_module(const char * name, ...) { return -ENOSYS; } #define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x))) struct key; -extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[], - struct key *session_keyring, int wait); +struct file; +struct subprocess_info; + +/* Allocate a subprocess_info structure */ +struct subprocess_info *call_usermodehelper_setup(char *path, + char **argv, char **envp); + +/* Set various pieces of state into the subprocess_info structure */ +void call_usermodehelper_setkeys(struct subprocess_info *info, + struct key *session_keyring); +int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, + struct file **filp); +void call_usermodehelper_setcleanup(struct subprocess_info *info, + void (*cleanup)(char **argv, char **envp)); + +/* Actually execute the sub-process */ +int call_usermodehelper_exec(struct subprocess_info *info, int wait); + +/* Free the subprocess_info. This is only needed if you're not going + to call call_usermodehelper_exec */ +void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int call_usermodehelper(char *path, char **argv, char **envp, int wait) { - return call_usermodehelper_keys(path, argv, envp, NULL, wait); + struct subprocess_info *info; + + info = call_usermodehelper_setup(path, argv, envp); + if (info == NULL) + return -ENOMEM; + return call_usermodehelper_exec(info, wait); +} + +static inline int +call_usermodehelper_keys(char *path, char **argv, char **envp, + struct key *session_keyring, int wait) +{ + struct subprocess_info *info; + + info = call_usermodehelper_setup(path, argv, envp); + if (info == NULL) + return -ENOMEM; + + call_usermodehelper_setkeys(info, session_keyring); + return call_usermodehelper_exec(info, wait); } extern void usermodehelper_init(void); diff --git a/kernel/kmod.c b/kernel/kmod.c index 4d32eb07717..d2dce71115d 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -122,6 +122,7 @@ struct subprocess_info { int wait; int retval; struct file *stdin; + void (*cleanup)(char **argv, char **envp); }; /* @@ -180,6 +181,14 @@ static int ____call_usermodehelper(void *data) do_exit(0); } +void call_usermodehelper_freeinfo(struct subprocess_info *info) +{ + if (info->cleanup) + (*info->cleanup)(info->argv, info->envp); + kfree(info); +} +EXPORT_SYMBOL(call_usermodehelper_freeinfo); + /* Keventd can't block, but this (a child) can. */ static int wait_for_helper(void *data) { @@ -217,7 +226,7 @@ static int wait_for_helper(void *data) } if (sub_info->wait < 0) - kfree(sub_info); + call_usermodehelper_freeinfo(sub_info); else complete(sub_info->complete); return 0; @@ -252,11 +261,94 @@ static void __call_usermodehelper(struct work_struct *work) } /** - * call_usermodehelper_keys - start a usermode application - * @path: pathname for the application - * @argv: null-terminated argument list - * @envp: null-terminated environment list - * @session_keyring: session keyring for process (NULL for an empty keyring) + * call_usermodehelper_setup - prepare to call a usermode helper + * @path - path to usermode executable + * @argv - arg vector for process + * @envp - environment for process + * + * Returns either NULL on allocation failure, or a subprocess_info + * structure. This should be passed to call_usermodehelper_exec to + * exec the process and free the structure. + */ +struct subprocess_info *call_usermodehelper_setup(char *path, + char **argv, char **envp) +{ + struct subprocess_info *sub_info; + sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); + if (!sub_info) + goto out; + + INIT_WORK(&sub_info->work, __call_usermodehelper); + sub_info->path = path; + sub_info->argv = argv; + sub_info->envp = envp; + + out: + return sub_info; +} +EXPORT_SYMBOL(call_usermodehelper_setup); + +/** + * call_usermodehelper_setkeys - set the session keys for usermode helper + * @info: a subprocess_info returned by call_usermodehelper_setup + * @session_keyring: the session keyring for the process + */ +void call_usermodehelper_setkeys(struct subprocess_info *info, + struct key *session_keyring) +{ + info->ring = session_keyring; +} +EXPORT_SYMBOL(call_usermodehelper_setkeys); + +/** + * call_usermodehelper_setcleanup - set a cleanup function + * @info: a subprocess_info returned by call_usermodehelper_setup + * @cleanup: a cleanup function + * + * The cleanup function is just befor ethe subprocess_info is about to + * be freed. This can be used for freeing the argv and envp. The + * Function must be runnable in either a process context or the + * context in which call_usermodehelper_exec is called. + */ +void call_usermodehelper_setcleanup(struct subprocess_info *info, + void (*cleanup)(char **argv, char **envp)) +{ + info->cleanup = cleanup; +} +EXPORT_SYMBOL(call_usermodehelper_setcleanup); + +/** + * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin + * @sub_info: a subprocess_info returned by call_usermodehelper_setup + * @filp: set to the write-end of a pipe + * + * This constructs a pipe, and sets the read end to be the stdin of the + * subprocess, and returns the write-end in *@filp. + */ +int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, + struct file **filp) +{ + struct file *f; + + f = create_write_pipe(); + if (IS_ERR(f)) + return PTR_ERR(f); + *filp = f; + + f = create_read_pipe(f); + if (IS_ERR(f)) { + free_write_pipe(*filp); + return PTR_ERR(f); + } + sub_info->stdin = f; + + return 0; +} +EXPORT_SYMBOL(call_usermodehelper_stdinpipe); + +/** + * call_usermodehelper_exec - start a usermode application + * @sub_info: information about the subprocessa * @wait: wait for the application to finish and return status. * when -1 don't wait at all, but you get no useful error back when * the program couldn't be exec'ed. This makes it safe to call @@ -265,33 +357,24 @@ static void __call_usermodehelper(struct work_struct *work) * Runs a user-space application. The application is started * asynchronously if wait is not set, and runs as a child of keventd. * (ie. it runs with full root capabilities). - * - * Must be called from process context. Returns a negative error code - * if program was not execed successfully, or 0. */ -int call_usermodehelper_keys(char *path, char **argv, char **envp, - struct key *session_keyring, int wait) +int call_usermodehelper_exec(struct subprocess_info *sub_info, + int wait) { DECLARE_COMPLETION_ONSTACK(done); - struct subprocess_info *sub_info; int retval; - if (!khelper_wq) - return -EBUSY; - - if (path[0] == '\0') - return 0; + if (sub_info->path[0] == '\0') { + retval = 0; + goto out; + } - sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); - if (!sub_info) - return -ENOMEM; + if (!khelper_wq) { + retval = -EBUSY; + goto out; + } - INIT_WORK(&sub_info->work, __call_usermodehelper); sub_info->complete = &done; - sub_info->path = path; - sub_info->argv = argv; - sub_info->envp = envp; - sub_info->ring = session_keyring; sub_info->wait = wait; queue_work(khelper_wq, &sub_info->work); @@ -299,47 +382,43 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp, return 0; wait_for_completion(&done); retval = sub_info->retval; - kfree(sub_info); + + out: + call_usermodehelper_freeinfo(sub_info); return retval; } -EXPORT_SYMBOL(call_usermodehelper_keys); +EXPORT_SYMBOL(call_usermodehelper_exec); +/** + * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin + * @path: path to usermode executable + * @argv: arg vector for process + * @envp: environment for process + * @filp: set to the write-end of a pipe + * + * This is a simple wrapper which executes a usermode-helper function + * with a pipe as stdin. It is implemented entirely in terms of + * lower-level call_usermodehelper_* functions. + */ int call_usermodehelper_pipe(char *path, char **argv, char **envp, struct file **filp) { - DECLARE_COMPLETION(done); - struct subprocess_info sub_info = { - .work = __WORK_INITIALIZER(sub_info.work, - __call_usermodehelper), - .complete = &done, - .path = path, - .argv = argv, - .envp = envp, - .retval = 0, - }; - struct file *f; - - if (!khelper_wq) - return -EBUSY; + struct subprocess_info *sub_info; + int ret; - if (path[0] == '\0') - return 0; + sub_info = call_usermodehelper_setup(path, argv, envp); + if (sub_info == NULL) + return -ENOMEM; - f = create_write_pipe(); - if (IS_ERR(f)) - return PTR_ERR(f); - *filp = f; + ret = call_usermodehelper_stdinpipe(sub_info, filp); + if (ret < 0) + goto out; - f = create_read_pipe(f); - if (IS_ERR(f)) { - free_write_pipe(*filp); - return PTR_ERR(f); - } - sub_info.stdin = f; + return call_usermodehelper_exec(sub_info, 1); - queue_work(khelper_wq, &sub_info.work); - wait_for_completion(&done); - return sub_info.retval; + out: + call_usermodehelper_freeinfo(sub_info); + return ret; } EXPORT_SYMBOL(call_usermodehelper_pipe); -- cgit v1.2.3-70-g09d2 From 10a0a8d4e3f6bf2d077f94344441909abe670f5a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:02 -0700 Subject: Add common orderly_poweroff() Various pieces of code around the kernel want to be able to trigger an orderly poweroff. This pulls them together into a single implementation. By default the poweroff command is /sbin/poweroff, but it can be set via sysctl: kernel/poweroff_cmd. This is split at whitespace, so it can include command-line arguments. This patch replaces four other instances of invoking either "poweroff" or "shutdown -h now": two sbus drivers, and acpi thermal management. sparc64 has its own "powerd"; still need to determine whether it should be replaced by orderly_poweroff(). Signed-off-by: Jeremy Fitzhardinge Acked-by: Len Brown Signed-off-by: Chris Wright Cc: Andrew Morton Cc: Randy Dunlap Cc: Andi Kleen Cc: Al Viro Cc: Arnd Bergmann Cc: David S. Miller --- drivers/acpi/thermal.c | 24 ++--------------- drivers/sbus/char/bbc_envctrl.c | 5 ++-- drivers/sbus/char/envctrl.c | 7 ++--- include/linux/reboot.h | 5 ++++ kernel/sys.c | 58 +++++++++++++++++++++++++++++++++++++++++ kernel/sysctl.c | 10 +++++++ 6 files changed, 79 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 88a6fc7fd27..58f1338981b 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -59,7 +60,6 @@ #define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0 #define ACPI_THERMAL_NOTIFY_HOT 0xF1 #define ACPI_THERMAL_MODE_ACTIVE 0x00 -#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff" #define ACPI_THERMAL_MAX_ACTIVE 10 #define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65 @@ -419,26 +419,6 @@ static int acpi_thermal_get_devices(struct acpi_thermal *tz) return 0; } -static int acpi_thermal_call_usermode(char *path) -{ - char *argv[2] = { NULL, NULL }; - char *envp[3] = { NULL, NULL, NULL }; - - - if (!path) - return -EINVAL; - - argv[0] = path; - - /* minimal command environment */ - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - - call_usermodehelper(argv[0], argv, envp, 0); - - return 0; -} - static int acpi_thermal_critical(struct acpi_thermal *tz) { if (!tz || !tz->trips.critical.flags.valid) @@ -456,7 +436,7 @@ static int acpi_thermal_critical(struct acpi_thermal *tz) acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL, tz->trips.critical.flags.enabled); - acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF); + orderly_poweroff(true); return 0; } diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c index a54e4140683..e821a155b65 100644 --- a/drivers/sbus/char/bbc_envctrl.c +++ b/drivers/sbus/char/bbc_envctrl.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -170,8 +171,6 @@ static void get_current_temps(struct bbc_cpu_temperature *tp) static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) { static int shutting_down = 0; - static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; char *type = "???"; s8 val = -1; @@ -195,7 +194,7 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); shutting_down = 1; - if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0) + if (orderly_poweroff(true) < 0) printk(KERN_CRIT "envctrl: shutdown execution failed\n"); } diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c index 8328acab47f..dadabef116b 100644 --- a/drivers/sbus/char/envctrl.c +++ b/drivers/sbus/char/envctrl.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -966,10 +967,6 @@ static struct i2c_child_t *envctrl_get_i2c_child(unsigned char mon_type) static void envctrl_do_shutdown(void) { static int inprog = 0; - static char *envp[] = { - "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - char *argv[] = { - "/sbin/shutdown", "-h", "now", NULL }; int ret; if (inprog != 0) @@ -977,7 +974,7 @@ static void envctrl_do_shutdown(void) inprog = 1; printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n"); - ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0); + ret = orderly_poweroff(true); if (ret < 0) { printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n"); inprog = 0; /* unlikely to succeed, but we could try again */ diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 1dd1c707311..85ea63f462a 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -67,6 +67,11 @@ extern void kernel_power_off(void); void ctrl_alt_del(void); +#define POWEROFF_CMD_PATH_LEN 256 +extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; + +extern int orderly_poweroff(bool force); + /* * Emergency restart, callable from an interrupt handler. */ diff --git a/kernel/sys.c b/kernel/sys.c index 4d141ae3e80..aeded9ad66c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2286,3 +2286,61 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, } return err ? -EFAULT : 0; } + +char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; + +static void argv_cleanup(char **argv, char **envp) +{ + argv_free(argv); +} + +/** + * orderly_poweroff - Trigger an orderly system poweroff + * @force: force poweroff if command execution fails + * + * This may be called from any context to trigger a system shutdown. + * If the orderly shutdown fails, it will force an immediate shutdown. + */ +int orderly_poweroff(bool force) +{ + int argc; + char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); + static char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL + }; + int ret = -ENOMEM; + struct subprocess_info *info; + + if (argv == NULL) { + printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", + __func__, poweroff_cmd); + goto out; + } + + info = call_usermodehelper_setup(argv[0], argv, envp); + if (info == NULL) { + argv_free(argv); + goto out; + } + + call_usermodehelper_setcleanup(info, argv_cleanup); + + ret = call_usermodehelper_exec(info, -1); + + out: + if (ret && force) { + printk(KERN_WARNING "Failed to start orderly shutdown: " + "forcing the issue\n"); + + /* I guess this should try to kick off some daemon to + sync and poweroff asap. Or not even bother syncing + if we're doing an emergency shutdown? */ + emergency_sync(); + kernel_power_off(); + } + + return ret; +} +EXPORT_SYMBOL_GPL(orderly_poweroff); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7063ebc6db0..44a1d699aad 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -705,6 +706,15 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "poweroff_cmd", + .data = &poweroff_cmd, + .maxlen = POWEROFF_CMD_PATH_LEN, + .mode = 0644, + .proc_handler = &proc_dostring, + .strategy = &sysctl_string, + }, { .ctl_name = 0 } }; -- cgit v1.2.3-70-g09d2 From 86313c488a6848b7ec2ba04e74f25f79dd32a0b7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: usermodehelper: Tidy up waiting Rather than using a tri-state integer for the wait flag in call_usermodehelper_exec, define a proper enum, and use that. I've preserved the integer values so that any callers I've missed should still work OK. Signed-off-by: Jeremy Fitzhardinge Cc: James Bottomley Cc: Randy Dunlap Cc: Christoph Hellwig Cc: Andi Kleen Cc: Paul Mackerras Cc: Johannes Berg Cc: Ralf Baechle Cc: Bjorn Helgaas Cc: Joel Becker Cc: Tony Luck Cc: Kay Sievers Cc: Srivatsa Vaddagiri Cc: Oleg Nesterov Cc: David Howells --- arch/i386/mach-voyager/voyager_thread.c | 2 +- arch/x86_64/kernel/mce.c | 2 +- drivers/macintosh/therm_pm72.c | 3 ++- drivers/macintosh/windfarm_core.c | 3 ++- drivers/net/hamradio/baycom_epp.c | 2 +- drivers/pnp/pnpbios/core.c | 2 +- fs/ocfs2/heartbeat.c | 2 +- include/linux/kmod.h | 12 +++++++++--- kernel/cpuset.c | 2 +- kernel/kmod.c | 27 ++++++++++++++++----------- kernel/sys.c | 2 +- lib/kobject_uevent.c | 2 +- net/bridge/br_stp_if.c | 2 +- security/keys/request_key.c | 3 ++- 14 files changed, 40 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c index b4b24e0e45e..f9d59533815 100644 --- a/arch/i386/mach-voyager/voyager_thread.c +++ b/arch/i386/mach-voyager/voyager_thread.c @@ -52,7 +52,7 @@ execute(const char *string) NULL, }; - if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) { + if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) { printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string, ret); } diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index aa1d1599179..f3fb8174559 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -174,7 +174,7 @@ static void do_mce_trigger(void) if (events != atomic_read(&mce_logged) && trigger[0]) { /* Small race window, but should be harmless. */ atomic_set(&mce_logged, events); - call_usermodehelper(trigger, trigger_argv, NULL, -1); + call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); } } diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index dbb22403979..3d90fc00209 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -1770,7 +1770,8 @@ static int call_critical_overtemp(void) "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - return call_usermodehelper(critical_overtemp_path, argv, envp, 0); + return call_usermodehelper(critical_overtemp_path, + argv, envp, UMH_WAIT_EXEC); } diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index e18d265d5d3..516d943227e 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -80,7 +80,8 @@ int wf_critical_overtemp(void) "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - return call_usermodehelper(critical_overtemp_path, argv, envp, 0); + return call_usermodehelper(critical_overtemp_path, + argv, envp, UMH_WAIT_EXEC); } EXPORT_SYMBOL_GPL(wf_critical_overtemp); diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 84aa2117c0e..355c6cf3d11 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -320,7 +320,7 @@ static int eppconfig(struct baycom_state *bc) sprintf(portarg, "%ld", bc->pdev->port->base); printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg); - return call_usermodehelper(eppconfig_path, argv, envp, 1); + return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC); } /* ---------------------------------------------------------------------- */ diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 03baf1c64a2..ed112ee1601 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -147,7 +147,7 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info) info->location_id, info->serial, info->capabilities); envp[i] = NULL; - value = call_usermodehelper (argv [0], argv, envp, 0); + value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC); kfree (buf); kfree (envp); return 0; diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 352eb4a13f9..c4c36171240 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -209,7 +209,7 @@ void ocfs2_stop_heartbeat(struct ocfs2_super *osb) envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[2] = NULL; - ret = call_usermodehelper(argv[0], argv, envp, 1); + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); if (ret < 0) mlog_errno(ret); } diff --git a/include/linux/kmod.h b/include/linux/kmod.h index c4cbe59d9c6..5dc13848891 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -51,15 +51,21 @@ int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, void call_usermodehelper_setcleanup(struct subprocess_info *info, void (*cleanup)(char **argv, char **envp)); +enum umh_wait { + UMH_NO_WAIT = -1, /* don't wait at all */ + UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */ + UMH_WAIT_PROC = 1, /* wait for the process to complete */ +}; + /* Actually execute the sub-process */ -int call_usermodehelper_exec(struct subprocess_info *info, int wait); +int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait); /* Free the subprocess_info. This is only needed if you're not going to call call_usermodehelper_exec */ void call_usermodehelper_freeinfo(struct subprocess_info *info); static inline int -call_usermodehelper(char *path, char **argv, char **envp, int wait) +call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) { struct subprocess_info *info; @@ -71,7 +77,7 @@ call_usermodehelper(char *path, char **argv, char **envp, int wait) static inline int call_usermodehelper_keys(char *path, char **argv, char **envp, - struct key *session_keyring, int wait) + struct key *session_keyring, enum umh_wait wait) { struct subprocess_info *info; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b4796d85014..57e6448b171 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -516,7 +516,7 @@ static void cpuset_release_agent(const char *pathbuf) envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; envp[i] = NULL; - call_usermodehelper(argv[0], argv, envp, 0); + call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); kfree(pathbuf); } diff --git a/kernel/kmod.c b/kernel/kmod.c index d2dce71115d..78d365c524e 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -119,7 +119,7 @@ struct subprocess_info { char **argv; char **envp; struct key *ring; - int wait; + enum umh_wait wait; int retval; struct file *stdin; void (*cleanup)(char **argv, char **envp); @@ -225,7 +225,7 @@ static int wait_for_helper(void *data) sub_info->retval = ret; } - if (sub_info->wait < 0) + if (sub_info->wait == UMH_NO_WAIT) call_usermodehelper_freeinfo(sub_info); else complete(sub_info->complete); @@ -238,26 +238,31 @@ static void __call_usermodehelper(struct work_struct *work) struct subprocess_info *sub_info = container_of(work, struct subprocess_info, work); pid_t pid; - int wait = sub_info->wait; + enum umh_wait wait = sub_info->wait; /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ - if (wait) + if (wait == UMH_WAIT_PROC || wait == UMH_NO_WAIT) pid = kernel_thread(wait_for_helper, sub_info, CLONE_FS | CLONE_FILES | SIGCHLD); else pid = kernel_thread(____call_usermodehelper, sub_info, CLONE_VFORK | SIGCHLD); - if (wait < 0) - return; + switch (wait) { + case UMH_NO_WAIT: + break; - if (pid < 0) { + case UMH_WAIT_PROC: + if (pid > 0) + break; sub_info->retval = pid; + /* FALLTHROUGH */ + + case UMH_WAIT_EXEC: complete(sub_info->complete); - } else if (!wait) - complete(sub_info->complete); + } } /** @@ -359,7 +364,7 @@ EXPORT_SYMBOL(call_usermodehelper_stdinpipe); * (ie. it runs with full root capabilities). */ int call_usermodehelper_exec(struct subprocess_info *sub_info, - int wait) + enum umh_wait wait) { DECLARE_COMPLETION_ONSTACK(done); int retval; @@ -378,7 +383,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, sub_info->wait = wait; queue_work(khelper_wq, &sub_info->work); - if (wait < 0) /* task has freed sub_info */ + if (wait == UMH_NO_WAIT) /* task has freed sub_info */ return 0; wait_for_completion(&done); retval = sub_info->retval; diff --git a/kernel/sys.c b/kernel/sys.c index aeded9ad66c..18987c7f6ad 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2327,7 +2327,7 @@ int orderly_poweroff(bool force) call_usermodehelper_setcleanup(info, argv_cleanup); - ret = call_usermodehelper_exec(info, -1); + ret = call_usermodehelper_exec(info, UMH_NO_WAIT); out: if (ret && force) { diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 12e311dc664..bd5ecbbafab 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -208,7 +208,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, argv [0] = uevent_helper; argv [1] = (char *)subsystem; argv [2] = NULL; - call_usermodehelper (argv[0], argv, envp, 0); + call_usermodehelper (argv[0], argv, envp, UMH_WAIT_EXEC); } exit: diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a786e786320..1ea2f86f768 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -125,7 +125,7 @@ static void br_stp_start(struct net_bridge *br) char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; - r = call_usermodehelper(BR_STP_PROG, argv, envp, 1); + r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); if (r == 0) { br->stp_enabled = BR_USER_STP; printk(KERN_INFO "%s: userspace STP started\n", br->dev->name); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index f573ac189a0..557500110a1 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -108,7 +108,8 @@ static int call_sbin_request_key(struct key *key, argv[i] = NULL; /* do it */ - ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1); + ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, + UMH_WAIT_PROC); error_link: key_put(keyring); -- cgit v1.2.3-70-g09d2 From 810bab448e563ffd1718d78e9a3756806b626acc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: use elfnote.h to generate vsyscall notes. Use existing elfnote.h to generate vsyscall notes, rather than doing it locally. Changes elfnote.h a bit to suit, since this is the first asm user, and it wasn't quite right. Signed-off-by: Jeremy Fitzhardinge Cc: "Eric W. Biederman" Cc: Roland McGrath Cc: Andrew Morton --- arch/i386/kernel/vsyscall-note.S | 23 ++++++----------------- include/linux/elfnote.h | 22 +++++++++++++++------- 2 files changed, 21 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/vsyscall-note.S b/arch/i386/kernel/vsyscall-note.S index d4b5be4f3d5..52e0cbbac70 100644 --- a/arch/i386/kernel/vsyscall-note.S +++ b/arch/i386/kernel/vsyscall-note.S @@ -3,23 +3,12 @@ * Here we can supply some information useful to userland. */ -#include #include +#include -#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ - .section name, flags; \ - .balign 4; \ - .long 1f - 0f; /* name length */ \ - .long 3f - 2f; /* data length */ \ - .long type; /* note type */ \ -0: .asciz vendor; /* vendor name */ \ -1: .balign 4; \ -2: - -#define ASM_ELF_NOTE_END \ -3: .balign 4; /* pad out section */ \ - .previous - - ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) +/* Ideally this would use UTS_NAME, but using a quoted string here + doesn't work. Remember to change this when changing the + kernel's name. */ +ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE - ASM_ELF_NOTE_END +ELFNOTE_END diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h index 9a1e0674e56..e831759b2fb 100644 --- a/include/linux/elfnote.h +++ b/include/linux/elfnote.h @@ -38,17 +38,25 @@ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) */ -#define ELFNOTE(name, type, desctype, descdata) \ -.pushsection .note.name, "",@note ; \ - .align 4 ; \ +#define ELFNOTE_START(name, type, flags) \ +.pushsection .note.name, flags,@note ; \ + .balign 4 ; \ .long 2f - 1f /* namesz */ ; \ - .long 4f - 3f /* descsz */ ; \ + .long 4484f - 3f /* descsz */ ; \ .long type ; \ 1:.asciz #name ; \ -2:.align 4 ; \ -3:desctype descdata ; \ -4:.align 4 ; \ +2:.balign 4 ; \ +3: + +#define ELFNOTE_END \ +4484:.balign 4 ; \ .popsection ; + +#define ELFNOTE(name, type, desc) \ + ELFNOTE_START(name, type, "") \ + desc ; \ + ELFNOTE_END + #else /* !__ASSEMBLER__ */ #include /* -- cgit v1.2.3-70-g09d2 From fdb4c338c8d1d494e17c3422a3ea2129f6791596 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: paravirt: add an "mm" argument to alloc_pt It's useful to know which mm is allocating a pagetable. Xen uses this to determine whether the pagetable being added to is pinned or not. Signed-off-by: Jeremy Fitzhardinge --- arch/i386/kernel/vmi.c | 2 +- arch/i386/mm/init.c | 2 +- arch/i386/mm/pageattr.c | 2 +- include/asm-i386/paravirt.h | 6 +++--- include/asm-i386/pgalloc.h | 6 +++--- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index c12720d7cbc..234bd6ff518 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -362,7 +362,7 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pt(u32 pfn) +static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 7135946d366..f9b6a887854 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -87,7 +87,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); + paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 2eb14a73be9..37992ffb163 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -60,7 +60,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); - paravirt_alloc_pt(page_to_pfn(base)); + paravirt_alloc_pt(&init_mm, page_to_pfn(base)); for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, addr == address ? prot : ref_prot)); diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 7f846a7d6bc..99bf661a65f 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -173,7 +173,7 @@ struct paravirt_ops unsigned long va); /* Hooks for allocating/releasing pagetable pages */ - void (*alloc_pt)(u32 pfn); + void (*alloc_pt)(struct mm_struct *mm, u32 pfn); void (*alloc_pd)(u32 pfn); void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); void (*release_pt)(u32 pfn); @@ -725,9 +725,9 @@ static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va); } -static inline void paravirt_alloc_pt(unsigned pfn) +static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn) { - PVOP_VCALL1(alloc_pt, pfn); + PVOP_VCALL2(alloc_pt, mm, pfn); } static inline void paravirt_release_pt(unsigned pfn) { diff --git a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h index d07b7afc269..f2fc33ceb9f 100644 --- a/include/asm-i386/pgalloc.h +++ b/include/asm-i386/pgalloc.h @@ -7,7 +7,7 @@ #ifdef CONFIG_PARAVIRT #include #else -#define paravirt_alloc_pt(pfn) do { } while (0) +#define paravirt_alloc_pt(mm, pfn) do { } while (0) #define paravirt_alloc_pd(pfn) do { } while (0) #define paravirt_alloc_pd(pfn) do { } while (0) #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) @@ -17,13 +17,13 @@ #define pmd_populate_kernel(mm, pmd, pte) \ do { \ - paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \ + paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ } while (0) #define pmd_populate(mm, pmd, pte) \ do { \ - paravirt_alloc_pt(page_to_pfn(pte)); \ + paravirt_alloc_pt(mm, page_to_pfn(pte)); \ set_pmd(pmd, __pmd(_PAGE_TABLE + \ ((unsigned long long)page_to_pfn(pte) << \ (unsigned long long) PAGE_SHIFT))); \ -- cgit v1.2.3-70-g09d2 From 6996d3b63fd9a64341bc80dad1b556fd3eb81272 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: paravirt: add a hook for once the allocator is ready Add a hook so that the paravirt backend knows when the allocator is ready. This is useful for the obvious reason that the allocator is available, but the other side-effect of having the bootmem allocator available is that each page now has an associated "struct page". Signed-off-by: Jeremy Fitzhardinge --- arch/i386/kernel/setup.c | 2 ++ include/asm-i386/paravirt.h | 8 ++++++++ include/asm-i386/setup.h | 4 ++++ 3 files changed, 14 insertions(+) (limited to 'include') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 2d61e65eeb5..74871d066c2 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -601,6 +601,8 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ + paravirt_post_allocator_init(); + dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 99bf661a65f..786856950b1 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -52,6 +52,8 @@ struct paravirt_ops /* Basic arch-specific setup */ void (*arch_setup)(void); char *(*memory_setup)(void); + void (*post_allocator_init)(void); + void (*init_IRQ)(void); void (*time_init)(void); @@ -669,6 +671,12 @@ static inline void setup_secondary_clock(void) } #endif +static inline void paravirt_post_allocator_init(void) +{ + if (paravirt_ops.post_allocator_init) + (*paravirt_ops.post_allocator_init)(); +} + static inline void paravirt_pagetable_setup_start(pgd_t *base) { if (paravirt_ops.pagetable_setup_start) diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h index 0d5bff9dc4a..7862fe858a9 100644 --- a/include/asm-i386/setup.h +++ b/include/asm-i386/setup.h @@ -81,6 +81,10 @@ void __init add_memory_region(unsigned long long start, extern unsigned long init_pg_tables_end; +#ifndef CONFIG_PARAVIRT +#define paravirt_post_allocator_init() do {} while (0) +#endif + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From 03f0c2f950f813e3b26c56ed041ba170479d479c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: paravirt: increase IRQ limit When running with CONFIG_PARAVIRT, we may want lots of IRQs even if there's no IO APIC. Signed-off-by: Jeremy Fitzhardinge Cc: "Eric W. Biederman" --- include/asm-i386/mach-default/irq_vectors_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/mach-default/irq_vectors_limits.h b/include/asm-i386/mach-default/irq_vectors_limits.h index 7f161e760be..a90c7a60109 100644 --- a/include/asm-i386/mach-default/irq_vectors_limits.h +++ b/include/asm-i386/mach-default/irq_vectors_limits.h @@ -1,7 +1,7 @@ #ifndef _ASM_IRQ_VECTORS_LIMITS_H #define _ASM_IRQ_VECTORS_LIMITS_H -#ifdef CONFIG_X86_IO_APIC +#if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) #define NR_IRQS 224 # if (224 >= 32 * NR_CPUS) # define NR_IRQ_VECTORS NR_IRQS -- cgit v1.2.3-70-g09d2 From 53787013248f52af81d99f63454e5a5cf34d6f12 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: paravirt: unstatic leave_mm Make globally leave_mm visible, specifically so that Xen can use it to shoot-down lazy uses of cr3. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright --- arch/i386/kernel/smp.c | 5 +++-- include/asm-i386/mmu_context.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 6299c080f6e..2d35d850202 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -22,6 +22,7 @@ #include #include +#include #include /* @@ -249,13 +250,13 @@ static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock); /* - * We cannot call mmdrop() because we are in interrupt context, + * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. * * We need to reload %cr3 since the page tables may be going * away from under us.. */ -static inline void leave_mm (unsigned long cpu) +void leave_mm(unsigned long cpu) { if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) BUG(); diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 8198d1cca1f..7eb0b0b1fb3 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -32,6 +32,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) #endif } +void leave_mm(unsigned long cpu); + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) -- cgit v1.2.3-70-g09d2 From 724faa89ccd8fae65f3d41a47b0e1034cf07918b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: paravirt: unstatic smp_store_cpu_info Paravirt implementations need to store cpu info when bringing up cpus. Signed-off-by: Jeremy Fitzhardinge --- arch/i386/kernel/smpboot.c | 2 +- include/asm-i386/smp.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 0b2954534b8..26752931023 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -148,7 +148,7 @@ void __init smp_alloc_memory(void) * a given CPU */ -static void __cpuinit smp_store_cpu_info(int id) +void __cpuinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 0c713278706..0f54f44b472 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -129,6 +129,8 @@ extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern unsigned int num_processors; +void __cpuinit smp_store_cpu_info(int id); + #endif /* !__ASSEMBLY__ */ #else /* CONFIG_SMP */ -- cgit v1.2.3-70-g09d2 From c70df74376c1e29a04e07e23dd3f4c384d6166dd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:03 -0700 Subject: paravirt: make siblingmap functions visible Paravirt implementations need to set the sibling map on new cpus. Signed-off-by: Jeremy Fitzhardinge --- arch/i386/kernel/smpboot.c | 6 ++---- include/asm-i386/smp.h | 3 +++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 26752931023..5910d3fac56 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -308,8 +308,7 @@ cpumask_t cpu_coregroup_map(int cpu) /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; -static inline void -set_cpu_sibling_map(int cpu) +void set_cpu_sibling_map(int cpu) { int i; struct cpuinfo_x86 *c = cpu_data; @@ -1144,8 +1143,7 @@ void __init native_smp_prepare_boot_cpu(void) } #ifdef CONFIG_HOTPLUG_CPU -static void -remove_siblinginfo(int cpu) +void remove_siblinginfo(int cpu) { int sibling; struct cpuinfo_x86 *c = cpu_data; diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 0f54f44b472..1f73bde165b 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -43,9 +43,12 @@ extern u8 x86_cpu_to_apicid[]; #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +extern void set_cpu_sibling_map(int cpu); + #ifdef CONFIG_HOTPLUG_CPU extern void cpu_exit_clear(void); extern void cpu_uninit(void); +extern void remove_siblinginfo(int cpu); #endif struct smp_ops -- cgit v1.2.3-70-g09d2 From 5f4352fbffd6c45123dbce9e195efd54df4e177e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:04 -0700 Subject: Allocate and free vmalloc areas Allocate/release a chunk of vmalloc address space: alloc_vm_area reserves a chunk of address space, and makes sure all the pagetables are constructed for that address range - but no pages. free_vm_area releases the address space range. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Pratt Signed-off-by: Christian Limpach Signed-off-by: Chris Wright Cc: "Jan Beulich" Cc: "Andi Kleen" --- include/linux/vmalloc.h | 4 ++++ mm/vmalloc.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 132b260aef1..c2b10cae5da 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -70,6 +70,10 @@ extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); extern void unmap_kernel_range(unsigned long addr, unsigned long size); +/* Allocate/destroy a 'vmalloc' VM area. */ +extern struct vm_struct *alloc_vm_area(size_t size); +extern void free_vm_area(struct vm_struct *area); + /* * Internals. Dont't use.. */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8e05a11155c..3130c343088 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -767,3 +767,56 @@ EXPORT_SYMBOL(remap_vmalloc_range); void __attribute__((weak)) vmalloc_sync_all(void) { } + + +static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) +{ + /* apply_to_page_range() does all the hard work. */ + return 0; +} + +/** + * alloc_vm_area - allocate a range of kernel address space + * @size: size of the area + * @returns: NULL on failure, vm_struct on success + * + * This function reserves a range of kernel address space, and + * allocates pagetables to map that range. No actual mappings + * are created. If the kernel address space is not shared + * between processes, it syncs the pagetable across all + * processes. + */ +struct vm_struct *alloc_vm_area(size_t size) +{ + struct vm_struct *area; + + area = get_vm_area(size, VM_IOREMAP); + if (area == NULL) + return NULL; + + /* + * This ensures that page tables are constructed for this region + * of kernel virtual address space and mapped into init_mm. + */ + if (apply_to_page_range(&init_mm, (unsigned long)area->addr, + area->size, f, NULL)) { + free_vm_area(area); + return NULL; + } + + /* Make sure the pagetables are constructed in process kernel + mappings */ + vmalloc_sync_all(); + + return area; +} +EXPORT_SYMBOL_GPL(alloc_vm_area); + +void free_vm_area(struct vm_struct *area) +{ + struct vm_struct *ret; + ret = remove_vm_area(area->addr); + BUG_ON(ret != area); + kfree(area); +} +EXPORT_SYMBOL_GPL(free_vm_area); -- cgit v1.2.3-70-g09d2 From d572929cdd12a60732c3522f7cf011bfa29165cf Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:04 -0700 Subject: paravirt: helper to disable all IO space In a virtual environment, device drivers such as legacy IDE will waste quite a lot of time probing for their devices which will never appear. This helper function allows a paravirt implementation to lay claim to the whole iomem and ioport space, thereby disabling all device drivers trying to claim IO resources. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Rusty Russell --- arch/i386/kernel/paravirt.c | 35 +++++++++++++++++++++++++++++++++++ include/asm-i386/paravirt.h | 1 + 2 files changed, 36 insertions(+) (limited to 'include') diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index faab09abca5..60e08b9b50a 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -228,6 +228,41 @@ static int __init print_banner(void) } core_initcall(print_banner); +static struct resource reserve_ioports = { + .start = 0, + .end = IO_SPACE_LIMIT, + .name = "paravirt-ioport", + .flags = IORESOURCE_IO | IORESOURCE_BUSY, +}; + +static struct resource reserve_iomem = { + .start = 0, + .end = -1, + .name = "paravirt-iomem", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + +/* + * Reserve the whole legacy IO space to prevent any legacy drivers + * from wasting time probing for their hardware. This is a fairly + * brute-force approach to disabling all non-virtual drivers. + * + * Note that this must be called very early to have any effect. + */ +int paravirt_disable_iospace(void) +{ + int ret; + + ret = request_resource(&ioport_resource, &reserve_ioports); + if (ret == 0) { + ret = request_resource(&iomem_resource, &reserve_iomem); + if (ret) + release_resource(&reserve_ioports); + } + + return ret; +} + struct paravirt_ops paravirt_ops = { .name = "bare hardware", .paravirt_enabled = 0, diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 786856950b1..690ada22437 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -262,6 +262,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len) unsigned paravirt_patch_insns(void *site, unsigned len, const char *start, const char *end); +int paravirt_disable_iospace(void); /* * This generates an indirect call based on the operation type number. -- cgit v1.2.3-70-g09d2 From 688340ea34c61ad12473ccd837325b59aada9a93 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:04 -0700 Subject: Add a sched_clock paravirt_op The tsc-based get_scheduled_cycles interface is not a good match for Xen's runstate accounting, which reports everything in nanoseconds. This patch replaces this interface with a sched_clock interface, which matches both Xen and VMI's requirements. In order to do this, we: 1. replace get_scheduled_cycles with sched_clock 2. hoist cycles_2_ns into a common header 3. update vmi accordingly One thing to note: because sched_clock is implemented as a weak function in kernel/sched.c, we must define a real function in order to override this weak binding. This means the usual paravirt_ops technique of using an inline function won't work in this case. Signed-off-by: Jeremy Fitzhardinge Cc: Zachary Amsden Cc: Dan Hecht Cc: john stultz --- arch/i386/kernel/paravirt.c | 2 +- arch/i386/kernel/tsc.c | 23 +++++++++++++++-------- arch/i386/kernel/vmi.c | 2 +- arch/i386/kernel/vmiclock.c | 6 +++--- include/asm-i386/paravirt.h | 7 +++++-- include/asm-i386/timer.h | 32 +++++++++++++++++++++++++++++++- include/asm-i386/vmi_time.h | 2 +- 7 files changed, 57 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 60e08b9b50a..53f07a8275e 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -302,7 +302,7 @@ struct paravirt_ops paravirt_ops = { .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .get_scheduled_cycles = native_read_tsc, + .sched_clock = native_sched_clock, .get_cpu_khz = native_calculate_cpu_khz, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index ea63a30ca3e..252f9010f28 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -84,7 +84,7 @@ static inline int check_tsc_unstable(void) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale __read_mostly; +unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ @@ -93,15 +93,10 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz) cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - /* * Scheduler clock - returns current time in nanosec units. */ -unsigned long long sched_clock(void) +unsigned long long native_sched_clock(void) { unsigned long long this_offset; @@ -118,12 +113,24 @@ unsigned long long sched_clock(void) return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); /* read the Time Stamp Counter: */ - get_scheduled_cycles(this_offset); + rdtscll(this_offset); /* return the value in ns */ return cycles_2_ns(this_offset); } +/* We need to define a real function for sched_clock, to override the + weak default version */ +#ifdef CONFIG_PARAVIRT +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#else +unsigned long long sched_clock(void) + __attribute__((alias("native_sched_clock"))); +#endif + unsigned long native_calculate_cpu_khz(void) { unsigned long long start, end; diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index 234bd6ff518..72042bb7ec9 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -891,7 +891,7 @@ static inline int __init activate_vmi(void) paravirt_ops.setup_boot_clock = vmi_time_bsp_init; paravirt_ops.setup_secondary_clock = vmi_time_ap_init; #endif - paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; + paravirt_ops.sched_clock = vmi_sched_clock; paravirt_ops.get_cpu_khz = vmi_cpu_khz; /* We have true wallclock functions; disable CMOS clock sync */ diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c index 26a37f8a876..f9b845f4e69 100644 --- a/arch/i386/kernel/vmiclock.c +++ b/arch/i386/kernel/vmiclock.c @@ -64,10 +64,10 @@ int vmi_set_wallclock(unsigned long now) return 0; } -/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */ -unsigned long long vmi_get_sched_cycles(void) +/* paravirt_ops.sched_clock = vmi_sched_clock */ +unsigned long long vmi_sched_clock(void) { - return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); + return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); } /* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 690ada22437..7df88be2dd9 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -118,7 +118,7 @@ struct paravirt_ops u64 (*read_tsc)(void); u64 (*read_pmc)(void); - u64 (*get_scheduled_cycles)(void); + unsigned long long (*sched_clock)(void); unsigned long (*get_cpu_khz)(void); /* Segment descriptor handling */ @@ -566,7 +566,10 @@ static inline u64 paravirt_read_tsc(void) #define rdtscll(val) (val = paravirt_read_tsc()) -#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles()) +static inline unsigned long long paravirt_sched_clock(void) +{ + return PVOP_CALL0(unsigned long long, sched_clock); +} #define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 153770e25fa..51a713e33a9 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -15,8 +15,38 @@ extern int no_sync_cmos_clock; extern int recalibrate_cpu_khz(void); #ifndef CONFIG_PARAVIRT -#define get_scheduled_cycles(val) rdtscll(val) #define calculate_cpu_khz() native_calculate_cpu_khz() #endif +/* Accellerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ +extern unsigned long cyc2ns_scale __read_mostly; + +#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ + +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; +} + + #endif diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h index 213930b995c..47818813032 100644 --- a/include/asm-i386/vmi_time.h +++ b/include/asm-i386/vmi_time.h @@ -49,7 +49,7 @@ extern struct vmi_timer_ops { extern void __init vmi_time_init(void); extern unsigned long vmi_get_wallclock(void); extern int vmi_set_wallclock(unsigned long now); -extern unsigned long long vmi_get_sched_cycles(void); +extern unsigned long long vmi_sched_clock(void); extern unsigned long vmi_cpu_khz(void); #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.2.3-70-g09d2 From a42089dd358a7673a0a23126589a9029e57c2049 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:04 -0700 Subject: xen: Add Xen interface header files Add Xen interface header files. These are taken fairly directly from the Xen tree, but somewhat rearranged to suit the kernel's conventions. Define macros and inline functions for doing hypercalls into the hypervisor. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Pratt Signed-off-by: Christian Limpach Signed-off-by: Chris Wright --- include/asm-i386/xen/hypercall.h | 395 ++++++++++++++++++++++++++++++ include/asm-i386/xen/hypervisor.h | 72 ++++++ include/asm-i386/xen/interface.h | 188 ++++++++++++++ include/xen/interface/elfnote.h | 133 ++++++++++ include/xen/interface/event_channel.h | 195 +++++++++++++++ include/xen/interface/features.h | 43 ++++ include/xen/interface/grant_table.h | 301 +++++++++++++++++++++++ include/xen/interface/io/blkif.h | 94 +++++++ include/xen/interface/io/console.h | 23 ++ include/xen/interface/io/netif.h | 158 ++++++++++++ include/xen/interface/io/ring.h | 260 ++++++++++++++++++++ include/xen/interface/io/xenbus.h | 44 ++++ include/xen/interface/io/xs_wire.h | 87 +++++++ include/xen/interface/memory.h | 145 +++++++++++ include/xen/interface/physdev.h | 145 +++++++++++ include/xen/interface/sched.h | 77 ++++++ include/xen/interface/vcpu.h | 154 ++++++++++++ include/xen/interface/version.h | 60 +++++ include/xen/interface/xen.h | 447 ++++++++++++++++++++++++++++++++++ 19 files changed, 3021 insertions(+) create mode 100644 include/asm-i386/xen/hypercall.h create mode 100644 include/asm-i386/xen/hypervisor.h create mode 100644 include/asm-i386/xen/interface.h create mode 100644 include/xen/interface/elfnote.h create mode 100644 include/xen/interface/event_channel.h create mode 100644 include/xen/interface/features.h create mode 100644 include/xen/interface/grant_table.h create mode 100644 include/xen/interface/io/blkif.h create mode 100644 include/xen/interface/io/console.h create mode 100644 include/xen/interface/io/netif.h create mode 100644 include/xen/interface/io/ring.h create mode 100644 include/xen/interface/io/xenbus.h create mode 100644 include/xen/interface/io/xs_wire.h create mode 100644 include/xen/interface/memory.h create mode 100644 include/xen/interface/physdev.h create mode 100644 include/xen/interface/sched.h create mode 100644 include/xen/interface/vcpu.h create mode 100644 include/xen/interface/version.h create mode 100644 include/xen/interface/xen.h (limited to 'include') diff --git a/include/asm-i386/xen/hypercall.h b/include/asm-i386/xen/hypercall.h new file mode 100644 index 00000000000..53912859708 --- /dev/null +++ b/include/asm-i386/xen/hypercall.h @@ -0,0 +1,395 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_H__ +#define __HYPERCALL_H__ + +#include +#include + +#include +#include +#include + +extern struct { char _entry[32]; } hypercall_page[]; + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + "call %[call]" \ + : "=a" (__res) \ + : [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + "call %[call]" \ + : "=a" (__res), "=b" (__ign1) \ + : "1" ((long)(a1)), \ + [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + "call %[call]" \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + "call %[call]" \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), \ + [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + asm volatile ( \ + "call %[call]" \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + asm volatile ( \ + "call %[call]" \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + "5" ((long)(a5)), \ + [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ + : "memory" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table(struct trap_info *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update(struct mmu_update *req, int count, + int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, + int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt(unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks(unsigned long event_selector, + unsigned long event_address, + unsigned long failsafe_selector, + unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch(int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op(int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op(u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +HYPERVISOR_set_debugreg(int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg(int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor(u64 ma, u64 desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op(unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall(void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, + unsigned long flags) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, pte_hi, flags); +} + +static inline int +HYPERVISOR_event_channel_op(int cmd, void *arg) +{ + int rc = _hypercall2(int, event_channel_op, cmd, arg); + if (unlikely(rc == -ENOSYS)) { + struct evtchn_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, event_channel_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } + return rc; +} + +static inline int +HYPERVISOR_xen_version(int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io(int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op(int cmd, void *arg) +{ + int rc = _hypercall2(int, physdev_op, cmd, arg); + if (unlikely(rc == -ENOSYS)) { + struct physdev_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, physdev_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } + return rc; +} + +static inline int +HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val, + unsigned long flags, domid_t domid) +{ + unsigned long pte_hi = 0; +#ifdef CONFIG_X86_PAE + pte_hi = new_val.pte_high; +#endif + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, pte_hi, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend(unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +static inline int +HYPERVISOR_nmi_op(unsigned long op, unsigned long arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} + +static inline void +MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, + pte_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = va; +#ifdef CONFIG_X86_PAE + mcl->args[1] = new_val.pte_low; + mcl->args[2] = new_val.pte_high; +#else + mcl->args[1] = new_val.pte_low; + mcl->args[2] = 0; +#endif + mcl->args[3] = flags; +} + +static inline void +MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd, + void *uop, unsigned int count) +{ + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = cmd; + mcl->args[1] = (unsigned long)uop; + mcl->args[2] = count; +} + +static inline void +MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long va, + pte_t new_val, unsigned long flags, + domid_t domid) +{ + mcl->op = __HYPERVISOR_update_va_mapping_otherdomain; + mcl->args[0] = va; +#ifdef CONFIG_X86_PAE + mcl->args[1] = new_val.pte_low; + mcl->args[2] = new_val.pte_high; +#else + mcl->args[1] = new_val.pte_low; + mcl->args[2] = 0; +#endif + mcl->args[3] = flags; + mcl->args[4] = domid; +} + +static inline void +MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, + struct desc_struct desc) +{ + mcl->op = __HYPERVISOR_update_descriptor; + mcl->args[0] = maddr; + mcl->args[1] = maddr >> 32; + mcl->args[2] = desc.a; + mcl->args[3] = desc.b; +} + +static inline void +MULTI_memory_op(struct multicall_entry *mcl, unsigned int cmd, void *arg) +{ + mcl->op = __HYPERVISOR_memory_op; + mcl->args[0] = cmd; + mcl->args[1] = (unsigned long)arg; +} + +static inline void +MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, + int count, int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)req; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; +} + +static inline void +MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, + int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmuext_op; + mcl->args[0] = (unsigned long)op; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; +} +#endif /* __HYPERCALL_H__ */ diff --git a/include/asm-i386/xen/hypervisor.h b/include/asm-i386/xen/hypervisor.h new file mode 100644 index 00000000000..ebfa7e06308 --- /dev/null +++ b/include/asm-i386/xen/hypervisor.h @@ -0,0 +1,72 @@ +/****************************************************************************** + * hypervisor.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERVISOR_H__ +#define __HYPERVISOR_H__ + +#include +#include +#include + +#include +#include + +#include +#include +#if defined(__i386__) +# ifdef CONFIG_X86_PAE +# include +# else +# include +# endif +#endif +#include + +/* arch/i386/kernel/setup.c */ +extern struct shared_info *HYPERVISOR_shared_info; +extern struct start_info *xen_start_info; +#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN) + +/* arch/i386/mach-xen/evtchn.c */ +/* Force a proper event-channel callback from Xen. */ +extern void force_evtchn_callback(void); + +/* Turn jiffies into Xen system time. */ +u64 jiffies_to_st(unsigned long jiffies); + + +#define MULTI_UVMFLAGS_INDEX 3 +#define MULTI_UVMDOMID_INDEX 4 + +#define is_running_on_xen() (xen_start_info ? 1 : 0) + +#endif /* __HYPERVISOR_H__ */ diff --git a/include/asm-i386/xen/interface.h b/include/asm-i386/xen/interface.h new file mode 100644 index 00000000000..165c3968e13 --- /dev/null +++ b/include/asm-i386/xen/interface.h @@ -0,0 +1,188 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_32_H__ +#define __XEN_PUBLIC_ARCH_X86_32_H__ + +#ifdef __XEN__ +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +#define DEFINE_GUEST_HANDLE_STRUCT(name) \ + __DEFINE_GUEST_HANDLE(name, struct name) +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) +#define GUEST_HANDLE(name) __guest_handle_ ## name + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +__DEFINE_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_GUEST_HANDLE(uint, unsigned int); +__DEFINE_GUEST_HANDLE(ulong, unsigned long); +DEFINE_GUEST_HANDLE(char); +DEFINE_GUEST_HANDLE(int); +DEFINE_GUEST_HANDLE(long); +DEFINE_GUEST_HANDLE(void); +#endif + +/* + * SEGMENT DESCRIPTOR TABLES + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +/* And the trap vector is... */ +#define TRAP_INSTR "int $0x82" + +/* + * Virtual addresses beyond this are not modifiable by guest OSes. The + * machine->physical mapping table starts at this address, read-only. + */ +#ifdef CONFIG_X86_PAE +#define __HYPERVISOR_VIRT_START 0xF5800000 +#else +#define __HYPERVISOR_VIRT_START 0xFC000000 +#endif + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 32 + +#ifndef __ASSEMBLY__ + +/* + * Send an array of these to HYPERVISOR_set_trap_table() + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) + +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + unsigned long address; /* code offset */ +}; +DEFINE_GUEST_HANDLE_STRUCT(trap_info); + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_HVM_GUEST (1<<1) +#define VGCF_IN_KERNEL (1<<2) + unsigned long flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; + unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +}; +DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); + +struct arch_shared_info { + unsigned long max_pfn; /* max pfn that appears in table */ + /* Frame containing list of mfns containing list of mfns containing p2m. */ + unsigned long pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; +}; + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ +}; + +#endif /* !__ASSEMBLY__ */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#ifdef __ASSEMBLY__ +#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_CPUID XEN_EMULATE_PREFIX cpuid +#else +#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" +#endif + +#endif diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h new file mode 100644 index 00000000000..a64d3df5bd9 --- /dev/null +++ b/include/xen/interface/elfnote.h @@ -0,0 +1,133 @@ +/****************************************************************************** + * elfnote.h + * + * Definitions used for the Xen ELF notes. + * + * Copyright (c) 2006, Ian Campbell, XenSource Ltd. + */ + +#ifndef __XEN_PUBLIC_ELFNOTE_H__ +#define __XEN_PUBLIC_ELFNOTE_H__ + +/* + * The notes should live in a SHT_NOTE segment and have "Xen" in the + * name field. + * + * Numeric types are either 4 or 8 bytes depending on the content of + * the desc field. + * + * LEGACY indicated the fields in the legacy __xen_guest string which + * this a note type replaces. + */ + +/* + * NAME=VALUE pair (string). + * + * LEGACY: FEATURES and PAE + */ +#define XEN_ELFNOTE_INFO 0 + +/* + * The virtual address of the entry point (numeric). + * + * LEGACY: VIRT_ENTRY + */ +#define XEN_ELFNOTE_ENTRY 1 + +/* The virtual address of the hypercall transfer page (numeric). + * + * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page + * number not a virtual address) + */ +#define XEN_ELFNOTE_HYPERCALL_PAGE 2 + +/* The virtual address where the kernel image should be mapped (numeric). + * + * Defaults to 0. + * + * LEGACY: VIRT_BASE + */ +#define XEN_ELFNOTE_VIRT_BASE 3 + +/* + * The offset of the ELF paddr field from the acutal required + * psuedo-physical address (numeric). + * + * This is used to maintain backwards compatibility with older kernels + * which wrote __PAGE_OFFSET into that field. This field defaults to 0 + * if not present. + * + * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE) + */ +#define XEN_ELFNOTE_PADDR_OFFSET 4 + +/* + * The version of Xen that we work with (string). + * + * LEGACY: XEN_VER + */ +#define XEN_ELFNOTE_XEN_VERSION 5 + +/* + * The name of the guest operating system (string). + * + * LEGACY: GUEST_OS + */ +#define XEN_ELFNOTE_GUEST_OS 6 + +/* + * The version of the guest operating system (string). + * + * LEGACY: GUEST_VER + */ +#define XEN_ELFNOTE_GUEST_VERSION 7 + +/* + * The loader type (string). + * + * LEGACY: LOADER + */ +#define XEN_ELFNOTE_LOADER 8 + +/* + * The kernel supports PAE (x86/32 only, string = "yes" or "no"). + * + * LEGACY: PAE (n.b. The legacy interface included a provision to + * indicate 'extended-cr3' support allowing L3 page tables to be + * placed above 4G. It is assumed that any kernel new enough to use + * these ELF notes will include this and therefore "yes" here is + * equivalent to "yes[entended-cr3]" in the __xen_guest interface. + */ +#define XEN_ELFNOTE_PAE_MODE 9 + +/* + * The features supported/required by this kernel (string). + * + * The string must consist of a list of feature names (as given in + * features.h, without the "XENFEAT_" prefix) separated by '|' + * characters. If a feature is required for the kernel to function + * then the feature name must be preceded by a '!' character. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_FEATURES 10 + +/* + * The kernel requires the symbol table to be loaded (string = "yes" or "no") + * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence + * of this string as a boolean flag rather than requiring "yes" or + * "no". + */ +#define XEN_ELFNOTE_BSD_SYMTAB 11 + +#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h new file mode 100644 index 00000000000..919b5bdcb2b --- /dev/null +++ b/include/xen/interface/event_channel.h @@ -0,0 +1,195 @@ +/****************************************************************************** + * event_channel.h + * + * Event channels between domains. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +typedef uint32_t evtchn_port_t; +DEFINE_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as + * accepting interdomain bindings from domain . A fresh port + * is allocated in and returned as . + * NOTES: + * 1. If the caller is unprivileged then must be DOMID_SELF. + * 2. may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_alloc_unbound 6 +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and . must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * . + * NOTES: + * 2. may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_bind_interdomain 0 +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified + * vcpu. + * NOTES: + * 1. A virtual IRQ may be bound to at most one event channel per vcpu. + * 2. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_virq 1 +struct evtchn_bind_virq { + /* IN parameters. */ + uint32_t virq; + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ . + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +#define EVTCHNOP_bind_pirq 2 +struct evtchn_bind_pirq { + /* IN parameters. */ + uint32_t pirq; +#define BIND_PIRQ__WILL_SHARE 1 + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_ipi 7 +struct evtchn_bind_ipi { + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_close: Close a local event channel . If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +#define EVTCHNOP_close 3 +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is . + */ +#define EVTCHNOP_send 4 +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at . + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which is not DOMID_SELF. + */ +#define EVTCHNOP_status 5 +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; +}; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised + * the binding. This binding cannot be changed. + * 2. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +#define EVTCHNOP_bind_vcpu 8 +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; +}; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +#define EVTCHNOP_unmask 9 +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; + +struct evtchn_op { + uint32_t cmd; /* EVTCHNOP_* */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; +DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h new file mode 100644 index 00000000000..d73228d1648 --- /dev/null +++ b/include/xen/interface/features.h @@ -0,0 +1,43 @@ +/****************************************************************************** + * features.h + * + * Feature flags, reported by XENVER_get_features. + * + * Copyright (c) 2006, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_FEATURES_H__ +#define __XEN_PUBLIC_FEATURES_H__ + +/* + * If set, the guest does not need to write-protect its pagetables, and can + * update them via direct writes. + */ +#define XENFEAT_writable_page_tables 0 + +/* + * If set, the guest does not need to write-protect its segment descriptor + * tables, and can update them via direct writes. + */ +#define XENFEAT_writable_descriptor_tables 1 + +/* + * If set, translation between the guest's 'pseudo-physical' address space + * and the host's machine address space are handled by the hypervisor. In this + * mode the guest does not need to perform phys-to/from-machine translations + * when performing page table operations. + */ +#define XENFEAT_auto_translated_physmap 2 + +/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */ +#define XENFEAT_supervisor_mode_kernel 3 + +/* + * If set, the guest does not need to allocate x86 PAE page directories + * below 4GB. This flag is usually implied by auto_translated_physmap. + */ +#define XENFEAT_pae_pgdir_above_4gb 4 + +#define XENFEAT_NR_SUBMAPS 1 + +#endif /* __XEN_PUBLIC_FEATURES_H__ */ diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h new file mode 100644 index 00000000000..e9e06695ed5 --- /dev/null +++ b/include/xen/interface/grant_table.h @@ -0,0 +1,301 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (arch/i386/mach-xen/grant_table.c). + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ +struct grant_entry { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + uint16_t flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + uint32_t frame; +}; + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + */ +#define GTF_invalid (0U<<0) +#define GTF_permit_access (1U<<0) +#define GTF_accept_transfer (2U<<0) +#define GTF_type_mask (3U<<0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U<<_GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U<<_GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U<<_GTF_writing) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U<<_GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U<<_GTF_transfer_completed) + + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef uint32_t grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (,) for access + * by devices and/or host CPUs. If successful, is a tracking number + * that must be presented later to destroy the mapping(s). On error, + * is a negative status code. + * NOTES: + * 1. If GNTPIN_map_for_dev is specified then is the address + * via which I/O devices may access the granted frame. + * 2. If GNTPIN_map_for_host is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in . + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +#define GNTTABOP_map_grant_ref 0 +struct gnttab_map_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint32_t flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + grant_handle_t handle; + uint64_t dev_bus_addr; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by . If or is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_grant_ref 1 +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint64_t dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); + +/* + * GNTTABOP_setup_table: Set up a grant table for comprising at least + * pages. The frame addresses are written to the . + * Only addresses are written, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +#define GNTTABOP_setup_table 2 +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + uint32_t nr_frames; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + GUEST_HANDLE(ulong) frame_list; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +#define GNTTABOP_dump_table 3 +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); + +/* + * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * . + * + * Note that, even if the transfer fails, the specified page no longer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +#define GNTTABOP_transfer 4 +struct gnttab_transfer { + /* IN parameters. */ + unsigned long mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + int16_t status; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); + +/* + * Bitfield values for update_pin_status.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1<<_GNTMAP_device_map) + /* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1<<_GNTMAP_host_map) + /* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1<<_GNTMAP_readonly) + /* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1<<_GNTMAP_application_map) + + /* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) + +/* + * Values for error status returns. All errors are -ve. + */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h new file mode 100644 index 00000000000..c2d1fa4dc1e --- /dev/null +++ b/include/xen/interface/io/blkif.h @@ -0,0 +1,94 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +typedef uint16_t blkif_vdev_t; +typedef uint64_t blkif_sector_t; + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * Recognised only if "feature-barrier" is present in backend xenbus info. + * The "feature_barrier" node contains a boolean indicating whether barrier + * requests are likely to succeed or fail. Either way, a barrier request + * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by + * the underlying block-device hardware. The boolean simply indicates whether + * or not it is worthwhile for the frontend to attempt barrier requests. + * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not* + * create the "feature-barrier" node! + */ +#define BLKIF_OP_WRITE_BARRIER 2 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +struct blkif_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; + } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + +struct blkif_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ + +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ diff --git a/include/xen/interface/io/console.h b/include/xen/interface/io/console.h new file mode 100644 index 00000000000..e563de70f78 --- /dev/null +++ b/include/xen/interface/io/console.h @@ -0,0 +1,23 @@ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef uint32_t XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h new file mode 100644 index 00000000000..518481c95f1 --- /dev/null +++ b/include/xen/interface/io/netif.h @@ -0,0 +1,158 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_NETIF_H__ +#define __XEN_PUBLIC_IO_NETIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Notifications after enqueuing any type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. + * If the client sends notification for rx requests then it should specify + * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume + * that it cannot safely queue packets (as it may not be kicked to send them). + */ + +/* + * This is the 'wire' format for packets: + * Request 1: netif_tx_request -- NETTXF_* (any flags) + * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info) + * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE) + * Request 4: netif_tx_request -- NETTXF_more_data + * Request 5: netif_tx_request -- NETTXF_more_data + * ... + * Request N: netif_tx_request -- 0 + */ + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETTXF_csum_blank (0) +#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) + +/* Packet data has been validated against protocol checksum. */ +#define _NETTXF_data_validated (1) +#define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the next request descriptor. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETTXF_extra_info (3) +#define NETTXF_extra_info (1U<<_NETTXF_extra_info) + +struct xen_netif_tx_request { + grant_ref_t gref; /* Reference to buffer page */ + uint16_t offset; /* Offset within buffer page */ + uint16_t flags; /* NETTXF_* */ + uint16_t id; /* Echoed in response message. */ + uint16_t size; /* Packet size in bytes. */ +}; + +/* Types of netif_extra_info descriptors. */ +#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MAX (2) + +/* netif_extra_info flags. */ +#define _XEN_NETIF_EXTRA_FLAG_MORE (0) +#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) + +/* GSO types - only TCPv4 currently supported. */ +#define XEN_NETIF_GSO_TYPE_TCPV4 (1) + +/* + * This structure needs to fit within both netif_tx_request and + * netif_rx_response for compatibility. + */ +struct xen_netif_extra_info { + uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ + uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ + + union { + struct { + /* + * Maximum payload size of each segment. For + * example, for TCP this is just the path MSS. + */ + uint16_t size; + + /* + * GSO type. This determines the protocol of + * the packet and any extra features required + * to segment the packet properly. + */ + uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ + + /* Future expansion. */ + uint8_t pad; + + /* + * GSO features. This specifies any extra GSO + * features required to process this packet, + * such as ECN support for TCPv4. + */ + uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ + } gso; + + uint16_t pad[3]; + } u; +}; + +struct xen_netif_tx_response { + uint16_t id; + int16_t status; /* NETIF_RSP_* */ +}; + +struct xen_netif_rx_request { + uint16_t id; /* Echoed in response message. */ + grant_ref_t gref; /* Reference to incoming granted frame */ +}; + +/* Packet data has been validated against protocol checksum. */ +#define _NETRXF_data_validated (0) +#define NETRXF_data_validated (1U<<_NETRXF_data_validated) + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETRXF_csum_blank (1) +#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) + +/* Packet continues in the next request descriptor. */ +#define _NETRXF_more_data (2) +#define NETRXF_more_data (1U<<_NETRXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETRXF_extra_info (3) +#define NETRXF_extra_info (1U<<_NETRXF_extra_info) + +struct xen_netif_rx_response { + uint16_t id; + uint16_t offset; /* Offset in page of start of received packet */ + uint16_t flags; /* NETRXF_* */ + int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ +}; + +/* + * Generate netif ring structures and types. + */ + +DEFINE_RING_TYPES(xen_netif_tx, + struct xen_netif_tx_request, + struct xen_netif_tx_response); +DEFINE_RING_TYPES(xen_netif_rx, + struct xen_netif_rx_request, + struct xen_netif_rx_response); + +#define NETIF_RSP_DROPPED -2 +#define NETIF_RSP_ERROR -1 +#define NETIF_RSP_OKAY 0 +/* No response: used for auxiliary requests (e.g., netif_tx_extra). */ +#define NETIF_RSP_NULL 1 + +#endif diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h new file mode 100644 index 00000000000..e8cbf431c8c --- /dev/null +++ b/include/xen/interface/io/ring.h @@ -0,0 +1,260 @@ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say struct request, and struct response already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, struct request, struct response); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * struct mytag_sring - The shared ring. + * struct mytag_front_ring - The 'front' half of the ring. + * struct mytag_back_ring - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * struct mytag_front_ring front_ring; + * SHARED_RING_INIT((struct mytag_sring *)shared_page); + * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page, + * PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * struct mytag_back_ring back_ring; + * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page, + * PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + uint8_t pad[48]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + memset((_s)->pad, 0, sizeof((_s)->pad)); \ +} while(0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* Initialize to existing shared indexes -- for recovery */ +#define FRONT_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->req_prod_pvt = (_s)->req_prod; \ + (_r)->rsp_cons = (_s)->rsp_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ +} while (0) + +#define BACK_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ + (_r)->req_cons = (_s)->req_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ + }) + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + wmb(); /* front sees responses /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + wmb(); /* front sees responses /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + mb(); /* front sees new responses /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h new file mode 100644 index 00000000000..46508c7fa39 --- /dev/null +++ b/include/xen/interface/io/xenbus.h @@ -0,0 +1,44 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* The state of either end of the Xenbus, i.e. the current communication + status of initialisation across the bus. States here imply nothing about + the state of the connection between the driver and the kernel's device + layers. */ +enum xenbus_state +{ + XenbusStateUnknown = 0, + XenbusStateInitialising = 1, + XenbusStateInitWait = 2, /* Finished early + initialisation, but waiting + for information from the peer + or hotplug scripts. */ + XenbusStateInitialised = 3, /* Initialised and waiting for a + connection from the peer. */ + XenbusStateConnected = 4, + XenbusStateClosing = 5, /* The device is being closed + due to an error or an unplug + event. */ + XenbusStateClosed = 6 + +}; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h new file mode 100644 index 00000000000..99fcffb372d --- /dev/null +++ b/include/xen/interface/io/xs_wire.h @@ -0,0 +1,87 @@ +/* + * Details of the "wire" protocol between Xen Store Daemon and client + * library or guest kernel. + * Copyright (C) 2005 Rusty Russell IBM Corporation + */ + +#ifndef _XS_WIRE_H +#define _XS_WIRE_H + +enum xsd_sockmsg_type +{ + XS_DEBUG, + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, + XS_IS_DOMAIN_INTRODUCED +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors +{ + int errnum; + const char *errstring; +}; +#define XSD_ERROR(x) { x, #x } +static struct xsd_errors xsd_errors[] __attribute__((unused)) = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(EAGAIN), + XSD_ERROR(EISCONN) +}; + +struct xsd_sockmsg +{ + uint32_t type; /* XS_??? */ + uint32_t req_id;/* Request identifier, echoed in daemon's response. */ + uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */ + uint32_t len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +enum xs_watch_type +{ + XS_WATCH_PATH = 0, + XS_WATCH_TOKEN +}; + +/* Inter-domain shared memory communications. */ +#define XENSTORE_RING_SIZE 1024 +typedef uint32_t XENSTORE_RING_IDX; +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1)) +struct xenstore_domain_interface { + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ + XENSTORE_RING_IDX req_cons, req_prod; + XENSTORE_RING_IDX rsp_cons, rsp_prod; +}; + +#endif /* _XS_WIRE_H */ diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h new file mode 100644 index 00000000000..af36ead1681 --- /dev/null +++ b/include/xen/interface/memory.h @@ -0,0 +1,145 @@ +/****************************************************************************** + * memory.h + * + * Memory reservation and information. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_MEMORY_H__ +#define __XEN_PUBLIC_MEMORY_H__ + +/* + * Increase or decrease the specified domain's memory reservation. Returns a + * -ve errcode on failure, or the # extents successfully allocated or freed. + * arg == addr of struct xen_memory_reservation. + */ +#define XENMEM_increase_reservation 0 +#define XENMEM_decrease_reservation 1 +#define XENMEM_populate_physmap 6 +struct xen_memory_reservation { + + /* + * XENMEM_increase_reservation: + * OUT: MFN (*not* GMFN) bases of extents that were allocated + * XENMEM_decrease_reservation: + * IN: GMFN bases of extents to free + * XENMEM_populate_physmap: + * IN: GPFN bases of extents to populate with memory + * OUT: GMFN bases of extents that were allocated + * (NB. This command also updates the mach_to_phys translation table) + */ + GUEST_HANDLE(ulong) extent_start; + + /* Number of extents, and size/alignment of each (2^extent_order pages). */ + unsigned long nr_extents; + unsigned int extent_order; + + /* + * Maximum # bits addressable by the user of the allocated region (e.g., + * I/O devices often have a 32-bit limitation even in 64-bit systems). If + * zero then the user has no addressing restriction. + * This field is not used by XENMEM_decrease_reservation. + */ + unsigned int address_bits; + + /* + * Domain whose reservation is being changed. + * Unprivileged domains can specify only DOMID_SELF. + */ + domid_t domid; + +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); + +/* + * Returns the maximum machine frame number of mapped RAM in this system. + * This command always succeeds (it never returns an error code). + * arg == NULL. + */ +#define XENMEM_maximum_ram_page 2 + +/* + * Returns the current or maximum memory reservation, in pages, of the + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. + * arg == addr of domid_t. + */ +#define XENMEM_current_reservation 3 +#define XENMEM_maximum_reservation 4 + +/* + * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys + * mapping table. Architectures which do not have a m2p table do not implement + * this command. + * arg == addr of xen_machphys_mfn_list_t. + */ +#define XENMEM_machphys_mfn_list 5 +struct xen_machphys_mfn_list { + /* + * Size of the 'extent_start' array. Fewer entries will be filled if the + * machphys table is smaller than max_extents * 2MB. + */ + unsigned int max_extents; + + /* + * Pointer to buffer to fill with list of extent starts. If there are + * any large discontiguities in the machine address space, 2MB gaps in + * the machphys table will be represented by an MFN base of zero. + */ + GUEST_HANDLE(ulong) extent_start; + + /* + * Number of extents written to the above array. This will be smaller + * than 'max_extents' if the machphys table is smaller than max_e * 2MB. + */ + unsigned int nr_extents; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); + +/* + * Sets the GPFN at which a particular page appears in the specified guest's + * pseudophysical address space. + * arg == addr of xen_add_to_physmap_t. + */ +#define XENMEM_add_to_physmap 7 +struct xen_add_to_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* Source mapping space. */ +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ + unsigned int space; + + /* Index into source mapping space. */ + unsigned long idx; + + /* GPFN where the source mapping page should appear. */ + unsigned long gpfn; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); + +/* + * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error + * code on failure. This call only works for auto-translated guests. + */ +#define XENMEM_translate_gpfn_list 8 +struct xen_translate_gpfn_list { + /* Which domain to translate for? */ + domid_t domid; + + /* Length of list. */ + unsigned long nr_gpfns; + + /* List of GPFNs to translate. */ + GUEST_HANDLE(ulong) gpfn_list; + + /* + * Output list to contain MFN translations. May be the same as the input + * list (in which case each input GPFN is overwritten with the output MFN). + */ + GUEST_HANDLE(ulong) mfn_list; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); + +#endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h new file mode 100644 index 00000000000..cd6939147cb --- /dev/null +++ b/include/xen/interface/physdev.h @@ -0,0 +1,145 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_PHYSDEV_H__ +#define __XEN_PUBLIC_PHYSDEV_H__ + +/* + * Prototype for this hypercall is: + * int physdev_op(int cmd, void *args) + * @cmd == PHYSDEVOP_??? (physdev operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Notify end-of-interrupt (EOI) for the specified IRQ. + * @arg == pointer to physdev_eoi structure. + */ +#define PHYSDEVOP_eoi 12 +struct physdev_eoi { + /* IN */ + uint32_t irq; +}; + +/* + * Query the status of an IRQ line. + * @arg == pointer to physdev_irq_status_query structure. + */ +#define PHYSDEVOP_irq_status_query 5 +struct physdev_irq_status_query { + /* IN */ + uint32_t irq; + /* OUT */ + uint32_t flags; /* XENIRQSTAT_* */ +}; + +/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ +#define _XENIRQSTAT_needs_eoi (0) +#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) + +/* IRQ shared by multiple guests? */ +#define _XENIRQSTAT_shared (1) +#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) + +/* + * Set the current VCPU's I/O privilege level. + * @arg == pointer to physdev_set_iopl structure. + */ +#define PHYSDEVOP_set_iopl 6 +struct physdev_set_iopl { + /* IN */ + uint32_t iopl; +}; + +/* + * Set the current VCPU's I/O-port permissions bitmap. + * @arg == pointer to physdev_set_iobitmap structure. + */ +#define PHYSDEVOP_set_iobitmap 7 +struct physdev_set_iobitmap { + /* IN */ + uint8_t * bitmap; + uint32_t nr_ports; +}; + +/* + * Read or write an IO-APIC register. + * @arg == pointer to physdev_apic structure. + */ +#define PHYSDEVOP_apic_read 8 +#define PHYSDEVOP_apic_write 9 +struct physdev_apic { + /* IN */ + unsigned long apic_physbase; + uint32_t reg; + /* IN or OUT */ + uint32_t value; +}; + +/* + * Allocate or free a physical upcall vector for the specified IRQ line. + * @arg == pointer to physdev_irq structure. + */ +#define PHYSDEVOP_alloc_irq_vector 10 +#define PHYSDEVOP_free_irq_vector 11 +struct physdev_irq { + /* IN */ + uint32_t irq; + /* IN or OUT */ + uint32_t vector; +}; + +/* + * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() + * hypercall since 0x00030202. + */ +struct physdev_op { + uint32_t cmd; + union { + struct physdev_irq_status_query irq_status_query; + struct physdev_set_iopl set_iopl; + struct physdev_set_iobitmap set_iobitmap; + struct physdev_apic apic_op; + struct physdev_irq irq_op; + } u; +}; + +/* + * Notify that some PIRQ-bound event channels have been unmasked. + * ** This command is obsolete since interface version 0x00030202 and is ** + * ** unsupported by newer versions of Xen. ** + */ +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 + +/* + * These all-capitals physdev operation names are superceded by the new names + * (defined above) since interface version 0x00030202. + */ +#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query +#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl +#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap +#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read +#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write +#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector +#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi +#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared + +#endif /* __XEN_PUBLIC_PHYSDEV_H__ */ diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h new file mode 100644 index 00000000000..5fec575a800 --- /dev/null +++ b/include/xen/interface/sched.h @@ -0,0 +1,77 @@ +/****************************************************************************** + * sched.h + * + * Scheduler state interactions + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +#include "event_channel.h" + +/* + * The prototype for this hypercall is: + * long sched_op_new(int cmd, void *arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s), as described below. + * + * **NOTE**: + * Versions of Xen prior to 3.0.2 provide only the following legacy version + * of this hypercall, supporting only the commands yield, block and shutdown: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + */ + +/* + * Voluntarily yield the CPU. + * @arg == NULL. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == NULL. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == pointer to sched_shutdown structure. + */ +#define SCHEDOP_shutdown 2 +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* */ +}; +DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown); + +/* + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll structure. + */ +#define SCHEDOP_poll 3 +struct sched_poll { + GUEST_HANDLE(evtchn_port_t) ports; + unsigned int nr_ports; + uint64_t timeout; +}; +DEFINE_GUEST_HANDLE_STRUCT(sched_poll); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h new file mode 100644 index 00000000000..c6218f1ad3c --- /dev/null +++ b/include/xen/interface/vcpu.h @@ -0,0 +1,154 @@ +/****************************************************************************** + * vcpu.h + * + * VCPU initialisation, query, and hotplug. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_VCPU_H__ +#define __XEN_PUBLIC_VCPU_H__ + +/* + * Prototype for this hypercall is: + * int vcpu_op(int cmd, int vcpuid, void *extra_args) + * @cmd == VCPUOP_??? (VCPU operation). + * @vcpuid == VCPU to operate on. + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Initialise a VCPU. Each VCPU can be initialised only once. A + * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. + * + * @extra_arg == pointer to vcpu_guest_context structure containing initial + * state for the VCPU. + */ +#define VCPUOP_initialise 0 + +/* + * Bring up a VCPU. This makes the VCPU runnable. This operation will fail + * if the VCPU has not been initialised (VCPUOP_initialise). + */ +#define VCPUOP_up 1 + +/* + * Bring down a VCPU (i.e., make it non-runnable). + * There are a few caveats that callers should observe: + * 1. This operation may return, and VCPU_is_up may return false, before the + * VCPU stops running (i.e., the command is asynchronous). It is a good + * idea to ensure that the VCPU has entered a non-critical loop before + * bringing it down. Alternatively, this operation is guaranteed + * synchronous if invoked by the VCPU itself. + * 2. After a VCPU is initialised, there is currently no way to drop all its + * references to domain memory. Even a VCPU that is down still holds + * memory references via its pagetable base pointer and GDT. It is good + * practise to move a VCPU onto an 'idle' or default page table, LDT and + * GDT before bringing it down. + */ +#define VCPUOP_down 2 + +/* Returns 1 if the given VCPU is up. */ +#define VCPUOP_is_up 3 + +/* + * Return information about the state and running time of a VCPU. + * @extra_arg == pointer to vcpu_runstate_info structure. + */ +#define VCPUOP_get_runstate_info 4 +struct vcpu_runstate_info { + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; +}; + +/* VCPU is currently running on a physical CPU. */ +#define RUNSTATE_running 0 + +/* VCPU is runnable, but not currently scheduled on any physical CPU. */ +#define RUNSTATE_runnable 1 + +/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */ +#define RUNSTATE_blocked 2 + +/* + * VCPU is not runnable, but it is not blocked. + * This is a 'catch all' state for things like hotplug and pauses by the + * system administrator (or for critical sections in the hypervisor). + * RUNSTATE_blocked dominates this state (it is the preferred state). + */ +#define RUNSTATE_offline 3 + +/* + * Register a shared memory area from which the guest may obtain its own + * runstate information without needing to execute a hypercall. + * Notes: + * 1. The registered address may be virtual or physical, depending on the + * platform. The virtual address should be registered on x86 systems. + * 2. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. + */ +#define VCPUOP_register_runstate_memory_area 5 +struct vcpu_register_runstate_memory_area { + union { + struct vcpu_runstate_info *v; + uint64_t p; + } addr; +}; + +/* + * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer + * which can be set via these commands. Periods smaller than one millisecond + * may not be supported. + */ +#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ +#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ +struct vcpu_set_periodic_timer { + uint64_t period_ns; +}; + +/* + * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot + * timer which can be set via these commands. + */ +#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ +#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ +struct vcpu_set_singleshot_timer { + uint64_t timeout_abs_ns; + uint32_t flags; /* VCPU_SSHOTTMR_??? */ +}; + +/* Flags to VCPUOP_set_singleshot_timer. */ + /* Require the timeout to be in the future (return -ETIME if it's passed). */ +#define _VCPU_SSHOTTMR_future (0) +#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) + +#endif /* __XEN_PUBLIC_VCPU_H__ */ diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h new file mode 100644 index 00000000000..453235e923f --- /dev/null +++ b/include/xen/interface/version.h @@ -0,0 +1,60 @@ +/****************************************************************************** + * version.h + * + * Xen version, type, and compile information. + * + * Copyright (c) 2005, Nguyen Anh Quynh + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_VERSION_H__ +#define __XEN_PUBLIC_VERSION_H__ + +/* NB. All ops return zero on success, except XENVER_version. */ + +/* arg == NULL; returns major:minor (16:16). */ +#define XENVER_version 0 + +/* arg == xen_extraversion_t. */ +#define XENVER_extraversion 1 +struct xen_extraversion { + char extraversion[16]; +}; +#define XEN_EXTRAVERSION_LEN (sizeof(struct xen_extraversion)) + +/* arg == xen_compile_info_t. */ +#define XENVER_compile_info 2 +struct xen_compile_info { + char compiler[64]; + char compile_by[16]; + char compile_domain[32]; + char compile_date[32]; +}; + +#define XENVER_capabilities 3 +struct xen_capabilities_info { + char info[1024]; +}; +#define XEN_CAPABILITIES_INFO_LEN (sizeof(struct xen_capabilities_info)) + +#define XENVER_changeset 4 +struct xen_changeset_info { + char info[64]; +}; +#define XEN_CHANGESET_INFO_LEN (sizeof(struct xen_changeset_info)) + +#define XENVER_platform_parameters 5 +struct xen_platform_parameters { + unsigned long virt_start; +}; + +#define XENVER_get_features 6 +struct xen_feature_info { + unsigned int submap_idx; /* IN: which 32-bit submap to return */ + uint32_t submap; /* OUT: 32-bit submap */ +}; + +/* Declares the features reported by XENVER_get_features. */ +#include "features.h" + +#endif /* __XEN_PUBLIC_VERSION_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h new file mode 100644 index 00000000000..518a5bf79ed --- /dev/null +++ b/include/xen/interface/xen.h @@ -0,0 +1,447 @@ +/****************************************************************************** + * xen.h + * + * Guest OS interface to Xen. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include + +/* + * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). + */ + +/* + * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. + * EAX = return value + * (argument registers may be clobbered on return) + * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6. + * RAX = return value + * (argument registers not clobbered on return; RCX, R11 are) + */ +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op 6 +#define __HYPERVISOR_dom0_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op_new 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 + +/* + * VIRTUAL INTERRUPTS + * + * Virtual interrupts that a guest OS may receive from Xen. + */ +#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ +#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ +#define NR_VIRQS 8 + +/* + * MMU-UPDATE REQUESTS + * + * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * ptr[1:0] specifies the appropriate MMU_* command. + * + * ptr[1:0] == MMU_NORMAL_PT_UPDATE: + * Updates an entry in a page table. If updating an L1 table, and the new + * table entry is valid/present, the mapped frame must belong to the FD, if + * an FD has been specified. If attempting to map an I/O page then the + * caller assumes the privilege of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * ptr[1:0] == MMU_MACHPHYS_UPDATE: + * Updates an entry in the machine->pseudo-physical mapping table. + * ptr[:2] -- Machine address within the frame whose mapping to modify. + * The frame must belong to the FD, if one is specified. + * val -- Value to write into the mapping entry. + */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ + +/* + * MMU EXTENDED OPERATIONS + * + * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * + * cmd: MMUEXT_(UN)PIN_*_TABLE + * mfn: Machine frame number to be (un)pinned as a p.t. page. + * The frame must belong to the FD, if one is specified. + * + * cmd: MMUEXT_NEW_BASEPTR + * mfn: Machine frame number of new page-table base to install in MMU. + * + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in user space. + * + * cmd: MMUEXT_TLB_FLUSH_LOCAL + * No additional arguments. Flushes local TLB. + * + * cmd: MMUEXT_INVLPG_LOCAL + * linear_addr: Linear address to be flushed from the local TLB. + * + * cmd: MMUEXT_TLB_FLUSH_MULTI + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_INVLPG_MULTI + * linear_addr: Linear address to be flushed. + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_TLB_FLUSH_ALL + * No additional arguments. Flushes all VCPUs' TLBs. + * + * cmd: MMUEXT_INVLPG_ALL + * linear_addr: Linear address to be flushed from all VCPUs' TLBs. + * + * cmd: MMUEXT_FLUSH_CACHE + * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_SET_LDT + * linear_addr: Linear address of LDT base (NB. must be page-aligned). + * nr_ents: Number of entries in LDT. + */ +#define MMUEXT_PIN_L1_TABLE 0 +#define MMUEXT_PIN_L2_TABLE 1 +#define MMUEXT_PIN_L3_TABLE 2 +#define MMUEXT_PIN_L4_TABLE 3 +#define MMUEXT_UNPIN_TABLE 4 +#define MMUEXT_NEW_BASEPTR 5 +#define MMUEXT_TLB_FLUSH_LOCAL 6 +#define MMUEXT_INVLPG_LOCAL 7 +#define MMUEXT_TLB_FLUSH_MULTI 8 +#define MMUEXT_INVLPG_MULTI 9 +#define MMUEXT_TLB_FLUSH_ALL 10 +#define MMUEXT_INVLPG_ALL 11 +#define MMUEXT_FLUSH_CACHE 12 +#define MMUEXT_SET_LDT 13 +#define MMUEXT_NEW_USER_BASEPTR 15 + +#ifndef __ASSEMBLY__ +struct mmuext_op { + unsigned int cmd; + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ + unsigned long mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + unsigned long linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ + void *vcpumask; + } arg2; +}; +DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); +#endif + +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +#define UVMF_NONE (0UL<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (3UL<<0) +#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ +#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ + +/* + * Commands to HYPERVISOR_console_io(). + */ +#define CONSOLEIO_write 0 +#define CONSOLEIO_read 1 + +/* + * Commands to HYPERVISOR_vm_assist(). + */ +#define VMASST_CMD_enable 0 +#define VMASST_CMD_disable 1 +#define VMASST_TYPE_4gb_segments 0 +#define VMASST_TYPE_4gb_segments_notify 1 +#define VMASST_TYPE_writable_pagetables 2 +#define VMASST_TYPE_pae_extended_cr3 3 +#define MAX_VMASST_TYPE 3 + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* + * Send an array of these to HYPERVISOR_mmu_update(). + * NB. The fields are natural pointer/address size for this architecture. + */ +struct mmu_update { + uint64_t ptr; /* Machine address of PTE. */ + uint64_t val; /* New contents of PTE. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(mmu_update); + +/* + * Send an array of these to HYPERVISOR_multicall(). + * NB. The fields are natural register size for this architecture. + */ +struct multicall_entry { + unsigned long op; + long result; + unsigned long args[6]; +}; +DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); + +/* + * Event channel endpoints per domain: + * 1024 if a long is 32 bits; 4096 if a long is 64 bits. + */ +#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64) + +struct vcpu_time_info { + /* + * Updates to the following values are preceded and followed + * by an increment of 'version'. The guest can therefore + * detect updates by looking for changes to 'version'. If the + * least-significant bit of the version number is set then an + * update is in progress and the guest must wait to read a + * consistent set of values. The correct way to interact with + * the version number is similar to Linux's seqlock: see the + * implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; +}; /* 32 bytes */ + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; + struct vcpu_time_info time; +}; /* 64 bytes (x86) */ + +/* + * Xen/kernel shared data -- pointer provided in start_info. + * NB. We expect that this struct is smaller than a page. + */ +struct shared_info { + struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + unsigned long evtchn_pending[sizeof(unsigned long) * 8]; + unsigned long evtchn_mask[sizeof(unsigned long) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ + + struct arch_shared_info arch; + +}; + +/* + * Start-of-day memory layout for the initial domain (DOM0): + * 1. The domain is started within contiguous virtual-memory region. + * 2. The contiguous region begins and ends on an aligned 4MB boundary. + * 3. The region start corresponds to the load address of the OS image. + * If the load address is not 4MB aligned then the address is rounded down. + * 4. This the order of bootstrap elements in the initial virtual region: + * a. relocated kernel image + * b. initial ram disk [mod_start, mod_len] + * c. list of allocated page frames [mfn_list, nr_pages] + * d. start_info_t structure [register ESI (x86)] + * e. bootstrap page tables [pt_base, CR3 (x86)] + * f. bootstrap stack [register ESP (x86)] + * 5. Bootstrap elements are packed together, but each is 4kB-aligned. + * 6. The initial ram disk may be omitted. + * 7. The list of page frames forms a contiguous 'pseudo-physical' memory + * layout for the domain. In particular, the bootstrap virtual-memory + * region is a 1:1 mapping to the first section of the pseudo-physical map. + * 8. All bootstrap elements are mapped read-writable for the guest OS. The + * only exception is the bootstrap page table, which is mapped read-only. + * 9. There is guaranteed to be at least 512kB padding after the final + * bootstrap element. If necessary, the bootstrap virtual region is + * extended by an extra 4MB to ensure this. + */ + +#define MAX_GUEST_CMDLINE 1024 +struct start_info { + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen--". */ + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + unsigned long store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + unsigned long mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ + int8_t cmd_line[MAX_GUEST_CMDLINE]; +}; + +/* These flags are passed in the 'flags' field of start_info_t. */ +#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ +#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ + +typedef uint64_t cpumap_t; + +typedef uint8_t xen_domain_handle_t[16]; + +/* Turn a plain number into a C unsigned long constant. */ +#define __mk_unsigned_long(x) x ## UL +#define mk_unsigned_long(x) __mk_unsigned_long(x) + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_XEN_H__ */ -- cgit v1.2.3-70-g09d2 From 5ead97c84fa7d63a6a7a2f4e9f18f452bd109045 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:04 -0700 Subject: xen: Core Xen implementation This patch is a rollup of all the core pieces of the Xen implementation, including: - booting and setup - pagetable setup - privileged instructions - segmentation - interrupt flags - upcalls - multicall batching BOOTING AND SETUP The vmlinux image is decorated with ELF notes which tell the Xen domain builder what the kernel's requirements are; the domain builder then constructs the address space accordingly and starts the kernel. Xen has its own entrypoint for the kernel (contained in an ELF note). The ELF notes are set up by xen-head.S, which is included into head.S. In principle it could be linked separately, but it seems to provoke lots of binutils bugs. Because the domain builder starts the kernel in a fairly sane state (32-bit protected mode, paging enabled, flat segments set up), there's not a lot of setup needed before starting the kernel proper. The main steps are: 1. Install the Xen paravirt_ops, which is simply a matter of a structure assignment. 2. Set init_mm to use the Xen-supplied pagetables (analogous to the head.S generated pagetables in a native boot). 3. Reserve address space for Xen, since it takes a chunk at the top of the address space for its own use. 4. Call start_kernel() PAGETABLE SETUP Once we hit the main kernel boot sequence, it will end up calling back via paravirt_ops to set up various pieces of Xen specific state. One of the critical things which requires a bit of extra care is the construction of the initial init_mm pagetable. Because Xen places tight constraints on pagetables (an active pagetable must always be valid, and must always be mapped read-only to the guest domain), we need to be careful when constructing the new pagetable to keep these constraints in mind. It turns out that the easiest way to do this is use the initial Xen-provided pagetable as a template, and then just insert new mappings for memory where a mapping doesn't already exist. This means that during pagetable setup, it uses a special version of xen_set_pte which ignores any attempt to remap a read-only page as read-write (since Xen will map its own initial pagetable as RO), but lets other changes to the ptes happen, so that things like NX are set properly. PRIVILEGED INSTRUCTIONS AND SEGMENTATION When the kernel runs under Xen, it runs in ring 1 rather than ring 0. This means that it is more privileged than user-mode in ring 3, but it still can't run privileged instructions directly. Non-performance critical instructions are dealt with by taking a privilege exception and trapping into the hypervisor and emulating the instruction, but more performance-critical instructions have their own specific paravirt_ops. In many cases we can avoid having to do any hypercalls for these instructions, or the Xen implementation is quite different from the normal native version. The privileged instructions fall into the broad classes of: Segmentation: setting up the GDT and the GDT entries, LDT, TLS and so on. Xen doesn't allow the GDT to be directly modified; all GDT updates are done via hypercalls where the new entries can be validated. This is important because Xen uses segment limits to prevent the guest kernel from damaging the hypervisor itself. Traps and exceptions: Xen uses a special format for trap entrypoints, so when the kernel wants to set an IDT entry, it needs to be converted to the form Xen expects. Xen sets int 0x80 up specially so that the trap goes straight from userspace into the guest kernel without going via the hypervisor. sysenter isn't supported. Kernel stack: The esp0 entry is extracted from the tss and provided to Xen. TLB operations: the various TLB calls are mapped into corresponding Xen hypercalls. Control registers: all the control registers are privileged. The most important is cr3, which points to the base of the current pagetable, and we handle it specially. Another instruction we treat specially is CPUID, even though its not privileged. We want to control what CPU features are visible to the rest of the kernel, and so CPUID ends up going into a paravirt_op. Xen implements this mainly to disable the ACPI and APIC subsystems. INTERRUPT FLAGS Xen maintains its own separate flag for masking events, which is contained within the per-cpu vcpu_info structure. Because the guest kernel runs in ring 1 and not 0, the IF flag in EFLAGS is completely ignored (and must be, because even if a guest domain disables interrupts for itself, it can't disable them overall). (A note on terminology: "events" and interrupts are effectively synonymous. However, rather than using an "enable flag", Xen uses a "mask flag", which blocks event delivery when it is non-zero.) There are paravirt_ops for each of cli/sti/save_fl/restore_fl, which are implemented to manage the Xen event mask state. The only thing worth noting is that when events are unmasked, we need to explicitly see if there's a pending event and call into the hypervisor to make sure it gets delivered. UPCALLS Xen needs a couple of upcall (or callback) functions to be implemented by each guest. One is the event upcalls, which is how events (interrupts, effectively) are delivered to the guests. The other is the failsafe callback, which is used to report errors in either reloading a segment register, or caused by iret. These are implemented in i386/kernel/entry.S so they can jump into the normal iret_exc path when necessary. MULTICALL BATCHING Xen provides a multicall mechanism, which allows multiple hypercalls to be issued at once in order to mitigate the cost of trapping into the hypervisor. This is particularly useful for context switches, since the 4-5 hypercalls they would normally need (reload cr3, update TLS, maybe update LDT) can be reduced to one. This patch implements a generic batching mechanism for hypercalls, which gets used in many places in the Xen code. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Ian Pratt Cc: Christian Limpach Cc: Adrian Bunk --- arch/i386/Makefile | 3 + arch/i386/kernel/entry.S | 71 ++++ arch/i386/kernel/head.S | 5 +- arch/i386/kernel/vmlinux.lds.S | 1 + arch/i386/xen/Makefile | 1 + arch/i386/xen/enlighten.c | 745 +++++++++++++++++++++++++++++++++++++++ arch/i386/xen/features.c | 29 ++ arch/i386/xen/multicalls.c | 89 +++++ arch/i386/xen/multicalls.h | 45 +++ arch/i386/xen/setup.c | 97 +++++ arch/i386/xen/xen-head.S | 36 ++ arch/i386/xen/xen-ops.h | 31 ++ include/asm-i386/irq.h | 1 + include/asm-i386/xen/hypercall.h | 18 + include/xen/features.h | 23 ++ include/xen/page.h | 179 ++++++++++ 16 files changed, 1373 insertions(+), 1 deletion(-) create mode 100644 arch/i386/xen/Makefile create mode 100644 arch/i386/xen/enlighten.c create mode 100644 arch/i386/xen/features.c create mode 100644 arch/i386/xen/multicalls.c create mode 100644 arch/i386/xen/multicalls.h create mode 100644 arch/i386/xen/setup.c create mode 100644 arch/i386/xen/xen-head.S create mode 100644 arch/i386/xen/xen-ops.h create mode 100644 include/xen/features.h create mode 100644 include/xen/page.h (limited to 'include') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 181cc29a7c4..01f0ff0daaf 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -93,6 +93,9 @@ mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000 mcore-$(CONFIG_X86_ES7000) := mach-default core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/ +# Xen paravirtualization support +core-$(CONFIG_XEN) += arch/i386/xen/ + # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3c3c220488c..ffb23654427 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -1023,6 +1023,77 @@ ENTRY(kernel_thread_helper) CFI_ENDPROC ENDPROC(kernel_thread_helper) +#ifdef CONFIG_XEN +ENTRY(xen_hypervisor_callback) + CFI_STARTPROC + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + mov %esp, %eax + call xen_evtchn_do_upcall + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(xen_hypervisor_callback) + +# Hypervisor uses this for application faults while it executes. +# We get here for two reasons: +# 1. Fault while reloading DS, ES, FS or GS +# 2. Fault while executing IRET +# Category 1 we fix up by reattempting the load, and zeroing the segment +# register if the load fails. +# Category 2 we fix up by jumping to do_iret_error. We cannot use the +# normal Linux return path in this case because if we use the IRET hypercall +# to pop the stack frame we end up in an infinite loop of failsafe callbacks. +# We distinguish between categories by maintaining a status value in EAX. +ENTRY(xen_failsafe_callback) + CFI_STARTPROC + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl $1,%eax +1: mov 4(%esp),%ds +2: mov 8(%esp),%es +3: mov 12(%esp),%fs +4: mov 16(%esp),%gs + testl %eax,%eax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + lea 16(%esp),%esp + CFI_ADJUST_CFA_OFFSET -16 + jz 5f + addl $16,%esp + jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) +5: pushl $0 # EAX == 0 => Category 1 (Bad segment) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + jmp ret_from_exception + CFI_ENDPROC + +.section .fixup,"ax" +6: xorl %eax,%eax + movl %eax,4(%esp) + jmp 1b +7: xorl %eax,%eax + movl %eax,8(%esp) + jmp 2b +8: xorl %eax,%eax + movl %eax,12(%esp) + jmp 3b +9: xorl %eax,%eax + movl %eax,16(%esp) + jmp 4b +.previous +.section __ex_table,"a" + .align 4 + .long 1b,6b + .long 2b,7b + .long 3b,8b + .long 4b,9b +.previous +ENDPROC(xen_failsafe_callback) + +#endif /* CONFIG_XEN */ + .section .rodata,"a" #include "syscall_table.S" diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 82714668d43..7c52b222207 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -510,7 +510,8 @@ ENTRY(_stext) /* * BSS section */ -.section ".bss.page_aligned","w" +.section ".bss.page_aligned","wa" + .align PAGE_SIZE_asm ENTRY(swapper_pg_dir) .fill 1024,4,0 ENTRY(swapper_pg_pmd) @@ -538,6 +539,8 @@ fault_msg: .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" .asciz "Stack: %p %p %p %p %p %p %p %p\n" +#include "../xen/xen-head.S" + /* * The IDT and GDT 'descriptors' are a strange 48-bit object * only used by the lidt and lgdt instructions. They are not diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index aa87b06c7c8..00f1bc47d3a 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -88,6 +88,7 @@ SECTIONS . = ALIGN(4096); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.page_aligned) *(.data.idt) } diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile new file mode 100644 index 00000000000..60bc1cfb101 --- /dev/null +++ b/arch/i386/xen/Makefile @@ -0,0 +1 @@ +obj-y := enlighten.o setup.o features.o multicalls.o diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c new file mode 100644 index 00000000000..2d484f9320d --- /dev/null +++ b/arch/i386/xen/enlighten.c @@ -0,0 +1,745 @@ +/* + * Core of Xen paravirt_ops implementation. + * + * This file contains the xen_paravirt_ops structure itself, and the + * implementations for: + * - privileged instructions + * - interrupt flags + * - segment operations + * - booting and setup + * + * Jeremy Fitzhardinge , XenSource Inc, 2007 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xen-ops.h" +#include "multicalls.h" + +EXPORT_SYMBOL_GPL(hypercall_page); + +DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); + +DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); +DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DEFINE_PER_CPU(unsigned long, xen_cr3); + +struct start_info *xen_start_info; +EXPORT_SYMBOL_GPL(xen_start_info); + +static void xen_vcpu_setup(int cpu) +{ + per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; +} + +static void __init xen_banner(void) +{ + printk(KERN_INFO "Booting paravirtualized kernel on %s\n", + paravirt_ops.name); + printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); +} + +static void xen_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + unsigned maskedx = ~0; + + /* + * Mask out inconvenient features, to try and disable as many + * unsupported kernel subsystems as possible. + */ + if (*eax == 1) + maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ + (1 << X86_FEATURE_ACPI) | /* disable ACPI */ + (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + + asm(XEN_EMULATE_PREFIX "cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); + *edx &= maskedx; +} + +static void xen_set_debugreg(int reg, unsigned long val) +{ + HYPERVISOR_set_debugreg(reg, val); +} + +static unsigned long xen_get_debugreg(int reg) +{ + return HYPERVISOR_get_debugreg(reg); +} + +static unsigned long xen_save_fl(void) +{ + struct vcpu_info *vcpu; + unsigned long flags; + + preempt_disable(); + vcpu = x86_read_percpu(xen_vcpu); + /* flag has opposite sense of mask */ + flags = !vcpu->evtchn_upcall_mask; + preempt_enable(); + + /* convert to IF type flag + -0 -> 0x00000000 + -1 -> 0xffffffff + */ + return (-flags) & X86_EFLAGS_IF; +} + +static void xen_restore_fl(unsigned long flags) +{ + struct vcpu_info *vcpu; + + preempt_disable(); + + /* convert from IF type flag */ + flags = !(flags & X86_EFLAGS_IF); + vcpu = x86_read_percpu(xen_vcpu); + vcpu->evtchn_upcall_mask = flags; + + if (flags == 0) { + /* Unmask then check (avoid races). We're only protecting + against updates by this CPU, so there's no need for + anything stronger. */ + barrier(); + + if (unlikely(vcpu->evtchn_upcall_pending)) + force_evtchn_callback(); + preempt_enable(); + } else + preempt_enable_no_resched(); +} + +static void xen_irq_disable(void) +{ + struct vcpu_info *vcpu; + preempt_disable(); + vcpu = x86_read_percpu(xen_vcpu); + vcpu->evtchn_upcall_mask = 1; + preempt_enable_no_resched(); +} + +static void xen_irq_enable(void) +{ + struct vcpu_info *vcpu; + + preempt_disable(); + vcpu = x86_read_percpu(xen_vcpu); + vcpu->evtchn_upcall_mask = 0; + + /* Unmask then check (avoid races). We're only protecting + against updates by this CPU, so there's no need for + anything stronger. */ + barrier(); + + if (unlikely(vcpu->evtchn_upcall_pending)) + force_evtchn_callback(); + preempt_enable(); +} + +static void xen_safe_halt(void) +{ + /* Blocking includes an implicit local_irq_enable(). */ + if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) + BUG(); +} + +static void xen_halt(void) +{ + if (irqs_disabled()) + HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + else + xen_safe_halt(); +} + +static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) +{ + switch (mode) { + case PARAVIRT_LAZY_NONE: + BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); + break; + + case PARAVIRT_LAZY_MMU: + case PARAVIRT_LAZY_CPU: + BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE); + break; + + case PARAVIRT_LAZY_FLUSH: + /* flush if necessary, but don't change state */ + if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE) + xen_mc_flush(); + return; + } + + xen_mc_flush(); + x86_write_percpu(xen_lazy_mode, mode); +} + +static unsigned long xen_store_tr(void) +{ + return 0; +} + +static void xen_set_ldt(const void *addr, unsigned entries) +{ + unsigned long linear_addr = (unsigned long)addr; + struct mmuext_op *op; + struct multicall_space mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_SET_LDT; + if (linear_addr) { + /* ldt my be vmalloced, use arbitrary_virt_to_machine */ + xmaddr_t maddr; + maddr = arbitrary_virt_to_machine((unsigned long)addr); + linear_addr = (unsigned long)maddr.maddr; + } + op->arg1.linear_addr = linear_addr; + op->arg2.nr_ents = entries; + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void xen_load_gdt(const struct Xgt_desc_struct *dtr) +{ + unsigned long *frames; + unsigned long va = dtr->address; + unsigned int size = dtr->size + 1; + unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + int f; + struct multicall_space mcs; + + /* A GDT can be up to 64k in size, which corresponds to 8192 + 8-byte entries, or 16 4k pages.. */ + + BUG_ON(size > 65536); + BUG_ON(va & ~PAGE_MASK); + + mcs = xen_mc_entry(sizeof(*frames) * pages); + frames = mcs.args; + + for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { + frames[f] = virt_to_mfn(va); + make_lowmem_page_readonly((void *)va); + } + + MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void load_TLS_descriptor(struct thread_struct *t, + unsigned int cpu, unsigned int i) +{ + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); + struct multicall_space mc = __xen_mc_entry(0); + + MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); +} + +static void xen_load_tls(struct thread_struct *t, unsigned int cpu) +{ + xen_mc_batch(); + + load_TLS_descriptor(t, cpu, 0); + load_TLS_descriptor(t, cpu, 1); + load_TLS_descriptor(t, cpu, 2); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, + u32 low, u32 high) +{ + unsigned long lp = (unsigned long)&dt[entrynum]; + xmaddr_t mach_lp = virt_to_machine(lp); + u64 entry = (u64)high << 32 | low; + + xen_mc_flush(); + if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) + BUG(); +} + +static int cvt_gate_to_trap(int vector, u32 low, u32 high, + struct trap_info *info) +{ + u8 type, dpl; + + type = (high >> 8) & 0x1f; + dpl = (high >> 13) & 3; + + if (type != 0xf && type != 0xe) + return 0; + + info->vector = vector; + info->address = (high & 0xffff0000) | (low & 0x0000ffff); + info->cs = low >> 16; + info->flags = dpl; + /* interrupt gates clear IF */ + if (type == 0xe) + info->flags |= 4; + + return 1; +} + +/* Locations of each CPU's IDT */ +static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc); + +/* Set an IDT entry. If the entry is part of the current IDT, then + also update Xen. */ +static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, + u32 low, u32 high) +{ + + int cpu = smp_processor_id(); + unsigned long p = (unsigned long)&dt[entrynum]; + unsigned long start = per_cpu(idt_desc, cpu).address; + unsigned long end = start + per_cpu(idt_desc, cpu).size + 1; + + xen_mc_flush(); + + write_dt_entry(dt, entrynum, low, high); + + if (p >= start && (p + 8) <= end) { + struct trap_info info[2]; + + info[1].address = 0; + + if (cvt_gate_to_trap(entrynum, low, high, &info[0])) + if (HYPERVISOR_set_trap_table(info)) + BUG(); + } +} + +/* Load a new IDT into Xen. In principle this can be per-CPU, so we + hold a spinlock to protect the static traps[] array (static because + it avoids allocation, and saves stack space). */ +static void xen_load_idt(const struct Xgt_desc_struct *desc) +{ + static DEFINE_SPINLOCK(lock); + static struct trap_info traps[257]; + + int cpu = smp_processor_id(); + unsigned in, out, count; + + per_cpu(idt_desc, cpu) = *desc; + + count = (desc->size+1) / 8; + BUG_ON(count > 256); + + spin_lock(&lock); + for (in = out = 0; in < count; in++) { + const u32 *entry = (u32 *)(desc->address + in * 8); + + if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) + out++; + } + traps[out].address = 0; + + xen_mc_flush(); + if (HYPERVISOR_set_trap_table(traps)) + BUG(); + + spin_unlock(&lock); +} + +/* Write a GDT descriptor entry. Ignore LDT descriptors, since + they're handled differently. */ +static void xen_write_gdt_entry(struct desc_struct *dt, int entry, + u32 low, u32 high) +{ + switch ((high >> 8) & 0xff) { + case DESCTYPE_LDT: + case DESCTYPE_TSS: + /* ignore */ + break; + + default: { + xmaddr_t maddr = virt_to_machine(&dt[entry]); + u64 desc = (u64)high << 32 | low; + + xen_mc_flush(); + if (HYPERVISOR_update_descriptor(maddr.maddr, desc)) + BUG(); + } + + } +} + +static void xen_load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + struct multicall_space mcs = xen_mc_entry(0); + MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0); + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void xen_set_iopl_mask(unsigned mask) +{ + struct physdev_set_iopl set_iopl; + + /* Force the change at ring 0. */ + set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; + HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); +} + +static void xen_io_delay(void) +{ +} + +#ifdef CONFIG_X86_LOCAL_APIC +static unsigned long xen_apic_read(unsigned long reg) +{ + return 0; +} +#endif + +static void xen_flush_tlb(void) +{ + struct mmuext_op op; + + op.cmd = MMUEXT_TLB_FLUSH_LOCAL; + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + +static void xen_flush_tlb_single(unsigned long addr) +{ + struct mmuext_op op; + + op.cmd = MMUEXT_INVLPG_LOCAL; + op.arg1.linear_addr = addr & PAGE_MASK; + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + +static unsigned long xen_read_cr2(void) +{ + return x86_read_percpu(xen_vcpu)->arch.cr2; +} + +static void xen_write_cr4(unsigned long cr4) +{ + /* never allow TSC to be disabled */ + native_write_cr4(cr4 & ~X86_CR4_TSD); +} + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + * + * Note that Xen is using the fact that the pagetable base is always + * page-aligned, and putting the 12 MSB of the address into the 12 LSB + * of cr3. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +static unsigned long xen_read_cr3(void) +{ + return x86_read_percpu(xen_cr3); +} + +static void xen_write_cr3(unsigned long cr3) +{ + if (cr3 == x86_read_percpu(xen_cr3)) { + /* just a simple tlb flush */ + xen_flush_tlb(); + return; + } + + x86_write_percpu(xen_cr3, cr3); + + + { + struct mmuext_op *op; + struct multicall_space mcs = xen_mc_entry(sizeof(*op)); + unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); + + op = mcs.args; + op->cmd = MMUEXT_NEW_BASEPTR; + op->arg1.mfn = mfn; + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_CPU); + } +} + +static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) +{ + /* XXX pfn isn't necessarily a lowmem page */ + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); +} + +static void xen_alloc_pd(u32 pfn) +{ + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); +} + +static void xen_release_pd(u32 pfn) +{ + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); +} + +static void xen_release_pt(u32 pfn) +{ + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); +} + +static void xen_alloc_pd_clone(u32 pfn, u32 clonepfn, + u32 start, u32 count) +{ + xen_alloc_pd(pfn); +} + +static __init void xen_pagetable_setup_start(pgd_t *base) +{ + pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; + + init_mm.pgd = base; + /* + * copy top-level of Xen-supplied pagetable into place. For + * !PAE we can use this as-is, but for PAE it is a stand-in + * while we copy the pmd pages. + */ + memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); + + if (PTRS_PER_PMD > 1) { + int i; + /* + * For PAE, need to allocate new pmds, rather than + * share Xen's, since Xen doesn't like pmd's being + * shared between address spaces. + */ + for (i = 0; i < PTRS_PER_PGD; i++) { + if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { + pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + + memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), + PAGE_SIZE); + + xen_alloc_pd(PFN_DOWN(__pa(pmd))); + + set_pgd(&base[i], __pgd(1 + __pa(pmd))); + } else + pgd_clear(&base[i]); + } + } + + /* make sure zero_page is mapped RO so we can use it in pagetables */ + make_lowmem_page_readonly(empty_zero_page); + make_lowmem_page_readonly(base); + /* + * Switch to new pagetable. This is done before + * pagetable_init has done anything so that the new pages + * added to the table can be prepared properly for Xen. + */ + xen_write_cr3(__pa(base)); +} + +static __init void xen_pagetable_setup_done(pgd_t *base) +{ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* + * Create a mapping for the shared info page. + * Should be set_fixmap(), but shared_info is a machine + * address with no corresponding pseudo-phys address. + */ +#if 0 + set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP), + PFN_DOWN(xen_start_info->shared_info), + PAGE_KERNEL); +#endif + + HYPERVISOR_shared_info = + (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); + + } else + HYPERVISOR_shared_info = + (struct shared_info *)__va(xen_start_info->shared_info); + +#if 0 + xen_pgd_pin(base); +#endif + + xen_vcpu_setup(smp_processor_id()); +} + +static const struct paravirt_ops xen_paravirt_ops __initdata = { + .paravirt_enabled = 1, + .shared_kernel_pmd = 0, + + .name = "Xen", + .banner = xen_banner, + + .patch = paravirt_patch_default, + + .memory_setup = xen_memory_setup, + .arch_setup = xen_arch_setup, + + .cpuid = xen_cpuid, + + .set_debugreg = xen_set_debugreg, + .get_debugreg = xen_get_debugreg, + + .clts = native_clts, + + .read_cr0 = native_read_cr0, + .write_cr0 = native_write_cr0, + + .read_cr2 = xen_read_cr2, + .write_cr2 = native_write_cr2, + + .read_cr3 = xen_read_cr3, + .write_cr3 = xen_write_cr3, + + .read_cr4 = native_read_cr4, + .read_cr4_safe = native_read_cr4_safe, + .write_cr4 = xen_write_cr4, + + .save_fl = xen_save_fl, + .restore_fl = xen_restore_fl, + .irq_disable = xen_irq_disable, + .irq_enable = xen_irq_enable, + .safe_halt = xen_safe_halt, + .halt = xen_halt, + .wbinvd = native_wbinvd, + + .read_msr = native_read_msr_safe, + .write_msr = native_write_msr_safe, + .read_tsc = native_read_tsc, + .read_pmc = native_read_pmc, + + .iret = (void *)&hypercall_page[__HYPERVISOR_iret], + .irq_enable_sysexit = NULL, /* never called */ + + .load_tr_desc = paravirt_nop, + .set_ldt = xen_set_ldt, + .load_gdt = xen_load_gdt, + .load_idt = xen_load_idt, + .load_tls = xen_load_tls, + + .store_gdt = native_store_gdt, + .store_idt = native_store_idt, + .store_tr = xen_store_tr, + + .write_ldt_entry = xen_write_ldt_entry, + .write_gdt_entry = xen_write_gdt_entry, + .write_idt_entry = xen_write_idt_entry, + .load_esp0 = xen_load_esp0, + + .set_iopl_mask = xen_set_iopl_mask, + .io_delay = xen_io_delay, + +#ifdef CONFIG_X86_LOCAL_APIC + .apic_write = paravirt_nop, + .apic_write_atomic = paravirt_nop, + .apic_read = xen_apic_read, + .setup_boot_clock = paravirt_nop, + .setup_secondary_clock = paravirt_nop, + .startup_ipi_hook = paravirt_nop, +#endif + + .flush_tlb_user = xen_flush_tlb, + .flush_tlb_kernel = xen_flush_tlb, + .flush_tlb_single = xen_flush_tlb_single, + + .pte_update = paravirt_nop, + .pte_update_defer = paravirt_nop, + + .pagetable_setup_start = xen_pagetable_setup_start, + .pagetable_setup_done = xen_pagetable_setup_done, + + .alloc_pt = xen_alloc_pt, + .alloc_pd = xen_alloc_pd, + .alloc_pd_clone = xen_alloc_pd_clone, + .release_pd = xen_release_pd, + .release_pt = xen_release_pt, + + .set_lazy_mode = xen_set_lazy_mode, +}; + +/* First C function to be called on Xen boot */ +asmlinkage void __init xen_start_kernel(void) +{ + pgd_t *pgd; + + if (!xen_start_info) + return; + + BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0); + + /* Install Xen paravirt ops */ + paravirt_ops = xen_paravirt_ops; + + xen_setup_features(); + + /* Get mfn list */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) + phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; + + pgd = (pgd_t *)xen_start_info->pt_base; + + init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; + + init_mm.pgd = pgd; /* use the Xen pagetables to start */ + + /* keep using Xen gdt for now; no urgent need to change it */ + + x86_write_percpu(xen_cr3, __pa(pgd)); + xen_vcpu_setup(0); + + paravirt_ops.kernel_rpl = 1; + if (xen_feature(XENFEAT_supervisor_mode_kernel)) + paravirt_ops.kernel_rpl = 0; + + /* set the limit of our address space */ + reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE); + + /* set up basic CPUID stuff */ + cpu_detect(&new_cpu_data); + new_cpu_data.hard_math = 1; + new_cpu_data.x86_capability[0] = cpuid_edx(1); + + /* Poke various useful things into boot_params */ + LOADER_TYPE = (9 << 4) | 0; + INITRD_START = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0; + INITRD_SIZE = xen_start_info->mod_len; + + /* Start the world */ + start_kernel(); +} diff --git a/arch/i386/xen/features.c b/arch/i386/xen/features.c new file mode 100644 index 00000000000..0707714e40d --- /dev/null +++ b/arch/i386/xen/features.c @@ -0,0 +1,29 @@ +/****************************************************************************** + * features.c + * + * Xen feature flags. + * + * Copyright (c) 2006, Ian Campbell, XenSource Inc. + */ +#include +#include +#include +#include +#include + +u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; +EXPORT_SYMBOL_GPL(xen_features); + +void xen_setup_features(void) +{ + struct xen_feature_info fi; + int i, j; + + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { + fi.submap_idx = i; + if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) + break; + for (j = 0; j < 32; j++) + xen_features[i * 32 + j] = !!(fi.submap & 1<, XenSource Inc, 2007 + */ +#include + +#include + +#include "multicalls.h" + +#define MC_BATCH 8 +#define MC_ARGS (MC_BATCH * 32 / sizeof(u64)) + +struct mc_buffer { + struct multicall_entry entries[MC_BATCH]; + u64 args[MC_ARGS]; + unsigned mcidx, argidx; +}; + +static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); +DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); + +void xen_mc_flush(void) +{ + struct mc_buffer *b = &get_cpu_var(mc_buffer); + int ret = 0; + unsigned long flags; + + /* Disable interrupts in case someone comes in and queues + something in the middle */ + local_irq_save(flags); + + if (b->mcidx) { + int i; + + if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) + BUG(); + for (i = 0; i < b->mcidx; i++) + if (b->entries[i].result < 0) + ret++; + b->mcidx = 0; + b->argidx = 0; + } else + BUG_ON(b->argidx != 0); + + put_cpu_var(mc_buffer); + local_irq_restore(flags); + + BUG_ON(ret); +} + +struct multicall_space __xen_mc_entry(size_t args) +{ + struct mc_buffer *b = &get_cpu_var(mc_buffer); + struct multicall_space ret; + unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); + + BUG_ON(argspace > MC_ARGS); + + if (b->mcidx == MC_BATCH || + (b->argidx + argspace) > MC_ARGS) + xen_mc_flush(); + + ret.mc = &b->entries[b->mcidx]; + b->mcidx++; + ret.args = &b->args[b->argidx]; + b->argidx += argspace; + + put_cpu_var(mc_buffer); + + return ret; +} diff --git a/arch/i386/xen/multicalls.h b/arch/i386/xen/multicalls.h new file mode 100644 index 00000000000..e6f7530b156 --- /dev/null +++ b/arch/i386/xen/multicalls.h @@ -0,0 +1,45 @@ +#ifndef _XEN_MULTICALLS_H +#define _XEN_MULTICALLS_H + +#include "xen-ops.h" + +/* Multicalls */ +struct multicall_space +{ + struct multicall_entry *mc; + void *args; +}; + +/* Allocate room for a multicall and its args */ +struct multicall_space __xen_mc_entry(size_t args); + +DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); + +/* Call to start a batch of multiple __xen_mc_entry()s. Must be + paired with xen_mc_issue() */ +static inline void xen_mc_batch(void) +{ + /* need to disable interrupts until this entry is complete */ + local_irq_save(__get_cpu_var(xen_mc_irq_flags)); +} + +static inline struct multicall_space xen_mc_entry(size_t args) +{ + xen_mc_batch(); + return __xen_mc_entry(args); +} + +/* Flush all pending multicalls */ +void xen_mc_flush(void); + +/* Issue a multicall if we're not in a lazy mode */ +static inline void xen_mc_issue(unsigned mode) +{ + if ((xen_get_lazy_mode() & mode) == 0) + xen_mc_flush(); + + /* restore flags saved in xen_mc_batch */ + local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); +} + +#endif /* _XEN_MULTICALLS_H */ diff --git a/arch/i386/xen/setup.c b/arch/i386/xen/setup.c new file mode 100644 index 00000000000..7da93ee612f --- /dev/null +++ b/arch/i386/xen/setup.c @@ -0,0 +1,97 @@ +/* + * Machine specific setup for xen + * + * Jeremy Fitzhardinge , XenSource Inc, 2007 + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "xen-ops.h" + +/* These are code, but not functions. Defined in entry.S */ +extern const char xen_hypervisor_callback[]; +extern const char xen_failsafe_callback[]; + +static __initdata struct shared_info init_shared; + +/* + * Point at some empty memory to start with. We map the real shared_info + * page as soon as fixmap is up and running. + */ +struct shared_info *HYPERVISOR_shared_info = &init_shared; + +unsigned long *phys_to_machine_mapping; +EXPORT_SYMBOL(phys_to_machine_mapping); + +/** + * machine_specific_memory_setup - Hook for machine specific memory setup. + **/ + +char * __init xen_memory_setup(void) +{ + unsigned long max_pfn = xen_start_info->nr_pages; + + e820.nr_map = 0; + add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM); + + return "Xen"; +} + +static void xen_idle(void) +{ + local_irq_disable(); + + if (need_resched()) + local_irq_enable(); + else { + current_thread_info()->status &= ~TS_POLLING; + smp_mb__after_clear_bit(); + safe_halt(); + current_thread_info()->status |= TS_POLLING; + } +} + +void __init xen_arch_setup(void) +{ + struct physdev_set_iopl set_iopl; + int rc; + + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); + + HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, + __KERNEL_CS, (unsigned long)xen_failsafe_callback); + + set_iopl.iopl = 1; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); + if (rc != 0) + printk(KERN_INFO "physdev_op failed %d\n", rc); + +#ifdef CONFIG_ACPI + if (!(xen_start_info->flags & SIF_INITDOMAIN)) { + printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); + disable_acpi(); + } +#endif + + memcpy(boot_command_line, xen_start_info->cmd_line, + MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? + COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); + + pm_idle = xen_idle; +} diff --git a/arch/i386/xen/xen-head.S b/arch/i386/xen/xen-head.S new file mode 100644 index 00000000000..2998d55a001 --- /dev/null +++ b/arch/i386/xen/xen-head.S @@ -0,0 +1,36 @@ +/* Xen-specific pieces of head.S, intended to be included in the right + place in head.S */ + +#ifdef CONFIG_XEN + +#include +#include +#include + +ENTRY(startup_xen) + movl %esi,xen_start_info + cld + movl $(init_thread_union+THREAD_SIZE),%esp + jmp xen_start_kernel + +.pushsection ".bss.page_aligned" + .align PAGE_SIZE_asm +ENTRY(hypercall_page) + .skip 0x1000 +.popsection + + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET) + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") +#ifdef CONFIG_X86_PAE + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") +#else + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") +#endif + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") + +#endif /*CONFIG_XEN */ diff --git a/arch/i386/xen/xen-ops.h b/arch/i386/xen/xen-ops.h new file mode 100644 index 00000000000..79648fe1ab7 --- /dev/null +++ b/arch/i386/xen/xen-ops.h @@ -0,0 +1,31 @@ +#ifndef XEN_OPS_H +#define XEN_OPS_H + +#include +#include + +DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); +DECLARE_PER_CPU(unsigned long, xen_cr3); + +extern struct start_info *xen_start_info; +extern struct shared_info *HYPERVISOR_shared_info; + +char * __init xen_memory_setup(void); +void __init xen_arch_setup(void); +void __init xen_init_IRQ(void); + +unsigned long xen_cpu_khz(void); +void __init xen_time_init(void); +unsigned long xen_get_wallclock(void); +int xen_set_wallclock(unsigned long time); +cycle_t xen_clocksource_read(void); + +DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); + +static inline unsigned xen_get_lazy_mode(void) +{ + return x86_read_percpu(xen_lazy_mode); +} + + +#endif /* XEN_OPS_H */ diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 9e15ce0006e..36f310632c4 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -41,6 +41,7 @@ extern int irqbalance_disable(char *str); extern void fixup_irqs(cpumask_t map); #endif +unsigned int do_IRQ(struct pt_regs *regs); void init_IRQ(void); void __init native_init_IRQ(void); diff --git a/include/asm-i386/xen/hypercall.h b/include/asm-i386/xen/hypercall.h index 53912859708..bc0ee7d961c 100644 --- a/include/asm-i386/xen/hypercall.h +++ b/include/asm-i386/xen/hypercall.h @@ -392,4 +392,22 @@ MULTI_mmuext_op(struct multicall_entry *mcl, struct mmuext_op *op, int count, mcl->args[2] = (unsigned long)success_count; mcl->args[3] = domid; } + +static inline void +MULTI_set_gdt(struct multicall_entry *mcl, unsigned long *frames, int entries) +{ + mcl->op = __HYPERVISOR_set_gdt; + mcl->args[0] = (unsigned long)frames; + mcl->args[1] = entries; +} + +static inline void +MULTI_stack_switch(struct multicall_entry *mcl, + unsigned long ss, unsigned long esp) +{ + mcl->op = __HYPERVISOR_stack_switch; + mcl->args[0] = ss; + mcl->args[1] = esp; +} + #endif /* __HYPERCALL_H__ */ diff --git a/include/xen/features.h b/include/xen/features.h new file mode 100644 index 00000000000..27292d4d2a6 --- /dev/null +++ b/include/xen/features.h @@ -0,0 +1,23 @@ +/****************************************************************************** + * features.h + * + * Query the features reported by Xen. + * + * Copyright (c) 2006, Ian Campbell + */ + +#ifndef __XEN_FEATURES_H__ +#define __XEN_FEATURES_H__ + +#include + +void xen_setup_features(void); + +extern u8 xen_features[XENFEAT_NR_SUBMAPS * 32]; + +static inline int xen_feature(int flag) +{ + return xen_features[flag]; +} + +#endif /* __ASM_XEN_FEATURES_H__ */ diff --git a/include/xen/page.h b/include/xen/page.h new file mode 100644 index 00000000000..1df6c193057 --- /dev/null +++ b/include/xen/page.h @@ -0,0 +1,179 @@ +#ifndef __XEN_PAGE_H +#define __XEN_PAGE_H + +#include + +#include + +#include + +#ifdef CONFIG_X86_PAE +/* Xen machine address */ +typedef struct xmaddr { + unsigned long long maddr; +} xmaddr_t; + +/* Xen pseudo-physical address */ +typedef struct xpaddr { + unsigned long long paddr; +} xpaddr_t; +#else +/* Xen machine address */ +typedef struct xmaddr { + unsigned long maddr; +} xmaddr_t; + +/* Xen pseudo-physical address */ +typedef struct xpaddr { + unsigned long paddr; +} xpaddr_t; +#endif + +#define XMADDR(x) ((xmaddr_t) { .maddr = (x) }) +#define XPADDR(x) ((xpaddr_t) { .paddr = (x) }) + +/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +#define INVALID_P2M_ENTRY (~0UL) +#define FOREIGN_FRAME_BIT (1UL<<31) +#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) + +extern unsigned long *phys_to_machine_mapping; + +static inline unsigned long pfn_to_mfn(unsigned long pfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return pfn; + + return phys_to_machine_mapping[(unsigned int)(pfn)] & + ~FOREIGN_FRAME_BIT; +} + +static inline int phys_to_machine_mapping_valid(unsigned long pfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 1; + + return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY); +} + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ + unsigned long pfn; + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return mfn; + +#if 0 + if (unlikely((mfn >> machine_to_phys_order) != 0)) + return max_mapnr; +#endif + + pfn = 0; + /* + * The array access can fail (e.g., device space beyond end of RAM). + * In such cases it doesn't matter what we return (we return garbage), + * but we must handle the fault without crashing! + */ + __get_user(pfn, &machine_to_phys_mapping[mfn]); + + return pfn; +} + +static inline xmaddr_t phys_to_machine(xpaddr_t phys) +{ + unsigned offset = phys.paddr & ~PAGE_MASK; + return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset); +} + +static inline xpaddr_t machine_to_phys(xmaddr_t machine) +{ + unsigned offset = machine.maddr & ~PAGE_MASK; + return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset); +} + +/* + * We detect special mappings in one of two ways: + * 1. If the MFN is an I/O page then Xen will set the m2p entry + * to be outside our maximum possible pseudophys range. + * 2. If the MFN belongs to a different domain then we will certainly + * not have MFN in our p2m table. Conversely, if the page is ours, + * then we'll have p2m(m2p(MFN))==MFN. + * If we detect a special mapping then it doesn't have a 'struct page'. + * We force !pfn_valid() by returning an out-of-range pointer. + * + * NB. These checks require that, for any MFN that is not in our reservation, + * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if + * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. + * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. + * + * NB2. When deliberately mapping foreign pages into the p2m table, you *must* + * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we + * require. In all the cases we care about, the FOREIGN_FRAME bit is + * masked (e.g., pfn_to_mfn()) so behaviour there is correct. + */ +static inline unsigned long mfn_to_local_pfn(unsigned long mfn) +{ + extern unsigned long max_mapnr; + unsigned long pfn = mfn_to_pfn(mfn); + if ((pfn < max_mapnr) + && !xen_feature(XENFEAT_auto_translated_physmap) + && (phys_to_machine_mapping[pfn] != mfn)) + return max_mapnr; /* force !pfn_valid() */ + return pfn; +} + +static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } + phys_to_machine_mapping[pfn] = mfn; +} + +/* VIRT <-> MACHINE conversion */ +#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) +#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) +#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) + +#ifdef CONFIG_X86_PAE +#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ + (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT))) + +static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) +{ + pte_t pte; + + pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | + (pgprot_val(pgprot) >> 32); + pte.pte_high &= (__supported_pte_mask >> 32); + pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); + pte.pte_low &= __supported_pte_mask; + + return pte; +} + +static inline unsigned long long pte_val_ma(pte_t x) +{ + return ((unsigned long long)x.pte_high << 32) | x.pte_low; +} +#define pmd_val_ma(v) ((v).pmd) +#define pud_val_ma(v) ((v).pgd.pgd) +#define __pte_ma(x) ((pte_t) { .pte_low = (x), .pte_high = (x)>>32 } ) +#define __pmd_ma(x) ((pmd_t) { (x) } ) +#else /* !X86_PAE */ +#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) +#define mfn_pte(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#define pte_val_ma(x) ((x).pte_low) +#define pmd_val_ma(v) ((v).pud.pgd.pgd) +#define __pte_ma(x) ((pte_t) { (x) } ) +#endif /* CONFIG_X86_PAE */ + +#define pgd_val_ma(x) ((x).pgd) + + +xmaddr_t arbitrary_virt_to_machine(unsigned long address); +void make_lowmem_page_readonly(void *vaddr); +void make_lowmem_page_readwrite(void *vaddr); + +#endif /* __XEN_PAGE_H */ -- cgit v1.2.3-70-g09d2 From e46cdb66c8fc1c8d61cfae0f219ff47ac4b9d531 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:05 -0700 Subject: xen: event channels Xen implements interrupts in terms of event channels. Each guest domain gets 1024 event channels which can be used for a variety of purposes, such as Xen timer events, inter-domain events, inter-processor events (IPI) or for real hardware IRQs. Within the kernel, we map the event channels to IRQs, and implement the whole interrupt handling using a Xen irq_chip. Rather than setting NR_IRQ to 1024 under PARAVIRT in order to accomodate Xen, we create a dynamic mapping between event channels and IRQs. Ideally, Linux will eventually move towards dynamically allocating per-irq structures, and we can use a 1:1 mapping between event channels and irqs. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Ingo Molnar Cc: Eric W. Biederman --- arch/i386/xen/Makefile | 3 +- arch/i386/xen/enlighten.c | 1 + arch/i386/xen/events.c | 511 ++++++++++++++++++++++++++++++++++++++++++++++ include/xen/events.h | 28 +++ 4 files changed, 542 insertions(+), 1 deletion(-) create mode 100644 arch/i386/xen/events.c create mode 100644 include/xen/events.h (limited to 'include') diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile index 803c1ee2b76..7a78f27bfb1 100644 --- a/arch/i386/xen/Makefile +++ b/arch/i386/xen/Makefile @@ -1 +1,2 @@ -obj-y := enlighten.o setup.o features.o multicalls.o mmu.o +obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ + events.o diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index c0b0aa7af14..6417dfdccb4 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c @@ -607,6 +607,7 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .memory_setup = xen_memory_setup, .arch_setup = xen_arch_setup, + .init_IRQ = xen_init_IRQ, .cpuid = xen_cpuid, diff --git a/arch/i386/xen/events.c b/arch/i386/xen/events.c new file mode 100644 index 00000000000..e7c5d00ab4f --- /dev/null +++ b/arch/i386/xen/events.c @@ -0,0 +1,511 @@ +/* + * Xen event channels + * + * Xen models interrupts with abstract event channels. Because each + * domain gets 1024 event channels, but NR_IRQ is not that large, we + * must dynamically map irqs<->event channels. The event channels + * interface with the rest of the kernel by defining a xen interrupt + * chip. When an event is recieved, it is mapped to an irq and sent + * through the normal interrupt processing path. + * + * There are four kinds of events which can be mapped to an event + * channel: + * + * 1. Inter-domain notifications. This includes all the virtual + * device events, since they're driven by front-ends in another domain + * (typically dom0). + * 2. VIRQs, typically used for timers. These are per-cpu events. + * 3. IPIs. + * 4. Hardware interrupts. Not supported at present. + * + * Jeremy Fitzhardinge , XenSource Inc, 2007 + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include "xen-ops.h" + +/* + * This lock protects updates to the following mapping and reference-count + * arrays. The lock does not need to be acquired to read the mapping tables. + */ +static DEFINE_SPINLOCK(irq_mapping_update_lock); + +/* IRQ <-> VIRQ mapping. */ +static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; + +/* Packed IRQ information: binding type, sub-type index, and event channel. */ +struct packed_irq +{ + unsigned short evtchn; + unsigned char index; + unsigned char type; +}; + +static struct packed_irq irq_info[NR_IRQS]; + +/* Binding types. */ +enum { IRQT_UNBOUND, IRQT_PIRQ, IRQT_VIRQ, IRQT_IPI, IRQT_EVTCHN }; + +/* Convenient shorthand for packed representation of an unbound IRQ. */ +#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) + +static int evtchn_to_irq[NR_EVENT_CHANNELS] = { + [0 ... NR_EVENT_CHANNELS-1] = -1 +}; +static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; +static u8 cpu_evtchn[NR_EVENT_CHANNELS]; + +/* Reference counts for bindings to IRQs. */ +static int irq_bindcount[NR_IRQS]; + +/* Xen will never allocate port zero for any purpose. */ +#define VALID_EVTCHN(chn) ((chn) != 0) + +/* + * Force a proper event-channel callback from Xen after clearing the + * callback mask. We do this in a very simple manner, by making a call + * down into Xen. The pending flag will be checked by Xen on return. + */ +void force_evtchn_callback(void) +{ + (void)HYPERVISOR_xen_version(0, NULL); +} +EXPORT_SYMBOL_GPL(force_evtchn_callback); + +static struct irq_chip xen_dynamic_chip; + +/* Constructor for packed IRQ information. */ +static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn) +{ + return (struct packed_irq) { evtchn, index, type }; +} + +/* + * Accessors for packed IRQ information. + */ +static inline unsigned int evtchn_from_irq(int irq) +{ + return irq_info[irq].evtchn; +} + +static inline unsigned int index_from_irq(int irq) +{ + return irq_info[irq].index; +} + +static inline unsigned int type_from_irq(int irq) +{ + return irq_info[irq].type; +} + +static inline unsigned long active_evtchns(unsigned int cpu, + struct shared_info *sh, + unsigned int idx) +{ + return (sh->evtchn_pending[idx] & + cpu_evtchn_mask[cpu][idx] & + ~sh->evtchn_mask[idx]); +} + +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ + int irq = evtchn_to_irq[chn]; + + BUG_ON(irq == -1); +#ifdef CONFIG_SMP + irq_desc[irq].affinity = cpumask_of_cpu(cpu); +#endif + + __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); + __set_bit(chn, cpu_evtchn_mask[cpu]); + + cpu_evtchn[chn] = cpu; +} + +static void init_evtchn_cpu_bindings(void) +{ +#ifdef CONFIG_SMP + int i; + /* By default all event channels notify CPU#0. */ + for (i = 0; i < NR_IRQS; i++) + irq_desc[i].affinity = cpumask_of_cpu(0); +#endif + + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); +} + +static inline unsigned int cpu_from_evtchn(unsigned int evtchn) +{ + return cpu_evtchn[evtchn]; +} + +static inline void clear_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + sync_clear_bit(port, &s->evtchn_pending[0]); +} + +static inline void set_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + sync_set_bit(port, &s->evtchn_pending[0]); +} + + +/** + * notify_remote_via_irq - send event to remote end of event channel via irq + * @irq: irq of event channel to send event to + * + * Unlike notify_remote_via_evtchn(), this is safe to use across + * save/restore. Notifications on a broken connection are silently + * dropped. + */ +void notify_remote_via_irq(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + notify_remote_via_evtchn(evtchn); +} +EXPORT_SYMBOL_GPL(notify_remote_via_irq); + +static void mask_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + sync_set_bit(port, &s->evtchn_mask[0]); +} + +static void unmask_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + unsigned int cpu = get_cpu(); + + BUG_ON(!irqs_disabled()); + + /* Slow path (hypercall) if this is a non-local port. */ + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + } else { + struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); + + sync_clear_bit(port, &s->evtchn_mask[0]); + + /* + * The following is basically the equivalent of + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose + * the interrupt edge' if the channel is masked. + */ + if (sync_test_bit(port, &s->evtchn_pending[0]) && + !sync_test_and_set_bit(port / BITS_PER_LONG, + &vcpu_info->evtchn_pending_sel)) + vcpu_info->evtchn_upcall_pending = 1; + } + + put_cpu(); +} + +static int find_unbound_irq(void) +{ + int irq; + + /* Only allocate from dynirq range */ + for (irq = 0; irq < NR_IRQS; irq++) + if (irq_bindcount[irq] == 0) + break; + + if (irq == NR_IRQS) + panic("No available IRQ to bind to: increase NR_IRQS!\n"); + + return irq; +} + +static int bind_evtchn_to_irq(unsigned int evtchn) +{ + int irq; + + spin_lock(&irq_mapping_update_lock); + + irq = evtchn_to_irq[evtchn]; + + if (irq == -1) { + irq = find_unbound_irq(); + + dynamic_irq_init(irq); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "event"); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + } + + irq_bindcount[irq]++; + + spin_unlock(&irq_mapping_update_lock); + + return irq; +} + +static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int evtchn, irq; + + spin_lock(&irq_mapping_update_lock); + + irq = per_cpu(virq_to_irq, cpu)[virq]; + + if (irq == -1) { + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + irq = find_unbound_irq(); + + dynamic_irq_init(irq); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "virq"); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + + per_cpu(virq_to_irq, cpu)[virq] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_bindcount[irq]++; + + spin_unlock(&irq_mapping_update_lock); + + return irq; +} + +static void unbind_from_irq(unsigned int irq) +{ + struct evtchn_close close; + int evtchn = evtchn_from_irq(irq); + + spin_lock(&irq_mapping_update_lock); + + if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { + close.port = evtchn; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); + + switch (type_from_irq(irq)) { + case IRQT_VIRQ: + per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) + [index_from_irq(irq)] = -1; + break; + default: + break; + } + + /* Closed ports are implicitly re-bound to VCPU0. */ + bind_evtchn_to_cpu(evtchn, 0); + + evtchn_to_irq[evtchn] = -1; + irq_info[irq] = IRQ_UNBOUND; + + dynamic_irq_init(irq); + } + + spin_unlock(&irq_mapping_update_lock); +} + +int bind_evtchn_to_irqhandler(unsigned int evtchn, + irqreturn_t (*handler)(int, void *), + unsigned long irqflags, + const char *devname, void *dev_id) +{ + unsigned int irq; + int retval; + + irq = bind_evtchn_to_irq(evtchn); + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); + +int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, + irqreturn_t (*handler)(int, void *), + unsigned long irqflags, const char *devname, void *dev_id) +{ + unsigned int irq; + int retval; + + irq = bind_virq_to_irq(virq, cpu); + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); + +void unbind_from_irqhandler(unsigned int irq, void *dev_id) +{ + free_irq(irq, dev_id); + unbind_from_irq(irq); +} +EXPORT_SYMBOL_GPL(unbind_from_irqhandler); + +/* + * Search the CPUs pending events bitmasks. For each one found, map + * the event number to an irq, and feed it into do_IRQ() for + * handling. + * + * Xen uses a two-level bitmap to speed searching. The first level is + * a bitset of words which contain pending event bits. The second + * level is a bitset of pending events themselves. + */ +fastcall void xen_evtchn_do_upcall(struct pt_regs *regs) +{ + int cpu = get_cpu(); + struct shared_info *s = HYPERVISOR_shared_info; + struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); + unsigned long pending_words; + + vcpu_info->evtchn_upcall_pending = 0; + + /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ + pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); + while (pending_words != 0) { + unsigned long pending_bits; + int word_idx = __ffs(pending_words); + pending_words &= ~(1UL << word_idx); + + while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { + int bit_idx = __ffs(pending_bits); + int port = (word_idx * BITS_PER_LONG) + bit_idx; + int irq = evtchn_to_irq[port]; + + if (irq != -1) { + regs->orig_eax = ~irq; + do_IRQ(regs); + } + } + } + + put_cpu(); +} + +/* Rebind an evtchn so that it gets delivered to a specific cpu */ +static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +{ + struct evtchn_bind_vcpu bind_vcpu; + int evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + return; + + /* Send future instances of this interrupt to other vcpu. */ + bind_vcpu.port = evtchn; + bind_vcpu.vcpu = tcpu; + + /* + * If this fails, it usually just indicates that we're dealing with a + * virq or IPI channel, which don't actually need to be rebound. Ignore + * it, but don't do the xenlinux-level rebind in that case. + */ + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) + bind_evtchn_to_cpu(evtchn, tcpu); +} + + +static void set_affinity_irq(unsigned irq, cpumask_t dest) +{ + unsigned tcpu = first_cpu(dest); + rebind_irq_to_cpu(irq, tcpu); +} + +static void enable_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + unmask_evtchn(evtchn); +} + +static void disable_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + mask_evtchn(evtchn); +} + +static void ack_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + move_native_irq(irq); + + if (VALID_EVTCHN(evtchn)) + clear_evtchn(evtchn); +} + +static int retrigger_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + int ret = 0; + + if (VALID_EVTCHN(evtchn)) { + set_evtchn(evtchn); + ret = 1; + } + + return ret; +} + +static struct irq_chip xen_dynamic_chip __read_mostly = { + .name = "xen-dyn", + .mask = disable_dynirq, + .unmask = enable_dynirq, + .ack = ack_dynirq, + .set_affinity = set_affinity_irq, + .retrigger = retrigger_dynirq, +}; + +void __init xen_init_IRQ(void) +{ + int i; + + init_evtchn_cpu_bindings(); + + /* No event channels are 'live' right now. */ + for (i = 0; i < NR_EVENT_CHANNELS; i++) + mask_evtchn(i); + + /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ + for (i = 0; i < NR_IRQS; i++) + irq_bindcount[i] = 0; + + irq_ctx_init(smp_processor_id()); +} diff --git a/include/xen/events.h b/include/xen/events.h new file mode 100644 index 00000000000..77f71c90682 --- /dev/null +++ b/include/xen/events.h @@ -0,0 +1,28 @@ +#ifndef _XEN_EVENTS_H +#define _XEN_EVENTS_H + +#include + +int bind_evtchn_to_irqhandler(unsigned int evtchn, + irqreturn_t (*handler)(int, void *), + unsigned long irqflags, const char *devname, + void *dev_id); +int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, + irqreturn_t (*handler)(int, void *), + unsigned long irqflags, const char *devname, void *dev_id); + +/* + * Common unbind function for all event sources. Takes IRQ to unbind from. + * Automatically closes the underlying event channel (even for bindings + * made with bind_evtchn_to_irqhandler()). + */ +void unbind_from_irqhandler(unsigned int irq, void *dev_id); + +static inline void notify_remote_via_evtchn(int port) +{ + struct evtchn_send send = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); +} + +extern void notify_remote_via_irq(int irq); +#endif /* _XEN_EVENTS_H */ -- cgit v1.2.3-70-g09d2 From c85b04c3749507546f6d5868976e4793e35c2ec0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:05 -0700 Subject: xen: add pinned page flag Add a new definition for PG_owner_priv_1 to define PG_pinned on Xen pagetable pages. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright --- include/linux/page-flags.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ae2d79f2107..731cd2ac322 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -92,6 +92,7 @@ /* PG_owner_priv_1 users should have descriptive aliases */ #define PG_checked PG_owner_priv_1 /* Used by some filesystems */ +#define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */ #if (BITS_PER_LONG > 32) /* @@ -170,6 +171,10 @@ static inline void SetPageUptodate(struct page *page) #define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) #define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) +#define PagePinned(page) test_bit(PG_pinned, &(page)->flags) +#define SetPagePinned(page) set_bit(PG_pinned, &(page)->flags) +#define ClearPagePinned(page) clear_bit(PG_pinned, &(page)->flags) + #define PageReserved(page) test_bit(PG_reserved, &(page)->flags) #define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) #define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) -- cgit v1.2.3-70-g09d2 From f87e4cac4f4e940b328d3deb5b53e642e3881f43 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:06 -0700 Subject: xen: SMP guest support This is a fairly straightforward Xen implementation of smp_ops. Xen has its own IPI mechanisms, and has no dependency on any APIC-based IPI. The smp_ops hooks and the flush_tlb_others pv_op allow a Xen guest to avoid all APIC code in arch/i386 (the only apic operation is a single apic_read for the apic version number). One subtle point which needs to be addressed is unpinning pagetables when another cpu may have a lazy tlb reference to the pagetable. Xen will not allow an in-use pagetable to be unpinned, so we must find any other cpus with a reference to the pagetable and get them to shoot down their references. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Benjamin LaHaise Cc: Ingo Molnar Cc: Andi Kleen --- arch/i386/xen/Kconfig | 2 +- arch/i386/xen/Makefile | 2 + arch/i386/xen/enlighten.c | 115 ++++++++++--- arch/i386/xen/events.c | 80 ++++++++- arch/i386/xen/mmu.c | 69 ++++++-- arch/i386/xen/mmu.h | 13 ++ arch/i386/xen/setup.c | 5 + arch/i386/xen/smp.c | 407 ++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/xen/time.c | 13 +- arch/i386/xen/xen-ops.h | 25 +++ include/xen/events.h | 27 ++- 11 files changed, 705 insertions(+), 53 deletions(-) create mode 100644 arch/i386/xen/smp.c (limited to 'include') diff --git a/arch/i386/xen/Kconfig b/arch/i386/xen/Kconfig index 7c5550058c1..b7697ff2236 100644 --- a/arch/i386/xen/Kconfig +++ b/arch/i386/xen/Kconfig @@ -4,7 +4,7 @@ config XEN bool "Enable support for Xen hypervisor" - depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !(PREEMPT || SMP || NEED_MULTIPLE_NODES) + depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !(PREEMPT || NEED_MULTIPLE_NODES) help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile index bf51cabed0d..fd05f243a3f 100644 --- a/arch/i386/xen/Makefile +++ b/arch/i386/xen/Makefile @@ -1,2 +1,4 @@ obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ events.o time.o + +obj-$(CONFIG_SMP) += smp.o diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index a9ba834295a..de62d66e089 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include "xen-ops.h" #include "mmu.h" @@ -56,7 +58,7 @@ DEFINE_PER_CPU(unsigned long, xen_cr3); struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); -static void xen_vcpu_setup(int cpu) +void xen_vcpu_setup(int cpu) { per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; } @@ -347,23 +349,14 @@ static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, } } -/* Load a new IDT into Xen. In principle this can be per-CPU, so we - hold a spinlock to protect the static traps[] array (static because - it avoids allocation, and saves stack space). */ -static void xen_load_idt(const struct Xgt_desc_struct *desc) +static void xen_convert_trap_info(const struct Xgt_desc_struct *desc, + struct trap_info *traps) { - static DEFINE_SPINLOCK(lock); - static struct trap_info traps[257]; - - int cpu = smp_processor_id(); unsigned in, out, count; - per_cpu(idt_desc, cpu) = *desc; - count = (desc->size+1) / 8; BUG_ON(count > 256); - spin_lock(&lock); for (in = out = 0; in < count; in++) { const u32 *entry = (u32 *)(desc->address + in * 8); @@ -371,6 +364,31 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc) out++; } traps[out].address = 0; +} + +void xen_copy_trap_info(struct trap_info *traps) +{ + const struct Xgt_desc_struct *desc = &get_cpu_var(idt_desc); + + xen_convert_trap_info(desc, traps); + + put_cpu_var(idt_desc); +} + +/* Load a new IDT into Xen. In principle this can be per-CPU, so we + hold a spinlock to protect the static traps[] array (static because + it avoids allocation, and saves stack space). */ +static void xen_load_idt(const struct Xgt_desc_struct *desc) +{ + static DEFINE_SPINLOCK(lock); + static struct trap_info traps[257]; + int cpu = smp_processor_id(); + + per_cpu(idt_desc, cpu) = *desc; + + spin_lock(&lock); + + xen_convert_trap_info(desc, traps); xen_mc_flush(); if (HYPERVISOR_set_trap_table(traps)) @@ -428,6 +446,12 @@ static unsigned long xen_apic_read(unsigned long reg) { return 0; } + +static void xen_apic_write(unsigned long reg, unsigned long val) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} #endif static void xen_flush_tlb(void) @@ -449,6 +473,40 @@ static void xen_flush_tlb_single(unsigned long addr) BUG(); } +static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, + unsigned long va) +{ + struct mmuext_op op; + cpumask_t cpumask = *cpus; + + /* + * A couple of (to be removed) sanity checks: + * + * - current CPU must not be in mask + * - mask must exist :) + */ + BUG_ON(cpus_empty(cpumask)); + BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(!mm); + + /* If a CPU which we ran on has gone down, OK. */ + cpus_and(cpumask, cpumask, cpu_online_map); + if (cpus_empty(cpumask)) + return; + + if (va == TLB_FLUSH_ALL) { + op.cmd = MMUEXT_TLB_FLUSH_MULTI; + op.arg2.vcpumask = (void *)cpus; + } else { + op.cmd = MMUEXT_INVLPG_MULTI; + op.arg1.linear_addr = va; + op.arg2.vcpumask = (void *)cpus; + } + + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + static unsigned long xen_read_cr2(void) { return x86_read_percpu(xen_vcpu)->arch.cr2; @@ -460,18 +518,6 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4 & ~X86_CR4_TSD); } -/* - * Page-directory addresses above 4GB do not fit into architectural %cr3. - * When accessing %cr3, or equivalent field in vcpu_guest_context, guests - * must use the following accessor macros to pack/unpack valid MFNs. - * - * Note that Xen is using the fact that the pagetable base is always - * page-aligned, and putting the 12 MSB of the address into the 12 LSB - * of cr3. - */ -#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) -#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) - static unsigned long xen_read_cr3(void) { return x86_read_percpu(xen_cr3); @@ -740,8 +786,8 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .io_delay = xen_io_delay, #ifdef CONFIG_X86_LOCAL_APIC - .apic_write = paravirt_nop, - .apic_write_atomic = paravirt_nop, + .apic_write = xen_apic_write, + .apic_write_atomic = xen_apic_write, .apic_read = xen_apic_read, .setup_boot_clock = paravirt_nop, .setup_secondary_clock = paravirt_nop, @@ -751,6 +797,7 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, .flush_tlb_single = xen_flush_tlb_single, + .flush_tlb_others = xen_flush_tlb_others, .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, @@ -796,6 +843,19 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .set_lazy_mode = xen_set_lazy_mode, }; +#ifdef CONFIG_SMP +static const struct smp_ops xen_smp_ops __initdata = { + .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, + .smp_prepare_cpus = xen_smp_prepare_cpus, + .cpu_up = xen_cpu_up, + .smp_cpus_done = xen_smp_cpus_done, + + .smp_send_stop = xen_smp_send_stop, + .smp_send_reschedule = xen_smp_send_reschedule, + .smp_call_function_mask = xen_smp_call_function_mask, +}; +#endif /* CONFIG_SMP */ + /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -808,6 +868,9 @@ asmlinkage void __init xen_start_kernel(void) /* Install Xen paravirt ops */ paravirt_ops = xen_paravirt_ops; +#ifdef CONFIG_SMP + smp_ops = xen_smp_ops; +#endif xen_setup_features(); diff --git a/arch/i386/xen/events.c b/arch/i386/xen/events.c index e7c5d00ab4f..4103b8bf22f 100644 --- a/arch/i386/xen/events.c +++ b/arch/i386/xen/events.c @@ -47,6 +47,9 @@ static DEFINE_SPINLOCK(irq_mapping_update_lock); /* IRQ <-> VIRQ mapping. */ static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; +/* IRQ <-> IPI mapping */ +static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1}; + /* Packed IRQ information: binding type, sub-type index, and event channel. */ struct packed_irq { @@ -58,7 +61,13 @@ struct packed_irq static struct packed_irq irq_info[NR_IRQS]; /* Binding types. */ -enum { IRQT_UNBOUND, IRQT_PIRQ, IRQT_VIRQ, IRQT_IPI, IRQT_EVTCHN }; +enum { + IRQT_UNBOUND, + IRQT_PIRQ, + IRQT_VIRQ, + IRQT_IPI, + IRQT_EVTCHN +}; /* Convenient shorthand for packed representation of an unbound IRQ. */ #define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) @@ -261,6 +270,45 @@ static int bind_evtchn_to_irq(unsigned int evtchn) return irq; } +static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int evtchn, irq; + + spin_lock(&irq_mapping_update_lock); + + irq = per_cpu(ipi_to_irq, cpu)[ipi]; + if (irq == -1) { + irq = find_unbound_irq(); + if (irq < 0) + goto out; + + dynamic_irq_init(irq); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "ipi"); + + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + + per_cpu(ipi_to_irq, cpu)[ipi] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_bindcount[irq]++; + + out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} + + static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) { struct evtchn_bind_virq bind_virq; @@ -369,6 +417,28 @@ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, } EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); +int bind_ipi_to_irqhandler(enum ipi_vector ipi, + unsigned int cpu, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + irq = bind_ipi_to_irq(ipi, cpu); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} + void unbind_from_irqhandler(unsigned int irq, void *dev_id) { free_irq(irq, dev_id); @@ -376,6 +446,14 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id) } EXPORT_SYMBOL_GPL(unbind_from_irqhandler); +void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) +{ + int irq = per_cpu(ipi_to_irq, cpu)[vector]; + BUG_ON(irq < 0); + notify_remote_via_irq(irq); +} + + /* * Search the CPUs pending events bitmasks. For each one found, map * the event number to an irq, and feed it into do_IRQ() for diff --git a/arch/i386/xen/mmu.c b/arch/i386/xen/mmu.c index 53501ce2d15..bc49ef84620 100644 --- a/arch/i386/xen/mmu.c +++ b/arch/i386/xen/mmu.c @@ -391,8 +391,12 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_batch(); - if (pgd_walk(pgd, pin_page, TASK_SIZE)) + if (pgd_walk(pgd, pin_page, TASK_SIZE)) { + /* re-enable interrupts for kmap_flush_unused */ + xen_mc_issue(0); kmap_flush_unused(); + xen_mc_batch(); + } mcs = __xen_mc_entry(sizeof(*op)); op = mcs.args; @@ -474,27 +478,58 @@ void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } -void xen_exit_mmap(struct mm_struct *mm) -{ - struct task_struct *tsk = current; - - task_lock(tsk); - /* - * We aggressively remove defunct pgd from cr3. We execute unmap_vmas() - * *much* faster this way, as no tlb flushes means bigger wrpt batches. - */ - if (tsk->active_mm == mm) { - tsk->active_mm = &init_mm; - atomic_inc(&init_mm.mm_count); +#ifdef CONFIG_SMP +/* Another cpu may still have their %cr3 pointing at the pagetable, so + we need to repoint it somewhere else before we can unpin it. */ +static void drop_other_mm_ref(void *info) +{ + struct mm_struct *mm = info; - switch_mm(mm, &init_mm, tsk); + if (__get_cpu_var(cpu_tlbstate).active_mm == mm) + leave_mm(smp_processor_id()); +} - atomic_dec(&mm->mm_count); - BUG_ON(atomic_read(&mm->mm_count) == 0); +static void drop_mm_ref(struct mm_struct *mm) +{ + if (current->active_mm == mm) { + if (current->mm == mm) + load_cr3(swapper_pg_dir); + else + leave_mm(smp_processor_id()); } - task_unlock(tsk); + if (!cpus_empty(mm->cpu_vm_mask)) + xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref, + mm, 1); +} +#else +static void drop_mm_ref(struct mm_struct *mm) +{ + if (current->active_mm == mm) + load_cr3(swapper_pg_dir); +} +#endif + +/* + * While a process runs, Xen pins its pagetables, which means that the + * hypervisor forces it to be read-only, and it controls all updates + * to it. This means that all pagetable updates have to go via the + * hypervisor, which is moderately expensive. + * + * Since we're pulling the pagetable down, we switch to use init_mm, + * unpin old process pagetable and mark it all read-write, which + * allows further operations on it to be simple memory accesses. + * + * The only subtle point is that another CPU may be still using the + * pagetable because of lazy tlb flushing. This means we need need to + * switch all CPUs off this pagetable before we can unpin it. + */ +void xen_exit_mmap(struct mm_struct *mm) +{ + get_cpu(); /* make sure we don't move around */ + drop_mm_ref(mm); + put_cpu(); xen_pgd_unpin(mm->pgd); } diff --git a/arch/i386/xen/mmu.h b/arch/i386/xen/mmu.h index 49776fe9f02..c9ff27f3ac3 100644 --- a/arch/i386/xen/mmu.h +++ b/arch/i386/xen/mmu.h @@ -3,6 +3,19 @@ #include #include +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + * + * Note that Xen is using the fact that the pagetable base is always + * page-aligned, and putting the 12 MSB of the address into the 12 LSB + * of cr3. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + + void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); void xen_set_pte(pte_t *ptep, pte_t pteval); diff --git a/arch/i386/xen/setup.c b/arch/i386/xen/setup.c index 7da93ee612f..18a994d5a4c 100644 --- a/arch/i386/xen/setup.c +++ b/arch/i386/xen/setup.c @@ -94,4 +94,9 @@ void __init xen_arch_setup(void) COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); pm_idle = xen_idle; + +#ifdef CONFIG_SMP + /* fill cpus_possible with all available cpus */ + xen_fill_possible_map(); +#endif } diff --git a/arch/i386/xen/smp.c b/arch/i386/xen/smp.c new file mode 100644 index 00000000000..a91587fbf5c --- /dev/null +++ b/arch/i386/xen/smp.c @@ -0,0 +1,407 @@ +/* + * Xen SMP support + * + * This file implements the Xen versions of smp_ops. SMP under Xen is + * very straightforward. Bringing a CPU up is simply a matter of + * loading its initial context and setting it running. + * + * IPIs are handled through the Xen event mechanism. + * + * Because virtual CPUs can be scheduled onto any real CPU, there's no + * useful topology information for the kernel to make use of. As a + * result, all CPUs are treated as if they're single-core and + * single-threaded. + * + * This does not handle HOTPLUG_CPU yet. + */ +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include + +#include "xen-ops.h" +#include "mmu.h" + +static cpumask_t cpu_initialized_map; +static DEFINE_PER_CPU(int, resched_irq); +static DEFINE_PER_CPU(int, callfunc_irq); + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + */ +static DEFINE_SPINLOCK(call_lock); + +struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +}; + +static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); + +static struct call_data_struct *call_data; + +/* + * Reschedule call back. Nothing to do, + * all the work is done automatically when + * we return from the interrupt. + */ +static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) +{ + return IRQ_HANDLED; +} + +static __cpuinit void cpu_bringup_and_idle(void) +{ + int cpu = smp_processor_id(); + + cpu_init(); + + preempt_disable(); + per_cpu(cpu_state, cpu) = CPU_ONLINE; + + xen_setup_cpu_clockevents(); + + /* We can take interrupts now: we're officially "up". */ + local_irq_enable(); + + wmb(); /* make sure everything is out */ + cpu_idle(); +} + +static int xen_smp_intr_init(unsigned int cpu) +{ + int rc; + const char *resched_name, *callfunc_name; + + per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; + + resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); + rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, + cpu, + xen_reschedule_interrupt, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + resched_name, + NULL); + if (rc < 0) + goto fail; + per_cpu(resched_irq, cpu) = rc; + + callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); + rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, + cpu, + xen_call_function_interrupt, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + callfunc_name, + NULL); + if (rc < 0) + goto fail; + per_cpu(callfunc_irq, cpu) = rc; + + return 0; + + fail: + if (per_cpu(resched_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); + if (per_cpu(callfunc_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); + return rc; +} + +void __init xen_fill_possible_map(void) +{ + int i, rc; + + for (i = 0; i < NR_CPUS; i++) { + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); + if (rc >= 0) + cpu_set(i, cpu_possible_map); + } +} + +void __init xen_smp_prepare_boot_cpu(void) +{ + int cpu; + + BUG_ON(smp_processor_id() != 0); + native_smp_prepare_boot_cpu(); + + xen_vcpu_setup(0); + + /* We've switched to the "real" per-cpu gdt, so make sure the + old memory can be recycled */ + make_lowmem_page_readwrite(&per_cpu__gdt_page); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + } +} + +void __init xen_smp_prepare_cpus(unsigned int max_cpus) +{ + unsigned cpu; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + } + + smp_store_cpu_info(0); + set_cpu_sibling_map(0); + + if (xen_smp_intr_init(0)) + BUG(); + + cpu_initialized_map = cpumask_of_cpu(0); + + /* Restrict the possible_map according to max_cpus. */ + while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { + for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--) + continue; + cpu_clear(cpu, cpu_possible_map); + } + + for_each_possible_cpu (cpu) { + struct task_struct *idle; + + if (cpu == 0) + continue; + + idle = fork_idle(cpu); + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpu); + + cpu_set(cpu, cpu_present_map); + } + + //init_xenbus_allowed_cpumask(); +} + +static __cpuinit int +cpu_initialize_context(unsigned int cpu, struct task_struct *idle) +{ + struct vcpu_guest_context *ctxt; + struct gdt_page *gdt = &per_cpu(gdt_page, cpu); + + if (cpu_test_and_set(cpu, cpu_initialized_map)) + return 0; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (ctxt == NULL) + return -ENOMEM; + + ctxt->flags = VGCF_IN_KERNEL; + ctxt->user_regs.ds = __USER_DS; + ctxt->user_regs.es = __USER_DS; + ctxt->user_regs.fs = __KERNEL_PERCPU; + ctxt->user_regs.gs = 0; + ctxt->user_regs.ss = __KERNEL_DS; + ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; + ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ + + memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); + + xen_copy_trap_info(ctxt->trap_ctxt); + + ctxt->ldt_ents = 0; + + BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK); + make_lowmem_page_readonly(gdt->gdt); + + ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt); + ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt); + + ctxt->user_regs.cs = __KERNEL_CS; + ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); + + ctxt->kernel_ss = __KERNEL_DS; + ctxt->kernel_sp = idle->thread.esp0; + + ctxt->event_callback_cs = __KERNEL_CS; + ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; + ctxt->failsafe_callback_cs = __KERNEL_CS; + ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; + + per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); + ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); + + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) + BUG(); + + kfree(ctxt); + return 0; +} + +int __cpuinit xen_cpu_up(unsigned int cpu) +{ + struct task_struct *idle = idle_task(cpu); + int rc; + +#if 0 + rc = cpu_up_check(cpu); + if (rc) + return rc; +#endif + + init_gdt(cpu); + per_cpu(current_task, cpu) = idle; + xen_vcpu_setup(cpu); + irq_ctx_init(cpu); + xen_setup_timer(cpu); + + /* make sure interrupts start blocked */ + per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; + + rc = cpu_initialize_context(cpu, idle); + if (rc) + return rc; + + if (num_online_cpus() == 1) + alternatives_smp_switch(1); + + rc = xen_smp_intr_init(cpu); + if (rc) + return rc; + + smp_store_cpu_info(cpu); + set_cpu_sibling_map(cpu); + /* This must be done before setting cpu_online_map */ + wmb(); + + cpu_set(cpu, cpu_online_map); + + rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + BUG_ON(rc); + + return 0; +} + +void xen_smp_cpus_done(unsigned int max_cpus) +{ +} + +static void stop_self(void *v) +{ + int cpu = smp_processor_id(); + + /* make sure we're not pinning something down */ + load_cr3(swapper_pg_dir); + /* should set up a minimal gdt */ + + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); + BUG(); +} + +void xen_smp_send_stop(void) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + xen_smp_call_function_mask(mask, stop_self, NULL, 0); +} + +void xen_smp_send_reschedule(int cpu) +{ + xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); +} + + +static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) +{ + unsigned cpu; + + cpus_and(mask, mask, cpu_online_map); + + for_each_cpu_mask(cpu, mask) + xen_send_IPI_one(cpu, vector); +} + +static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) +{ + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + irq_enter(); + (*func)(info); + irq_exit(); + + if (wait) { + mb(); /* commit everything before setting finished */ + atomic_inc(&call_data->finished); + } + + return IRQ_HANDLED; +} + +int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), + void *info, int wait) +{ + struct call_data_struct data; + int cpus; + + /* Holding any lock stops cpus from going down. */ + spin_lock(&call_lock); + + cpu_clear(smp_processor_id(), mask); + + cpus = cpus_weight(mask); + if (!cpus) { + spin_unlock(&call_lock); + return 0; + } + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + mb(); /* write everything before IPI */ + + /* Send a message to other CPUs and wait for them to respond */ + xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); + + /* Make sure other vcpus get a chance to run. + XXX too severe? Maybe we should check the other CPU's states? */ + HYPERVISOR_sched_op(SCHEDOP_yield, 0); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus || + (wait && atomic_read(&data.finished) != cpus)) + cpu_relax(); + + spin_unlock(&call_lock); + + return 0; +} diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c index 2aab44bec2a..aeb04cf5dbf 100644 --- a/arch/i386/xen/time.c +++ b/arch/i386/xen/time.c @@ -519,7 +519,7 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) return ret; } -static void xen_setup_timer(int cpu) +void xen_setup_timer(int cpu) { const char *name; struct clock_event_device *evt; @@ -535,16 +535,20 @@ static void xen_setup_timer(int cpu) IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, name, NULL); - evt = &get_cpu_var(xen_clock_events); + evt = &per_cpu(xen_clock_events, cpu); memcpy(evt, xen_clockevent, sizeof(*evt)); evt->cpumask = cpumask_of_cpu(cpu); evt->irq = irq; - clockevents_register_device(evt); setup_runstate_info(cpu); +} + +void xen_setup_cpu_clockevents(void) +{ + BUG_ON(preemptible()); - put_cpu_var(xen_clock_events); + clockevents_register_device(&__get_cpu_var(xen_clock_events)); } __init void xen_time_init(void) @@ -570,4 +574,5 @@ __init void xen_time_init(void) tsc_disable = 0; xen_setup_timer(cpu); + xen_setup_cpu_clockevents(); } diff --git a/arch/i386/xen/xen-ops.h b/arch/i386/xen/xen-ops.h index 7667abd390e..4069be8ba31 100644 --- a/arch/i386/xen/xen-ops.h +++ b/arch/i386/xen/xen-ops.h @@ -3,6 +3,12 @@ #include +/* These are code, but not functions. Defined in entry.S */ +extern const char xen_hypervisor_callback[]; +extern const char xen_failsafe_callback[]; + +void xen_copy_trap_info(struct trap_info *traps); + DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); DECLARE_PER_CPU(unsigned long, xen_cr3); @@ -13,6 +19,8 @@ char * __init xen_memory_setup(void); void __init xen_arch_setup(void); void __init xen_init_IRQ(void); +void xen_setup_timer(int cpu); +void xen_setup_cpu_clockevents(void); unsigned long xen_cpu_khz(void); void __init xen_time_init(void); unsigned long xen_get_wallclock(void); @@ -28,5 +36,22 @@ static inline unsigned xen_get_lazy_mode(void) return x86_read_percpu(xen_lazy_mode); } +void __init xen_fill_possible_map(void); + +void xen_vcpu_setup(int cpu); +void xen_smp_prepare_boot_cpu(void); +void xen_smp_prepare_cpus(unsigned int max_cpus); +int xen_cpu_up(unsigned int cpu); +void xen_smp_cpus_done(unsigned int max_cpus); + +void xen_smp_send_stop(void); +void xen_smp_send_reschedule(int cpu); +int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait); +int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait); + +int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), + void *info, int wait); #endif /* XEN_OPS_H */ diff --git a/include/xen/events.h b/include/xen/events.h index 77f71c90682..7abe4ddfac5 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -1,15 +1,32 @@ #ifndef _XEN_EVENTS_H #define _XEN_EVENTS_H -#include +#include + +#include +#include + +enum ipi_vector { + XEN_RESCHEDULE_VECTOR, + XEN_CALL_FUNCTION_VECTOR, + + XEN_NR_IPIS, +}; int bind_evtchn_to_irqhandler(unsigned int evtchn, - irqreturn_t (*handler)(int, void *), + irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - irqreturn_t (*handler)(int, void *), - unsigned long irqflags, const char *devname, void *dev_id); + irq_handler_t handler, + unsigned long irqflags, const char *devname, + void *dev_id); +int bind_ipi_to_irqhandler(enum ipi_vector ipi, + unsigned int cpu, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); /* * Common unbind function for all event sources. Takes IRQ to unbind from. @@ -18,6 +35,8 @@ int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, */ void unbind_from_irqhandler(unsigned int irq, void *dev_id); +void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); + static inline void notify_remote_via_evtchn(int port) { struct evtchn_send send = { .port = port }; -- cgit v1.2.3-70-g09d2 From b536b4b9623084d86f2b1f19cb44a2d6d74f00bf Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:06 -0700 Subject: xen: use the hvc console infrastructure for Xen console Implement a Xen back-end for hvc console. * * * Add early printk support via hvc console, enable using "earlyprintk=xen" on the kernel command line. From: Gerd Hoffmann Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Acked-by: Ingo Molnar Acked-by: Olof Johansson --- arch/i386/xen/events.c | 3 +- arch/x86_64/kernel/early_printk.c | 5 ++ drivers/char/Kconfig | 8 ++ drivers/char/Makefile | 1 + drivers/char/hvc_xen.c | 159 ++++++++++++++++++++++++++++++++++++++ include/xen/events.h | 1 + include/xen/hvc-console.h | 6 ++ 7 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 drivers/char/hvc_xen.c create mode 100644 include/xen/hvc-console.h (limited to 'include') diff --git a/arch/i386/xen/events.c b/arch/i386/xen/events.c index 4103b8bf22f..8904acc20f8 100644 --- a/arch/i386/xen/events.c +++ b/arch/i386/xen/events.c @@ -244,7 +244,7 @@ static int find_unbound_irq(void) return irq; } -static int bind_evtchn_to_irq(unsigned int evtchn) +int bind_evtchn_to_irq(unsigned int evtchn) { int irq; @@ -269,6 +269,7 @@ static int bind_evtchn_to_irq(unsigned int evtchn) return irq; } +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 296d2b0c5d8..fd9aff3f389 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -6,6 +6,7 @@ #include #include #include +#include /* Simple VGA output */ @@ -242,6 +243,10 @@ static int __init setup_early_printk(char *buf) simnow_init(buf + 6); early_console = &simnow_console; keep_early = 1; +#ifdef CONFIG_HVC_XEN + } else if (!strncmp(buf, "xen", 3)) { + early_console = &xenboot_console; +#endif } if (keep_early) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 97bd71bc3ae..9e8f21410d2 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -604,6 +604,14 @@ config HVC_BEAT help Toshiba's Cell Reference Set Beat Console device driver +config HVC_XEN + bool "Xen Hypervisor Console support" + depends on XEN + select HVC_DRIVER + default y + help + Xen virtual console device driver + config HVCS tristate "IBM Hypervisor Virtual Console Server support" depends on PPC_PSERIES diff --git a/drivers/char/Makefile b/drivers/char/Makefile index f2996a95eb0..8852b8d643c 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o obj-$(CONFIG_HVC_BEAT) += hvc_beat.o obj-$(CONFIG_HVC_DRIVER) += hvc_console.o +obj-$(CONFIG_HVC_XEN) += hvc_xen.o obj-$(CONFIG_RAW_DRIVER) += raw.o obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o obj-$(CONFIG_MSPEC) += mspec.o diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c new file mode 100644 index 00000000000..dd68f8541c2 --- /dev/null +++ b/drivers/char/hvc_xen.c @@ -0,0 +1,159 @@ +/* + * xen console driver interface to hvc_console.c + * + * (c) 2007 Gerd Hoffmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "hvc_console.h" + +#define HVC_COOKIE 0x58656e /* "Xen" in hex */ + +static struct hvc_struct *hvc; +static int xencons_irq; + +/* ------------------------------------------------------------------ */ + +static inline struct xencons_interface *xencons_interface(void) +{ + return mfn_to_virt(xen_start_info->console.domU.mfn); +} + +static inline void notify_daemon(void) +{ + /* Use evtchn: this is called early, before irq is set up. */ + notify_remote_via_evtchn(xen_start_info->console.domU.evtchn); +} + +static int write_console(uint32_t vtermno, const char *data, int len) +{ + struct xencons_interface *intf = xencons_interface(); + XENCONS_RING_IDX cons, prod; + int sent = 0; + + cons = intf->out_cons; + prod = intf->out_prod; + mb(); /* update queue values before going on */ + BUG_ON((prod - cons) > sizeof(intf->out)); + + while ((sent < len) && ((prod - cons) < sizeof(intf->out))) + intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; + + wmb(); /* write ring before updating pointer */ + intf->out_prod = prod; + + notify_daemon(); + return sent; +} + +static int read_console(uint32_t vtermno, char *buf, int len) +{ + struct xencons_interface *intf = xencons_interface(); + XENCONS_RING_IDX cons, prod; + int recv = 0; + + cons = intf->in_cons; + prod = intf->in_prod; + mb(); /* get pointers before reading ring */ + BUG_ON((prod - cons) > sizeof(intf->in)); + + while (cons != prod && recv < len) + buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)]; + + mb(); /* read ring before consuming */ + intf->in_cons = cons; + + notify_daemon(); + return recv; +} + +static struct hv_ops hvc_ops = { + .get_chars = read_console, + .put_chars = write_console, +}; + +static int __init xen_init(void) +{ + struct hvc_struct *hp; + + if (!is_running_on_xen()) + return 0; + + xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); + if (xencons_irq < 0) + xencons_irq = 0 /* NO_IRQ */; + hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); + if (IS_ERR(hp)) + return PTR_ERR(hp); + + hvc = hp; + return 0; +} + +static void __exit xen_fini(void) +{ + if (hvc) + hvc_remove(hvc); +} + +static int xen_cons_init(void) +{ + if (!is_running_on_xen()) + return 0; + + hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); + return 0; +} + +module_init(xen_init); +module_exit(xen_fini); +console_initcall(xen_cons_init); + +static void xenboot_write_console(struct console *console, const char *string, + unsigned len) +{ + unsigned int linelen, off = 0; + const char *pos; + + while (off < len && NULL != (pos = strchr(string+off, '\n'))) { + linelen = pos-string+off; + if (off + linelen > len) + break; + write_console(0, string+off, linelen); + write_console(0, "\r\n", 2); + off += linelen + 1; + } + if (off < len) + write_console(0, string+off, len-off); +} + +struct console xenboot_console = { + .name = "xenboot", + .write = xenboot_write_console, + .flags = CON_PRINTBUFFER | CON_BOOT, +}; diff --git a/include/xen/events.h b/include/xen/events.h index 7abe4ddfac5..2bde54d29be 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -13,6 +13,7 @@ enum ipi_vector { XEN_NR_IPIS, }; +int bind_evtchn_to_irq(unsigned int evtchn); int bind_evtchn_to_irqhandler(unsigned int evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h new file mode 100644 index 00000000000..21c0ecfd786 --- /dev/null +++ b/include/xen/hvc-console.h @@ -0,0 +1,6 @@ +#ifndef XEN_HVC_CONSOLE_H +#define XEN_HVC_CONSOLE_H + +extern struct console xenboot_console; + +#endif /* XEN_HVC_CONSOLE_H */ -- cgit v1.2.3-70-g09d2 From ad9a86121f5a374b48ce2924f8a9d7e94a04db27 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:06 -0700 Subject: xen: Add grant table support Add Xen 'grant table' driver which allows granting of access to selected local memory pages by other virtual machines and, symmetrically, the mapping of remote memory pages which other virtual machines have granted access to. This driver is a prerequisite for many of the Xen virtual device drivers, which grant the 'device driver domain' restricted and temporary access to only those memory pages that are currently involved in I/O operations. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Pratt Signed-off-by: Christian Limpach Signed-off-by: Chris Wright --- drivers/Makefile | 2 + drivers/xen/Makefile | 1 + drivers/xen/grant-table.c | 582 ++++++++++++++++++++++++++++++++++++ include/xen/grant_table.h | 107 +++++++ include/xen/interface/grant_table.h | 94 +++++- 5 files changed, 776 insertions(+), 10 deletions(-) create mode 100644 drivers/xen/Makefile create mode 100644 drivers/xen/grant-table.c create mode 100644 include/xen/grant_table.h (limited to 'include') diff --git a/drivers/Makefile b/drivers/Makefile index 503d8256944..6d9d7fab77f 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -15,6 +15,8 @@ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_PNP) += pnp/ obj-$(CONFIG_ARM_AMBA) += amba/ +obj-$(CONFIG_XEN) += xen/ + # char/ comes before serial/ etc so that the VT console is the boot-time # default. obj-y += char/ diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile new file mode 100644 index 00000000000..eb42b521eef --- /dev/null +++ b/drivers/xen/Makefile @@ -0,0 +1 @@ +obj-y += grant-table.o diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c new file mode 100644 index 00000000000..ea94dbabf9a --- /dev/null +++ b/drivers/xen/grant-table.c @@ -0,0 +1,582 @@ +/****************************************************************************** + * grant_table.c + * + * Granting foreign access to our memory reservation. + * + * Copyright (c) 2005-2006, Christopher Clark + * Copyright (c) 2004-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + + +/* External tools reserve first few grant table entries. */ +#define NR_RESERVED_ENTRIES 8 +#define GNTTAB_LIST_END 0xffffffff +#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry)) + +static grant_ref_t **gnttab_list; +static unsigned int nr_grant_frames; +static unsigned int boot_max_nr_grant_frames; +static int gnttab_free_count; +static grant_ref_t gnttab_free_head; +static DEFINE_SPINLOCK(gnttab_list_lock); + +static struct grant_entry *shared; + +static struct gnttab_free_callback *gnttab_free_callback_list; + +static int gnttab_expand(unsigned int req_entries); + +#define RPP (PAGE_SIZE / sizeof(grant_ref_t)) + +static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) +{ + return &gnttab_list[(entry) / RPP][(entry) % RPP]; +} +/* This can be used as an l-value */ +#define gnttab_entry(entry) (*__gnttab_entry(entry)) + +static int get_free_entries(unsigned count) +{ + unsigned long flags; + int ref, rc; + grant_ref_t head; + + spin_lock_irqsave(&gnttab_list_lock, flags); + + if ((gnttab_free_count < count) && + ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) { + spin_unlock_irqrestore(&gnttab_list_lock, flags); + return rc; + } + + ref = head = gnttab_free_head; + gnttab_free_count -= count; + while (count-- > 1) + head = gnttab_entry(head); + gnttab_free_head = gnttab_entry(head); + gnttab_entry(head) = GNTTAB_LIST_END; + + spin_unlock_irqrestore(&gnttab_list_lock, flags); + + return ref; +} + +static void do_free_callbacks(void) +{ + struct gnttab_free_callback *callback, *next; + + callback = gnttab_free_callback_list; + gnttab_free_callback_list = NULL; + + while (callback != NULL) { + next = callback->next; + if (gnttab_free_count >= callback->count) { + callback->next = NULL; + callback->fn(callback->arg); + } else { + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + } + callback = next; + } +} + +static inline void check_free_callbacks(void) +{ + if (unlikely(gnttab_free_callback_list)) + do_free_callbacks(); +} + +static void put_free_entry(grant_ref_t ref) +{ + unsigned long flags; + spin_lock_irqsave(&gnttab_list_lock, flags); + gnttab_entry(ref) = gnttab_free_head; + gnttab_free_head = ref; + gnttab_free_count++; + check_free_callbacks(); + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} + +static void update_grant_entry(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags) +{ + /* + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + */ + shared[ref].frame = frame; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = flags; +} + +/* + * Public grant-issuing interface functions + */ +void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int readonly) +{ + update_grant_entry(ref, domid, frame, + GTF_permit_access | (readonly ? GTF_readonly : 0)); +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); + +int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, + int readonly) +{ + int ref; + + ref = get_free_entries(1); + if (unlikely(ref < 0)) + return -ENOSPC; + + gnttab_grant_foreign_access_ref(ref, domid, frame, readonly); + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); + +int gnttab_query_foreign_access(grant_ref_t ref) +{ + u16 nflags; + + nflags = shared[ref].flags; + + return (nflags & (GTF_reading|GTF_writing)); +} +EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); + +int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +{ + u16 flags, nflags; + + nflags = shared[ref].flags; + do { + flags = nflags; + if (flags & (GTF_reading|GTF_writing)) { + printk(KERN_ALERT "WARNING: g.e. still in use!\n"); + return 0; + } + } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags); + + return 1; +} +EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); + +void gnttab_end_foreign_access(grant_ref_t ref, int readonly, + unsigned long page) +{ + if (gnttab_end_foreign_access_ref(ref, readonly)) { + put_free_entry(ref); + if (page != 0) + free_page(page); + } else { + /* XXX This needs to be fixed so that the ref and page are + placed on a list to be freed up later. */ + printk(KERN_WARNING + "WARNING: leaking g.e. and page still in use!\n"); + } +} +EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); + +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn) +{ + int ref; + + ref = get_free_entries(1); + if (unlikely(ref < 0)) + return -ENOSPC; + gnttab_grant_foreign_transfer_ref(ref, domid, pfn); + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); + +void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, + unsigned long pfn) +{ + update_grant_entry(ref, domid, pfn, GTF_accept_transfer); +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); + +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) +{ + unsigned long frame; + u16 flags; + + /* + * If a transfer is not even yet started, try to reclaim the grant + * reference and return failure (== 0). + */ + while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { + if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags) + return 0; + cpu_relax(); + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = shared[ref].flags; + cpu_relax(); + } + + rmb(); /* Read the frame number /after/ reading completion status. */ + frame = shared[ref].frame; + BUG_ON(frame == 0); + + return frame; +} +EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); + +unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) +{ + unsigned long frame = gnttab_end_foreign_transfer_ref(ref); + put_free_entry(ref); + return frame; +} +EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer); + +void gnttab_free_grant_reference(grant_ref_t ref) +{ + put_free_entry(ref); +} +EXPORT_SYMBOL_GPL(gnttab_free_grant_reference); + +void gnttab_free_grant_references(grant_ref_t head) +{ + grant_ref_t ref; + unsigned long flags; + int count = 1; + if (head == GNTTAB_LIST_END) + return; + spin_lock_irqsave(&gnttab_list_lock, flags); + ref = head; + while (gnttab_entry(ref) != GNTTAB_LIST_END) { + ref = gnttab_entry(ref); + count++; + } + gnttab_entry(ref) = gnttab_free_head; + gnttab_free_head = head; + gnttab_free_count += count; + check_free_callbacks(); + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} +EXPORT_SYMBOL_GPL(gnttab_free_grant_references); + +int gnttab_alloc_grant_references(u16 count, grant_ref_t *head) +{ + int h = get_free_entries(count); + + if (h < 0) + return -ENOSPC; + + *head = h; + + return 0; +} +EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references); + +int gnttab_empty_grant_references(const grant_ref_t *private_head) +{ + return (*private_head == GNTTAB_LIST_END); +} +EXPORT_SYMBOL_GPL(gnttab_empty_grant_references); + +int gnttab_claim_grant_reference(grant_ref_t *private_head) +{ + grant_ref_t g = *private_head; + if (unlikely(g == GNTTAB_LIST_END)) + return -ENOSPC; + *private_head = gnttab_entry(g); + return g; +} +EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference); + +void gnttab_release_grant_reference(grant_ref_t *private_head, + grant_ref_t release) +{ + gnttab_entry(release) = *private_head; + *private_head = release; +} +EXPORT_SYMBOL_GPL(gnttab_release_grant_reference); + +void gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, u16 count) +{ + unsigned long flags; + spin_lock_irqsave(&gnttab_list_lock, flags); + if (callback->next) + goto out; + callback->fn = fn; + callback->arg = arg; + callback->count = count; + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + check_free_callbacks(); +out: + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} +EXPORT_SYMBOL_GPL(gnttab_request_free_callback); + +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) +{ + struct gnttab_free_callback **pcb; + unsigned long flags; + + spin_lock_irqsave(&gnttab_list_lock, flags); + for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { + if (*pcb == callback) { + *pcb = callback->next; + break; + } + } + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} +EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback); + +static int grow_gnttab_list(unsigned int more_frames) +{ + unsigned int new_nr_grant_frames, extra_entries, i; + + new_nr_grant_frames = nr_grant_frames + more_frames; + extra_entries = more_frames * GREFS_PER_GRANT_FRAME; + + for (i = nr_grant_frames; i < new_nr_grant_frames; i++) { + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC); + if (!gnttab_list[i]) + goto grow_nomem; + } + + + for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; + i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(i) = gnttab_free_head; + gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; + gnttab_free_count += extra_entries; + + nr_grant_frames = new_nr_grant_frames; + + check_free_callbacks(); + + return 0; + +grow_nomem: + for ( ; i >= nr_grant_frames; i--) + free_page((unsigned long) gnttab_list[i]); + return -ENOMEM; +} + +static unsigned int __max_nr_grant_frames(void) +{ + struct gnttab_query_size query; + int rc; + + query.dom = DOMID_SELF; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); + if ((rc < 0) || (query.status != GNTST_okay)) + return 4; /* Legacy max supported number of frames */ + + return query.max_nr_frames; +} + +static inline unsigned int max_nr_grant_frames(void) +{ + unsigned int xen_max = __max_nr_grant_frames(); + + if (xen_max > boot_max_nr_grant_frames) + return boot_max_nr_grant_frames; + return xen_max; +} + +static int map_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + unsigned long **frames = (unsigned long **)data; + + set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); + (*frames)++; + return 0; +} + +static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + + set_pte_at(&init_mm, addr, pte, __pte(0)); + return 0; +} + +static int gnttab_map(unsigned int start_idx, unsigned int end_idx) +{ + struct gnttab_setup_table setup; + unsigned long *frames; + unsigned int nr_gframes = end_idx + 1; + int rc; + + frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); + if (!frames) + return -ENOMEM; + + setup.dom = DOMID_SELF; + setup.nr_frames = nr_gframes; + setup.frame_list = frames; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + if (rc == -ENOSYS) { + kfree(frames); + return -ENOSYS; + } + + BUG_ON(rc || setup.status); + + if (shared == NULL) { + struct vm_struct *area; + area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames()); + BUG_ON(area == NULL); + shared = area->addr; + } + rc = apply_to_page_range(&init_mm, (unsigned long)shared, + PAGE_SIZE * nr_gframes, + map_pte_fn, &frames); + BUG_ON(rc); + frames -= nr_gframes; /* adjust after map_pte_fn() */ + + kfree(frames); + + return 0; +} + +static int gnttab_resume(void) +{ + if (max_nr_grant_frames() < nr_grant_frames) + return -ENOSYS; + return gnttab_map(0, nr_grant_frames - 1); +} + +static int gnttab_suspend(void) +{ + apply_to_page_range(&init_mm, (unsigned long)shared, + PAGE_SIZE * nr_grant_frames, + unmap_pte_fn, NULL); + + return 0; +} + +static int gnttab_expand(unsigned int req_entries) +{ + int rc; + unsigned int cur, extra; + + cur = nr_grant_frames; + extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / + GREFS_PER_GRANT_FRAME); + if (cur + extra > max_nr_grant_frames()) + return -ENOSPC; + + rc = gnttab_map(cur, cur + extra - 1); + if (rc == 0) + rc = grow_gnttab_list(extra); + + return rc; +} + +static int __devinit gnttab_init(void) +{ + int i; + unsigned int max_nr_glist_frames; + unsigned int nr_init_grefs; + + if (!is_running_on_xen()) + return -ENODEV; + + nr_grant_frames = 1; + boot_max_nr_grant_frames = __max_nr_grant_frames(); + + /* Determine the maximum number of frames required for the + * grant reference free list on the current hypervisor. + */ + max_nr_glist_frames = (boot_max_nr_grant_frames * + GREFS_PER_GRANT_FRAME / + (PAGE_SIZE / sizeof(grant_ref_t))); + + gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), + GFP_KERNEL); + if (gnttab_list == NULL) + return -ENOMEM; + + for (i = 0; i < nr_grant_frames; i++) { + gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL); + if (gnttab_list[i] == NULL) + goto ini_nomem; + } + + if (gnttab_resume() < 0) + return -ENODEV; + + nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; + + for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; + gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; + gnttab_free_head = NR_RESERVED_ENTRIES; + + printk("Grant table initialized\n"); + return 0; + + ini_nomem: + for (i--; i >= 0; i--) + free_page((unsigned long)gnttab_list[i]); + kfree(gnttab_list); + return -ENOMEM; +} + +core_initcall(gnttab_init); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h new file mode 100644 index 00000000000..761c83498e0 --- /dev/null +++ b/include/xen/grant_table.h @@ -0,0 +1,107 @@ +/****************************************************************************** + * grant_table.h + * + * Two sets of functionality: + * 1. Granting foreign access to our memory reservation. + * 2. Accessing others' memory reservations via grant references. + * (i.e., mechanisms for both sender and recipient of grant references) + * + * Copyright (c) 2004-2005, K A Fraser + * Copyright (c) 2005, Christopher Clark + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __ASM_GNTTAB_H__ +#define __ASM_GNTTAB_H__ + +#include +#include + +/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ +#define NR_GRANT_FRAMES 4 + +struct gnttab_free_callback { + struct gnttab_free_callback *next; + void (*fn)(void *); + void *arg; + u16 count; +}; + +int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, + int readonly); + +/* + * End access through the given grant reference, iff the grant entry is no + * longer in use. Return 1 if the grant entry was freed, 0 if it is still in + * use. + */ +int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); + +/* + * Eventually end access through the given grant reference, and once that + * access has been ended, free the given page too. Access will be ended + * immediately iff the grant entry is not in use, otherwise it will happen + * some time later. page may be 0, in which case no freeing will occur. + */ +void gnttab_end_foreign_access(grant_ref_t ref, int readonly, + unsigned long page); + +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); + +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); +unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); + +int gnttab_query_foreign_access(grant_ref_t ref); + +/* + * operations on reserved batches of grant references + */ +int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head); + +void gnttab_free_grant_reference(grant_ref_t ref); + +void gnttab_free_grant_references(grant_ref_t head); + +int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); + +int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); + +void gnttab_release_grant_reference(grant_ref_t *private_head, + grant_ref_t release); + +void gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, u16 count); +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback); + +void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int readonly); + +void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, + unsigned long pfn); + +#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) + +#endif /* __ASM_GNTTAB_H__ */ diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index e9e06695ed5..219049802cf 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -4,6 +4,24 @@ * Interface for granting foreign access to page frames, and receiving * page-ownership transfers. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2004, K A Fraser */ @@ -17,7 +35,7 @@ /* Some rough guidelines on accessing and updating grant-table entries * in a concurrency-safe manner. For more information, Linux contains a - * reference implementation for guest OSes (arch/i386/mach-xen/grant_table.c). + * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). * * NB. WMB is a no-op on current-generation x86 processors. However, a * compiler barrier will still be required. @@ -144,9 +162,9 @@ typedef uint32_t grant_handle_t; * that must be presented later to destroy the mapping(s). On error, * is a negative status code. * NOTES: - * 1. If GNTPIN_map_for_dev is specified then is the address + * 1. If GNTMAP_device_map is specified then is the address * via which I/O devices may access the granted frame. - * 2. If GNTPIN_map_for_host is specified then a mapping will be added at + * 2. If GNTMAP_host_map is specified then a mapping will be added at * either a host virtual address in the current address space, or at * a PTE at the specified machine address. The type of mapping to * perform is selected through the GNTMAP_contains_pte flag, and the @@ -167,7 +185,6 @@ struct gnttab_map_grant_ref { grant_handle_t handle; uint64_t dev_bus_addr; }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); /* * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings @@ -189,7 +206,6 @@ struct gnttab_unmap_grant_ref { /* OUT parameters. */ int16_t status; /* GNTST_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); /* * GNTTABOP_setup_table: Set up a grant table for comprising at least @@ -207,9 +223,8 @@ struct gnttab_setup_table { uint32_t nr_frames; /* OUT parameters. */ int16_t status; /* GNTST_* */ - GUEST_HANDLE(ulong) frame_list; + ulong *frame_list; }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); /* * GNTTABOP_dump_table: Dump the contents of the grant table to the @@ -222,7 +237,6 @@ struct gnttab_dump_table { /* OUT parameters. */ int16_t status; /* GNTST_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); /* * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The @@ -241,7 +255,65 @@ struct gnttab_transfer { /* OUT parameters. */ int16_t status; }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); + + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) + +#define GNTTABOP_copy 5 +struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + unsigned long gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +}; + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +#define GNTTABOP_query_size 6 +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + uint32_t nr_frames; + uint32_t max_nr_frames; + int16_t status; /* GNTST_* */ +}; + /* * Bitfield values for update_pin_status.flags. @@ -284,6 +356,7 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); #define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ #define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ #define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary */ #define GNTTABOP_error_msgs { \ "okay", \ @@ -295,7 +368,8 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); "invalid device address", \ "no spare translation slot in the I/O MMU", \ "permission denied", \ - "bad page" \ + "bad page", \ + "copy arguments cross page boundary" \ } #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ -- cgit v1.2.3-70-g09d2 From 4bac07c993d03434ea902d3d4290d9e45944b66c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:06 -0700 Subject: xen: add the Xenbus sysfs and virtual device hotplug driver This communicates with the machine control software via a registry residing in a controlling virtual machine. This allows dynamic creation, destruction and modification of virtual device configurations (network devices, block devices and CPUS, to name some examples). [ Greg, would you mind giving this a review? Thanks -J ] Signed-off-by: Ian Pratt Signed-off-by: Christian Limpach Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Cc: Greg KH --- drivers/xen/Makefile | 1 + drivers/xen/xenbus/Makefile | 7 + drivers/xen/xenbus/xenbus_client.c | 569 ++++++++++++++++++++++ drivers/xen/xenbus/xenbus_comms.c | 233 +++++++++ drivers/xen/xenbus/xenbus_comms.h | 46 ++ drivers/xen/xenbus/xenbus_probe.c | 935 +++++++++++++++++++++++++++++++++++++ drivers/xen/xenbus/xenbus_probe.h | 74 +++ drivers/xen/xenbus/xenbus_xs.c | 861 ++++++++++++++++++++++++++++++++++ include/asm-i386/xen/hypervisor.h | 1 + include/xen/xenbus.h | 234 ++++++++++ 10 files changed, 2961 insertions(+) create mode 100644 drivers/xen/xenbus/Makefile create mode 100644 drivers/xen/xenbus/xenbus_client.c create mode 100644 drivers/xen/xenbus/xenbus_comms.c create mode 100644 drivers/xen/xenbus/xenbus_comms.h create mode 100644 drivers/xen/xenbus/xenbus_probe.c create mode 100644 drivers/xen/xenbus/xenbus_probe.h create mode 100644 drivers/xen/xenbus/xenbus_xs.c create mode 100644 include/xen/xenbus.h (limited to 'include') diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index eb42b521eef..56592f0d6ce 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1 +1,2 @@ obj-y += grant-table.o +obj-y += xenbus/ diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile new file mode 100644 index 00000000000..5571f5b8422 --- /dev/null +++ b/drivers/xen/xenbus/Makefile @@ -0,0 +1,7 @@ +obj-y += xenbus.o + +xenbus-objs = +xenbus-objs += xenbus_client.o +xenbus-objs += xenbus_comms.o +xenbus-objs += xenbus_xs.o +xenbus-objs += xenbus_probe.o diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c new file mode 100644 index 00000000000..9fd2f70ab46 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_client.c @@ -0,0 +1,569 @@ +/****************************************************************************** + * Client-facing interface for the Xenbus driver. In other words, the + * interface between the Xenbus and the device-specific code, be it the + * frontend or the backend of that driver. + * + * Copyright (C) 2005 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +const char *xenbus_strstate(enum xenbus_state state) +{ + static const char *const name[] = { + [ XenbusStateUnknown ] = "Unknown", + [ XenbusStateInitialising ] = "Initialising", + [ XenbusStateInitWait ] = "InitWait", + [ XenbusStateInitialised ] = "Initialised", + [ XenbusStateConnected ] = "Connected", + [ XenbusStateClosing ] = "Closing", + [ XenbusStateClosed ] = "Closed", + }; + return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; +} +EXPORT_SYMBOL_GPL(xenbus_strstate); + +/** + * xenbus_watch_path - register a watch + * @dev: xenbus device + * @path: path to watch + * @watch: watch to register + * @callback: callback to register + * + * Register a @watch on the given path, using the given xenbus_watch structure + * for storage, and the given @callback function as the callback. Return 0 on + * success, or -errno on error. On success, the given @path will be saved as + * @watch->node, and remains the caller's to free. On error, @watch->node will + * be NULL, the device will switch to %XenbusStateClosing, and the error will + * be saved in the store. + */ +int xenbus_watch_path(struct xenbus_device *dev, const char *path, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)) +{ + int err; + + watch->node = path; + watch->callback = callback; + + err = register_xenbus_watch(watch); + + if (err) { + watch->node = NULL; + watch->callback = NULL; + xenbus_dev_fatal(dev, err, "adding watch on %s", path); + } + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_watch_path); + + +/** + * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path + * @dev: xenbus device + * @watch: watch to register + * @callback: callback to register + * @pathfmt: format of path to watch + * + * Register a watch on the given @path, using the given xenbus_watch + * structure for storage, and the given @callback function as the callback. + * Return 0 on success, or -errno on error. On success, the watched path + * (@path/@path2) will be saved as @watch->node, and becomes the caller's to + * kfree(). On error, watch->node will be NULL, so the caller has nothing to + * free, the device will switch to %XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_watch_pathfmt(struct xenbus_device *dev, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int), + const char *pathfmt, ...) +{ + int err; + va_list ap; + char *path; + + va_start(ap, pathfmt); + path = kvasprintf(GFP_KERNEL, pathfmt, ap); + va_end(ap); + + if (!path) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); + return -ENOMEM; + } + err = xenbus_watch_path(dev, path, watch, callback); + + if (err) + kfree(path); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); + + +/** + * xenbus_switch_state + * @dev: xenbus device + * @xbt: transaction handle + * @state: new state + * + * Advertise in the store a change of the given driver to the given new_state. + * Return 0 on success, or -errno on error. On error, the device will switch + * to XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) +{ + /* We check whether the state is currently set to the given value, and + if not, then the state is set. We don't want to unconditionally + write the given state, because we don't want to fire watches + unnecessarily. Furthermore, if the node has gone, we don't write + to it, as the device will be tearing down, and we don't want to + resurrect that directory. + + Note that, because of this cached value of our state, this function + will not work inside a Xenstore transaction (something it was + trying to in the past) because dev->state would not get reset if + the transaction was aborted. + + */ + + int current_state; + int err; + + if (state == dev->state) + return 0; + + err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d", + ¤t_state); + if (err != 1) + return 0; + + err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state); + if (err) { + if (state != XenbusStateClosing) /* Avoid looping */ + xenbus_dev_fatal(dev, err, "writing new state"); + return err; + } + + dev->state = state; + + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_switch_state); + +int xenbus_frontend_closed(struct xenbus_device *dev) +{ + xenbus_switch_state(dev, XenbusStateClosed); + complete(&dev->down); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_frontend_closed); + +/** + * Return the path to the error node for the given device, or NULL on failure. + * If the value returned is non-NULL, then it is the caller's to kfree. + */ +static char *error_path(struct xenbus_device *dev) +{ + return kasprintf(GFP_KERNEL, "error/%s", dev->nodename); +} + + +static void xenbus_va_dev_error(struct xenbus_device *dev, int err, + const char *fmt, va_list ap) +{ + int ret; + unsigned int len; + char *printf_buffer = NULL; + char *path_buffer = NULL; + +#define PRINTF_BUFFER_SIZE 4096 + printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); + if (printf_buffer == NULL) + goto fail; + + len = sprintf(printf_buffer, "%i ", -err); + ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); + + BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1); + + dev_err(&dev->dev, "%s\n", printf_buffer); + + path_buffer = error_path(dev); + + if (path_buffer == NULL) { + dev_err(&dev->dev, "failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); + goto fail; + } + + if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { + dev_err(&dev->dev, "failed to write error node for %s (%s)\n", + dev->nodename, printf_buffer); + goto fail; + } + +fail: + kfree(printf_buffer); + kfree(path_buffer); +} + + +/** + * xenbus_dev_error + * @dev: xenbus device + * @err: error to report + * @fmt: error message format + * + * Report the given negative errno into the store, along with the given + * formatted message. + */ +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xenbus_va_dev_error(dev, err, fmt, ap); + va_end(ap); +} +EXPORT_SYMBOL_GPL(xenbus_dev_error); + +/** + * xenbus_dev_fatal + * @dev: xenbus device + * @err: error to report + * @fmt: error message format + * + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by + * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly + * closedown of this driver and its peer. + */ + +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xenbus_va_dev_error(dev, err, fmt, ap); + va_end(ap); + + xenbus_switch_state(dev, XenbusStateClosing); +} +EXPORT_SYMBOL_GPL(xenbus_dev_fatal); + +/** + * xenbus_grant_ring + * @dev: xenbus device + * @ring_mfn: mfn of ring to grant + + * Grant access to the given @ring_mfn to the peer of the given device. Return + * 0 on success, or -errno on error. On error, the device will switch to + * XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) +{ + int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); + if (err < 0) + xenbus_dev_fatal(dev, err, "granting access to ring page"); + return err; +} +EXPORT_SYMBOL_GPL(xenbus_grant_ring); + + +/** + * Allocate an event channel for the given xenbus_device, assigning the newly + * created local port to *port. Return 0 on success, or -errno on error. On + * error, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = dev->otherend_id; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err) + xenbus_dev_fatal(dev, err, "allocating event channel"); + else + *port = alloc_unbound.port; + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); + + +/** + * Bind to an existing interdomain event channel in another domain. Returns 0 + * on success and stores the local port in *port. On error, returns -errno, + * switches the device to XenbusStateClosing, and saves the error in XenStore. + */ +int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port) +{ + struct evtchn_bind_interdomain bind_interdomain; + int err; + + bind_interdomain.remote_dom = dev->otherend_id; + bind_interdomain.remote_port = remote_port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (err) + xenbus_dev_fatal(dev, err, + "binding to event channel %d from domain %d", + remote_port, dev->otherend_id); + else + *port = bind_interdomain.local_port; + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_bind_evtchn); + + +/** + * Free an existing event channel. Returns 0 on success or -errno on error. + */ +int xenbus_free_evtchn(struct xenbus_device *dev, int port) +{ + struct evtchn_close close; + int err; + + close.port = port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + if (err) + xenbus_dev_error(dev, err, "freeing event channel %d", port); + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_free_evtchn); + + +/** + * xenbus_map_ring_valloc + * @dev: xenbus device + * @gnt_ref: grant reference + * @vaddr: pointer to address to be filled out by mapping + * + * Based on Rusty Russell's skeleton driver's map_page. + * Map a page of memory into this domain from another domain's grant table. + * xenbus_map_ring_valloc allocates a page of virtual address space, maps the + * page to that address, and sets *vaddr to that address. + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) + * or -ENOMEM on error. If an error is returned, device will switch to + * XenbusStateClosing and the error message will be saved in XenStore. + */ +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +{ + struct gnttab_map_grant_ref op = { + .flags = GNTMAP_host_map, + .ref = gnt_ref, + .dom = dev->otherend_id, + }; + struct vm_struct *area; + + *vaddr = NULL; + + area = alloc_vm_area(PAGE_SIZE); + if (!area) + return -ENOMEM; + + op.host_addr = (unsigned long)area->addr; + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status != GNTST_okay) { + free_vm_area(area); + xenbus_dev_fatal(dev, op.status, + "mapping in shared page %d from domain %d", + gnt_ref, dev->otherend_id); + return op.status; + } + + /* Stuff the handle in an unused field */ + area->phys_addr = (unsigned long)op.handle; + + *vaddr = area->addr; + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + + +/** + * xenbus_map_ring + * @dev: xenbus device + * @gnt_ref: grant reference + * @handle: pointer to grant handle to be filled + * @vaddr: address to be mapped to + * + * Map a page of memory into this domain from another domain's grant table. + * xenbus_map_ring does not allocate the virtual address space (you must do + * this yourself!). It only maps in the page to the specified address. + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) + * or -ENOMEM on error. If an error is returned, device will switch to + * XenbusStateClosing and the error message will be saved in XenStore. + */ +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, + grant_handle_t *handle, void *vaddr) +{ + struct gnttab_map_grant_ref op = { + .host_addr = (unsigned long)vaddr, + .flags = GNTMAP_host_map, + .ref = gnt_ref, + .dom = dev->otherend_id, + }; + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status != GNTST_okay) { + xenbus_dev_fatal(dev, op.status, + "mapping in shared page %d from domain %d", + gnt_ref, dev->otherend_id); + } else + *handle = op.handle; + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_map_ring); + + +/** + * xenbus_unmap_ring_vfree + * @dev: xenbus device + * @vaddr: addr to unmap + * + * Based on Rusty Russell's skeleton driver's unmap_page. + * Unmap a page of memory in this domain that was imported from another domain. + * Use xenbus_unmap_ring_vfree if you mapped in your memory with + * xenbus_map_ring_valloc (it will free the virtual address space). + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) +{ + struct vm_struct *area; + struct gnttab_unmap_grant_ref op = { + .host_addr = (unsigned long)vaddr, + }; + + /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) + * method so that we don't have to muck with vmalloc internals here. + * We could force the user to hang on to their struct vm_struct from + * xenbus_map_ring_valloc, but these 6 lines considerably simplify + * this API. + */ + read_lock(&vmlist_lock); + for (area = vmlist; area != NULL; area = area->next) { + if (area->addr == vaddr) + break; + } + read_unlock(&vmlist_lock); + + if (!area) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + op.handle = (grant_handle_t)area->phys_addr; + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); + + if (op.status == GNTST_okay) + free_vm_area(area); + else + xenbus_dev_error(dev, op.status, + "unmapping page at handle %d error %d", + (int16_t)area->phys_addr, op.status); + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + + +/** + * xenbus_unmap_ring + * @dev: xenbus device + * @handle: grant handle + * @vaddr: addr to unmap + * + * Unmap a page of memory in this domain that was imported from another domain. + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +int xenbus_unmap_ring(struct xenbus_device *dev, + grant_handle_t handle, void *vaddr) +{ + struct gnttab_unmap_grant_ref op = { + .host_addr = (unsigned long)vaddr, + .handle = handle, + }; + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); + + if (op.status != GNTST_okay) + xenbus_dev_error(dev, op.status, + "unmapping page at handle %d error %d", + handle, op.status); + + return op.status; +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring); + + +/** + * xenbus_read_driver_state + * @path: path for driver + * + * Return the state of the driver rooted at the given store path, or + * XenbusStateUnknown if no state can be read. + */ +enum xenbus_state xenbus_read_driver_state(const char *path) +{ + enum xenbus_state result; + int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); + if (err) + result = XenbusStateUnknown; + + return result; +} +EXPORT_SYMBOL_GPL(xenbus_read_driver_state); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c new file mode 100644 index 00000000000..6efbe3f29ca --- /dev/null +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -0,0 +1,233 @@ +/****************************************************************************** + * xenbus_comms.c + * + * Low level code to talks to Xen Store: ringbuffer and event channel. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "xenbus_comms.h" + +static int xenbus_irq; + +static DECLARE_WORK(probe_work, xenbus_probe); + +static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); + +static irqreturn_t wake_waiting(int irq, void *unused) +{ + if (unlikely(xenstored_ready == 0)) { + xenstored_ready = 1; + schedule_work(&probe_work); + } + + wake_up(&xb_waitq); + return IRQ_HANDLED; +} + +static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) +{ + return ((prod - cons) <= XENSTORE_RING_SIZE); +} + +static void *get_output_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); + if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) + *len = XENSTORE_RING_SIZE - (prod - cons); + return buf + MASK_XENSTORE_IDX(prod); +} + +static const void *get_input_chunk(XENSTORE_RING_IDX cons, + XENSTORE_RING_IDX prod, + const char *buf, uint32_t *len) +{ + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); + if ((prod - cons) < *len) + *len = prod - cons; + return buf + MASK_XENSTORE_IDX(cons); +} + +/** + * xb_write - low level write + * @data: buffer to send + * @len: length of buffer + * + * Returns 0 on success, error otherwise. + */ +int xb_write(const void *data, unsigned len) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + XENSTORE_RING_IDX cons, prod; + int rc; + + while (len != 0) { + void *dst; + unsigned int avail; + + rc = wait_event_interruptible( + xb_waitq, + (intf->req_prod - intf->req_cons) != + XENSTORE_RING_SIZE); + if (rc < 0) + return rc; + + /* Read indexes, then verify. */ + cons = intf->req_cons; + prod = intf->req_prod; + if (!check_indexes(cons, prod)) { + intf->req_cons = intf->req_prod = 0; + return -EIO; + } + + dst = get_output_chunk(cons, prod, intf->req, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + /* Must write data /after/ reading the consumer index. */ + mb(); + + memcpy(dst, data, avail); + data += avail; + len -= avail; + + /* Other side must not see new producer until data is there. */ + wmb(); + intf->req_prod += avail; + + /* Implies mb(): other side will see the updated producer. */ + notify_remote_via_evtchn(xen_store_evtchn); + } + + return 0; +} + +int xb_data_to_read(void) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + return (intf->rsp_cons != intf->rsp_prod); +} + +int xb_wait_for_data_to_read(void) +{ + return wait_event_interruptible(xb_waitq, xb_data_to_read()); +} + +int xb_read(void *data, unsigned len) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + XENSTORE_RING_IDX cons, prod; + int rc; + + while (len != 0) { + unsigned int avail; + const char *src; + + rc = xb_wait_for_data_to_read(); + if (rc < 0) + return rc; + + /* Read indexes, then verify. */ + cons = intf->rsp_cons; + prod = intf->rsp_prod; + if (!check_indexes(cons, prod)) { + intf->rsp_cons = intf->rsp_prod = 0; + return -EIO; + } + + src = get_input_chunk(cons, prod, intf->rsp, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + /* Must read data /after/ reading the producer index. */ + rmb(); + + memcpy(data, src, avail); + data += avail; + len -= avail; + + /* Other side must not see free space until we've copied out */ + mb(); + intf->rsp_cons += avail; + + pr_debug("Finished read of %i bytes (%i to go)\n", avail, len); + + /* Implies mb(): other side will see the updated consumer. */ + notify_remote_via_evtchn(xen_store_evtchn); + } + + return 0; +} + +/** + * xb_init_comms - Set up interrupt handler off store event channel. + */ +int xb_init_comms(void) +{ + struct xenstore_domain_interface *intf = xen_store_interface; + int err; + + if (intf->req_prod != intf->req_cons) + printk(KERN_ERR "XENBUS request ring is not quiescent " + "(%08x:%08x)!\n", intf->req_cons, intf->req_prod); + + if (intf->rsp_prod != intf->rsp_cons) { + printk(KERN_WARNING "XENBUS response ring is not quiescent " + "(%08x:%08x): fixing up\n", + intf->rsp_cons, intf->rsp_prod); + intf->rsp_cons = intf->rsp_prod; + } + + if (xenbus_irq) + unbind_from_irqhandler(xenbus_irq, &xb_waitq); + + err = bind_evtchn_to_irqhandler( + xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err <= 0) { + printk(KERN_ERR "XENBUS request irq failed %i\n", err); + return err; + } + + xenbus_irq = err; + + return 0; +} diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h new file mode 100644 index 00000000000..c21db751373 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_comms.h @@ -0,0 +1,46 @@ +/* + * Private include for xenbus communications. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XENBUS_COMMS_H +#define _XENBUS_COMMS_H + +int xs_init(void); +int xb_init_comms(void); + +/* Low level routines. */ +int xb_write(const void *data, unsigned len); +int xb_read(void *data, unsigned len); +int xb_data_to_read(void); +int xb_wait_for_data_to_read(void); +int xs_input_avail(void); +extern struct xenstore_domain_interface *xen_store_interface; +extern int xen_store_evtchn; + +#endif /* _XENBUS_COMMS_H */ diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c new file mode 100644 index 00000000000..0b769f7c4a4 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -0,0 +1,935 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define DPRINTK(fmt, args...) \ + pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ + __func__, __LINE__, ##args) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "xenbus_comms.h" +#include "xenbus_probe.h" + +int xen_store_evtchn; +struct xenstore_domain_interface *xen_store_interface; +static unsigned long xen_store_mfn; + +static BLOCKING_NOTIFIER_HEAD(xenstore_chain); + +static void wait_for_devices(struct xenbus_driver *xendrv); + +static int xenbus_probe_frontend(const char *type, const char *name); + +static void xenbus_dev_shutdown(struct device *_dev); + +/* If something in array of ids matches this device, return it. */ +static const struct xenbus_device_id * +match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) +{ + for (; *arr->devicetype != '\0'; arr++) { + if (!strcmp(arr->devicetype, dev->devicetype)) + return arr; + } + return NULL; +} + +int xenbus_match(struct device *_dev, struct device_driver *_drv) +{ + struct xenbus_driver *drv = to_xenbus_driver(_drv); + + if (!drv->ids) + return 0; + + return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; +} + +/* device// => - */ +static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) +{ + nodename = strchr(nodename, '/'); + if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) { + printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); + return -EINVAL; + } + + strlcpy(bus_id, nodename + 1, BUS_ID_SIZE); + if (!strchr(bus_id, '/')) { + printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); + return -EINVAL; + } + *strchr(bus_id, '/') = '-'; + return 0; +} + + +static void free_otherend_details(struct xenbus_device *dev) +{ + kfree(dev->otherend); + dev->otherend = NULL; +} + + +static void free_otherend_watch(struct xenbus_device *dev) +{ + if (dev->otherend_watch.node) { + unregister_xenbus_watch(&dev->otherend_watch); + kfree(dev->otherend_watch.node); + dev->otherend_watch.node = NULL; + } +} + + +int read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node) +{ + int err = xenbus_gather(XBT_NIL, xendev->nodename, + id_node, "%i", &xendev->otherend_id, + path_node, NULL, &xendev->otherend, + NULL); + if (err) { + xenbus_dev_fatal(xendev, err, + "reading other end details from %s", + xendev->nodename); + return err; + } + if (strlen(xendev->otherend) == 0 || + !xenbus_exists(XBT_NIL, xendev->otherend, "")) { + xenbus_dev_fatal(xendev, -ENOENT, + "unable to read other end from %s. " + "missing or inaccessible.", + xendev->nodename); + free_otherend_details(xendev); + return -ENOENT; + } + + return 0; +} + + +static int read_backend_details(struct xenbus_device *xendev) +{ + return read_otherend_details(xendev, "backend-id", "backend"); +} + + +/* Bus type for frontend drivers. */ +static struct xen_bus_type xenbus_frontend = { + .root = "device", + .levels = 2, /* device/type/ */ + .get_bus_id = frontend_bus_id, + .probe = xenbus_probe_frontend, + .bus = { + .name = "xen", + .match = xenbus_match, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + }, +}; + +static void otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct xenbus_device *dev = + container_of(watch, struct xenbus_device, otherend_watch); + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + enum xenbus_state state; + + /* Protect us against watches firing on old details when the otherend + details change, say immediately after a resume. */ + if (!dev->otherend || + strncmp(dev->otherend, vec[XS_WATCH_PATH], + strlen(dev->otherend))) { + dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]); + return; + } + + state = xenbus_read_driver_state(dev->otherend); + + dev_dbg(&dev->dev, "state is %d, (%s), %s, %s", + state, xenbus_strstate(state), dev->otherend_watch.node, + vec[XS_WATCH_PATH]); + + /* + * Ignore xenbus transitions during shutdown. This prevents us doing + * work that can fail e.g., when the rootfs is gone. + */ + if (system_state > SYSTEM_RUNNING) { + struct xen_bus_type *bus = bus; + bus = container_of(dev->dev.bus, struct xen_bus_type, bus); + /* If we're frontend, drive the state machine to Closed. */ + /* This should cause the backend to release our resources. */ + if ((bus == &xenbus_frontend) && (state == XenbusStateClosing)) + xenbus_frontend_closed(dev); + return; + } + + if (drv->otherend_changed) + drv->otherend_changed(dev, state); +} + + +static int talk_to_otherend(struct xenbus_device *dev) +{ + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + + free_otherend_watch(dev); + free_otherend_details(dev); + + return drv->read_otherend_details(dev); +} + + +static int watch_otherend(struct xenbus_device *dev) +{ + return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, + "%s/%s", dev->otherend, "state"); +} + + +int xenbus_dev_probe(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); + const struct xenbus_device_id *id; + int err; + + DPRINTK("%s", dev->nodename); + + if (!drv->probe) { + err = -ENODEV; + goto fail; + } + + id = match_device(drv->ids, dev); + if (!id) { + err = -ENODEV; + goto fail; + } + + err = talk_to_otherend(dev); + if (err) { + dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n", + dev->nodename); + return err; + } + + err = drv->probe(dev, id); + if (err) + goto fail; + + err = watch_otherend(dev); + if (err) { + dev_warn(&dev->dev, "watch_otherend on %s failed.\n", + dev->nodename); + return err; + } + + return 0; +fail: + xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); + xenbus_switch_state(dev, XenbusStateClosed); + return -ENODEV; +} + +int xenbus_dev_remove(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); + + DPRINTK("%s", dev->nodename); + + free_otherend_watch(dev); + free_otherend_details(dev); + + if (drv->remove) + drv->remove(dev); + + xenbus_switch_state(dev, XenbusStateClosed); + return 0; +} + +static void xenbus_dev_shutdown(struct device *_dev) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + unsigned long timeout = 5*HZ; + + DPRINTK("%s", dev->nodename); + + get_device(&dev->dev); + if (dev->state != XenbusStateConnected) { + printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__, + dev->nodename, xenbus_strstate(dev->state)); + goto out; + } + xenbus_switch_state(dev, XenbusStateClosing); + timeout = wait_for_completion_timeout(&dev->down, timeout); + if (!timeout) + printk(KERN_INFO "%s: %s timeout closing device\n", + __func__, dev->nodename); + out: + put_device(&dev->dev); +} + +int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus, + struct module *owner, + const char *mod_name) +{ + drv->driver.name = drv->name; + drv->driver.bus = &bus->bus; + drv->driver.owner = owner; + drv->driver.mod_name = mod_name; + + return driver_register(&drv->driver); +} + +int __xenbus_register_frontend(struct xenbus_driver *drv, + struct module *owner, const char *mod_name) +{ + int ret; + + drv->read_otherend_details = read_backend_details; + + ret = xenbus_register_driver_common(drv, &xenbus_frontend, + owner, mod_name); + if (ret) + return ret; + + /* If this driver is loaded as a module wait for devices to attach. */ + wait_for_devices(drv); + + return 0; +} +EXPORT_SYMBOL_GPL(__xenbus_register_frontend); + +void xenbus_unregister_driver(struct xenbus_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL_GPL(xenbus_unregister_driver); + +struct xb_find_info +{ + struct xenbus_device *dev; + const char *nodename; +}; + +static int cmp_dev(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct xb_find_info *info = data; + + if (!strcmp(xendev->nodename, info->nodename)) { + info->dev = xendev; + get_device(dev); + return 1; + } + return 0; +} + +struct xenbus_device *xenbus_device_find(const char *nodename, + struct bus_type *bus) +{ + struct xb_find_info info = { .dev = NULL, .nodename = nodename }; + + bus_for_each_dev(bus, NULL, &info, cmp_dev); + return info.dev; +} + +static int cleanup_dev(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct xb_find_info *info = data; + int len = strlen(info->nodename); + + DPRINTK("%s", info->nodename); + + /* Match the info->nodename path, or any subdirectory of that path. */ + if (strncmp(xendev->nodename, info->nodename, len)) + return 0; + + /* If the node name is longer, ensure it really is a subdirectory. */ + if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/')) + return 0; + + info->dev = xendev; + get_device(dev); + return 1; +} + +static void xenbus_cleanup_devices(const char *path, struct bus_type *bus) +{ + struct xb_find_info info = { .nodename = path }; + + do { + info.dev = NULL; + bus_for_each_dev(bus, NULL, &info, cleanup_dev); + if (info.dev) { + device_unregister(&info.dev->dev); + put_device(&info.dev->dev); + } + } while (info.dev); +} + +static void xenbus_dev_release(struct device *dev) +{ + if (dev) + kfree(to_xenbus_device(dev)); +} + +static ssize_t xendev_show_nodename(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); +} +DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); + +static ssize_t xendev_show_devtype(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); +} +DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); + + +int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, + const char *nodename) +{ + int err; + struct xenbus_device *xendev; + size_t stringlen; + char *tmpstring; + + enum xenbus_state state = xenbus_read_driver_state(nodename); + + if (state != XenbusStateInitialising) { + /* Device is not new, so ignore it. This can happen if a + device is going away after switching to Closed. */ + return 0; + } + + stringlen = strlen(nodename) + 1 + strlen(type) + 1; + xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL); + if (!xendev) + return -ENOMEM; + + xendev->state = XenbusStateInitialising; + + /* Copy the strings into the extra space. */ + + tmpstring = (char *)(xendev + 1); + strcpy(tmpstring, nodename); + xendev->nodename = tmpstring; + + tmpstring += strlen(tmpstring) + 1; + strcpy(tmpstring, type); + xendev->devicetype = tmpstring; + init_completion(&xendev->down); + + xendev->dev.bus = &bus->bus; + xendev->dev.release = xenbus_dev_release; + + err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename); + if (err) + goto fail; + + /* Register with generic device framework. */ + err = device_register(&xendev->dev); + if (err) + goto fail; + + err = device_create_file(&xendev->dev, &dev_attr_nodename); + if (err) + goto fail_unregister; + + err = device_create_file(&xendev->dev, &dev_attr_devtype); + if (err) + goto fail_remove_file; + + return 0; +fail_remove_file: + device_remove_file(&xendev->dev, &dev_attr_nodename); +fail_unregister: + device_unregister(&xendev->dev); +fail: + kfree(xendev); + return err; +} + +/* device// */ +static int xenbus_probe_frontend(const char *type, const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", + xenbus_frontend.root, type, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s", nodename); + + err = xenbus_probe_node(&xenbus_frontend, type, nodename); + kfree(nodename); + return err; +} + +static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) +{ + int err = 0; + char **dir; + unsigned int dir_n = 0; + int i; + + dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = bus->probe(type, dir[i]); + if (err) + break; + } + kfree(dir); + return err; +} + +int xenbus_probe_devices(struct xen_bus_type *bus) +{ + int err = 0; + char **dir; + unsigned int i, dir_n; + + dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_device_type(bus, dir[i]); + if (err) + break; + } + kfree(dir); + return err; +} + +static unsigned int char_count(const char *str, char c) +{ + unsigned int i, ret = 0; + + for (i = 0; str[i]; i++) + if (str[i] == c) + ret++; + return ret; +} + +static int strsep_len(const char *str, char c, unsigned int len) +{ + unsigned int i; + + for (i = 0; str[i]; i++) + if (str[i] == c) { + if (len == 0) + return i; + len--; + } + return (len == 0) ? i : -ERANGE; +} + +void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) +{ + int exists, rootlen; + struct xenbus_device *dev; + char type[BUS_ID_SIZE]; + const char *p, *root; + + if (char_count(node, '/') < 2) + return; + + exists = xenbus_exists(XBT_NIL, node, ""); + if (!exists) { + xenbus_cleanup_devices(node, &bus->bus); + return; + } + + /* backend//... or device//... */ + p = strchr(node, '/') + 1; + snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p); + type[BUS_ID_SIZE-1] = '\0'; + + rootlen = strsep_len(node, '/', bus->levels); + if (rootlen < 0) + return; + root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node); + if (!root) + return; + + dev = xenbus_device_find(root, &bus->bus); + if (!dev) + xenbus_probe_node(bus, type, root); + else + put_device(&dev->dev); + + kfree(root); +} + +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); +} + +/* We watch for devices appearing and vanishing. */ +static struct xenbus_watch fe_watch = { + .node = "device", + .callback = frontend_changed, +}; + +static int suspend_dev(struct device *dev, void *data) +{ + int err = 0; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + if (dev->driver == NULL) + return 0; + drv = to_xenbus_driver(dev->driver); + xdev = container_of(dev, struct xenbus_device, dev); + if (drv->suspend) + err = drv->suspend(xdev); + if (err) + printk(KERN_WARNING + "xenbus: suspend %s failed: %i\n", dev->bus_id, err); + return 0; +} + +static int suspend_cancel_dev(struct device *dev, void *data) +{ + int err = 0; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + if (dev->driver == NULL) + return 0; + drv = to_xenbus_driver(dev->driver); + xdev = container_of(dev, struct xenbus_device, dev); + if (drv->suspend_cancel) + err = drv->suspend_cancel(xdev); + if (err) + printk(KERN_WARNING + "xenbus: suspend_cancel %s failed: %i\n", + dev->bus_id, err); + return 0; +} + +static int resume_dev(struct device *dev, void *data) +{ + int err; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + if (dev->driver == NULL) + return 0; + + drv = to_xenbus_driver(dev->driver); + xdev = container_of(dev, struct xenbus_device, dev); + + err = talk_to_otherend(xdev); + if (err) { + printk(KERN_WARNING + "xenbus: resume (talk_to_otherend) %s failed: %i\n", + dev->bus_id, err); + return err; + } + + xdev->state = XenbusStateInitialising; + + if (drv->resume) { + err = drv->resume(xdev); + if (err) { + printk(KERN_WARNING + "xenbus: resume %s failed: %i\n", + dev->bus_id, err); + return err; + } + } + + err = watch_otherend(xdev); + if (err) { + printk(KERN_WARNING + "xenbus_probe: resume (watch_otherend) %s failed: " + "%d.\n", dev->bus_id, err); + return err; + } + + return 0; +} + +void xenbus_suspend(void) +{ + DPRINTK(""); + + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); + xenbus_backend_suspend(suspend_dev); + xs_suspend(); +} +EXPORT_SYMBOL_GPL(xenbus_suspend); + +void xenbus_resume(void) +{ + xb_init_comms(); + xs_resume(); + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); + xenbus_backend_resume(resume_dev); +} +EXPORT_SYMBOL_GPL(xenbus_resume); + +void xenbus_suspend_cancel(void) +{ + xs_suspend_cancel(); + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); + xenbus_backend_resume(suspend_cancel_dev); +} +EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); + +/* A flag to determine if xenstored is 'ready' (i.e. has started) */ +int xenstored_ready = 0; + + +int register_xenstore_notifier(struct notifier_block *nb) +{ + int ret = 0; + + if (xenstored_ready > 0) + ret = nb->notifier_call(nb, 0, NULL); + else + blocking_notifier_chain_register(&xenstore_chain, nb); + + return ret; +} +EXPORT_SYMBOL_GPL(register_xenstore_notifier); + +void unregister_xenstore_notifier(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&xenstore_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); + +void xenbus_probe(struct work_struct *unused) +{ + BUG_ON((xenstored_ready <= 0)); + + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_frontend); + register_xenbus_watch(&fe_watch); + xenbus_backend_probe_and_watch(); + + /* Notify others that xenstore is up */ + blocking_notifier_call_chain(&xenstore_chain, 0, NULL); +} + +static int __init xenbus_probe_init(void) +{ + int err = 0; + + DPRINTK(""); + + err = -ENODEV; + if (!is_running_on_xen()) + goto out_error; + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_frontend.bus); + if (err) + goto out_error; + + err = xenbus_backend_bus_register(); + if (err) + goto out_unreg_front; + + /* + * Domain0 doesn't have a store_evtchn or store_mfn yet. + */ + if (is_initial_xendomain()) { + /* dom0 not yet supported */ + } else { + xenstored_ready = 1; + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + } + xen_store_interface = mfn_to_virt(xen_store_mfn); + + /* Initialize the interface to xenstore. */ + err = xs_init(); + if (err) { + printk(KERN_WARNING + "XENBUS: Error initializing xenstore comms: %i\n", err); + goto out_unreg_back; + } + + if (!is_initial_xendomain()) + xenbus_probe(NULL); + + return 0; + + out_unreg_back: + xenbus_backend_bus_unregister(); + + out_unreg_front: + bus_unregister(&xenbus_frontend.bus); + + out_error: + return err; +} + +postcore_initcall(xenbus_probe_init); + +MODULE_LICENSE("GPL"); + +static int is_disconnected_device(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + + /* + * A device with no driver will never connect. We care only about + * devices which should currently be in the process of connecting. + */ + if (!dev->driver) + return 0; + + /* Is this search limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + return (xendev->state != XenbusStateConnected); +} + +static int exists_disconnected_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + is_disconnected_device); +} + +static int print_device_status(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + + /* Is this operation limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + if (!dev->driver) { + /* Information only: is this too noisy? */ + printk(KERN_INFO "XENBUS: Device with no driver: %s\n", + xendev->nodename); + } else if (xendev->state != XenbusStateConnected) { + printk(KERN_WARNING "XENBUS: Timeout connecting " + "to device: %s (state %d)\n", + xendev->nodename, xendev->state); + } + + return 0; +} + +/* We only wait for device setup after most initcalls have run. */ +static int ready_to_wait_for_devices; + +/* + * On a 10 second timeout, wait for all devices currently configured. We need + * to do this to guarantee that the filesystems and / or network devices + * needed for boot are available, before we can allow the boot to proceed. + * + * This needs to be on a late_initcall, to happen after the frontend device + * drivers have been initialised, but before the root fs is mounted. + * + * A possible improvement here would be to have the tools add a per-device + * flag to the store entry, indicating whether it is needed at boot time. + * This would allow people who knew what they were doing to accelerate their + * boot slightly, but of course needs tools or manual intervention to set up + * those flags correctly. + */ +static void wait_for_devices(struct xenbus_driver *xendrv) +{ + unsigned long timeout = jiffies + 10*HZ; + struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + + if (!ready_to_wait_for_devices || !is_running_on_xen()) + return; + + while (exists_disconnected_device(drv)) { + if (time_after(jiffies, timeout)) + break; + schedule_timeout_interruptible(HZ/10); + } + + bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + print_device_status); +} + +#ifndef MODULE +static int __init boot_wait_for_devices(void) +{ + ready_to_wait_for_devices = 1; + wait_for_devices(NULL); + return 0; +} + +late_initcall(boot_wait_for_devices); +#endif diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h new file mode 100644 index 00000000000..e09b19415a4 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -0,0 +1,74 @@ +/****************************************************************************** + * xenbus_probe.h + * + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 XenSource Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XENBUS_PROBE_H +#define _XENBUS_PROBE_H + +#ifdef CONFIG_XEN_BACKEND +extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); +extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); +extern void xenbus_backend_probe_and_watch(void); +extern int xenbus_backend_bus_register(void); +extern void xenbus_backend_bus_unregister(void); +#else +static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {} +static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {} +static inline void xenbus_backend_probe_and_watch(void) {} +static inline int xenbus_backend_bus_register(void) { return 0; } +static inline void xenbus_backend_bus_unregister(void) {} +#endif + +struct xen_bus_type +{ + char *root; + unsigned int levels; + int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename); + int (*probe)(const char *type, const char *dir); + struct bus_type bus; +}; + +extern int xenbus_match(struct device *_dev, struct device_driver *_drv); +extern int xenbus_dev_probe(struct device *_dev); +extern int xenbus_dev_remove(struct device *_dev); +extern int xenbus_register_driver_common(struct xenbus_driver *drv, + struct xen_bus_type *bus, + struct module *owner, + const char *mod_name); +extern int xenbus_probe_node(struct xen_bus_type *bus, + const char *type, + const char *nodename); +extern int xenbus_probe_devices(struct xen_bus_type *bus); + +extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); + +#endif diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c new file mode 100644 index 00000000000..9e943fbce81 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -0,0 +1,861 @@ +/****************************************************************************** + * xenbus_xs.c + * + * This is the kernel equivalent of the "xs" library. We don't need everything + * and we use xenbus_comms for communication. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xenbus_comms.h" + +struct xs_stored_msg { + struct list_head list; + + struct xsd_sockmsg hdr; + + union { + /* Queued replies. */ + struct { + char *body; + } reply; + + /* Queued watch events. */ + struct { + struct xenbus_watch *handle; + char **vec; + unsigned int vec_size; + } watch; + } u; +}; + +struct xs_handle { + /* A list of replies. Currently only one will ever be outstanding. */ + struct list_head reply_list; + spinlock_t reply_lock; + wait_queue_head_t reply_waitq; + + /* + * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex. + * response_mutex is never taken simultaneously with the other three. + */ + + /* One request at a time. */ + struct mutex request_mutex; + + /* Protect xenbus reader thread against save/restore. */ + struct mutex response_mutex; + + /* Protect transactions against save/restore. */ + struct rw_semaphore transaction_mutex; + + /* Protect watch (de)register against save/restore. */ + struct rw_semaphore watch_mutex; +}; + +static struct xs_handle xs_state; + +/* List of registered watches, and a lock to protect it. */ +static LIST_HEAD(watches); +static DEFINE_SPINLOCK(watches_lock); + +/* List of pending watch callback events, and a lock to protect it. */ +static LIST_HEAD(watch_events); +static DEFINE_SPINLOCK(watch_events_lock); + +/* + * Details of the xenwatch callback kernel thread. The thread waits on the + * watch_events_waitq for work to do (queued on watch_events list). When it + * wakes up it acquires the xenwatch_mutex before reading the list and + * carrying out work. + */ +static pid_t xenwatch_pid; +static DEFINE_MUTEX(xenwatch_mutex); +static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); + +static int get_error(const char *errorstring) +{ + unsigned int i; + + for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) { + if (i == ARRAY_SIZE(xsd_errors) - 1) { + printk(KERN_WARNING + "XENBUS xen store gave: unknown error %s", + errorstring); + return EINVAL; + } + } + return xsd_errors[i].errnum; +} + +static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len) +{ + struct xs_stored_msg *msg; + char *body; + + spin_lock(&xs_state.reply_lock); + + while (list_empty(&xs_state.reply_list)) { + spin_unlock(&xs_state.reply_lock); + /* XXX FIXME: Avoid synchronous wait for response here. */ + wait_event(xs_state.reply_waitq, + !list_empty(&xs_state.reply_list)); + spin_lock(&xs_state.reply_lock); + } + + msg = list_entry(xs_state.reply_list.next, + struct xs_stored_msg, list); + list_del(&msg->list); + + spin_unlock(&xs_state.reply_lock); + + *type = msg->hdr.type; + if (len) + *len = msg->hdr.len; + body = msg->u.reply.body; + + kfree(msg); + + return body; +} + +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) +{ + void *ret; + struct xsd_sockmsg req_msg = *msg; + int err; + + if (req_msg.type == XS_TRANSACTION_START) + down_read(&xs_state.transaction_mutex); + + mutex_lock(&xs_state.request_mutex); + + err = xb_write(msg, sizeof(*msg) + msg->len); + if (err) { + msg->type = XS_ERROR; + ret = ERR_PTR(err); + } else + ret = read_reply(&msg->type, &msg->len); + + mutex_unlock(&xs_state.request_mutex); + + if ((msg->type == XS_TRANSACTION_END) || + ((req_msg.type == XS_TRANSACTION_START) && + (msg->type == XS_ERROR))) + up_read(&xs_state.transaction_mutex); + + return ret; +} + +/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ +static void *xs_talkv(struct xenbus_transaction t, + enum xsd_sockmsg_type type, + const struct kvec *iovec, + unsigned int num_vecs, + unsigned int *len) +{ + struct xsd_sockmsg msg; + void *ret = NULL; + unsigned int i; + int err; + + msg.tx_id = t.id; + msg.req_id = 0; + msg.type = type; + msg.len = 0; + for (i = 0; i < num_vecs; i++) + msg.len += iovec[i].iov_len; + + mutex_lock(&xs_state.request_mutex); + + err = xb_write(&msg, sizeof(msg)); + if (err) { + mutex_unlock(&xs_state.request_mutex); + return ERR_PTR(err); + } + + for (i = 0; i < num_vecs; i++) { + err = xb_write(iovec[i].iov_base, iovec[i].iov_len); + if (err) { + mutex_unlock(&xs_state.request_mutex); + return ERR_PTR(err); + } + } + + ret = read_reply(&msg.type, len); + + mutex_unlock(&xs_state.request_mutex); + + if (IS_ERR(ret)) + return ret; + + if (msg.type == XS_ERROR) { + err = get_error(ret); + kfree(ret); + return ERR_PTR(-err); + } + + if (msg.type != type) { + if (printk_ratelimit()) + printk(KERN_WARNING + "XENBUS unexpected type [%d], expected [%d]\n", + msg.type, type); + kfree(ret); + return ERR_PTR(-EINVAL); + } + return ret; +} + +/* Simplified version of xs_talkv: single message. */ +static void *xs_single(struct xenbus_transaction t, + enum xsd_sockmsg_type type, + const char *string, + unsigned int *len) +{ + struct kvec iovec; + + iovec.iov_base = (void *)string; + iovec.iov_len = strlen(string) + 1; + return xs_talkv(t, type, &iovec, 1, len); +} + +/* Many commands only need an ack, don't care what it says. */ +static int xs_error(char *reply) +{ + if (IS_ERR(reply)) + return PTR_ERR(reply); + kfree(reply); + return 0; +} + +static unsigned int count_strings(const char *strings, unsigned int len) +{ + unsigned int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) + num++; + + return num; +} + +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ +static char *join(const char *dir, const char *name) +{ + char *buffer; + + if (strlen(name) == 0) + buffer = kasprintf(GFP_KERNEL, "%s", dir); + else + buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name); + return (!buffer) ? ERR_PTR(-ENOMEM) : buffer; +} + +static char **split(char *strings, unsigned int len, unsigned int *num) +{ + char *p, **ret; + + /* Count the strings. */ + *num = count_strings(strings, len); + + /* Transfer to one big alloc for easy freeing. */ + ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL); + if (!ret) { + kfree(strings); + return ERR_PTR(-ENOMEM); + } + memcpy(&ret[*num], strings, len); + kfree(strings); + + strings = (char *)&ret[*num]; + for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) + ret[(*num)++] = p; + + return ret; +} + +char **xenbus_directory(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *num) +{ + char *strings, *path; + unsigned int len; + + path = join(dir, node); + if (IS_ERR(path)) + return (char **)path; + + strings = xs_single(t, XS_DIRECTORY, path, &len); + kfree(path); + if (IS_ERR(strings)) + return (char **)strings; + + return split(strings, len, num); +} +EXPORT_SYMBOL_GPL(xenbus_directory); + +/* Check if a path exists. Return 1 if it does. */ +int xenbus_exists(struct xenbus_transaction t, + const char *dir, const char *node) +{ + char **d; + int dir_n; + + d = xenbus_directory(t, dir, node, &dir_n); + if (IS_ERR(d)) + return 0; + kfree(d); + return 1; +} +EXPORT_SYMBOL_GPL(xenbus_exists); + +/* Get the value of a single file. + * Returns a kmalloced value: call free() on it after use. + * len indicates length in bytes. + */ +void *xenbus_read(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *len) +{ + char *path; + void *ret; + + path = join(dir, node); + if (IS_ERR(path)) + return (void *)path; + + ret = xs_single(t, XS_READ, path, len); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_read); + +/* Write the value of a single file. + * Returns -err on failure. + */ +int xenbus_write(struct xenbus_transaction t, + const char *dir, const char *node, const char *string) +{ + const char *path; + struct kvec iovec[2]; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + iovec[0].iov_base = (void *)path; + iovec[0].iov_len = strlen(path) + 1; + iovec[1].iov_base = (void *)string; + iovec[1].iov_len = strlen(string); + + ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_write); + +/* Create a new directory. */ +int xenbus_mkdir(struct xenbus_transaction t, + const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_MKDIR, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_mkdir); + +/* Destroy a file or directory (directories must be empty). */ +int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = xs_error(xs_single(t, XS_RM, path, NULL)); + kfree(path); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_rm); + +/* Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + */ +int xenbus_transaction_start(struct xenbus_transaction *t) +{ + char *id_str; + + down_read(&xs_state.transaction_mutex); + + id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); + if (IS_ERR(id_str)) { + up_read(&xs_state.transaction_mutex); + return PTR_ERR(id_str); + } + + t->id = simple_strtoul(id_str, NULL, 0); + kfree(id_str); + return 0; +} +EXPORT_SYMBOL_GPL(xenbus_transaction_start); + +/* End a transaction. + * If abandon is true, transaction is discarded instead of committed. + */ +int xenbus_transaction_end(struct xenbus_transaction t, int abort) +{ + char abortstr[2]; + int err; + + if (abort) + strcpy(abortstr, "F"); + else + strcpy(abortstr, "T"); + + err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); + + up_read(&xs_state.transaction_mutex); + + return err; +} +EXPORT_SYMBOL_GPL(xenbus_transaction_end); + +/* Single read and scanf: returns -errno or num scanned. */ +int xenbus_scanf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; + char *val; + + val = xenbus_read(t, dir, node, NULL); + if (IS_ERR(val)) + return PTR_ERR(val); + + va_start(ap, fmt); + ret = vsscanf(val, fmt, ap); + va_end(ap); + kfree(val); + /* Distinctive errno. */ + if (ret == 0) + return -ERANGE; + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_scanf); + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int ret; +#define PRINTF_BUFFER_SIZE 4096 + char *printf_buffer; + + printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); + if (printf_buffer == NULL) + return -ENOMEM; + + va_start(ap, fmt); + ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); + va_end(ap); + + BUG_ON(ret > PRINTF_BUFFER_SIZE-1); + ret = xenbus_write(t, dir, node, printf_buffer); + + kfree(printf_buffer); + + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_printf); + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int xenbus_gather(struct xenbus_transaction t, const char *dir, ...) +{ + va_list ap; + const char *name; + int ret = 0; + + va_start(ap, dir); + while (ret == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + p = xenbus_read(t, dir, name, NULL); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + break; + } + if (fmt) { + if (sscanf(p, fmt, result) == 0) + ret = -EINVAL; + kfree(p); + } else + *(char **)result = p; + } + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(xenbus_gather); + +static int xs_watch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = (void *)path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static int xs_unwatch(const char *path, const char *token) +{ + struct kvec iov[2]; + + iov[0].iov_base = (char *)path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (char *)token; + iov[1].iov_len = strlen(token) + 1; + + return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov, + ARRAY_SIZE(iov), NULL)); +} + +static struct xenbus_watch *find_watch(const char *token) +{ + struct xenbus_watch *i, *cmp; + + cmp = (void *)simple_strtoul(token, NULL, 16); + + list_for_each_entry(i, &watches, list) + if (i == cmp) + return i; + + return NULL; +} + +/* Register callback to watch this node. */ +int register_xenbus_watch(struct xenbus_watch *watch) +{ + /* Pointer in ascii is the token. */ + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + down_read(&xs_state.watch_mutex); + + spin_lock(&watches_lock); + BUG_ON(find_watch(token)); + list_add(&watch->list, &watches); + spin_unlock(&watches_lock); + + err = xs_watch(watch->node, token); + + /* Ignore errors due to multiple registration. */ + if ((err != 0) && (err != -EEXIST)) { + spin_lock(&watches_lock); + list_del(&watch->list); + spin_unlock(&watches_lock); + } + + up_read(&xs_state.watch_mutex); + + return err; +} +EXPORT_SYMBOL_GPL(register_xenbus_watch); + +void unregister_xenbus_watch(struct xenbus_watch *watch) +{ + struct xs_stored_msg *msg, *tmp; + char token[sizeof(watch) * 2 + 1]; + int err; + + sprintf(token, "%lX", (long)watch); + + down_read(&xs_state.watch_mutex); + + spin_lock(&watches_lock); + BUG_ON(!find_watch(token)); + list_del(&watch->list); + spin_unlock(&watches_lock); + + err = xs_unwatch(watch->node, token); + if (err) + printk(KERN_WARNING + "XENBUS Failed to release watch %s: %i\n", + watch->node, err); + + up_read(&xs_state.watch_mutex); + + /* Make sure there are no callbacks running currently (unless + its us) */ + if (current->pid != xenwatch_pid) + mutex_lock(&xenwatch_mutex); + + /* Cancel pending watch events. */ + spin_lock(&watch_events_lock); + list_for_each_entry_safe(msg, tmp, &watch_events, list) { + if (msg->u.watch.handle != watch) + continue; + list_del(&msg->list); + kfree(msg->u.watch.vec); + kfree(msg); + } + spin_unlock(&watch_events_lock); + + if (current->pid != xenwatch_pid) + mutex_unlock(&xenwatch_mutex); +} +EXPORT_SYMBOL_GPL(unregister_xenbus_watch); + +void xs_suspend(void) +{ + down_write(&xs_state.transaction_mutex); + down_write(&xs_state.watch_mutex); + mutex_lock(&xs_state.request_mutex); + mutex_lock(&xs_state.response_mutex); +} + +void xs_resume(void) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + mutex_unlock(&xs_state.response_mutex); + mutex_unlock(&xs_state.request_mutex); + up_write(&xs_state.transaction_mutex); + + /* No need for watches_lock: the watch_mutex is sufficient. */ + list_for_each_entry(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); + } + + up_write(&xs_state.watch_mutex); +} + +void xs_suspend_cancel(void) +{ + mutex_unlock(&xs_state.response_mutex); + mutex_unlock(&xs_state.request_mutex); + up_write(&xs_state.watch_mutex); + up_write(&xs_state.transaction_mutex); +} + +static int xenwatch_thread(void *unused) +{ + struct list_head *ent; + struct xs_stored_msg *msg; + + for (;;) { + wait_event_interruptible(watch_events_waitq, + !list_empty(&watch_events)); + + if (kthread_should_stop()) + break; + + mutex_lock(&xenwatch_mutex); + + spin_lock(&watch_events_lock); + ent = watch_events.next; + if (ent != &watch_events) + list_del(ent); + spin_unlock(&watch_events_lock); + + if (ent != &watch_events) { + msg = list_entry(ent, struct xs_stored_msg, list); + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + kfree(msg->u.watch.vec); + kfree(msg); + } + + mutex_unlock(&xenwatch_mutex); + } + + return 0; +} + +static int process_msg(void) +{ + struct xs_stored_msg *msg; + char *body; + int err; + + /* + * We must disallow save/restore while reading a xenstore message. + * A partial read across s/r leaves us out of sync with xenstored. + */ + for (;;) { + err = xb_wait_for_data_to_read(); + if (err) + return err; + mutex_lock(&xs_state.response_mutex); + if (xb_data_to_read()) + break; + /* We raced with save/restore: pending data 'disappeared'. */ + mutex_unlock(&xs_state.response_mutex); + } + + + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (msg == NULL) { + err = -ENOMEM; + goto out; + } + + err = xb_read(&msg->hdr, sizeof(msg->hdr)); + if (err) { + kfree(msg); + goto out; + } + + body = kmalloc(msg->hdr.len + 1, GFP_KERNEL); + if (body == NULL) { + kfree(msg); + err = -ENOMEM; + goto out; + } + + err = xb_read(body, msg->hdr.len); + if (err) { + kfree(body); + kfree(msg); + goto out; + } + body[msg->hdr.len] = '\0'; + + if (msg->hdr.type == XS_WATCH_EVENT) { + msg->u.watch.vec = split(body, msg->hdr.len, + &msg->u.watch.vec_size); + if (IS_ERR(msg->u.watch.vec)) { + kfree(msg); + err = PTR_ERR(msg->u.watch.vec); + goto out; + } + + spin_lock(&watches_lock); + msg->u.watch.handle = find_watch( + msg->u.watch.vec[XS_WATCH_TOKEN]); + if (msg->u.watch.handle != NULL) { + spin_lock(&watch_events_lock); + list_add_tail(&msg->list, &watch_events); + wake_up(&watch_events_waitq); + spin_unlock(&watch_events_lock); + } else { + kfree(msg->u.watch.vec); + kfree(msg); + } + spin_unlock(&watches_lock); + } else { + msg->u.reply.body = body; + spin_lock(&xs_state.reply_lock); + list_add_tail(&msg->list, &xs_state.reply_list); + spin_unlock(&xs_state.reply_lock); + wake_up(&xs_state.reply_waitq); + } + + out: + mutex_unlock(&xs_state.response_mutex); + return err; +} + +static int xenbus_thread(void *unused) +{ + int err; + + for (;;) { + err = process_msg(); + if (err) + printk(KERN_WARNING "XENBUS error %d while reading " + "message\n", err); + if (kthread_should_stop()) + break; + } + + return 0; +} + +int xs_init(void) +{ + int err; + struct task_struct *task; + + INIT_LIST_HEAD(&xs_state.reply_list); + spin_lock_init(&xs_state.reply_lock); + init_waitqueue_head(&xs_state.reply_waitq); + + mutex_init(&xs_state.request_mutex); + mutex_init(&xs_state.response_mutex); + init_rwsem(&xs_state.transaction_mutex); + init_rwsem(&xs_state.watch_mutex); + + /* Initialize the shared memory rings to talk to xenstored */ + err = xb_init_comms(); + if (err) + return err; + + task = kthread_run(xenwatch_thread, NULL, "xenwatch"); + if (IS_ERR(task)) + return PTR_ERR(task); + xenwatch_pid = task->pid; + + task = kthread_run(xenbus_thread, NULL, "xenbus"); + if (IS_ERR(task)) + return PTR_ERR(task); + + return 0; +} diff --git a/include/asm-i386/xen/hypervisor.h b/include/asm-i386/xen/hypervisor.h index ebfa7e06308..8e15dd28c91 100644 --- a/include/asm-i386/xen/hypervisor.h +++ b/include/asm-i386/xen/hypervisor.h @@ -42,6 +42,7 @@ #include #include +#include #if defined(__i386__) # ifdef CONFIG_X86_PAE # include diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h new file mode 100644 index 00000000000..6f7c290651a --- /dev/null +++ b/include/xen/xenbus.h @@ -0,0 +1,234 @@ +/****************************************************************************** + * xenbus.h + * + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 XenSource Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XEN_XENBUS_H +#define _XEN_XENBUS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Register callback to watch this node. */ +struct xenbus_watch +{ + struct list_head list; + + /* Path being watched. */ + const char *node; + + /* Callback (executed in a process context with no locks held). */ + void (*callback)(struct xenbus_watch *, + const char **vec, unsigned int len); +}; + + +/* A xenbus device. */ +struct xenbus_device { + const char *devicetype; + const char *nodename; + const char *otherend; + int otherend_id; + struct xenbus_watch otherend_watch; + struct device dev; + enum xenbus_state state; + struct completion down; +}; + +static inline struct xenbus_device *to_xenbus_device(struct device *dev) +{ + return container_of(dev, struct xenbus_device, dev); +} + +struct xenbus_device_id +{ + /* .../device// */ + char devicetype[32]; /* General class of device. */ +}; + +/* A xenbus driver. */ +struct xenbus_driver { + char *name; + struct module *owner; + const struct xenbus_device_id *ids; + int (*probe)(struct xenbus_device *dev, + const struct xenbus_device_id *id); + void (*otherend_changed)(struct xenbus_device *dev, + enum xenbus_state backend_state); + int (*remove)(struct xenbus_device *dev); + int (*suspend)(struct xenbus_device *dev); + int (*suspend_cancel)(struct xenbus_device *dev); + int (*resume)(struct xenbus_device *dev); + int (*uevent)(struct xenbus_device *, char **, int, char *, int); + struct device_driver driver; + int (*read_otherend_details)(struct xenbus_device *dev); +}; + +static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) +{ + return container_of(drv, struct xenbus_driver, driver); +} + +int __must_check __xenbus_register_frontend(struct xenbus_driver *drv, + struct module *owner, + const char *mod_name); + +static inline int __must_check +xenbus_register_frontend(struct xenbus_driver *drv) +{ + WARN_ON(drv->owner != THIS_MODULE); + return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME); +} + +int __must_check __xenbus_register_backend(struct xenbus_driver *drv, + struct module *owner, + const char *mod_name); +static inline int __must_check +xenbus_register_backend(struct xenbus_driver *drv) +{ + WARN_ON(drv->owner != THIS_MODULE); + return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME); +} + +void xenbus_unregister_driver(struct xenbus_driver *drv); + +struct xenbus_transaction +{ + u32 id; +}; + +/* Nil transaction ID. */ +#define XBT_NIL ((struct xenbus_transaction) { 0 }) + +int __init xenbus_dev_init(void); + +char **xenbus_directory(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *num); +void *xenbus_read(struct xenbus_transaction t, + const char *dir, const char *node, unsigned int *len); +int xenbus_write(struct xenbus_transaction t, + const char *dir, const char *node, const char *string); +int xenbus_mkdir(struct xenbus_transaction t, + const char *dir, const char *node); +int xenbus_exists(struct xenbus_transaction t, + const char *dir, const char *node); +int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node); +int xenbus_transaction_start(struct xenbus_transaction *t); +int xenbus_transaction_end(struct xenbus_transaction t, int abort); + +/* Single read and scanf: returns -errno or num scanned if > 0. */ +int xenbus_scanf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) + __attribute__((format(scanf, 4, 5))); + +/* Single printf and write: returns -errno or 0. */ +int xenbus_printf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) + __attribute__((format(printf, 4, 5))); + +/* Generic read function: NULL-terminated triples of name, + * sprintf-style type string, and pointer. Returns 0 or errno.*/ +int xenbus_gather(struct xenbus_transaction t, const char *dir, ...); + +/* notifer routines for when the xenstore comes up */ +extern int xenstored_ready; +int register_xenstore_notifier(struct notifier_block *nb); +void unregister_xenstore_notifier(struct notifier_block *nb); + +int register_xenbus_watch(struct xenbus_watch *watch); +void unregister_xenbus_watch(struct xenbus_watch *watch); +void xs_suspend(void); +void xs_resume(void); +void xs_suspend_cancel(void); + +/* Used by xenbus_dev to borrow kernel's store connection. */ +void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg); + +struct work_struct; + +/* Prepare for domain suspend: then resume or cancel the suspend. */ +void xenbus_suspend(void); +void xenbus_resume(void); +void xenbus_probe(struct work_struct *); +void xenbus_suspend_cancel(void); + +#define XENBUS_IS_ERR_READ(str) ({ \ + if (!IS_ERR(str) && strlen(str) == 0) { \ + kfree(str); \ + str = ERR_PTR(-ERANGE); \ + } \ + IS_ERR(str); \ +}) + +#define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE) + +int xenbus_watch_path(struct xenbus_device *dev, const char *path, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)); +int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int), + const char *pathfmt, ...) + __attribute__ ((format (printf, 4, 5))); + +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); +int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); +int xenbus_map_ring_valloc(struct xenbus_device *dev, + int gnt_ref, void **vaddr); +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, + grant_handle_t *handle, void *vaddr); + +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); +int xenbus_unmap_ring(struct xenbus_device *dev, + grant_handle_t handle, void *vaddr); + +int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); +int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port); +int xenbus_free_evtchn(struct xenbus_device *dev, int port); + +enum xenbus_state xenbus_read_driver_state(const char *path); + +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...); +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...); + +const char *xenbus_strstate(enum xenbus_state state); +int xenbus_dev_is_online(struct xenbus_device *dev); +int xenbus_frontend_closed(struct xenbus_device *dev); + +#endif /* _XEN_XENBUS_H */ -- cgit v1.2.3-70-g09d2 From 9f27ee595038653ddf8bca871200d39247d6f4fc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:06 -0700 Subject: xen: add virtual block device driver. The block device frontend driver allows the kernel to access block devices exported exported by a virtual machine containing a physical block device driver. Signed-off-by: Ian Pratt Signed-off-by: Christian Limpach Signed-off-by: Chris Wright Cc: Arjan van de Ven Cc: Greg KH Cc: Jens Axboe --- drivers/block/Kconfig | 9 + drivers/block/Makefile | 1 + drivers/block/xen-blkfront.c | 988 +++++++++++++++++++++++++++++++++++++++++++ include/linux/major.h | 2 + 4 files changed, 1000 insertions(+) create mode 100644 drivers/block/xen-blkfront.c (limited to 'include') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 8f65b88cf71..a4a31199240 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -427,4 +427,13 @@ config XILINX_SYSACE help Include support for the Xilinx SystemACE CompactFlash interface +config XEN_BLKDEV_FRONTEND + tristate "Xen virtual block device support" + depends on XEN + default y + help + This driver implements the front-end of the Xen virtual + block device driver. It communicates with a back-end driver + in another domain which drives the actual block device. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 9ee08ab4ffa..3e31532df0e 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -29,3 +29,4 @@ obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c new file mode 100644 index 00000000000..6746c29181f --- /dev/null +++ b/drivers/block/xen-blkfront.c @@ -0,0 +1,988 @@ +/* + * blkfront.c + * + * XenLinux virtual block device driver. + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge + * Copyright (c) 2004, Christian Limpach + * Copyright (c) 2004, Andrew Warfield + * Copyright (c) 2005, Christopher Clark + * Copyright (c) 2005, XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include + +enum blkif_state { + BLKIF_STATE_DISCONNECTED, + BLKIF_STATE_CONNECTED, + BLKIF_STATE_SUSPENDED, +}; + +struct blk_shadow { + struct blkif_request req; + unsigned long request; + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + +static struct block_device_operations xlvbd_block_fops; + +#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) + +/* + * We have one of these per vbd, whether ide, scsi or 'other'. They + * hang in private_data off the gendisk structure. We may end up + * putting all kinds of interesting stuff here :-) + */ +struct blkfront_info +{ + struct xenbus_device *xbdev; + dev_t dev; + struct gendisk *gd; + int vdevice; + blkif_vdev_t handle; + enum blkif_state connected; + int ring_ref; + struct blkif_front_ring ring; + unsigned int evtchn, irq; + struct request_queue *rq; + struct work_struct work; + struct gnttab_free_callback callback; + struct blk_shadow shadow[BLK_RING_SIZE]; + unsigned long shadow_free; + int feature_barrier; + + /** + * The number of people holding this device open. We won't allow a + * hot-unplug unless this is 0. + */ + int users; +}; + +static DEFINE_SPINLOCK(blkif_io_lock); + +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ + (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) +#define GRANT_INVALID_REF 0 + +#define PARTS_PER_DISK 16 + +#define BLKIF_MAJOR(dev) ((dev)>>8) +#define BLKIF_MINOR(dev) ((dev) & 0xff) + +#define DEV_NAME "xvd" /* name in /dev */ + +/* Information about our VBDs. */ +#define MAX_VBDS 64 +static LIST_HEAD(vbds_list); + +static int get_id_from_freelist(struct blkfront_info *info) +{ + unsigned long free = info->shadow_free; + BUG_ON(free > BLK_RING_SIZE); + info->shadow_free = info->shadow[free].req.id; + info->shadow[free].req.id = 0x0fffffee; /* debug */ + return free; +} + +static void add_id_to_freelist(struct blkfront_info *info, + unsigned long id) +{ + info->shadow[id].req.id = info->shadow_free; + info->shadow[id].request = 0; + info->shadow_free = id; +} + +static void blkif_restart_queue_callback(void *arg) +{ + struct blkfront_info *info = (struct blkfront_info *)arg; + schedule_work(&info->work); +} + +/* + * blkif_queue_request + * + * request block io + * + * id: for guest use only. + * operation: BLKIF_OP_{READ,WRITE,PROBE} + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + */ +static int blkif_queue_request(struct request *req) +{ + struct blkfront_info *info = req->rq_disk->private_data; + unsigned long buffer_mfn; + struct blkif_request *ring_req; + struct bio *bio; + struct bio_vec *bvec; + int idx; + unsigned long id; + unsigned int fsect, lsect; + int ref; + grant_ref_t gref_head; + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + return 1; + + if (gnttab_alloc_grant_references( + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { + gnttab_request_free_callback( + &info->callback, + blkif_restart_queue_callback, + info, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + return 1; + } + + /* Fill out a communications ring structure. */ + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); + id = get_id_from_freelist(info); + info->shadow[id].request = (unsigned long)req; + + ring_req->id = id; + ring_req->sector_number = (blkif_sector_t)req->sector; + ring_req->handle = info->handle; + + ring_req->operation = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + if (blk_barrier_rq(req)) + ring_req->operation = BLKIF_OP_WRITE_BARRIER; + + ring_req->nr_segments = 0; + rq_for_each_bio (bio, req) { + bio_for_each_segment (bvec, bio, idx) { + BUG_ON(ring_req->nr_segments + == BLKIF_MAX_SEGMENTS_PER_REQUEST); + buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page)); + fsect = bvec->bv_offset >> 9; + lsect = fsect + (bvec->bv_len >> 9) - 1; + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head); + BUG_ON(ref == -ENOSPC); + + gnttab_grant_foreign_access_ref( + ref, + info->xbdev->otherend_id, + buffer_mfn, + rq_data_dir(req) ); + + info->shadow[id].frame[ring_req->nr_segments] = + mfn_to_pfn(buffer_mfn); + + ring_req->seg[ring_req->nr_segments] = + (struct blkif_request_segment) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + + ring_req->nr_segments++; + } + } + + info->ring.req_prod_pvt++; + + /* Keep a private copy so we can reissue requests when recovering. */ + info->shadow[id].req = *ring_req; + + gnttab_free_grant_references(gref_head); + + return 0; +} + + +static inline void flush_requests(struct blkfront_info *info) +{ + int notify; + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); + + if (notify) + notify_remote_via_irq(info->irq); +} + +/* + * do_blkif_request + * read a block; request is in a request queue + */ +static void do_blkif_request(request_queue_t *rq) +{ + struct blkfront_info *info = NULL; + struct request *req; + int queued; + + pr_debug("Entered do_blkif_request\n"); + + queued = 0; + + while ((req = elv_next_request(rq)) != NULL) { + info = req->rq_disk->private_data; + if (!blk_fs_request(req)) { + end_request(req, 0); + continue; + } + + if (RING_FULL(&info->ring)) + goto wait; + + pr_debug("do_blk_req %p: cmd %p, sec %lx, " + "(%u/%li) buffer:%p [%s]\n", + req, req->cmd, (unsigned long)req->sector, + req->current_nr_sectors, + req->nr_sectors, req->buffer, + rq_data_dir(req) ? "write" : "read"); + + + blkdev_dequeue_request(req); + if (blkif_queue_request(req)) { + blk_requeue_request(rq, req); +wait: + /* Avoid pointless unplugs. */ + blk_stop_queue(rq); + break; + } + + queued++; + } + + if (queued != 0) + flush_requests(info); +} + +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) +{ + request_queue_t *rq; + + rq = blk_init_queue(do_blkif_request, &blkif_io_lock); + if (rq == NULL) + return -1; + + elevator_init(rq, "noop"); + + /* Hard sector size and max sectors impersonate the equiv. hardware. */ + blk_queue_hardsect_size(rq, sector_size); + blk_queue_max_sectors(rq, 512); + + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(rq, PAGE_SIZE - 1); + blk_queue_max_segment_size(rq, PAGE_SIZE); + + /* Ensure a merged request will fit in a single I/O ring slot. */ + blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + + /* Make sure buffer addresses are sector-aligned. */ + blk_queue_dma_alignment(rq, 511); + + gd->queue = rq; + + return 0; +} + + +static int xlvbd_barrier(struct blkfront_info *info) +{ + int err; + + err = blk_queue_ordered(info->rq, + info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, + NULL); + + if (err) + return err; + + printk(KERN_INFO "blkfront: %s: barriers %s\n", + info->gd->disk_name, + info->feature_barrier ? "enabled" : "disabled"); + return 0; +} + + +static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, + int vdevice, u16 vdisk_info, u16 sector_size, + struct blkfront_info *info) +{ + struct gendisk *gd; + int nr_minors = 1; + int err = -ENODEV; + + BUG_ON(info->gd != NULL); + BUG_ON(info->rq != NULL); + + if ((minor % PARTS_PER_DISK) == 0) + nr_minors = PARTS_PER_DISK; + + gd = alloc_disk(nr_minors); + if (gd == NULL) + goto out; + + if (nr_minors > 1) + sprintf(gd->disk_name, "%s%c", DEV_NAME, + 'a' + minor / PARTS_PER_DISK); + else + sprintf(gd->disk_name, "%s%c%d", DEV_NAME, + 'a' + minor / PARTS_PER_DISK, + minor % PARTS_PER_DISK); + + gd->major = XENVBD_MAJOR; + gd->first_minor = minor; + gd->fops = &xlvbd_block_fops; + gd->private_data = info; + gd->driverfs_dev = &(info->xbdev->dev); + set_capacity(gd, capacity); + + if (xlvbd_init_blk_queue(gd, sector_size)) { + del_gendisk(gd); + goto out; + } + + info->rq = gd->queue; + info->gd = gd; + + if (info->feature_barrier) + xlvbd_barrier(info); + + if (vdisk_info & VDISK_READONLY) + set_disk_ro(gd, 1); + + if (vdisk_info & VDISK_REMOVABLE) + gd->flags |= GENHD_FL_REMOVABLE; + + if (vdisk_info & VDISK_CDROM) + gd->flags |= GENHD_FL_CD; + + return 0; + + out: + return err; +} + +static void kick_pending_request_queues(struct blkfront_info *info) +{ + if (!RING_FULL(&info->ring)) { + /* Re-enable calldowns. */ + blk_start_queue(info->rq); + /* Kick things off immediately. */ + do_blkif_request(info->rq); + } +} + +static void blkif_restart_queue(struct work_struct *work) +{ + struct blkfront_info *info = container_of(work, struct blkfront_info, work); + + spin_lock_irq(&blkif_io_lock); + if (info->connected == BLKIF_STATE_CONNECTED) + kick_pending_request_queues(info); + spin_unlock_irq(&blkif_io_lock); +} + +static void blkif_free(struct blkfront_info *info, int suspend) +{ + /* Prevent new requests being issued until we fix things up. */ + spin_lock_irq(&blkif_io_lock); + info->connected = suspend ? + BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; + /* No more blkif_request(). */ + if (info->rq) + blk_stop_queue(info->rq); + /* No more gnttab callback work. */ + gnttab_cancel_free_callback(&info->callback); + spin_unlock_irq(&blkif_io_lock); + + /* Flush gnttab callback work. Must be done with no locks held. */ + flush_scheduled_work(); + + /* Free resources associated with old device channel. */ + if (info->ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref, 0, + (unsigned long)info->ring.sring); + info->ring_ref = GRANT_INVALID_REF; + info->ring.sring = NULL; + } + if (info->irq) + unbind_from_irqhandler(info->irq, info); + info->evtchn = info->irq = 0; + +} + +static void blkif_completion(struct blk_shadow *s) +{ + int i; + for (i = 0; i < s->req.nr_segments; i++) + gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); +} + +static irqreturn_t blkif_interrupt(int irq, void *dev_id) +{ + struct request *req; + struct blkif_response *bret; + RING_IDX i, rp; + unsigned long flags; + struct blkfront_info *info = (struct blkfront_info *)dev_id; + int uptodate; + + spin_lock_irqsave(&blkif_io_lock, flags); + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { + spin_unlock_irqrestore(&blkif_io_lock, flags); + return IRQ_HANDLED; + } + + again: + rp = info->ring.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + for (i = info->ring.rsp_cons; i != rp; i++) { + unsigned long id; + int ret; + + bret = RING_GET_RESPONSE(&info->ring, i); + id = bret->id; + req = (struct request *)info->shadow[id].request; + + blkif_completion(&info->shadow[id]); + + add_id_to_freelist(info, id); + + uptodate = (bret->status == BLKIF_RSP_OKAY); + switch (bret->operation) { + case BLKIF_OP_WRITE_BARRIER: + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", + info->gd->disk_name); + uptodate = -EOPNOTSUPP; + info->feature_barrier = 0; + xlvbd_barrier(info); + } + /* fall through */ + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + if (unlikely(bret->status != BLKIF_RSP_OKAY)) + dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " + "request: %x\n", bret->status); + + ret = end_that_request_first(req, uptodate, + req->hard_nr_sectors); + BUG_ON(ret); + end_that_request_last(req, uptodate); + break; + default: + BUG(); + } + } + + info->ring.rsp_cons = i; + + if (i != info->ring.req_prod_pvt) { + int more_to_do; + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); + if (more_to_do) + goto again; + } else + info->ring.sring->rsp_event = i + 1; + + kick_pending_request_queues(info); + + spin_unlock_irqrestore(&blkif_io_lock, flags); + + return IRQ_HANDLED; +} + + +static int setup_blkring(struct xenbus_device *dev, + struct blkfront_info *info) +{ + struct blkif_sring *sring; + int err; + + info->ring_ref = GRANT_INVALID_REF; + + sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL); + if (!sring) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); + return -ENOMEM; + } + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + + err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); + if (err < 0) { + free_page((unsigned long)sring); + info->ring.sring = NULL; + goto fail; + } + info->ring_ref = err; + + err = xenbus_alloc_evtchn(dev, &info->evtchn); + if (err) + goto fail; + + err = bind_evtchn_to_irqhandler(info->evtchn, + blkif_interrupt, + IRQF_SAMPLE_RANDOM, "blkif", info); + if (err <= 0) { + xenbus_dev_fatal(dev, err, + "bind_evtchn_to_irqhandler failed"); + goto fail; + } + info->irq = err; + + return 0; +fail: + blkif_free(info, 0); + return err; +} + + +/* Common code used when first setting up, and when resuming. */ +static int talk_to_backend(struct xenbus_device *dev, + struct blkfront_info *info) +{ + const char *message = NULL; + struct xenbus_transaction xbt; + int err; + + /* Create shared ring, alloc event channel. */ + err = setup_blkring(dev, info); + if (err) + goto out; + +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto destroy_blkring; + } + + err = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%u", info->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, dev->nodename, + "event-channel", "%u", info->evtchn); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + if (err) { + if (err == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, err, "completing transaction"); + goto destroy_blkring; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + + return 0; + + abort_transaction: + xenbus_transaction_end(xbt, 1); + if (message) + xenbus_dev_fatal(dev, err, "%s", message); + destroy_blkring: + blkif_free(info, 0); + out: + return err; +} + + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures and the ring buffer for communication with the backend, and + * inform the backend of the appropriate details for those. Switch to + * Initialised state. + */ +static int blkfront_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err, vdevice, i; + struct blkfront_info *info; + + /* FIXME: Use dynamic device id if this is not set. */ + err = xenbus_scanf(XBT_NIL, dev->nodename, + "virtual-device", "%i", &vdevice); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading virtual-device"); + return err; + } + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); + return -ENOMEM; + } + + info->xbdev = dev; + info->vdevice = vdevice; + info->connected = BLKIF_STATE_DISCONNECTED; + INIT_WORK(&info->work, blkif_restart_queue); + + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + + /* Front end dir is a number, which is used as the id. */ + info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); + dev->dev.driver_data = info; + + err = talk_to_backend(dev, info); + if (err) { + kfree(info); + dev->dev.driver_data = NULL; + return err; + } + + return 0; +} + + +static int blkif_recover(struct blkfront_info *info) +{ + int i; + struct blkif_request *req; + struct blk_shadow *copy; + int j; + + /* Stage 1: Make a safe copy of the shadow state. */ + copy = kmalloc(sizeof(info->shadow), GFP_KERNEL); + if (!copy) + return -ENOMEM; + memcpy(copy, info->shadow, sizeof(info->shadow)); + + /* Stage 2: Set up free list. */ + memset(&info->shadow, 0, sizeof(info->shadow)); + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow_free = info->ring.req_prod_pvt; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + + /* Stage 3: Find pending requests and requeue them. */ + for (i = 0; i < BLK_RING_SIZE; i++) { + /* Not in use? */ + if (copy[i].request == 0) + continue; + + /* Grab a request slot and copy shadow state into it. */ + req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); + *req = copy[i].req; + + /* We get a new request id, and must reset the shadow state. */ + req->id = get_id_from_freelist(info); + memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); + + /* Rewrite any grant references invalidated by susp/resume. */ + for (j = 0; j < req->nr_segments; j++) + gnttab_grant_foreign_access_ref( + req->seg[j].gref, + info->xbdev->otherend_id, + pfn_to_mfn(info->shadow[req->id].frame[j]), + rq_data_dir( + (struct request *) + info->shadow[req->id].request)); + info->shadow[req->id].req = *req; + + info->ring.req_prod_pvt++; + } + + kfree(copy); + + xenbus_switch_state(info->xbdev, XenbusStateConnected); + + spin_lock_irq(&blkif_io_lock); + + /* Now safe for us to use the shared ring */ + info->connected = BLKIF_STATE_CONNECTED; + + /* Send off requeued requests */ + flush_requests(info); + + /* Kick any other new requests queued since we resumed */ + kick_pending_request_queues(info); + + spin_unlock_irq(&blkif_io_lock); + + return 0; +} + +/** + * We are reconnecting to the backend, due to a suspend/resume, or a backend + * driver restart. We tear down our blkif structure and recreate it, but + * leave the device-layer structures intact so that this is transparent to the + * rest of the kernel. + */ +static int blkfront_resume(struct xenbus_device *dev) +{ + struct blkfront_info *info = dev->dev.driver_data; + int err; + + dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); + + blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); + + err = talk_to_backend(dev, info); + if (info->connected == BLKIF_STATE_SUSPENDED && !err) + err = blkif_recover(info); + + return err; +} + + +/* + * Invoked when the backend is finally 'ready' (and has told produced + * the details about the physical device - #sectors, size, etc). + */ +static void blkfront_connect(struct blkfront_info *info) +{ + unsigned long long sectors; + unsigned long sector_size; + unsigned int binfo; + int err; + + if ((info->connected == BLKIF_STATE_CONNECTED) || + (info->connected == BLKIF_STATE_SUSPENDED) ) + return; + + dev_dbg(&info->xbdev->dev, "%s:%s.\n", + __func__, info->xbdev->otherend); + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "sectors", "%llu", §ors, + "info", "%u", &binfo, + "sector-size", "%lu", §or_size, + NULL); + if (err) { + xenbus_dev_fatal(info->xbdev, err, + "reading backend fields at %s", + info->xbdev->otherend); + return; + } + + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, + "feature-barrier", "%lu", &info->feature_barrier, + NULL); + if (err) + info->feature_barrier = 0; + + err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice), + sectors, info->vdevice, + binfo, sector_size, info); + if (err) { + xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", + info->xbdev->otherend); + return; + } + + xenbus_switch_state(info->xbdev, XenbusStateConnected); + + /* Kick pending requests. */ + spin_lock_irq(&blkif_io_lock); + info->connected = BLKIF_STATE_CONNECTED; + kick_pending_request_queues(info); + spin_unlock_irq(&blkif_io_lock); + + add_disk(info->gd); +} + +/** + * Handle the change of state of the backend to Closing. We must delete our + * device-layer structures now, to ensure that writes are flushed through to + * the backend. Once is this done, we can switch to Closed in + * acknowledgement. + */ +static void blkfront_closing(struct xenbus_device *dev) +{ + struct blkfront_info *info = dev->dev.driver_data; + unsigned long flags; + + dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename); + + if (info->rq == NULL) + goto out; + + spin_lock_irqsave(&blkif_io_lock, flags); + + del_gendisk(info->gd); + + /* No more blkif_request(). */ + blk_stop_queue(info->rq); + + /* No more gnttab callback work. */ + gnttab_cancel_free_callback(&info->callback); + spin_unlock_irqrestore(&blkif_io_lock, flags); + + /* Flush gnttab callback work. Must be done with no locks held. */ + flush_scheduled_work(); + + blk_cleanup_queue(info->rq); + info->rq = NULL; + + out: + xenbus_frontend_closed(dev); +} + +/** + * Callback received when the backend's state changes. + */ +static void backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + struct blkfront_info *info = dev->dev.driver_data; + struct block_device *bd; + + dev_dbg(&dev->dev, "blkfront:backend_changed.\n"); + + switch (backend_state) { + case XenbusStateInitialising: + case XenbusStateInitWait: + case XenbusStateInitialised: + case XenbusStateUnknown: + case XenbusStateClosed: + break; + + case XenbusStateConnected: + blkfront_connect(info); + break; + + case XenbusStateClosing: + bd = bdget(info->dev); + if (bd == NULL) + xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); + + mutex_lock(&bd->bd_mutex); + if (info->users > 0) + xenbus_dev_error(dev, -EBUSY, + "Device in use; refusing to close"); + else + blkfront_closing(dev); + mutex_unlock(&bd->bd_mutex); + bdput(bd); + break; + } +} + +static int blkfront_remove(struct xenbus_device *dev) +{ + struct blkfront_info *info = dev->dev.driver_data; + + dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename); + + blkif_free(info, 0); + + kfree(info); + + return 0; +} + +static int blkif_open(struct inode *inode, struct file *filep) +{ + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; + info->users++; + return 0; +} + +static int blkif_release(struct inode *inode, struct file *filep) +{ + struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; + info->users--; + if (info->users == 0) { + /* Check whether we have been instructed to close. We will + have ignored this request initially, as the device was + still mounted. */ + struct xenbus_device *dev = info->xbdev; + enum xenbus_state state = xenbus_read_driver_state(dev->otherend); + + if (state == XenbusStateClosing) + blkfront_closing(dev); + } + return 0; +} + +static struct block_device_operations xlvbd_block_fops = +{ + .owner = THIS_MODULE, + .open = blkif_open, + .release = blkif_release, +}; + + +static struct xenbus_device_id blkfront_ids[] = { + { "vbd" }, + { "" } +}; + +static struct xenbus_driver blkfront = { + .name = "vbd", + .owner = THIS_MODULE, + .ids = blkfront_ids, + .probe = blkfront_probe, + .remove = blkfront_remove, + .resume = blkfront_resume, + .otherend_changed = backend_changed, +}; + +static int __init xlblk_init(void) +{ + if (!is_running_on_xen()) + return -ENODEV; + + if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { + printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", + XENVBD_MAJOR, DEV_NAME); + return -ENODEV; + } + + return xenbus_register_frontend(&blkfront); +} +module_init(xlblk_init); + + +static void xlblk_exit(void) +{ + return xenbus_unregister_driver(&blkfront); +} +module_exit(xlblk_exit); + +MODULE_DESCRIPTION("Xen virtual block device frontend"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); diff --git a/include/linux/major.h b/include/linux/major.h index 7e7c9093919..0cb98053537 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -158,6 +158,8 @@ #define VXSPEC_MAJOR 200 /* VERITAS volume config driver */ #define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */ +#define XENVBD_MAJOR 202 /* Xen virtual block device */ + #define MSR_MAJOR 202 #define CPUID_MAJOR 203 -- cgit v1.2.3-70-g09d2 From 60223a326fc8fa6e90e2c3fd28ae6de4a311d731 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Jul 2007 18:37:07 -0700 Subject: xen: Place vcpu_info structure into per-cpu memory An experimental patch for Xen allows guests to place their vcpu_info structs anywhere. We try to use this to place the vcpu_info into the PDA, which allows direct access. If this works, then switch to using direct access operations for irq_enable, disable, save_fl and restore_fl. Signed-off-by: Jeremy Fitzhardinge Cc: Chris Wright Cc: Keir Fraser --- arch/i386/xen/enlighten.c | 152 +++++++++++++++++++++++++++++++++++++++++-- arch/i386/xen/setup.c | 8 --- arch/i386/xen/smp.c | 5 +- arch/i386/xen/xen-ops.h | 2 +- include/xen/interface/vcpu.h | 13 ++++ 5 files changed, 164 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c index 142e7489134..e33fa0990ed 100644 --- a/arch/i386/xen/enlighten.c +++ b/arch/i386/xen/enlighten.c @@ -61,9 +61,63 @@ DEFINE_PER_CPU(unsigned long, xen_cr3); struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); -void xen_vcpu_setup(int cpu) +static /* __initdata */ struct shared_info dummy_shared_info; + +/* + * Point at some empty memory to start with. We map the real shared_info + * page as soon as fixmap is up and running. + */ +struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; + +/* + * Flag to determine whether vcpu info placement is available on all + * VCPUs. We assume it is to start with, and then set it to zero on + * the first failure. This is because it can succeed on some VCPUs + * and not others, since it can involve hypervisor memory allocation, + * or because the guest failed to guarantee all the appropriate + * constraints on all VCPUs (ie buffer can't cross a page boundary). + * + * Note that any particular CPU may be using a placed vcpu structure, + * but we can only optimise if the all are. + * + * 0: not available, 1: available + */ +static int have_vcpu_info_placement = 1; + +static void __init xen_vcpu_setup(int cpu) { + struct vcpu_register_vcpu_info info; + int err; + struct vcpu_info *vcpup; + per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + + if (!have_vcpu_info_placement) + return; /* already tested, not available */ + + vcpup = &per_cpu(xen_vcpu_info, cpu); + + info.mfn = virt_to_mfn(vcpup); + info.offset = offset_in_page(vcpup); + + printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n", + cpu, vcpup, info.mfn, info.offset); + + /* Check to see if the hypervisor will put the vcpu_info + structure where we want it, which allows direct access via + a percpu-variable. */ + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); + + if (err) { + printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); + have_vcpu_info_placement = 0; + } else { + /* This cpu is using the registered vcpu info, even if + later ones fail to. */ + per_cpu(xen_vcpu, cpu) = vcpup; + printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n", + cpu, vcpup); + } } static void __init xen_banner(void) @@ -123,6 +177,20 @@ static unsigned long xen_save_fl(void) return (-flags) & X86_EFLAGS_IF; } +static unsigned long xen_save_fl_direct(void) +{ + unsigned long flags; + + /* flag has opposite sense of mask */ + flags = !x86_read_percpu(xen_vcpu_info.evtchn_upcall_mask); + + /* convert to IF type flag + -0 -> 0x00000000 + -1 -> 0xffffffff + */ + return (-flags) & X86_EFLAGS_IF; +} + static void xen_restore_fl(unsigned long flags) { struct vcpu_info *vcpu; @@ -149,6 +217,25 @@ static void xen_restore_fl(unsigned long flags) } } +static void xen_restore_fl_direct(unsigned long flags) +{ + /* convert from IF type flag */ + flags = !(flags & X86_EFLAGS_IF); + + /* This is an atomic update, so no need to worry about + preemption. */ + x86_write_percpu(xen_vcpu_info.evtchn_upcall_mask, flags); + + /* If we get preempted here, then any pending event will be + handled anyway. */ + + if (flags == 0) { + barrier(); /* unmask then check (avoid races) */ + if (unlikely(x86_read_percpu(xen_vcpu_info.evtchn_upcall_pending))) + force_evtchn_callback(); + } +} + static void xen_irq_disable(void) { /* There's a one instruction preempt window here. We need to @@ -159,6 +246,12 @@ static void xen_irq_disable(void) preempt_enable_no_resched(); } +static void xen_irq_disable_direct(void) +{ + /* Atomic update, so preemption not a concern. */ + x86_write_percpu(xen_vcpu_info.evtchn_upcall_mask, 1); +} + static void xen_irq_enable(void) { struct vcpu_info *vcpu; @@ -179,6 +272,19 @@ static void xen_irq_enable(void) force_evtchn_callback(); } +static void xen_irq_enable_direct(void) +{ + /* Atomic update, so preemption not a concern. */ + x86_write_percpu(xen_vcpu_info.evtchn_upcall_mask, 0); + + /* Doesn't matter if we get preempted here, because any + pending event will get dealt with anyway. */ + + barrier(); /* unmask then check (avoid races) */ + if (unlikely(x86_read_percpu(xen_vcpu_info.evtchn_upcall_pending))) + force_evtchn_callback(); +} + static void xen_safe_halt(void) { /* Blocking includes an implicit local_irq_enable(). */ @@ -551,11 +657,21 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, xen_mc_issue(PARAVIRT_LAZY_MMU); } +static void xen_write_cr2(unsigned long cr2) +{ + x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; +} + static unsigned long xen_read_cr2(void) { return x86_read_percpu(xen_vcpu)->arch.cr2; } +static unsigned long xen_read_cr2_direct(void) +{ + return x86_read_percpu(xen_vcpu_info.arch.cr2); +} + static void xen_write_cr4(unsigned long cr4) { /* never allow TSC to be disabled */ @@ -753,8 +869,27 @@ static __init void xen_pagetable_setup_done(pgd_t *base) if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) BUG(); } +} - xen_vcpu_setup(smp_processor_id()); +/* This is called once we have the cpu_possible_map */ +void __init xen_setup_vcpu_info_placement(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + xen_vcpu_setup(cpu); + + /* xen_vcpu_setup managed to place the vcpu_info within the + percpu area for all cpus, so make use of it */ + if (have_vcpu_info_placement) { + printk(KERN_INFO "Xen: using vcpu_info placement\n"); + + paravirt_ops.save_fl = xen_save_fl_direct; + paravirt_ops.restore_fl = xen_restore_fl_direct; + paravirt_ops.irq_disable = xen_irq_disable_direct; + paravirt_ops.irq_enable = xen_irq_enable_direct; + paravirt_ops.read_cr2 = xen_read_cr2_direct; + } } static const struct paravirt_ops xen_paravirt_ops __initdata = { @@ -788,7 +923,7 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .write_cr0 = native_write_cr0, .read_cr2 = xen_read_cr2, - .write_cr2 = native_write_cr2, + .write_cr2 = xen_write_cr2, .read_cr3 = xen_read_cr3, .write_cr3 = xen_write_cr3, @@ -974,7 +1109,16 @@ asmlinkage void __init xen_start_kernel(void) /* keep using Xen gdt for now; no urgent need to change it */ x86_write_percpu(xen_cr3, __pa(pgd)); - xen_vcpu_setup(0); + +#ifdef CONFIG_SMP + /* Don't do the full vcpu_info placement stuff until we have a + possible map. */ + per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; +#else + /* May as well do it now, since there's no good time to call + it later on UP. */ + xen_setup_vcpu_info_placement(); +#endif paravirt_ops.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) diff --git a/arch/i386/xen/setup.c b/arch/i386/xen/setup.c index 18a994d5a4c..3f8684eba62 100644 --- a/arch/i386/xen/setup.c +++ b/arch/i386/xen/setup.c @@ -24,14 +24,6 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; -static __initdata struct shared_info init_shared; - -/* - * Point at some empty memory to start with. We map the real shared_info - * page as soon as fixmap is up and running. - */ -struct shared_info *HYPERVISOR_shared_info = &init_shared; - unsigned long *phys_to_machine_mapping; EXPORT_SYMBOL(phys_to_machine_mapping); diff --git a/arch/i386/xen/smp.c b/arch/i386/xen/smp.c index a620918f87e..557b8e24706 100644 --- a/arch/i386/xen/smp.c +++ b/arch/i386/xen/smp.c @@ -142,8 +142,6 @@ void __init xen_smp_prepare_boot_cpu(void) BUG_ON(smp_processor_id() != 0); native_smp_prepare_boot_cpu(); - xen_vcpu_setup(0); - /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ make_lowmem_page_readwrite(&per_cpu__gdt_page); @@ -152,6 +150,8 @@ void __init xen_smp_prepare_boot_cpu(void) cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); } + + xen_setup_vcpu_info_placement(); } void __init xen_smp_prepare_cpus(unsigned int max_cpus) @@ -262,7 +262,6 @@ int __cpuinit xen_cpu_up(unsigned int cpu) init_gdt(cpu); per_cpu(current_task, cpu) = idle; - xen_vcpu_setup(cpu); irq_ctx_init(cpu); xen_setup_timer(cpu); diff --git a/arch/i386/xen/xen-ops.h b/arch/i386/xen/xen-ops.h index 4069be8ba31..5b56f7fecd1 100644 --- a/arch/i386/xen/xen-ops.h +++ b/arch/i386/xen/xen-ops.h @@ -38,7 +38,7 @@ static inline unsigned xen_get_lazy_mode(void) void __init xen_fill_possible_map(void); -void xen_vcpu_setup(int cpu); +void __init xen_setup_vcpu_info_placement(void); void xen_smp_prepare_boot_cpu(void); void xen_smp_prepare_cpus(unsigned int max_cpus); int xen_cpu_up(unsigned int cpu); diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index c6218f1ad3c..ff61ea36599 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h @@ -151,4 +151,17 @@ struct vcpu_set_singleshot_timer { #define _VCPU_SSHOTTMR_future (0) #define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) +/* + * Register a memory location in the guest address space for the + * vcpu_info structure. This allows the guest to place the vcpu_info + * structure in a convenient place, such as in a per-cpu data area. + * The pointer need not be page aligned, but the structure must not + * cross a page boundary. + */ +#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */ +struct vcpu_register_vcpu_info { + uint32_t mfn; /* mfn of page to place vcpu_info */ + uint32_t offset; /* offset within page */ +}; + #endif /* __XEN_PUBLIC_VCPU_H__ */ -- cgit v1.2.3-70-g09d2 From fa1c1e8f1ece48c7baa3ba529bfd0d10a0bdf4eb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 10 Aug 2006 19:19:47 -0700 Subject: [SCSI] Add SATA support to libsas Hook the scsi_host_template functions in libsas to delegate functionality to libata when appropriate. Signed-off-by: Darrick J. Wong Misc code changes and merge fixes and update for libata->drivers/ata move Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_init.c | 3 + drivers/scsi/libsas/sas_discover.c | 11 +- drivers/scsi/libsas/sas_scsi_host.c | 342 ++++++++++++++++++++++++++++++++++++ include/scsi/libsas.h | 12 ++ 4 files changed, 366 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 1c0d7578e79..b9cf46078fc 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -81,6 +81,9 @@ static struct scsi_host_template aic94xx_sht = { .use_clustering = ENABLE_CLUSTERING, .eh_device_reset_handler = sas_eh_device_reset_handler, .eh_bus_reset_handler = sas_eh_bus_reset_handler, + .slave_alloc = sas_slave_alloc, + .target_destroy = sas_target_destroy, + .ioctl = sas_ioctl, }; static int __devinit asd_map_memio(struct asd_ha_struct *asd_ha) diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index a65598b1e53..5252143b629 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -255,6 +255,7 @@ static int sas_get_port_device(struct asd_sas_port *port) switch (dev->dev_type) { case SAS_END_DEV: + case SATA_DEV: rphy = sas_end_device_alloc(port->port); break; case EDGE_DEV: @@ -265,7 +266,6 @@ static int sas_get_port_device(struct asd_sas_port *port) rphy = sas_expander_alloc(port->port, SAS_FANOUT_EXPANDER_DEVICE); break; - case SATA_DEV: default: printk("ERROR: Unidentified device type %d\n", dev->dev_type); rphy = NULL; @@ -480,7 +480,14 @@ cont1: present. sas_satl_register_dev(dev); */ - return 0; + + sas_fill_in_rphy(dev, dev->rphy); + + res = sas_rphy_add(dev->rphy); + if (res) + goto out_err; + + return res; out_err: dev->sata_dev.identify_packet_device = NULL; dev->sata_dev.identify_device = NULL; diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 9c5342e7a69..3220b3fc6b2 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -42,6 +42,7 @@ #include #include #include +#include /* ---------- SCSI Host glue ---------- */ @@ -192,6 +193,11 @@ static int sas_queue_up(struct sas_task *task) return 0; } +static inline int dev_is_sata(struct domain_device *dev) +{ + return (dev->rphy->identify.target_port_protocols & SAS_PROTOCOL_SATA); +} + /** * sas_queuecommand -- Enqueue a command for processing * @parameters: See SCSI Core documentation @@ -213,6 +219,12 @@ int sas_queuecommand(struct scsi_cmnd *cmd, struct sas_ha_struct *sas_ha = dev->port->ha; struct sas_task *task; + if (dev_is_sata(dev)) { + res = ata_sas_queuecmd(cmd, scsi_done, + dev->sata_dev.ap); + goto out; + } + res = -ENOMEM; task = sas_create_task(cmd, dev, GFP_ATOMIC); if (!task) @@ -684,6 +696,279 @@ enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) return EH_NOT_HANDLED; } + +static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts) +{ + /* Cheesy attempt to translate SAS errors into ATA. Hah! */ + + /* transport error */ + if (ts->resp == SAS_TASK_UNDELIVERED) + return AC_ERR_ATA_BUS; + + /* ts->resp == SAS_TASK_COMPLETE */ + /* task delivered, what happened afterwards? */ + switch (ts->stat) { + case SAS_DEV_NO_RESPONSE: + return AC_ERR_TIMEOUT; + + case SAS_INTERRUPTED: + case SAS_PHY_DOWN: + case SAS_NAK_R_ERR: + return AC_ERR_ATA_BUS; + + + case SAS_DATA_UNDERRUN: + /* + * Some programs that use the taskfile interface + * (smartctl in particular) can cause underrun + * problems. Ignore these errors, perhaps at our + * peril. + */ + return 0; + + case SAS_DATA_OVERRUN: + case SAS_QUEUE_FULL: + case SAS_DEVICE_UNKNOWN: + case SAS_SG_ERR: + return AC_ERR_INVALID; + + case SAM_CHECK_COND: + case SAS_OPEN_TO: + case SAS_OPEN_REJECT: + case SAS_PROTO_RESPONSE: + SAS_DPRINTK("%s: Saw error %d. What to do?\n", + __FUNCTION__, ts->stat); + return AC_ERR_OTHER; + + case SAS_ABORTED_TASK: + return AC_ERR_DEV; + + default: + return 0; + } +} + +static void sas_ata_task_done(struct sas_task *task) +{ + struct ata_queued_cmd *qc = task->uldd_task; + struct domain_device *dev = qc->ap->private_data; + struct task_status_struct *stat = &task->task_status; + struct ata_task_resp *resp = (struct ata_task_resp *)stat->buf; + enum ata_completion_errors ac; + + ac = sas_to_ata_err(stat); + if (ac) { + SAS_DPRINTK("%s: SAS error %x\n", __FUNCTION__, stat->stat); + /* We saw a SAS error. Send a vague error. */ + qc->err_mask = ac; + dev->sata_dev.tf.feature = 0x04; /* status err */ + dev->sata_dev.tf.command = ATA_ERR; + goto end; + } + + ata_tf_from_fis(resp->ending_fis, &dev->sata_dev.tf); + qc->err_mask |= ac_err_mask(dev->sata_dev.tf.command); + dev->sata_dev.sstatus = resp->sstatus; + dev->sata_dev.serror = resp->serror; + dev->sata_dev.scontrol = resp->scontrol; + dev->sata_dev.ap->sactive = resp->sactive; +end: + ata_qc_complete(qc); + list_del_init(&task->list); + sas_free_task(task); +} + +int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) +{ + struct domain_device *dev = sdev_to_domain_dev(sdev); + + if (dev_is_sata(dev)) + return ata_scsi_ioctl(sdev, cmd, arg); + + return -EINVAL; +} + +static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) +{ + int res = -ENOMEM; + struct sas_task *task; + struct domain_device *dev = qc->ap->private_data; + struct sas_ha_struct *sas_ha = dev->port->ha; + struct Scsi_Host *host = sas_ha->core.shost; + struct sas_internal *i = to_sas_internal(host->transportt); + struct scatterlist *sg; + unsigned int num = 0; + unsigned int xfer = 0; + + task = sas_alloc_task(GFP_ATOMIC); + if (!task) + goto out; + task->dev = dev; + task->task_proto = SAS_PROTOCOL_STP; + task->task_done = sas_ata_task_done; + + ata_tf_to_fis(&qc->tf, (u8*)&task->ata_task.fis, 0); + task->uldd_task = qc; + if (is_atapi_taskfile(&qc->tf)) { + memcpy(task->ata_task.atapi_packet, qc->cdb, ATAPI_CDB_LEN); + task->total_xfer_len = qc->nbytes + qc->pad_len; + task->num_scatter = qc->pad_len ? qc->n_elem + 1 : qc->n_elem; + } else { + ata_for_each_sg(sg, qc) { + num++; + xfer += sg->length; + } + + task->total_xfer_len = xfer; + task->num_scatter = num; + } + + task->data_dir = qc->dma_dir; + task->scatter = qc->__sg; + task->ata_task.retry_count = 1; + task->task_state_flags = SAS_TASK_STATE_PENDING; + + if (qc->tf.protocol == ATA_PROT_DMA) + task->ata_task.dma_xfer = 1; + + if (sas_ha->lldd_max_execute_num < 2) + res = i->dft->lldd_execute_task(task, 1, GFP_ATOMIC); + else + res = sas_queue_up(task); + + /* Examine */ + if (res) { + SAS_DPRINTK("lldd_execute_task returned: %d\n", res); + + sas_free_task(task); + if (res == -SAS_QUEUE_FULL) + return -ENOMEM; + } + +out: + return res; +} + +static u8 sas_ata_check_status(struct ata_port *ap) +{ + struct domain_device *dev = ap->private_data; + return dev->sata_dev.tf.command; +} + +static void sas_ata_phy_reset(struct ata_port *ap) +{ + struct domain_device *dev = ap->private_data; + struct sas_internal *i = + to_sas_internal(dev->port->ha->core.shost->transportt); + int res = 0; + + if (i->dft->lldd_I_T_nexus_reset) + res = i->dft->lldd_I_T_nexus_reset(dev); + + if (res) + SAS_DPRINTK("%s: Unable to reset I T nexus?\n", __FUNCTION__); + + switch (dev->sata_dev.command_set) { + case ATA_COMMAND_SET: + SAS_DPRINTK("%s: Found ATA device.\n", __FUNCTION__); + ap->device[0].class = ATA_DEV_ATA; + break; + case ATAPI_COMMAND_SET: + SAS_DPRINTK("%s: Found ATAPI device.\n", __FUNCTION__); + ap->device[0].class = ATA_DEV_ATAPI; + break; + default: + SAS_DPRINTK("%s: Unknown SATA command set: %d.\n", + __FUNCTION__, + dev->sata_dev.command_set); + ap->device[0].class = ATA_DEV_ATA; + break; + } + + ap->cbl = ATA_CBL_SATA; +} + +static void sas_ata_post_internal(struct ata_queued_cmd *qc) +{ + if (qc->flags & ATA_QCFLAG_FAILED) + qc->err_mask |= AC_ERR_OTHER; + + if (qc->err_mask) + SAS_DPRINTK("%s: Failure; reset phy!\n", __FUNCTION__); +} + +static void sas_ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf) +{ + struct domain_device *dev = ap->private_data; + memcpy(tf, &dev->sata_dev.tf, sizeof (*tf)); +} + +static void sas_ata_scr_write(struct ata_port *ap, unsigned int sc_reg_in, + u32 val) +{ + struct domain_device *dev = ap->private_data; + + SAS_DPRINTK("STUB %s\n", __FUNCTION__); + switch (sc_reg_in) { + case SCR_STATUS: + dev->sata_dev.sstatus = val; + break; + case SCR_CONTROL: + dev->sata_dev.scontrol = val; + break; + case SCR_ERROR: + dev->sata_dev.serror = val; + break; + case SCR_ACTIVE: + dev->sata_dev.ap->sactive = val; + break; + } +} + +static u32 sas_ata_scr_read(struct ata_port *ap, unsigned int sc_reg_in) +{ + struct domain_device *dev = ap->private_data; + + SAS_DPRINTK("STUB %s\n", __FUNCTION__); + switch (sc_reg_in) { + case SCR_STATUS: + return dev->sata_dev.sstatus; + case SCR_CONTROL: + return dev->sata_dev.scontrol; + case SCR_ERROR: + return dev->sata_dev.serror; + case SCR_ACTIVE: + return dev->sata_dev.ap->sactive; + default: + return 0xffffffffU; + } +} + +static struct ata_port_operations sas_sata_ops = { + .port_disable = ata_port_disable, + .check_status = sas_ata_check_status, + .check_altstatus = sas_ata_check_status, + .dev_select = ata_noop_dev_select, + .phy_reset = sas_ata_phy_reset, + .post_internal_cmd = sas_ata_post_internal, + .tf_read = sas_ata_tf_read, + .qc_prep = ata_noop_qc_prep, + .qc_issue = sas_ata_qc_issue, + .port_start = ata_sas_port_start, + .port_stop = ata_sas_port_stop, + .scr_read = sas_ata_scr_read, + .scr_write = sas_ata_scr_write +}; + +static struct ata_port_info sata_port_info = { + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_SATA_RESET | + ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA, + .pio_mask = 0x1f, /* PIO0-4 */ + .mwdma_mask = 0x07, /* MWDMA0-2 */ + .udma_mask = ATA_UDMA6, + .port_ops = &sas_sata_ops +}; + struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy) { struct Scsi_Host *shost = dev_to_shost(rphy->dev.parent); @@ -722,11 +1007,33 @@ static inline struct domain_device *sas_find_target(struct scsi_target *starget) int sas_target_alloc(struct scsi_target *starget) { + struct Scsi_Host *shost = dev_to_shost(&starget->dev); + struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); struct domain_device *found_dev = sas_find_target(starget); if (!found_dev) return -ENODEV; + if (dev_is_sata(found_dev)) { + struct ata_port *ap; + + ata_host_init(&found_dev->sata_dev.ata_host, + &ha->pcidev->dev, + sata_port_info.flags, + &sas_sata_ops); + ap = ata_sas_port_alloc(&found_dev->sata_dev.ata_host, + &sata_port_info, + shost); + if (!ap) { + SAS_DPRINTK("ata_sas_port_alloc failed.\n"); + return -ENODEV; + } + + ap->private_data = found_dev; + ap->cbl = ATA_CBL_SATA; + found_dev->sata_dev.ap = ap; + } + starget->hostdata = found_dev; return 0; } @@ -741,6 +1048,11 @@ int sas_slave_configure(struct scsi_device *scsi_dev) BUG_ON(dev->rphy->identify.device_type != SAS_END_DEVICE); + if (dev_is_sata(dev)) { + ata_sas_slave_configure(scsi_dev, dev->sata_dev.ap); + return 0; + } + sas_ha = dev->port->ha; sas_read_port_mode_page(scsi_dev); @@ -764,6 +1076,10 @@ int sas_slave_configure(struct scsi_device *scsi_dev) void sas_slave_destroy(struct scsi_device *scsi_dev) { + struct domain_device *dev = sdev_to_domain_dev(scsi_dev); + + if (dev_is_sata(dev)) + ata_port_disable(dev->sata_dev.ap); } int sas_change_queue_depth(struct scsi_device *scsi_dev, int new_depth) @@ -984,6 +1300,29 @@ void sas_task_abort(struct sas_task *task) scsi_schedule_eh(sc->device->host); } +int sas_slave_alloc(struct scsi_device *scsi_dev) +{ + struct domain_device *dev = sdev_to_domain_dev(scsi_dev); + + if (dev_is_sata(dev)) + return ata_sas_port_init(dev->sata_dev.ap); + + return 0; +} + +void sas_target_destroy(struct scsi_target *starget) +{ + struct domain_device *found_dev = sas_find_target(starget); + + if (!found_dev) + return; + + if (dev_is_sata(found_dev)) + ata_sas_port_destroy(found_dev->sata_dev.ap); + + return; +} + EXPORT_SYMBOL_GPL(sas_queuecommand); EXPORT_SYMBOL_GPL(sas_target_alloc); EXPORT_SYMBOL_GPL(sas_slave_configure); @@ -997,3 +1336,6 @@ EXPORT_SYMBOL_GPL(sas_phy_reset); EXPORT_SYMBOL_GPL(sas_phy_enable); EXPORT_SYMBOL_GPL(sas_eh_device_reset_handler); EXPORT_SYMBOL_GPL(sas_eh_bus_reset_handler); +EXPORT_SYMBOL_GPL(sas_slave_alloc); +EXPORT_SYMBOL_GPL(sas_target_destroy); +EXPORT_SYMBOL_GPL(sas_ioctl); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 2e6bdc4e7a0..ce20177069a 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -165,6 +166,13 @@ struct sata_device { u8 port_no; /* port number, if this is a PM (Port) */ struct list_head children; /* PM Ports if this is a PM */ + + struct ata_port *ap; + struct ata_host ata_host; + struct ata_taskfile tf; + u32 sstatus; + u32 serror; + u32 scontrol; }; /* ---------- Domain device ---------- */ @@ -661,4 +669,8 @@ int __sas_task_abort(struct sas_task *); int sas_eh_device_reset_handler(struct scsi_cmnd *cmd); int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd); +extern void sas_target_destroy(struct scsi_target *); +extern int sas_slave_alloc(struct scsi_device *); +extern int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg); + #endif /* _SASLIB_H_ */ -- cgit v1.2.3-70-g09d2 From 338ec57003ff9d7bc1471677e61872455977a5de Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 18 Oct 2006 14:43:37 -0700 Subject: [SCSI] Migrate libsas ATA code into a separate file This is a respin of my earlier patch that migrates the ATA support code into a separate file. For now, the controversial linking bits have been removed per James Bottomley's request for a patch that contains only the migration diffs, which means that libsas continues to require libata. I intend to address that problem in a separate patch. This patch is against the aic94xx-sas-2.6 git tree, and it has been sanity tested on my x206m with Seagate SATA and SAS disks without uncovering any new problems. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/libsas/Makefile | 3 +- drivers/scsi/libsas/sas_ata.c | 339 ++++++++++++++++++++++++++++++++++++ drivers/scsi/libsas/sas_scsi_host.c | 313 +-------------------------------- include/scsi/libsas.h | 1 + include/scsi/sas_ata.h | 39 +++++ 5 files changed, 387 insertions(+), 308 deletions(-) create mode 100644 drivers/scsi/libsas/sas_ata.c create mode 100644 include/scsi/sas_ata.h (limited to 'include') diff --git a/drivers/scsi/libsas/Makefile b/drivers/scsi/libsas/Makefile index 44d972a3b4b..6383eb58d89 100644 --- a/drivers/scsi/libsas/Makefile +++ b/drivers/scsi/libsas/Makefile @@ -33,4 +33,5 @@ libsas-y += sas_init.o \ sas_dump.o \ sas_discover.o \ sas_expander.o \ - sas_scsi_host.o + sas_scsi_host.o \ + sas_ata.o diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c new file mode 100644 index 00000000000..de42b5b801c --- /dev/null +++ b/drivers/scsi/libsas/sas_ata.c @@ -0,0 +1,339 @@ +/* + * Support for SATA devices on Serial Attached SCSI (SAS) controllers + * + * Copyright (C) 2006 IBM Corporation + * + * Written by: Darrick J. Wong , IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + */ + +#include +#include "sas_internal.h" +#include +#include +#include +#include +#include +#include +#include "../scsi_sas_internal.h" + +static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts) +{ + /* Cheesy attempt to translate SAS errors into ATA. Hah! */ + + /* transport error */ + if (ts->resp == SAS_TASK_UNDELIVERED) + return AC_ERR_ATA_BUS; + + /* ts->resp == SAS_TASK_COMPLETE */ + /* task delivered, what happened afterwards? */ + switch (ts->stat) { + case SAS_DEV_NO_RESPONSE: + return AC_ERR_TIMEOUT; + + case SAS_INTERRUPTED: + case SAS_PHY_DOWN: + case SAS_NAK_R_ERR: + return AC_ERR_ATA_BUS; + + + case SAS_DATA_UNDERRUN: + /* + * Some programs that use the taskfile interface + * (smartctl in particular) can cause underrun + * problems. Ignore these errors, perhaps at our + * peril. + */ + return 0; + + case SAS_DATA_OVERRUN: + case SAS_QUEUE_FULL: + case SAS_DEVICE_UNKNOWN: + case SAS_SG_ERR: + return AC_ERR_INVALID; + + case SAM_CHECK_COND: + case SAS_OPEN_TO: + case SAS_OPEN_REJECT: + SAS_DPRINTK("%s: Saw error %d. What to do?\n", + __FUNCTION__, ts->stat); + return AC_ERR_OTHER; + + case SAS_ABORTED_TASK: + return AC_ERR_DEV; + + case SAS_PROTO_RESPONSE: + /* This means the ending_fis has the error + * value; return 0 here to collect it */ + return 0; + default: + return 0; + } +} + +static void sas_ata_task_done(struct sas_task *task) +{ + struct ata_queued_cmd *qc = task->uldd_task; + struct domain_device *dev = qc->ap->private_data; + struct task_status_struct *stat = &task->task_status; + struct ata_task_resp *resp = (struct ata_task_resp *)stat->buf; + enum ata_completion_errors ac; + + if (stat->stat == SAS_PROTO_RESPONSE) { + ata_tf_from_fis(resp->ending_fis, &dev->sata_dev.tf); + qc->err_mask |= ac_err_mask(dev->sata_dev.tf.command); + dev->sata_dev.sstatus = resp->sstatus; + dev->sata_dev.serror = resp->serror; + dev->sata_dev.scontrol = resp->scontrol; + dev->sata_dev.ap->sactive = resp->sactive; + } else if (stat->stat != SAM_STAT_GOOD) { + ac = sas_to_ata_err(stat); + if (ac) { + SAS_DPRINTK("%s: SAS error %x\n", __FUNCTION__, + stat->stat); + /* We saw a SAS error. Send a vague error. */ + qc->err_mask = ac; + dev->sata_dev.tf.feature = 0x04; /* status err */ + dev->sata_dev.tf.command = ATA_ERR; + } + } + + ata_qc_complete(qc); + list_del_init(&task->list); + sas_free_task(task); +} + +static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) +{ + int res = -ENOMEM; + struct sas_task *task; + struct domain_device *dev = qc->ap->private_data; + struct sas_ha_struct *sas_ha = dev->port->ha; + struct Scsi_Host *host = sas_ha->core.shost; + struct sas_internal *i = to_sas_internal(host->transportt); + struct scatterlist *sg; + unsigned int num = 0; + unsigned int xfer = 0; + + task = sas_alloc_task(GFP_ATOMIC); + if (!task) + goto out; + task->dev = dev; + task->task_proto = SAS_PROTOCOL_STP; + task->task_done = sas_ata_task_done; + + if (qc->tf.command == ATA_CMD_FPDMA_WRITE || + qc->tf.command == ATA_CMD_FPDMA_READ) { + /* Need to zero out the tag libata assigned us */ + qc->tf.nsect = 0; + } + + ata_tf_to_fis(&qc->tf, (u8*)&task->ata_task.fis, 0); + task->uldd_task = qc; + if (is_atapi_taskfile(&qc->tf)) { + memcpy(task->ata_task.atapi_packet, qc->cdb, qc->dev->cdb_len); + task->total_xfer_len = qc->nbytes + qc->pad_len; + task->num_scatter = qc->pad_len ? qc->n_elem + 1 : qc->n_elem; + } else { + ata_for_each_sg(sg, qc) { + num++; + xfer += sg->length; + } + + task->total_xfer_len = xfer; + task->num_scatter = num; + } + + task->data_dir = qc->dma_dir; + task->scatter = qc->__sg; + task->ata_task.retry_count = 1; + task->task_state_flags = SAS_TASK_STATE_PENDING; + + switch (qc->tf.protocol) { + case ATA_PROT_NCQ: + task->ata_task.use_ncq = 1; + /* fall through */ + case ATA_PROT_ATAPI_DMA: + case ATA_PROT_DMA: + task->ata_task.dma_xfer = 1; + break; + } + + if (sas_ha->lldd_max_execute_num < 2) + res = i->dft->lldd_execute_task(task, 1, GFP_ATOMIC); + else + res = sas_queue_up(task); + + /* Examine */ + if (res) { + SAS_DPRINTK("lldd_execute_task returned: %d\n", res); + + sas_free_task(task); + if (res == -SAS_QUEUE_FULL) + return -ENOMEM; + } + +out: + return res; +} + +static u8 sas_ata_check_status(struct ata_port *ap) +{ + struct domain_device *dev = ap->private_data; + return dev->sata_dev.tf.command; +} + +static void sas_ata_phy_reset(struct ata_port *ap) +{ + struct domain_device *dev = ap->private_data; + struct sas_internal *i = + to_sas_internal(dev->port->ha->core.shost->transportt); + int res = 0; + + if (i->dft->lldd_I_T_nexus_reset) + res = i->dft->lldd_I_T_nexus_reset(dev); + + if (res) + SAS_DPRINTK("%s: Unable to reset I T nexus?\n", __FUNCTION__); + + switch (dev->sata_dev.command_set) { + case ATA_COMMAND_SET: + SAS_DPRINTK("%s: Found ATA device.\n", __FUNCTION__); + ap->device[0].class = ATA_DEV_ATA; + break; + case ATAPI_COMMAND_SET: + SAS_DPRINTK("%s: Found ATAPI device.\n", __FUNCTION__); + ap->device[0].class = ATA_DEV_ATAPI; + break; + default: + SAS_DPRINTK("%s: Unknown SATA command set: %d.\n", + __FUNCTION__, + dev->sata_dev.command_set); + ap->device[0].class = ATA_DEV_ATA; + break; + } + + ap->cbl = ATA_CBL_SATA; +} + +static void sas_ata_post_internal(struct ata_queued_cmd *qc) +{ + if (qc->flags & ATA_QCFLAG_FAILED) + qc->err_mask |= AC_ERR_OTHER; + + if (qc->err_mask) + SAS_DPRINTK("%s: Failure; reset phy!\n", __FUNCTION__); +} + +static void sas_ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf) +{ + struct domain_device *dev = ap->private_data; + memcpy(tf, &dev->sata_dev.tf, sizeof (*tf)); +} + +static void sas_ata_scr_write(struct ata_port *ap, unsigned int sc_reg_in, + u32 val) +{ + struct domain_device *dev = ap->private_data; + + SAS_DPRINTK("STUB %s\n", __FUNCTION__); + switch (sc_reg_in) { + case SCR_STATUS: + dev->sata_dev.sstatus = val; + break; + case SCR_CONTROL: + dev->sata_dev.scontrol = val; + break; + case SCR_ERROR: + dev->sata_dev.serror = val; + break; + case SCR_ACTIVE: + dev->sata_dev.ap->sactive = val; + break; + } +} + +static u32 sas_ata_scr_read(struct ata_port *ap, unsigned int sc_reg_in) +{ + struct domain_device *dev = ap->private_data; + + SAS_DPRINTK("STUB %s\n", __FUNCTION__); + switch (sc_reg_in) { + case SCR_STATUS: + return dev->sata_dev.sstatus; + case SCR_CONTROL: + return dev->sata_dev.scontrol; + case SCR_ERROR: + return dev->sata_dev.serror; + case SCR_ACTIVE: + return dev->sata_dev.ap->sactive; + default: + return 0xffffffffU; + } +} + +static struct ata_port_operations sas_sata_ops = { + .port_disable = ata_port_disable, + .check_status = sas_ata_check_status, + .check_altstatus = sas_ata_check_status, + .dev_select = ata_noop_dev_select, + .phy_reset = sas_ata_phy_reset, + .post_internal_cmd = sas_ata_post_internal, + .tf_read = sas_ata_tf_read, + .qc_prep = ata_noop_qc_prep, + .qc_issue = sas_ata_qc_issue, + .port_start = ata_sas_port_start, + .port_stop = ata_sas_port_stop, + .scr_read = sas_ata_scr_read, + .scr_write = sas_ata_scr_write +}; + +static struct ata_port_info sata_port_info = { + .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_SATA_RESET | + ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ, + .pio_mask = 0x1f, /* PIO0-4 */ + .mwdma_mask = 0x07, /* MWDMA0-2 */ + .udma_mask = ATA_UDMA6, + .port_ops = &sas_sata_ops +}; + +int sas_ata_init_host_and_port(struct domain_device *found_dev, + struct scsi_target *starget) +{ + struct Scsi_Host *shost = dev_to_shost(&starget->dev); + struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); + struct ata_port *ap; + + ata_host_init(&found_dev->sata_dev.ata_host, + &ha->pcidev->dev, + sata_port_info.flags, + &sas_sata_ops); + ap = ata_sas_port_alloc(&found_dev->sata_dev.ata_host, + &sata_port_info, + shost); + if (!ap) { + SAS_DPRINTK("ata_sas_port_alloc failed.\n"); + return -ENODEV; + } + + ap->private_data = found_dev; + ap->cbl = ATA_CBL_SATA; + ap->scsi_host = shost; + found_dev->sata_dev.ap = ap; + + return 0; +} diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 0dc7c02b383..dbc2a912114 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "../scsi_sas_internal.h" #include "../scsi_transport_api.h" #include "../scsi_priv.h" @@ -173,7 +174,7 @@ static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, return task; } -static int sas_queue_up(struct sas_task *task) +int sas_queue_up(struct sas_task *task) { struct sas_ha_struct *sas_ha = task->dev->port->ha; struct scsi_core *core = &sas_ha->core; @@ -193,11 +194,6 @@ static int sas_queue_up(struct sas_task *task) return 0; } -static inline int dev_is_sata(struct domain_device *dev) -{ - return (dev->rphy->identify.target_port_protocols & SAS_PROTOCOL_SATA); -} - /** * sas_queuecommand -- Enqueue a command for processing * @parameters: See SCSI Core documentation @@ -696,93 +692,6 @@ enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) return EH_NOT_HANDLED; } - -static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts) -{ - /* Cheesy attempt to translate SAS errors into ATA. Hah! */ - - /* transport error */ - if (ts->resp == SAS_TASK_UNDELIVERED) - return AC_ERR_ATA_BUS; - - /* ts->resp == SAS_TASK_COMPLETE */ - /* task delivered, what happened afterwards? */ - switch (ts->stat) { - case SAS_DEV_NO_RESPONSE: - return AC_ERR_TIMEOUT; - - case SAS_INTERRUPTED: - case SAS_PHY_DOWN: - case SAS_NAK_R_ERR: - return AC_ERR_ATA_BUS; - - - case SAS_DATA_UNDERRUN: - /* - * Some programs that use the taskfile interface - * (smartctl in particular) can cause underrun - * problems. Ignore these errors, perhaps at our - * peril. - */ - return 0; - - case SAS_DATA_OVERRUN: - case SAS_QUEUE_FULL: - case SAS_DEVICE_UNKNOWN: - case SAS_SG_ERR: - return AC_ERR_INVALID; - - case SAM_CHECK_COND: - case SAS_OPEN_TO: - case SAS_OPEN_REJECT: - SAS_DPRINTK("%s: Saw error %d. What to do?\n", - __FUNCTION__, ts->stat); - return AC_ERR_OTHER; - - case SAS_ABORTED_TASK: - return AC_ERR_DEV; - - case SAS_PROTO_RESPONSE: - /* This means the ending_fis has the error - * value; return 0 here to collect it */ - return 0; - default: - return 0; - } -} - -static void sas_ata_task_done(struct sas_task *task) -{ - struct ata_queued_cmd *qc = task->uldd_task; - struct domain_device *dev = qc->ap->private_data; - struct task_status_struct *stat = &task->task_status; - struct ata_task_resp *resp = (struct ata_task_resp *)stat->buf; - enum ata_completion_errors ac; - - if (stat->stat == SAS_PROTO_RESPONSE) { - ata_tf_from_fis(resp->ending_fis, &dev->sata_dev.tf); - qc->err_mask |= ac_err_mask(dev->sata_dev.tf.command); - dev->sata_dev.sstatus = resp->sstatus; - dev->sata_dev.serror = resp->serror; - dev->sata_dev.scontrol = resp->scontrol; - dev->sata_dev.ap->sactive = resp->sactive; - } else if (stat->stat != SAM_STAT_GOOD) { - ac = sas_to_ata_err(stat); - if (ac) { - SAS_DPRINTK("%s: SAS error %x\n", __FUNCTION__, - stat->stat); - /* We saw a SAS error. Send a vague error. */ - qc->err_mask = ac; - dev->sata_dev.tf.feature = 0x04; /* status err */ - dev->sata_dev.tf.command = ATA_ERR; - } - } - - ata_qc_complete(qc); - list_del_init(&task->list); - sas_free_task(task); -} - int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) { struct domain_device *dev = sdev_to_domain_dev(sdev); @@ -793,200 +702,6 @@ int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) return -EINVAL; } -static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) -{ - int res = -ENOMEM; - struct sas_task *task; - struct domain_device *dev = qc->ap->private_data; - struct sas_ha_struct *sas_ha = dev->port->ha; - struct Scsi_Host *host = sas_ha->core.shost; - struct sas_internal *i = to_sas_internal(host->transportt); - struct scatterlist *sg; - unsigned int num = 0; - unsigned int xfer = 0; - - task = sas_alloc_task(GFP_ATOMIC); - if (!task) - goto out; - task->dev = dev; - task->task_proto = SAS_PROTOCOL_STP; - task->task_done = sas_ata_task_done; - - if (qc->tf.command == ATA_CMD_FPDMA_WRITE || - qc->tf.command == ATA_CMD_FPDMA_READ) { - /* Need to zero out the tag libata assigned us */ - qc->tf.nsect = 0; - } - - ata_tf_to_fis(&qc->tf, (u8*)&task->ata_task.fis, 0); - task->uldd_task = qc; - if (is_atapi_taskfile(&qc->tf)) { - memcpy(task->ata_task.atapi_packet, qc->cdb, qc->dev->cdb_len); - task->total_xfer_len = qc->nbytes + qc->pad_len; - task->num_scatter = qc->pad_len ? qc->n_elem + 1 : qc->n_elem; - } else { - ata_for_each_sg(sg, qc) { - num++; - xfer += sg->length; - } - - task->total_xfer_len = xfer; - task->num_scatter = num; - } - - task->data_dir = qc->dma_dir; - task->scatter = qc->__sg; - task->ata_task.retry_count = 1; - task->task_state_flags = SAS_TASK_STATE_PENDING; - - switch (qc->tf.protocol) { - case ATA_PROT_NCQ: - task->ata_task.use_ncq = 1; - /* fall through */ - case ATA_PROT_ATAPI_DMA: - case ATA_PROT_DMA: - task->ata_task.dma_xfer = 1; - break; - } - - if (sas_ha->lldd_max_execute_num < 2) - res = i->dft->lldd_execute_task(task, 1, GFP_ATOMIC); - else - res = sas_queue_up(task); - - /* Examine */ - if (res) { - SAS_DPRINTK("lldd_execute_task returned: %d\n", res); - - sas_free_task(task); - if (res == -SAS_QUEUE_FULL) - return -ENOMEM; - } - -out: - return res; -} - -static u8 sas_ata_check_status(struct ata_port *ap) -{ - struct domain_device *dev = ap->private_data; - return dev->sata_dev.tf.command; -} - -static void sas_ata_phy_reset(struct ata_port *ap) -{ - struct domain_device *dev = ap->private_data; - struct sas_internal *i = - to_sas_internal(dev->port->ha->core.shost->transportt); - int res = 0; - - if (i->dft->lldd_I_T_nexus_reset) - res = i->dft->lldd_I_T_nexus_reset(dev); - - if (res) - SAS_DPRINTK("%s: Unable to reset I T nexus?\n", __FUNCTION__); - - switch (dev->sata_dev.command_set) { - case ATA_COMMAND_SET: - SAS_DPRINTK("%s: Found ATA device.\n", __FUNCTION__); - ap->device[0].class = ATA_DEV_ATA; - break; - case ATAPI_COMMAND_SET: - SAS_DPRINTK("%s: Found ATAPI device.\n", __FUNCTION__); - ap->device[0].class = ATA_DEV_ATAPI; - break; - default: - SAS_DPRINTK("%s: Unknown SATA command set: %d.\n", - __FUNCTION__, - dev->sata_dev.command_set); - ap->device[0].class = ATA_DEV_ATA; - break; - } - - ap->cbl = ATA_CBL_SATA; -} - -static void sas_ata_post_internal(struct ata_queued_cmd *qc) -{ - if (qc->flags & ATA_QCFLAG_FAILED) - qc->err_mask |= AC_ERR_OTHER; - - if (qc->err_mask) - SAS_DPRINTK("%s: Failure; reset phy!\n", __FUNCTION__); -} - -static void sas_ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf) -{ - struct domain_device *dev = ap->private_data; - memcpy(tf, &dev->sata_dev.tf, sizeof (*tf)); -} - -static void sas_ata_scr_write(struct ata_port *ap, unsigned int sc_reg_in, - u32 val) -{ - struct domain_device *dev = ap->private_data; - - SAS_DPRINTK("STUB %s\n", __FUNCTION__); - switch (sc_reg_in) { - case SCR_STATUS: - dev->sata_dev.sstatus = val; - break; - case SCR_CONTROL: - dev->sata_dev.scontrol = val; - break; - case SCR_ERROR: - dev->sata_dev.serror = val; - break; - case SCR_ACTIVE: - dev->sata_dev.ap->sactive = val; - break; - } -} - -static u32 sas_ata_scr_read(struct ata_port *ap, unsigned int sc_reg_in) -{ - struct domain_device *dev = ap->private_data; - - SAS_DPRINTK("STUB %s\n", __FUNCTION__); - switch (sc_reg_in) { - case SCR_STATUS: - return dev->sata_dev.sstatus; - case SCR_CONTROL: - return dev->sata_dev.scontrol; - case SCR_ERROR: - return dev->sata_dev.serror; - case SCR_ACTIVE: - return dev->sata_dev.ap->sactive; - default: - return 0xffffffffU; - } -} - -static struct ata_port_operations sas_sata_ops = { - .port_disable = ata_port_disable, - .check_status = sas_ata_check_status, - .check_altstatus = sas_ata_check_status, - .dev_select = ata_noop_dev_select, - .phy_reset = sas_ata_phy_reset, - .post_internal_cmd = sas_ata_post_internal, - .tf_read = sas_ata_tf_read, - .qc_prep = ata_noop_qc_prep, - .qc_issue = sas_ata_qc_issue, - .port_start = ata_sas_port_start, - .port_stop = ata_sas_port_stop, - .scr_read = sas_ata_scr_read, - .scr_write = sas_ata_scr_write -}; - -static struct ata_port_info sata_port_info = { - .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_SATA_RESET | - ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ, - .pio_mask = 0x1f, /* PIO0-4 */ - .mwdma_mask = 0x07, /* MWDMA0-2 */ - .udma_mask = ATA_UDMA6, - .port_ops = &sas_sata_ops -}; - struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy) { struct Scsi_Host *shost = dev_to_shost(rphy->dev.parent); @@ -1025,32 +740,16 @@ static inline struct domain_device *sas_find_target(struct scsi_target *starget) int sas_target_alloc(struct scsi_target *starget) { - struct Scsi_Host *shost = dev_to_shost(&starget->dev); - struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost); struct domain_device *found_dev = sas_find_target(starget); + int res; if (!found_dev) return -ENODEV; if (dev_is_sata(found_dev)) { - struct ata_port *ap; - - ata_host_init(&found_dev->sata_dev.ata_host, - &ha->pcidev->dev, - sata_port_info.flags, - &sas_sata_ops); - ap = ata_sas_port_alloc(&found_dev->sata_dev.ata_host, - &sata_port_info, - shost); - if (!ap) { - SAS_DPRINTK("ata_sas_port_alloc failed.\n"); - return -ENODEV; - } - - ap->private_data = found_dev; - ap->cbl = ATA_CBL_SATA; - ap->scsi_host = shost; - found_dev->sata_dev.ap = ap; + res = sas_ata_init_host_and_port(found_dev, starget); + if (res) + return res; } starget->hostdata = found_dev; diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index ce20177069a..9275a46bf2e 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -632,6 +632,7 @@ int sas_set_phy_speed(struct sas_phy *phy, struct sas_phy_linkrates *rates); int sas_phy_enable(struct sas_phy *phy, int enabled); int sas_phy_reset(struct sas_phy *phy, int hard_reset); +int sas_queue_up(struct sas_task *task); extern int sas_queuecommand(struct scsi_cmnd *, void (*scsi_done)(struct scsi_cmnd *)); extern int sas_target_alloc(struct scsi_target *); diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h new file mode 100644 index 00000000000..72a1904169a --- /dev/null +++ b/include/scsi/sas_ata.h @@ -0,0 +1,39 @@ +/* + * Support for SATA devices on Serial Attached SCSI (SAS) controllers + * + * Copyright (C) 2006 IBM Corporation + * + * Written by: Darrick J. Wong , IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#ifndef _SAS_ATA_H_ +#define _SAS_ATA_H_ + +#include +#include + +static inline int dev_is_sata(struct domain_device *dev) +{ + return (dev->rphy->identify.target_port_protocols & SAS_PROTOCOL_SATA); +} + +int sas_ata_init_host_and_port(struct domain_device *found_dev, + struct scsi_target *starget); + +#endif /* _SAS_ATA_H_ */ -- cgit v1.2.3-70-g09d2 From 1c50dc83f9ca752b1e1b985f1ce33d2695103ffa Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Jan 2007 01:18:41 -0800 Subject: [SCSI] sas_ata: ata_post_internal should abort the sas_task This patch adds a new field, lldd_task, to ata_queued_cmd so that libata users such as libsas can associate some data with a qc. The particular ambition with this patch is to associate a sas_task with a qc; that way, if libata decides to timeout a command, we can come back (in sas_ata_post_internal) and abort the sas task. One question remains: Is it necessary to reset the phy on error, or will the libata error handler take care of it? (Assuming that one is written, of course.) This patch, as it is today, works well enough to clean things up when an ATA device probe attempt fails halfway through the probe, though I'm not sure this is always the right thing to do. Signed-off-by: Darrick J. Wong Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_ata.c | 30 +++++++++++++++++++++++++++--- include/linux/libata.h | 1 + 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 46e1dbe1b84..c8af884abe1 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -88,12 +88,17 @@ static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts) static void sas_ata_task_done(struct sas_task *task) { struct ata_queued_cmd *qc = task->uldd_task; - struct domain_device *dev = qc->ap->private_data; + struct domain_device *dev; struct task_status_struct *stat = &task->task_status; struct ata_task_resp *resp = (struct ata_task_resp *)stat->buf; enum ata_completion_errors ac; unsigned long flags; + if (!qc) + goto qc_already_gone; + + dev = qc->ap->private_data; + spin_lock_irqsave(dev->sata_dev.ap->lock, flags); if (stat->stat == SAS_PROTO_RESPONSE) { ata_tf_from_fis(resp->ending_fis, &dev->sata_dev.tf); @@ -114,9 +119,11 @@ static void sas_ata_task_done(struct sas_task *task) } } + qc->lldd_task = NULL; ata_qc_complete(qc); spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags); +qc_already_gone: list_del_init(&task->list); sas_free_task(task); } @@ -166,6 +173,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) task->scatter = qc->__sg; task->ata_task.retry_count = 1; task->task_state_flags = SAS_TASK_STATE_PENDING; + qc->lldd_task = task; switch (qc->tf.protocol) { case ATA_PROT_NCQ: @@ -237,8 +245,24 @@ static void sas_ata_post_internal(struct ata_queued_cmd *qc) if (qc->flags & ATA_QCFLAG_FAILED) qc->err_mask |= AC_ERR_OTHER; - if (qc->err_mask) - SAS_DPRINTK("%s: Failure; reset phy!\n", __FUNCTION__); + if (qc->err_mask) { + /* + * Find the sas_task and kill it. By this point, + * libata has decided to kill the qc, so we needn't + * bother with sas_ata_task_done. But we still + * ought to abort the task. + */ + struct sas_task *task = qc->lldd_task; + struct domain_device *dev = qc->ap->private_data; + + qc->lldd_task = NULL; + if (task) { + task->uldd_task = NULL; + __sas_task_abort(task); + } + + sas_phy_reset(dev->port->phy, 1); + } } static void sas_ata_tf_read(struct ata_port *ap, struct ata_taskfile *tf) diff --git a/include/linux/libata.h b/include/linux/libata.h index 47cd2a1c554..4abb758a045 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -411,6 +411,7 @@ struct ata_queued_cmd { ata_qc_cb_t complete_fn; void *private_data; + void *lldd_task; }; struct ata_port_stats { -- cgit v1.2.3-70-g09d2 From 3a2755af37b317d47fdc3dd15178adaf5d47263e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 30 Jan 2007 01:18:58 -0800 Subject: [SCSI] sas_ata: Implement sas_task_abort for ATA devices ATA devices need special handling for sas_task_abort. If the ATA command came from SCSI, then we merely need to tell SCSI to abort the scsi_cmnd. However, internal commands require a bit more work--we need to fill the qc with the appropriate error status and complete the command, and eventually post_internal will issue the actual ABORT TASK. Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_ata.c | 47 ++++++++++++++++++++++++++++++++++--- drivers/scsi/libsas/sas_internal.h | 3 +++ drivers/scsi/libsas/sas_scsi_host.c | 8 ++++--- include/scsi/sas_ata.h | 2 ++ 4 files changed, 54 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index b6535b073bf..2db25899675 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -30,6 +30,8 @@ #include #include #include "../scsi_sas_internal.h" +#include "../scsi_transport_api.h" +#include static enum ata_completion_errors sas_to_ata_err(struct task_status_struct *ts) { @@ -91,6 +93,7 @@ static void sas_ata_task_done(struct sas_task *task) struct domain_device *dev; struct task_status_struct *stat = &task->task_status; struct ata_task_resp *resp = (struct ata_task_resp *)stat->buf; + struct sas_ha_struct *sas_ha; enum ata_completion_errors ac; unsigned long flags; @@ -98,6 +101,7 @@ static void sas_ata_task_done(struct sas_task *task) goto qc_already_gone; dev = qc->ap->private_data; + sas_ha = dev->port->ha; spin_lock_irqsave(dev->sata_dev.ap->lock, flags); if (stat->stat == SAS_PROTO_RESPONSE || stat->stat == SAM_GOOD) { @@ -124,6 +128,20 @@ static void sas_ata_task_done(struct sas_task *task) ata_qc_complete(qc); spin_unlock_irqrestore(dev->sata_dev.ap->lock, flags); + /* + * If the sas_task has an ata qc, a scsi_cmnd and the aborted + * flag is set, then we must have come in via the libsas EH + * functions. When we exit this function, we need to put the + * scsi_cmnd on the list of finished errors. The ata_qc_complete + * call cleans up the libata side of things but we're protected + * from the scsi_cmnd going away because the scsi_cmnd is owned + * by the EH, making libata's call to scsi_done a NOP. + */ + spin_lock_irqsave(&task->task_state_lock, flags); + if (qc->scsicmd && task->task_state_flags & SAS_TASK_STATE_ABORTED) + scsi_eh_finish_cmd(qc->scsicmd, &sas_ha->eh_done_q); + spin_unlock_irqrestore(&task->task_state_lock, flags); + qc_already_gone: list_del_init(&task->list); sas_free_task(task); @@ -259,15 +277,18 @@ static void sas_ata_post_internal(struct ata_queued_cmd *qc) * ought to abort the task. */ struct sas_task *task = qc->lldd_task; - struct domain_device *dev = qc->ap->private_data; + unsigned long flags; qc->lldd_task = NULL; if (task) { + /* Should this be a AT(API) device reset? */ + spin_lock_irqsave(&task->task_state_lock, flags); + task->task_state_flags |= SAS_TASK_NEED_DEV_RESET; + spin_unlock_irqrestore(&task->task_state_lock, flags); + task->uldd_task = NULL; __sas_task_abort(task); } - - sas_phy_reset(dev->port->phy, 1); } } @@ -369,3 +390,23 @@ int sas_ata_init_host_and_port(struct domain_device *found_dev, return 0; } + +void sas_ata_task_abort(struct sas_task *task) +{ + struct ata_queued_cmd *qc = task->uldd_task; + struct completion *waiting; + + /* Bounce SCSI-initiated commands to the SCSI EH */ + if (qc->scsicmd) { + scsi_req_abort_cmd(qc->scsicmd); + scsi_schedule_eh(qc->scsicmd->device->host); + return; + } + + /* Internal command, fake a timeout and complete. */ + qc->flags &= ~ATA_QCFLAG_ACTIVE; + qc->flags |= ATA_QCFLAG_FAILED; + qc->err_mask |= AC_ERR_TIMEOUT; + waiting = qc->private_data; + complete(waiting); +} diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index a78638df201..2b8213b1832 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -39,6 +39,9 @@ #define SAS_DPRINTK(fmt, ...) #endif +#define TO_SAS_TASK(_scsi_cmd) ((void *)(_scsi_cmd)->host_scribble) +#define ASSIGN_SAS_TASK(_sc, _t) do { (_sc)->host_scribble = (void *) _t; } while (0) + void sas_scsi_recover_host(struct Scsi_Host *shost); int sas_show_class(enum sas_class class, char *buf); diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index ba5c91b8131..7663841eb4c 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -47,9 +47,6 @@ /* ---------- SCSI Host glue ---------- */ -#define TO_SAS_TASK(_scsi_cmd) ((void *)(_scsi_cmd)->host_scribble) -#define ASSIGN_SAS_TASK(_sc, _t) do { (_sc)->host_scribble = (void *) _t; } while (0) - static void sas_scsi_task_done(struct sas_task *task) { struct task_status_struct *ts = &task->task_status; @@ -1018,6 +1015,11 @@ void sas_task_abort(struct sas_task *task) return; } + if (dev_is_sata(task->dev)) { + sas_ata_task_abort(task); + return; + } + scsi_req_abort_cmd(sc); scsi_schedule_eh(sc->device->host); } diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h index 72a1904169a..3407c819522 100644 --- a/include/scsi/sas_ata.h +++ b/include/scsi/sas_ata.h @@ -36,4 +36,6 @@ static inline int dev_is_sata(struct domain_device *dev) int sas_ata_init_host_and_port(struct domain_device *found_dev, struct scsi_target *starget); +void sas_ata_task_abort(struct sas_task *task); + #endif /* _SAS_ATA_H_ */ -- cgit v1.2.3-70-g09d2 From 0f05df8b3b41bc258bdf520b72e8cf7c524048b7 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 16 Jul 2007 13:41:04 -0500 Subject: [SCSI] libsas, aic94xx: fix dma mapping cockups with ATA This one was noticed by Gilbert Wu of Adaptec: The libata core actually does the DMA mapping for you, so there has to be an exception in the device drivers that *don't* do dma mapping for ATA commands. However, since we've already done this, libsas must now dma map any ATA commands that it wishes to issue ... and yes, this is a horrible mess. Additionally, the test in aic94xx for ATA protocols isn't quite right. Signed-off-by: James Bottomley --- drivers/scsi/aic94xx/aic94xx_task.c | 4 ++-- drivers/scsi/libsas/sas_discover.c | 14 +++++++++++++- include/scsi/scsi_transport_sas.h | 6 ++++++ 3 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c index 6c12c0f19f3..d5d8caba356 100644 --- a/drivers/scsi/aic94xx/aic94xx_task.c +++ b/drivers/scsi/aic94xx/aic94xx_task.c @@ -76,7 +76,7 @@ static inline int asd_map_scatterlist(struct sas_task *task, /* STP tasks come from libata which has already mapped * the SG list */ - if (task->task_proto == SAS_PROTOCOL_STP) + if (sas_protocol_ata(task->task_proto)) num_sg = task->num_scatter; else num_sg = pci_map_sg(asd_ha->pcidev, task->scatter, @@ -125,7 +125,7 @@ static inline int asd_map_scatterlist(struct sas_task *task, return 0; err_unmap: - if (task->task_proto != SAS_PROTOCOL_STP) + if (sas_protocol_ata(task->task_proto)) pci_unmap_sg(asd_ha->pcidev, task->scatter, task->num_scatter, task->data_dir); return res; diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index a18c0f6d666..4d768db1b56 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -110,6 +110,13 @@ static int sas_execute_task(struct sas_task *task, void *buffer, int size, task->total_xfer_len = size; task->data_dir = pci_dma_dir; task->task_done = sas_disc_task_done; + if (pci_dma_dir != PCI_DMA_NONE && + sas_protocol_ata(task->task_proto)) { + task->num_scatter = pci_map_sg(task->dev->port->ha->pcidev, + task->scatter, + task->num_scatter, + task->data_dir); + } for (retries = 0; retries < 5; retries++) { task->task_state_flags = SAS_TASK_STATE_PENDING; @@ -192,8 +199,13 @@ static int sas_execute_task(struct sas_task *task, void *buffer, int size, } } ex_err: - if (pci_dma_dir != PCI_DMA_NONE) + if (pci_dma_dir != PCI_DMA_NONE) { + if (sas_protocol_ata(task->task_proto)) + pci_unmap_sg(task->dev->port->ha->pcidev, + task->scatter, task->num_scatter, + task->data_dir); kfree(scatter); + } out: return res; } diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 9aedc19820b..97eeb5b59ea 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -23,6 +23,12 @@ enum sas_protocol { SAS_PROTOCOL_SSP = 0x08, }; +static inline int sas_protocol_ata(enum sas_protocol proto) +{ + return ((proto & SAS_PROTOCOL_SATA) || + (proto & SAS_PROTOCOL_STP))? 1 : 0; +} + enum sas_linkrate { /* These Values are defined in the SAS standard */ SAS_LINK_RATE_UNKNOWN = 0, -- cgit v1.2.3-70-g09d2 From 0c8db6beb81a07147f64cffd33bd43b9e96f4f40 Mon Sep 17 00:00:00 2001 From: "Prakash, Sathya" Date: Tue, 17 Jul 2007 13:40:10 +0530 Subject: [SCSI] add PCI_VENDOR_ID macro for Brocade in pci_ids.h Adds PCI_VENDOR_ID_BROCADE macro in include/linux/pci_ids.h file. This macro is used in MPT Fusion FC drivers to support Brocade branded FC controllers signed-off-by: Sathya Prakash Signed-off-by: James Bottomley --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2c7add16953..13d36bb01a4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2017,6 +2017,8 @@ #define PCI_VENDOR_ID_ARIMA 0x161f +#define PCI_VENDOR_ID_BROCADE 0x1657 + #define PCI_VENDOR_ID_SIBYTE 0x166d #define PCI_DEVICE_ID_BCM1250_PCI 0x0001 #define PCI_DEVICE_ID_BCM1250_HT 0x0002 -- cgit v1.2.3-70-g09d2 From 7aa68e80bd481faae1234bc2a7e4bcc9348f98b4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 9 Jul 2007 12:52:06 +0900 Subject: [SCSI] transport_sas: add SAS management protocol support The sas transport class attaches one bsg device to every SAS object (host, device, expander, etc). LLDs can define a function to handle SMP requests via sas_function_template::smp_handler. Signed-off-by: FUJITA Tomonori Signed-off-by: James Bottomley --- drivers/scsi/Kconfig | 2 +- drivers/scsi/scsi_transport_sas.c | 85 +++++++++++++++++++++++++++++++++++++++ include/scsi/scsi_transport_sas.h | 3 +- 3 files changed, 88 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 07a69117cae..bebe43e2cc3 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -282,7 +282,7 @@ config SCSI_ISCSI_ATTRS config SCSI_SAS_ATTRS tristate "SAS Transport Attributes" - depends on SCSI + depends on SCSI && BLK_DEV_BSG help If you wish to export transport-specific information about each attached SAS device to sysfs, say Y. diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index b2ef71a8629..2871fd05fcf 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include @@ -152,6 +154,76 @@ static struct { sas_bitfield_name_search(linkspeed, sas_linkspeed_names) sas_bitfield_name_set(linkspeed, sas_linkspeed_names) +static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost, + struct sas_rphy *rphy) +{ + struct request *req; + int ret; + int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *); + + while (!blk_queue_plugged(q)) { + req = elv_next_request(q); + if (!req) + break; + + blkdev_dequeue_request(req); + + spin_unlock_irq(q->queue_lock); + + handler = to_sas_internal(shost->transportt)->f->smp_handler; + ret = handler(shost, rphy, req); + + spin_lock_irq(q->queue_lock); + + req->end_io(req, ret); + } +} + +static void sas_host_smp_request(struct request_queue *q) +{ + sas_smp_request(q, (struct Scsi_Host *)q->queuedata, NULL); +} + +static void sas_non_host_smp_request(struct request_queue *q) +{ + struct sas_rphy *rphy = q->queuedata; + sas_smp_request(q, rphy_to_shost(rphy), rphy); +} + +static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy, + char *name) +{ + struct request_queue *q; + int error; + + if (!to_sas_internal(shost->transportt)->f->smp_handler) { + printk("%s can't handle SMP requests\n", shost->hostt->name); + return 0; + } + + if (rphy) + q = blk_init_queue(sas_non_host_smp_request, NULL); + else + q = blk_init_queue(sas_host_smp_request, NULL); + if (!q) + return -ENOMEM; + + error = bsg_register_queue(q, name); + if (error) { + blk_cleanup_queue(q); + return -ENOMEM; + } + + if (rphy) + q->queuedata = rphy; + else + q->queuedata = shost; + + set_bit(QUEUE_FLAG_BIDI, &q->queue_flags); + + return 0; +} + /* * SAS host attributes */ @@ -161,12 +233,19 @@ static int sas_host_setup(struct transport_container *tc, struct device *dev, { struct Scsi_Host *shost = dev_to_shost(dev); struct sas_host_attrs *sas_host = to_sas_host_attrs(shost); + char name[BUS_ID_SIZE]; INIT_LIST_HEAD(&sas_host->rphy_list); mutex_init(&sas_host->lock); sas_host->next_target_id = 0; sas_host->next_expander_id = 0; sas_host->next_port_id = 0; + + snprintf(name, sizeof(name), "sas_host%d", shost->host_no); + if (sas_bsg_initialize(shost, NULL, name)) + dev_printk(KERN_ERR, dev, "fail to a bsg device %d\n", + shost->host_no); + return 0; } @@ -1221,6 +1300,9 @@ struct sas_rphy *sas_end_device_alloc(struct sas_port *parent) sas_rphy_initialize(&rdev->rphy); transport_setup_device(&rdev->rphy.dev); + if (sas_bsg_initialize(shost, &rdev->rphy, rdev->rphy.dev.bus_id)) + printk("fail to a bsg device %s\n", rdev->rphy.dev.bus_id); + return &rdev->rphy; } EXPORT_SYMBOL(sas_end_device_alloc); @@ -1260,6 +1342,9 @@ struct sas_rphy *sas_expander_alloc(struct sas_port *parent, sas_rphy_initialize(&rdev->rphy); transport_setup_device(&rdev->rphy.dev); + if (sas_bsg_initialize(shost, &rdev->rphy, rdev->rphy.dev.bus_id)) + printk("fail to a bsg device %s\n", rdev->rphy.dev.bus_id); + return &rdev->rphy; } EXPORT_SYMBOL(sas_expander_alloc); diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 97eeb5b59ea..af304fb9d97 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -7,7 +7,7 @@ struct scsi_transport_template; struct sas_rphy; - +struct request; enum sas_device_type { SAS_PHY_UNUSED, @@ -172,6 +172,7 @@ struct sas_function_template { int (*phy_reset)(struct sas_phy *, int); int (*phy_enable)(struct sas_phy *, int); int (*set_phy_speed)(struct sas_phy *, struct sas_phy_linkrates *); + int (*smp_handler)(struct Scsi_Host *, struct sas_rphy *, struct request *); }; -- cgit v1.2.3-70-g09d2 From ba1fc175cc6c0af7e78241e50160344f0f198282 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 9 Jul 2007 12:52:08 +0900 Subject: [SCSI] libsas: add SAS management protocol handler This patch adds support for SAS Management Protocol (SMP) passthrough support via bsg. aic94xx can use this. Signed-off-by: FUJITA Tomonori Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_expander.c | 48 ++++++++++++++++++++++++++++++++++++++ drivers/scsi/libsas/sas_init.c | 1 + include/scsi/libsas.h | 2 ++ 3 files changed, 51 insertions(+) (limited to 'include') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 969fd3eb494..a81195354b9 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -23,6 +23,7 @@ */ #include +#include #include "sas_internal.h" @@ -1972,3 +1973,50 @@ out: return res; } #endif + +int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, + struct request *req) +{ + struct domain_device *dev; + int ret, type = rphy->identify.device_type; + struct request *rsp = req->next_rq; + + if (!rsp) { + printk("%s: space for a smp response is missing\n", + __FUNCTION__); + return -EINVAL; + } + + /* seems aic94xx doesn't support */ + if (!rphy) { + printk("%s: can we send a smp request to a host?\n", + __FUNCTION__); + return -EINVAL; + } + + if (type != SAS_EDGE_EXPANDER_DEVICE && + type != SAS_FANOUT_EXPANDER_DEVICE) { + printk("%s: can we send a smp request to a device?\n", + __FUNCTION__); + return -EINVAL; + } + + dev = sas_find_dev_by_rphy(rphy); + if (!dev) { + printk("%s: fail to find a domain_device?\n", __FUNCTION__); + return -EINVAL; + } + + /* do we need to support multiple segments? */ + if (req->bio->bi_vcnt > 1 || rsp->bio->bi_vcnt > 1) { + printk("%s: multiple segments req %u %u, rsp %u %u\n", + __FUNCTION__, req->bio->bi_vcnt, req->data_len, + rsp->bio->bi_vcnt, rsp->data_len); + return -EINVAL; + } + + ret = smp_execute_task(dev, bio_data(req->bio), req->data_len, + bio_data(rsp->bio), rsp->data_len); + + return ret; +} diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index 965698c8b7b..98360272f40 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -259,6 +259,7 @@ static struct sas_function_template sft = { .phy_reset = sas_phy_reset, .set_phy_speed = sas_set_phy_speed, .get_linkerrors = sas_get_linkerrors, + .smp_handler = sas_smp_handler, }; struct scsi_transport_template * diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 9275a46bf2e..df36461fe88 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -674,4 +674,6 @@ extern void sas_target_destroy(struct scsi_target *); extern int sas_slave_alloc(struct scsi_device *); extern int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg); +extern int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, + struct request *req); #endif /* _SASLIB_H_ */ -- cgit v1.2.3-70-g09d2 From 8573a9e6a8ed724b7e3074dc8762d4117ed0b3aa Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 8 Apr 2007 01:09:11 -0300 Subject: V4L/DVB (5563a): Add experimental support for tea5761 tuner This driver were made based on tea5761 specs. Signed-off-by: Mauro Carvalho Chehab --- Documentation/video4linux/CARDLIST.tuner | 1 + drivers/media/video/Kconfig | 8 ++ drivers/media/video/Makefile | 4 + drivers/media/video/tea5761.c | 239 +++++++++++++++++++++++++++++++ drivers/media/video/tuner-core.c | 26 ++++ drivers/media/video/tuner-types.c | 4 + include/media/tuner.h | 8 +- 7 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 drivers/media/video/tea5761.c (limited to 'include') diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner index 44134f04b82..9b02dbb2a75 100644 --- a/Documentation/video4linux/CARDLIST.tuner +++ b/Documentation/video4linux/CARDLIST.tuner @@ -72,3 +72,4 @@ tuner=70 - Samsung TCPN 2121P30A tuner=71 - Xceive xc3028 tuner=72 - Thomson FE6600 tuner=73 - Samsung TCPG 6121P30A +tuner=75 - Philips TEA5761 FM Radio diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index 4d45a40016d..bb072ab5f09 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -489,6 +489,14 @@ config TUNER_3036 Say Y here to include support for Philips SAB3036 compatible tuners. If in doubt, say N. +config TUNER_TEA5761 + tristate "TEA 5761 radio tuner (EXPERIMENTAL)" + depends on I2C + select VIDEO_TUNER + help + Say Y here to include support for Philips TEA5761 radio tuner. + If in doubt, say N. + config VIDEO_VINO tristate "SGI Vino Video For Linux (EXPERIMENTAL)" depends on I2C && SGI_IP22 && EXPERIMENTAL && VIDEO_V4L2 diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile index 9c2de501612..3202e872914 100644 --- a/drivers/media/video/Makefile +++ b/drivers/media/video/Makefile @@ -7,6 +7,10 @@ zr36067-objs := zoran_procfs.o zoran_device.o \ tuner-objs := tuner-core.o tuner-types.o tuner-simple.o \ mt20xx.o tda8290.o tea5767.o tda9887.o +ifneq ($(CONFIG_TUNER_TEA5761),n) + tuner-objs += tea5761.o +endif + msp3400-objs := msp3400-driver.o msp3400-kthreads.o obj-$(CONFIG_VIDEO_DEV) += videodev.o v4l2-common.o compat_ioctl32.o diff --git a/drivers/media/video/tea5761.c b/drivers/media/video/tea5761.c new file mode 100644 index 00000000000..4d2d3ef0860 --- /dev/null +++ b/drivers/media/video/tea5761.c @@ -0,0 +1,239 @@ +/* + * For Philips TEA5761 FM Chip + * I2C address is allways 0x20 (0x10 at 7-bit mode). + * + * Copyright (c) 2005-2007 Mauro Carvalho Chehab (mchehab@infradead.org) + * This code is placed under the terms of the GNUv2 General Public License + * + */ + +#include +#include +#include +#include +#include + +#define PREFIX "TEA5761 " + +/* from tuner-core.c */ +extern int tuner_debug; + +/*****************************************************************************/ + +/*************************** + * TEA5761HN I2C registers * + ***************************/ + +/* INTREG - Read: bytes 0 and 1 / Write: byte 0 */ + + /* first byte for reading */ +#define TEA5761_INTREG_IFFLAG 0x10 +#define TEA5761_INTREG_LEVFLAG 0x8 +#define TEA5761_INTREG_FRRFLAG 0x2 +#define TEA5761_INTREG_BLFLAG 0x1 + + /* second byte for reading / byte for writing */ +#define TEA5761_INTREG_IFMSK 0x10 +#define TEA5761_INTREG_LEVMSK 0x8 +#define TEA5761_INTREG_FRMSK 0x2 +#define TEA5761_INTREG_BLMSK 0x1 + +/* FRQSET - Read: bytes 2 and 3 / Write: byte 1 and 2 */ + + /* First byte */ +#define TEA5761_FRQSET_SEARCH_UP 0x80 /* 1=Station search from botton to up */ +#define TEA5761_FRQSET_SEARCH_MODE 0x40 /* 1=Search mode */ + + /* Bits 0-5 for divider MSB */ + + /* Second byte */ + /* Bits 0-7 for divider LSB */ + +/* TNCTRL - Read: bytes 4 and 5 / Write: Bytes 3 and 4 */ + + /* first byte */ + +#define TEA5761_TNCTRL_PUPD_0 0x40 /* Power UP/Power Down MSB */ +#define TEA5761_TNCTRL_BLIM 0X20 /* 1= Japan Frequencies, 0= European frequencies */ +#define TEA5761_TNCTRL_SWPM 0x10 /* 1= software port is FRRFLAG */ +#define TEA5761_TNCTRL_IFCTC 0x08 /* 1= IF count time 15.02 ms, 0= IF count time 2.02 ms */ +#define TEA5761_TNCTRL_AFM 0x04 +#define TEA5761_TNCTRL_SMUTE 0x02 /* 1= Soft mute */ +#define TEA5761_TNCTRL_SNC 0x01 + + /* second byte */ + +#define TEA5761_TNCTRL_MU 0x80 /* 1=Hard mute */ +#define TEA5761_TNCTRL_SSL_1 0x40 +#define TEA5761_TNCTRL_SSL_0 0x20 +#define TEA5761_TNCTRL_HLSI 0x10 +#define TEA5761_TNCTRL_MST 0x08 /* 1 = mono */ +#define TEA5761_TNCTRL_SWP 0x04 +#define TEA5761_TNCTRL_DTC 0x02 /* 1 = deemphasis 50 us, 0 = deemphasis 75 us */ +#define TEA5761_TNCTRL_AHLSI 0x01 + +/* FRQCHECK - Read: bytes 6 and 7 */ + /* First byte */ + + /* Bits 0-5 for divider MSB */ + + /* Second byte */ + /* Bits 0-7 for divider LSB */ + +/* TUNCHECK - Read: bytes 8 and 9 */ + + /* First byte */ +#define TEA5761_TUNCHECK_IF_MASK 0x7e /* IF count */ +#define TEA5761_TUNCHECK_TUNTO 0x01 + + /* Second byte */ +#define TEA5761_TUNCHECK_LEV_MASK 0xf0 /* Level Count */ +#define TEA5761_TUNCHECK_LD 0x08 +#define TEA5761_TUNCHECK_STEREO 0x04 + +/* TESTREG - Read: bytes 10 and 11 / Write: bytes 5 and 6 */ + + /* All zero = no test mode */ + +/* MANID - Read: bytes 12 and 13 */ + + /* First byte - should be 0x10 */ +#define TEA5767_MANID_VERSION_MASK 0xf0 /* Version = 1 */ +#define TEA5767_MANID_ID_MSB_MASK 0x0f /* Manufacurer ID - should be 0 */ + + /* Second byte - Should be 0x2b */ + +#define TEA5767_MANID_ID_LSB_MASK 0xfe /* Manufacturer ID - should be 0x15 */ +#define TEA5767_MANID_IDAV 0x01 /* 1 = Chip has ID, 0 = Chip has no ID */ + +/* Chip ID - Read: bytes 14 and 15 */ + + /* First byte - should be 0x57 */ + + /* Second byte - should be 0x61 */ + +/*****************************************************************************/ + +static void set_tv_freq(struct i2c_client *c, unsigned int freq) +{ + struct tuner *t = i2c_get_clientdata(c); + + tuner_warn("This tuner doesn't support TV freq.\n"); +} + +#define FREQ_OFFSET 0 /* for TEA5767, it is 700 to give the right freq */ +static void tea5761_status_dump(unsigned char *buffer) +{ + unsigned int div, frq; + + div = ((buffer[2] & 0x3f) << 8) | buffer[3]; + + frq = 1000 * (div * 32768 / 1000 + FREQ_OFFSET + 225) / 4; /* Freq in KHz */ + + printk(PREFIX "Frequency %d.%03d KHz (divider = 0x%04x)\n", + frq / 1000, frq % 1000, div); +} + +/* Freq should be specifyed at 62.5 Hz */ +static void set_radio_freq(struct i2c_client *c, unsigned int frq) +{ + struct tuner *t = i2c_get_clientdata(c); + unsigned char buffer[7] = {0, 0, 0, 0, 0, 0, 0 }; + unsigned div; + int rc; + + tuner_dbg (PREFIX "radio freq counter %d\n", frq); + + if (t->mode == T_STANDBY) { + tuner_dbg("TEA5761 set to standby mode\n"); + buffer[5] |= TEA5761_TNCTRL_MU; + } else { + buffer[4] |= TEA5761_TNCTRL_PUPD_0; + } + + + if (t->audmode == V4L2_TUNER_MODE_MONO) { + tuner_dbg("TEA5761 set to mono\n"); + buffer[5] |= TEA5761_TNCTRL_MST; +; + } else { + tuner_dbg("TEA5761 set to stereo\n"); + } + + div = (1000 * (frq * 4 / 16 + 700 + 225) ) >> 15; + buffer[1] = (div >> 8) & 0x3f; + buffer[2] = div & 0xff; + + if (tuner_debug) + tea5761_status_dump(buffer); + + if (7 != (rc = i2c_master_send(c, buffer, 7))) + tuner_warn("i2c i/o error: rc == %d (should be 5)\n", rc); +} + +static int tea5761_signal(struct i2c_client *c) +{ + unsigned char buffer[16]; + int rc; + struct tuner *t = i2c_get_clientdata(c); + + memset(buffer, 0, sizeof(buffer)); + if (16 != (rc = i2c_master_recv(c, buffer, 16))) + tuner_warn("i2c i/o error: rc == %d (should be 5)\n", rc); + + return ((buffer[9] & TEA5761_TUNCHECK_LEV_MASK) << (13 - 4)); +} + +static int tea5761_stereo(struct i2c_client *c) +{ + unsigned char buffer[16]; + int rc; + struct tuner *t = i2c_get_clientdata(c); + + memset(buffer, 0, sizeof(buffer)); + if (16 != (rc = i2c_master_recv(c, buffer, 16))) + tuner_warn("i2c i/o error: rc == %d (should be 5)\n", rc); + + rc = buffer[9] & TEA5761_TUNCHECK_STEREO; + + tuner_dbg("TEA5761 radio ST GET = %02x\n", rc); + + return (rc ? V4L2_TUNER_SUB_STEREO : 0); +} + +int tea5761_autodetection(struct i2c_client *c) +{ + unsigned char buffer[16]; + int rc; + struct tuner *t = i2c_get_clientdata(c); + + if (16 != (rc = i2c_master_recv(c, buffer, 16))) { + tuner_warn("it is not a TEA5761. Received %i chars.\n", rc); + return EINVAL; + } + + if (!((buffer[13] != 0x2b) || (buffer[14] != 0x57) || (buffer[15] != 0x061))) { + tuner_warn("Manufacturer ID= 0x%02x, Chip ID = %02x%02x. It is not a TEA5761\n",buffer[13],buffer[14],buffer[15]); + return EINVAL; + } + tuner_warn("TEA5761 detected.\n"); + return 0; +} + +int tea5761_tuner_init(struct i2c_client *c) +{ + struct tuner *t = i2c_get_clientdata(c); + + if (tea5761_autodetection(c) == EINVAL) + return EINVAL; + + tuner_info("type set to %d (%s)\n", t->type, "Philips TEA5761HN FM Radio"); + strlcpy(c->name, "tea5761", sizeof(c->name)); + + t->set_tv_freq = set_tv_freq; + t->set_radio_freq = set_radio_freq; + t->has_signal = tea5761_signal; + t->is_stereo = tea5761_stereo; + + return (0); +} diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index 505591a7abe..591ca9ce044 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -25,6 +25,9 @@ /* standard i2c insmod options */ static unsigned short normal_i2c[] = { +#ifdef CONFIG_TUNER_5761 + 0x10, +#endif 0x42, 0x43, 0x4a, 0x4b, /* tda8290 */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, @@ -189,6 +192,16 @@ static void set_type(struct i2c_client *c, unsigned int type, } t->mode_mask = T_RADIO; break; +#ifdef CONFIG_TUNER_5761 + case TUNER_TEA5761: + if (tea5761_tuner_init(c) == EINVAL) { + t->type = TUNER_ABSENT; + t->mode_mask = T_UNINITIALIZED; + return -ENODEV; + } + t->mode_mask = T_RADIO; + break; +#endif case TUNER_PHILIPS_FMD1216ME_MK3: buffer[0] = 0x0b; buffer[1] = 0xdc; @@ -460,6 +473,19 @@ static int tuner_attach(struct i2c_adapter *adap, int addr, int kind) /* autodetection code based on the i2c addr */ if (!no_autodetect) { switch (addr) { +#ifdef CONFIG_TUNER_5761 + case 0x10: + if (tea5761_autodetection(&t->i2c) != EINVAL) { + t->type = TUNER_TEA5761; + t->mode_mask = T_RADIO; + t->mode = T_STANDBY; + t->radio_freq = 87.5 * 16000; /* Sets freq to FM range */ + default_mode_mask &= ~T_RADIO; + + goto register_client; + } + break; +#endif case 0x42: case 0x43: case 0x4a: diff --git a/drivers/media/video/tuner-types.c b/drivers/media/video/tuner-types.c index 74c3e6f96f1..03849a166e5 100644 --- a/drivers/media/video/tuner-types.c +++ b/drivers/media/video/tuner-types.c @@ -1463,6 +1463,10 @@ struct tunertype tuners[] = { .name = "Philips TDA988[5,6,7] IF PLL Demodulator", /* see tda9887.c for details */ }, + [TUNER_TEA5761] = { /* Philips RADIO */ + .name = "Philips TEA5761 FM Radio", + /* see tea5767.c for details */ + }, }; unsigned const int tuner_count = ARRAY_SIZE(tuners); diff --git a/include/media/tuner.h b/include/media/tuner.h index 6dcf3c45707..7861babd4c9 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -124,6 +124,7 @@ extern int tuner_debug; #define TUNER_THOMSON_FE6600 72 /* DViCO FusionHDTV DVB-T Hybrid */ #define TUNER_SAMSUNG_TCPG_6121P30A 73 /* Hauppauge PVR-500 PAL */ #define TUNER_TDA9887 74 /* This tuner should be used only internally */ +#define TUNER_TEA5761 75 /* Only FM Radio Tuner */ /* tv card specific */ #define TDA9887_PRESENT (1<<0) @@ -233,11 +234,16 @@ extern int microtune_init(struct i2c_client *c); extern int xc3028_init(struct i2c_client *c); extern int tda8290_init(struct i2c_client *c); extern int tda8290_probe(struct i2c_client *c); -extern int tea5767_tuner_init(struct i2c_client *c); extern int default_tuner_init(struct i2c_client *c); + extern int tea5767_autodetection(struct i2c_client *c); +extern int tea5767_tuner_init(struct i2c_client *c); + extern int tda9887_tuner_init(struct i2c_client *c); +extern int tea5761_tuner_init(struct i2c_client *c); +extern int tea5761_autodetection(struct i2c_client *c); + #define tuner_warn(fmt, arg...) do {\ printk(KERN_WARNING "%s %d-%04x: " fmt, t->i2c.driver->driver.name, \ i2c_adapter_id(t->i2c.adapter), t->i2c.addr , ##arg); } while (0) -- cgit v1.2.3-70-g09d2 From b208319993ceff7ebfcc6bb914fe94d29e48a891 Mon Sep 17 00:00:00 2001 From: Michael Krufky Date: Tue, 29 May 2007 22:54:06 -0300 Subject: V4L/DVB (5719): Tuner: Move device-specific private data out of tuner struct Create private data struct for device specific private data. Signed-off-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/mt20xx.c | 33 +++++++++--- drivers/media/video/tda8290.c | 105 ++++++++++++++++++++++++--------------- drivers/media/video/tda9887.c | 33 ++++++++---- drivers/media/video/tuner-core.c | 5 +- include/media/tuner.h | 13 +---- 5 files changed, 117 insertions(+), 72 deletions(-) (limited to 'include') diff --git a/drivers/media/video/mt20xx.c b/drivers/media/video/mt20xx.c index 2987c1685da..d7e68a639cd 100644 --- a/drivers/media/video/mt20xx.c +++ b/drivers/media/video/mt20xx.c @@ -37,6 +37,11 @@ static char *microtune_part[] = { [ MT2050 ] = "MT2050", }; +struct microtune_priv { + unsigned int xogc; + unsigned int radio_if2; +}; + // IsSpurInBand()? static int mt2032_spurcheck(struct i2c_client *c, int f1, int f2, int spectrum_from,int spectrum_to) @@ -218,6 +223,7 @@ static void mt2032_set_if_freq(struct i2c_client *c, unsigned int rfin, unsigned char buf[21]; int lint_try,ret,sel,lock=0; struct tuner *t = i2c_get_clientdata(c); + struct microtune_priv *priv = t->priv; tuner_dbg("mt2032_set_if_freq rfin=%d if1=%d if2=%d from=%d to=%d\n", rfin,if1,if2,from,to); @@ -227,7 +233,7 @@ static void mt2032_set_if_freq(struct i2c_client *c, unsigned int rfin, i2c_master_recv(c,buf,21); buf[0]=0; - ret=mt2032_compute_freq(c,rfin,if1,if2,from,to,&buf[1],&sel,t->xogc); + ret=mt2032_compute_freq(c,rfin,if1,if2,from,to,&buf[1],&sel,priv->xogc); if (ret<0) return; @@ -251,10 +257,10 @@ static void mt2032_set_if_freq(struct i2c_client *c, unsigned int rfin, tuner_dbg("mt2032: re-init PLLs by LINT\n"); buf[0]=7; - buf[1]=0x80 +8+t->xogc; // set LINT to re-init PLLs + buf[1]=0x80 +8+priv->xogc; // set LINT to re-init PLLs i2c_master_send(c,buf,2); mdelay(10); - buf[1]=8+t->xogc; + buf[1]=8+priv->xogc; i2c_master_send(c,buf,2); } @@ -294,7 +300,8 @@ static void mt2032_set_tv_freq(struct i2c_client *c, unsigned int freq) static void mt2032_set_radio_freq(struct i2c_client *c, unsigned int freq) { struct tuner *t = i2c_get_clientdata(c); - int if2 = t->radio_if2; + struct microtune_priv *priv = t->priv; + int if2 = priv->radio_if2; // per Manual for FM tuning: first if center freq. 1085 MHz mt2032_set_if_freq(c, freq * 1000 / 16, @@ -305,6 +312,7 @@ static void mt2032_set_radio_freq(struct i2c_client *c, unsigned int freq) static int mt2032_init(struct i2c_client *c) { struct tuner *t = i2c_get_clientdata(c); + struct microtune_priv *priv = t->priv; unsigned char buf[21]; int ret,xogc,xok=0; @@ -351,7 +359,7 @@ static int mt2032_init(struct i2c_client *c) if (ret!=2) tuner_warn("i2c i/o error: rc == %d (should be 2)\n",ret); } while (xok != 1 ); - t->xogc=xogc; + priv->xogc=xogc; t->set_tv_freq = mt2032_set_tv_freq; t->set_radio_freq = mt2032_set_radio_freq; @@ -456,7 +464,8 @@ static void mt2050_set_tv_freq(struct i2c_client *c, unsigned int freq) static void mt2050_set_radio_freq(struct i2c_client *c, unsigned int freq) { struct tuner *t = i2c_get_clientdata(c); - int if2 = t->radio_if2; + struct microtune_priv *priv = t->priv; + int if2 = priv->radio_if2; mt2050_set_if_freq(c, freq * 1000 / 16, if2); mt2050_set_antenna(c, radio_antenna); @@ -488,21 +497,29 @@ static int mt2050_init(struct i2c_client *c) int microtune_init(struct i2c_client *c) { + struct microtune_priv *priv = NULL; struct tuner *t = i2c_get_clientdata(c); char *name; unsigned char buf[21]; int company_code; + priv = kzalloc(sizeof(struct microtune_priv), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + t->priv = priv; + + priv->radio_if2 = 10700 * 1000; /* 10.7MHz - FM radio */ + memset(buf,0,sizeof(buf)); t->set_tv_freq = NULL; t->set_radio_freq = NULL; t->standby = NULL; if (t->std & V4L2_STD_525_60) { tuner_dbg("pinnacle ntsc\n"); - t->radio_if2 = 41300 * 1000; + priv->radio_if2 = 41300 * 1000; } else { tuner_dbg("pinnacle pal\n"); - t->radio_if2 = 33300 * 1000; + priv->radio_if2 = 33300 * 1000; } name = "unknown"; diff --git a/drivers/media/video/tda8290.c b/drivers/media/video/tda8290.c index 1a1bef0e9c3..7bdf968bf6e 100644 --- a/drivers/media/video/tda8290.c +++ b/drivers/media/video/tda8290.c @@ -25,6 +25,16 @@ /* ---------------------------------------------------------------------- */ +struct tda8290_priv { + unsigned char tda8290_easy_mode; + unsigned char tda827x_lpsel; + unsigned char tda827x_addr; + unsigned char tda827x_ver; + unsigned int sgIF; +}; + +/* ---------------------------------------------------------------------- */ + struct tda827x_data { u32 lomax; u8 spd; @@ -76,7 +86,8 @@ static void tda827x_tune(struct i2c_client *c, u16 ifc, unsigned int freq) u32 N; int i; struct tuner *t = i2c_get_clientdata(c); - struct i2c_msg msg = {.addr = t->tda827x_addr, .flags = 0}; + struct tda8290_priv *priv = t->priv; + struct i2c_msg msg = {.addr = priv->tda827x_addr, .flags = 0}; if (t->mode == V4L2_TUNER_RADIO) freq = freq / 1000; @@ -95,7 +106,7 @@ static void tda827x_tune(struct i2c_client *c, u16 ifc, unsigned int freq) tuner_reg[1] = (unsigned char)(N>>8); tuner_reg[2] = (unsigned char) N; tuner_reg[3] = 0x40; - tuner_reg[4] = 0x52 + (t->tda827x_lpsel << 5); + tuner_reg[4] = 0x52 + (priv->tda827x_lpsel << 5); tuner_reg[5] = (tda827x_analog[i].spd << 6) + (tda827x_analog[i].div1p5 <<5) + (tda827x_analog[i].bs <<3) + tda827x_analog[i].bp; tuner_reg[6] = 0x8f + (tda827x_analog[i].gc3 << 4); @@ -146,8 +157,9 @@ static void tda827x_tune(struct i2c_client *c, u16 ifc, unsigned int freq) static void tda827x_agcf(struct i2c_client *c) { struct tuner *t = i2c_get_clientdata(c); + struct tda8290_priv *priv = t->priv; unsigned char data[] = {0x80, 0x0c}; - struct i2c_msg msg = {.addr = t->tda827x_addr, .buf = data, + struct i2c_msg msg = {.addr = priv->tda827x_addr, .buf = data, .flags = 0, .len = 2}; i2c_transfer(c->adapter, &msg, 1); } @@ -234,7 +246,8 @@ static void tda827xa_tune(struct i2c_client *c, u16 ifc, unsigned int freq) u32 N; int i; struct tuner *t = i2c_get_clientdata(c); - struct i2c_msg msg = {.addr = t->tda827x_addr, .flags = 0, .buf = tuner_reg}; + struct tda8290_priv *priv = t->priv; + struct i2c_msg msg = {.addr = priv->tda827x_addr, .flags = 0, .buf = tuner_reg}; tda827xa_lna_gain( c, 1); msleep(10); @@ -271,7 +284,7 @@ static void tda827xa_tune(struct i2c_client *c, u16 ifc, unsigned int freq) tuner_reg[1] = 0xff; tuner_reg[2] = 0xe0; tuner_reg[3] = 0; - tuner_reg[4] = 0x99 + (t->tda827x_lpsel << 1); + tuner_reg[4] = 0x99 + (priv->tda827x_lpsel << 1); msg.len = 5; i2c_transfer(c->adapter, &msg, 1); @@ -311,15 +324,16 @@ static void tda827xa_tune(struct i2c_client *c, u16 ifc, unsigned int freq) i2c_transfer(c->adapter, &msg, 1); tuner_reg[0] = 0xc0; - tuner_reg[1] = 0x19 + (t->tda827x_lpsel << 1); + tuner_reg[1] = 0x19 + (priv->tda827x_lpsel << 1); i2c_transfer(c->adapter, &msg, 1); } static void tda827xa_agcf(struct i2c_client *c) { struct tuner *t = i2c_get_clientdata(c); + struct tda8290_priv *priv = t->priv; unsigned char data[] = {0x80, 0x2c}; - struct i2c_msg msg = {.addr = t->tda827x_addr, .buf = data, + struct i2c_msg msg = {.addr = priv->tda827x_addr, .buf = data, .flags = 0, .len = 2}; i2c_transfer(c->adapter, &msg, 1); } @@ -347,8 +361,9 @@ static void tda8290_i2c_bridge(struct i2c_client *c, int close) static int tda8290_tune(struct i2c_client *c, u16 ifc, unsigned int freq) { struct tuner *t = i2c_get_clientdata(c); + struct tda8290_priv *priv = t->priv; unsigned char soft_reset[] = { 0x00, 0x00 }; - unsigned char easy_mode[] = { 0x01, t->tda8290_easy_mode }; + unsigned char easy_mode[] = { 0x01, priv->tda8290_easy_mode }; unsigned char expert_mode[] = { 0x01, 0x80 }; unsigned char agc_out_on[] = { 0x02, 0x00 }; unsigned char gainset_off[] = { 0x28, 0x14 }; @@ -375,18 +390,18 @@ static int tda8290_tune(struct i2c_client *c, u16 ifc, unsigned int freq) i2c_master_send(c, soft_reset, 2); msleep(1); - expert_mode[1] = t->tda8290_easy_mode + 0x80; + expert_mode[1] = priv->tda8290_easy_mode + 0x80; i2c_master_send(c, expert_mode, 2); i2c_master_send(c, gainset_off, 2); i2c_master_send(c, if_agc_spd, 2); - if (t->tda8290_easy_mode & 0x60) + if (priv->tda8290_easy_mode & 0x60) i2c_master_send(c, adc_head_9, 2); else i2c_master_send(c, adc_head_6, 2); i2c_master_send(c, pll_bw_nom, 2); tda8290_i2c_bridge(c, 1); - if (t->tda827x_ver != 0) + if (priv->tda827x_ver != 0) tda827xa_tune(c, ifc, freq); else tda827x_tune(c, ifc, freq); @@ -418,7 +433,7 @@ static int tda8290_tune(struct i2c_client *c, u16 ifc, unsigned int freq) if ((agc_stat > 115) || !(pll_stat & 0x80)) { tuner_dbg("adjust gain, step 2. Agc: %d, lock: %d\n", agc_stat, pll_stat & 0x80); - if (t->tda827x_ver != 0) + if (priv->tda827x_ver != 0) tda827xa_agcf(c); else tda827x_agcf(c); @@ -437,7 +452,7 @@ static int tda8290_tune(struct i2c_client *c, u16 ifc, unsigned int freq) } /* l/ l' deadlock? */ - if(t->tda8290_easy_mode & 0x60) { + if(priv->tda8290_easy_mode & 0x60) { i2c_master_send(c, &addr_adc_sat, 1); i2c_master_recv(c, &adc_sat, 1); i2c_master_send(c, &addr_pll_stat, 1); @@ -459,41 +474,42 @@ static int tda8290_tune(struct i2c_client *c, u16 ifc, unsigned int freq) static void set_audio(struct tuner *t) { + struct tda8290_priv *priv = t->priv; char* mode; - t->tda827x_lpsel = 0; + priv->tda827x_lpsel = 0; if (t->std & V4L2_STD_MN) { - t->sgIF = 92; - t->tda8290_easy_mode = 0x01; - t->tda827x_lpsel = 1; + priv->sgIF = 92; + priv->tda8290_easy_mode = 0x01; + priv->tda827x_lpsel = 1; mode = "MN"; } else if (t->std & V4L2_STD_B) { - t->sgIF = 108; - t->tda8290_easy_mode = 0x02; + priv->sgIF = 108; + priv->tda8290_easy_mode = 0x02; mode = "B"; } else if (t->std & V4L2_STD_GH) { - t->sgIF = 124; - t->tda8290_easy_mode = 0x04; + priv->sgIF = 124; + priv->tda8290_easy_mode = 0x04; mode = "GH"; } else if (t->std & V4L2_STD_PAL_I) { - t->sgIF = 124; - t->tda8290_easy_mode = 0x08; + priv->sgIF = 124; + priv->tda8290_easy_mode = 0x08; mode = "I"; } else if (t->std & V4L2_STD_DK) { - t->sgIF = 124; - t->tda8290_easy_mode = 0x10; + priv->sgIF = 124; + priv->tda8290_easy_mode = 0x10; mode = "DK"; } else if (t->std & V4L2_STD_SECAM_L) { - t->sgIF = 124; - t->tda8290_easy_mode = 0x20; + priv->sgIF = 124; + priv->tda8290_easy_mode = 0x20; mode = "L"; } else if (t->std & V4L2_STD_SECAM_LC) { - t->sgIF = 20; - t->tda8290_easy_mode = 0x40; + priv->sgIF = 20; + priv->tda8290_easy_mode = 0x40; mode = "LC"; } else { - t->sgIF = 124; - t->tda8290_easy_mode = 0x10; + priv->sgIF = 124; + priv->tda8290_easy_mode = 0x10; mode = "xx"; } tuner_dbg("setting tda8290 to system %s\n", mode); @@ -502,9 +518,10 @@ static void set_audio(struct tuner *t) static void set_tv_freq(struct i2c_client *c, unsigned int freq) { struct tuner *t = i2c_get_clientdata(c); + struct tda8290_priv *priv = t->priv; set_audio(t); - tda8290_tune(c, t->sgIF, freq); + tda8290_tune(c, priv->sgIF, freq); } static void set_radio_freq(struct i2c_client *c, unsigned int freq) @@ -528,13 +545,14 @@ static int has_signal(struct i2c_client *c) static void standby(struct i2c_client *c) { struct tuner *t = i2c_get_clientdata(c); + struct tda8290_priv *priv = t->priv; unsigned char cb1[] = { 0x30, 0xD0 }; unsigned char tda8290_standby[] = { 0x00, 0x02 }; unsigned char tda8290_agc_tri[] = { 0x02, 0x20 }; - struct i2c_msg msg = {.addr = t->tda827x_addr, .flags=0, .buf=cb1, .len = 2}; + struct i2c_msg msg = {.addr = priv->tda827x_addr, .flags=0, .buf=cb1, .len = 2}; tda8290_i2c_bridge(c, 1); - if (t->tda827x_ver != 0) + if (priv->tda827x_ver != 0) cb1[1] = 0x90; i2c_transfer(c->adapter, &msg, 1); tda8290_i2c_bridge(c, 0); @@ -560,13 +578,14 @@ static void tda8290_init_if(struct i2c_client *c) static void tda8290_init_tuner(struct i2c_client *c) { struct tuner *t = i2c_get_clientdata(c); + struct tda8290_priv *priv = t->priv; unsigned char tda8275_init[] = { 0x00, 0x00, 0x00, 0x40, 0xdC, 0x04, 0xAf, 0x3F, 0x2A, 0x04, 0xFF, 0x00, 0x00, 0x40 }; unsigned char tda8275a_init[] = { 0x00, 0x00, 0x00, 0x00, 0xdC, 0x05, 0x8b, 0x0c, 0x04, 0x20, 0xFF, 0x00, 0x00, 0x4b }; - struct i2c_msg msg = {.addr = t->tda827x_addr, .flags=0, + struct i2c_msg msg = {.addr = priv->tda827x_addr, .flags=0, .buf=tda8275_init, .len = 14}; - if (t->tda827x_ver != 0) + if (priv->tda827x_ver != 0) msg.buf = tda8275a_init; tda8290_i2c_bridge(c, 1); @@ -578,12 +597,18 @@ static void tda8290_init_tuner(struct i2c_client *c) int tda8290_init(struct i2c_client *c) { + struct tda8290_priv *priv = NULL; struct tuner *t = i2c_get_clientdata(c); u8 data; int i, ret, tuners_found; u32 tuner_addrs; struct i2c_msg msg = {.flags=I2C_M_RD, .buf=&data, .len = 1}; + priv = kzalloc(sizeof(struct tda8290_priv), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + t->priv = priv; + tda8290_i2c_bridge(c, 1); /* probe for tuner chip */ tuners_found = 0; @@ -618,7 +643,7 @@ int tda8290_init(struct i2c_client *c) tuner_addrs = tuner_addrs & 0xff; tuner_info ("setting tuner address to %x\n", tuner_addrs); } - t->tda827x_addr = tuner_addrs; + priv->tda827x_addr = tuner_addrs; msg.addr = tuner_addrs; tda8290_i2c_bridge(c, 1); @@ -627,10 +652,10 @@ int tda8290_init(struct i2c_client *c) tuner_warn ("TDA827x access failed!\n"); if ((data & 0x3c) == 0) { strlcpy(c->name, "tda8290+75", sizeof(c->name)); - t->tda827x_ver = 0; + priv->tda827x_ver = 0; } else { strlcpy(c->name, "tda8290+75a", sizeof(c->name)); - t->tda827x_ver = 2; + priv->tda827x_ver = 2; } tuner_info("type set to %s\n", c->name); @@ -638,7 +663,7 @@ int tda8290_init(struct i2c_client *c) t->set_radio_freq = set_radio_freq; t->has_signal = has_signal; t->standby = standby; - t->tda827x_lpsel = 0; + priv->tda827x_lpsel = 0; t->mode = V4L2_TUNER_ANALOG_TV; tda8290_init_tuner(c); diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index fde576f1101..01f18b07d01 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -29,6 +29,9 @@ printk(KERN_INFO "%s %d-%04x: " fmt, t->i2c.name, \ i2c_adapter_id(t->i2c.adapter), t->i2c.addr , ##arg); } while (0) +struct tda9887_priv { + unsigned char data[4]; +}; /* ---------------------------------------------------------------------- */ @@ -508,10 +511,11 @@ static int tda9887_status(struct tuner *t) static void tda9887_configure(struct i2c_client *client) { struct tuner *t = i2c_get_clientdata(client); + struct tda9887_priv *priv = t->priv; int rc; - memset(t->tda9887_data,0,sizeof(t->tda9887_data)); - tda9887_set_tvnorm(t,t->tda9887_data); + memset(priv->data,0,sizeof(priv->data)); + tda9887_set_tvnorm(t,priv->data); /* A note on the port settings: These settings tend to depend on the specifics of the board. @@ -526,22 +530,22 @@ static void tda9887_configure(struct i2c_client *client) the ports should be set to active (0), but, again, that may differ depending on the precise hardware configuration. */ - t->tda9887_data[1] |= cOutputPort1Inactive; - t->tda9887_data[1] |= cOutputPort2Inactive; + priv->data[1] |= cOutputPort1Inactive; + priv->data[1] |= cOutputPort2Inactive; - tda9887_set_config(t,t->tda9887_data); - tda9887_set_insmod(t,t->tda9887_data); + tda9887_set_config(t,priv->data); + tda9887_set_insmod(t,priv->data); if (t->mode == T_STANDBY) { - t->tda9887_data[1] |= cForcedMuteAudioON; + priv->data[1] |= cForcedMuteAudioON; } tda9887_dbg("writing: b=0x%02x c=0x%02x e=0x%02x\n", - t->tda9887_data[1],t->tda9887_data[2],t->tda9887_data[3]); + priv->data[1],priv->data[2],priv->data[3]); if (tuner_debug > 1) - dump_write_message(t, t->tda9887_data); + dump_write_message(t, priv->data); - if (4 != (rc = i2c_master_send(&t->i2c,t->tda9887_data,4))) + if (4 != (rc = i2c_master_send(&t->i2c,priv->data,4))) tda9887_info("i2c i/o error: rc == %d (should be 4)\n",rc); if (tuner_debug > 2) { @@ -555,7 +559,8 @@ static void tda9887_configure(struct i2c_client *client) static void tda9887_tuner_status(struct i2c_client *client) { struct tuner *t = i2c_get_clientdata(client); - tda9887_info("Data bytes: b=0x%02x c=0x%02x e=0x%02x\n", t->tda9887_data[1], t->tda9887_data[2], t->tda9887_data[3]); + struct tda9887_priv *priv = t->priv; + tda9887_info("Data bytes: b=0x%02x c=0x%02x e=0x%02x\n", priv->data[1], priv->data[2], priv->data[3]); } static int tda9887_get_afc(struct i2c_client *client) @@ -588,8 +593,14 @@ static void tda9887_set_freq(struct i2c_client *client, unsigned int freq) int tda9887_tuner_init(struct i2c_client *c) { + struct tda9887_priv *priv = NULL; struct tuner *t = i2c_get_clientdata(c); + priv = kzalloc(sizeof(struct tda9887_priv), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + t->priv = priv; + strlcpy(c->name, "tda9887", sizeof(c->name)); tda9887_info("tda988[5/6/7] found @ 0x%x (%s)\n", t->i2c.addr, diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index 591ca9ce044..406b85cd606 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -177,6 +177,9 @@ static void set_type(struct i2c_client *c, unsigned int type, return; } + /* discard private data, in case set_type() was previously called */ + kfree(t->priv); + t->priv = NULL; switch (t->type) { case TUNER_MT2032: microtune_init(c); @@ -450,7 +453,6 @@ static int tuner_attach(struct i2c_adapter *adap, int addr, int kind) memcpy(&t->i2c, &client_template, sizeof(struct i2c_client)); i2c_set_clientdata(&t->i2c, t); t->type = UNSET; - t->radio_if2 = 10700 * 1000; /* 10.7MHz - FM radio */ t->audmode = V4L2_TUNER_MODE_STEREO; t->mode_mask = T_UNINITIALIZED; t->tuner_status = tuner_status; @@ -559,6 +561,7 @@ static int tuner_detach(struct i2c_client *client) return err; } + kfree(t->priv); kfree(t); return 0; } diff --git a/include/media/tuner.h b/include/media/tuner.h index 7861babd4c9..b901373c6e1 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -199,21 +199,10 @@ struct tuner { v4l2_std_id std; int using_v4l2; + void *priv; /* used by tda9887 */ unsigned int tda9887_config; - unsigned char tda9887_data[4]; - - /* used by MT2032 */ - unsigned int xogc; - unsigned int radio_if2; - - /* used by tda8290 */ - unsigned char tda8290_easy_mode; - unsigned char tda827x_lpsel; - unsigned char tda827x_addr; - unsigned char tda827x_ver; - unsigned int sgIF; unsigned int config; int (*tuner_callback) (void *dev, int command,int arg); -- cgit v1.2.3-70-g09d2 From be2b85a13543bbaf1a141b3a54f84c1e3b059e69 Mon Sep 17 00:00:00 2001 From: Michael Krufky Date: Mon, 4 Jun 2007 14:40:27 -0300 Subject: V4L/DVB (5741): Tuner: add release callback Individual tuner drivers are now allocating memory themselves for their own private data structures. This changeset adds a release callback to the tuner operations, so that newer drivers that may require more complex data structures may release this private data themselves. Signed-off-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/tuner-core.c | 5 +++++ include/media/tuner.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index 4369f6d1249..0e71a22f1d4 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -178,8 +178,11 @@ static void set_type(struct i2c_client *c, unsigned int type, } /* discard private data, in case set_type() was previously called */ + if (t->release) + t->release(c); kfree(t->priv); t->priv = NULL; + switch (t->type) { case TUNER_MT2032: microtune_init(c); @@ -561,6 +564,8 @@ static int tuner_detach(struct i2c_client *client) return err; } + if (t->release) + t->release(client); kfree(t->priv); kfree(t); return 0; diff --git a/include/media/tuner.h b/include/media/tuner.h index b901373c6e1..da821a027e7 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -215,6 +215,7 @@ struct tuner { int (*get_afc)(struct i2c_client *c); void (*tuner_status)(struct i2c_client *c); void (*standby)(struct i2c_client *c); + void (*release)(struct i2c_client *c); }; extern unsigned const int tuner_count; -- cgit v1.2.3-70-g09d2 From 7a91a80a0d1a0a83a94e773ec6245b31b7c4ceed Mon Sep 17 00:00:00 2001 From: Michael Krufky Date: Wed, 6 Jun 2007 16:10:39 -0300 Subject: V4L/DVB (5753): Tuner: create struct tuner_operations Move tuner callback function pointers out of struct tuner, into struct tuner_operations. Signed-off-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/mt20xx.c | 16 +++++----- drivers/media/video/tda8290.c | 10 +++--- drivers/media/video/tda9887.c | 12 ++++---- drivers/media/video/tea5761.c | 8 ++--- drivers/media/video/tea5767.c | 10 +++--- drivers/media/video/tuner-core.c | 62 +++++++++++++++++++------------------- drivers/media/video/tuner-simple.c | 10 +++--- include/media/tuner.h | 21 +++++++------ 8 files changed, 76 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/drivers/media/video/mt20xx.c b/drivers/media/video/mt20xx.c index 5b33be8a49c..846c6233fa3 100644 --- a/drivers/media/video/mt20xx.c +++ b/drivers/media/video/mt20xx.c @@ -361,8 +361,8 @@ static int mt2032_init(struct i2c_client *c) } while (xok != 1 ); priv->xogc=xogc; - t->set_tv_freq = mt2032_set_tv_freq; - t->set_radio_freq = mt2032_set_radio_freq; + t->ops.set_tv_freq = mt2032_set_tv_freq; + t->ops.set_radio_freq = mt2032_set_radio_freq; return(1); } @@ -490,8 +490,8 @@ static int mt2050_init(struct i2c_client *c) i2c_master_recv(c,buf,1); tuner_dbg("mt2050: sro is %x\n",buf[0]); - t->set_tv_freq = mt2050_set_tv_freq; - t->set_radio_freq = mt2050_set_radio_freq; + t->ops.set_tv_freq = mt2050_set_tv_freq; + t->ops.set_radio_freq = mt2050_set_radio_freq; return 0; } @@ -519,10 +519,10 @@ int microtune_init(struct i2c_client *c) priv->radio_if2 = 10700 * 1000; /* 10.7MHz - FM radio */ memset(buf,0,sizeof(buf)); - t->set_tv_freq = NULL; - t->set_radio_freq = NULL; - t->standby = NULL; - t->release = microtune_release; + t->ops.set_tv_freq = NULL; + t->ops.set_radio_freq = NULL; + t->ops.standby = NULL; + t->ops.release = microtune_release; if (t->std & V4L2_STD_525_60) { tuner_dbg("pinnacle ntsc\n"); priv->radio_if2 = 41300 * 1000; diff --git a/drivers/media/video/tda8290.c b/drivers/media/video/tda8290.c index 2614ea99a9c..99122ff4e81 100644 --- a/drivers/media/video/tda8290.c +++ b/drivers/media/video/tda8290.c @@ -667,11 +667,11 @@ int tda8290_init(struct i2c_client *c) } tuner_info("type set to %s\n", c->name); - t->set_tv_freq = set_tv_freq; - t->set_radio_freq = set_radio_freq; - t->has_signal = has_signal; - t->standby = standby; - t->release = tda8290_release; + t->ops.set_tv_freq = set_tv_freq; + t->ops.set_radio_freq = set_radio_freq; + t->ops.has_signal = has_signal; + t->ops.standby = standby; + t->ops.release = tda8290_release; priv->tda827x_lpsel = 0; t->mode = V4L2_TUNER_ANALOG_TV; diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index f0443cc02e5..5bb7d19edfb 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -614,12 +614,12 @@ int tda9887_tuner_init(struct i2c_client *c) tda9887_info("tda988[5/6/7] found @ 0x%x (%s)\n", t->i2c.addr, t->i2c.driver->driver.name); - t->set_tv_freq = tda9887_set_freq; - t->set_radio_freq = tda9887_set_freq; - t->standby = tda9887_standby; - t->tuner_status = tda9887_tuner_status; - t->get_afc = tda9887_get_afc; - t->release = tda9887_release; + t->ops.set_tv_freq = tda9887_set_freq; + t->ops.set_radio_freq = tda9887_set_freq; + t->ops.standby = tda9887_standby; + t->ops.tuner_status = tda9887_tuner_status; + t->ops.get_afc = tda9887_get_afc; + t->ops.release = tda9887_release; return 0; } diff --git a/drivers/media/video/tea5761.c b/drivers/media/video/tea5761.c index 1479f16f9ab..858f2d11496 100644 --- a/drivers/media/video/tea5761.c +++ b/drivers/media/video/tea5761.c @@ -229,10 +229,10 @@ int tea5761_tuner_init(struct i2c_client *c) tuner_info("type set to %d (%s)\n", t->type, "Philips TEA5761HN FM Radio"); strlcpy(c->name, "tea5761", sizeof(c->name)); - t->set_tv_freq = set_tv_freq; - t->set_radio_freq = set_radio_freq; - t->has_signal = tea5761_signal; - t->is_stereo = tea5761_stereo; + t->ops.set_tv_freq = set_tv_freq; + t->ops.set_radio_freq = set_radio_freq; + t->ops.has_signal = tea5761_signal; + t->ops.is_stereo = tea5761_stereo; return (0); } diff --git a/drivers/media/video/tea5767.c b/drivers/media/video/tea5767.c index d1c41781ccc..c5510422aa3 100644 --- a/drivers/media/video/tea5767.c +++ b/drivers/media/video/tea5767.c @@ -350,11 +350,11 @@ int tea5767_tuner_init(struct i2c_client *c) tuner_info("type set to %d (%s)\n", t->type, "Philips TEA5767HN FM Radio"); strlcpy(c->name, "tea5767", sizeof(c->name)); - t->set_tv_freq = set_tv_freq; - t->set_radio_freq = set_radio_freq; - t->has_signal = tea5767_signal; - t->is_stereo = tea5767_stereo; - t->standby = tea5767_standby; + t->ops.set_tv_freq = set_tv_freq; + t->ops.set_radio_freq = set_radio_freq; + t->ops.has_signal = tea5767_signal; + t->ops.is_stereo = tea5767_stereo; + t->ops.standby = tea5767_standby; return (0); } diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index acbffbfdb50..603476532f5 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -80,7 +80,7 @@ static void set_tv_freq(struct i2c_client *c, unsigned int freq) tuner_warn ("tuner type not set\n"); return; } - if (NULL == t->set_tv_freq) { + if (NULL == t->ops.set_tv_freq) { tuner_warn ("Tuner has no way to set tv freq\n"); return; } @@ -95,7 +95,7 @@ static void set_tv_freq(struct i2c_client *c, unsigned int freq) else freq = tv_range[1] * 16; } - t->set_tv_freq(c, freq); + t->ops.set_tv_freq(c, freq); } static void set_radio_freq(struct i2c_client *c, unsigned int freq) @@ -106,7 +106,7 @@ static void set_radio_freq(struct i2c_client *c, unsigned int freq) tuner_warn ("tuner type not set\n"); return; } - if (NULL == t->set_radio_freq) { + if (NULL == t->ops.set_radio_freq) { tuner_warn ("tuner has no way to set radio frequency\n"); return; } @@ -122,7 +122,7 @@ static void set_radio_freq(struct i2c_client *c, unsigned int freq) freq = radio_range[1] * 16000; } - t->set_radio_freq(c, freq); + t->ops.set_radio_freq(c, freq); } static void set_freq(struct i2c_client *c, unsigned long freq) @@ -178,8 +178,8 @@ static void set_type(struct i2c_client *c, unsigned int type, } /* discard private data, in case set_type() was previously called */ - if (t->release) - t->release(c); + if (t->ops.release) + t->ops.release(c); else { kfree(t->priv); t->priv = NULL; @@ -429,11 +429,11 @@ static void tuner_status(struct i2c_client *client) tuner_info("Standard: 0x%08lx\n", (unsigned long)t->std); if (t->mode != V4L2_TUNER_RADIO) return; - if (t->has_signal) { - tuner_info("Signal strength: %d\n", t->has_signal(client)); + if (t->ops.has_signal) { + tuner_info("Signal strength: %d\n", t->ops.has_signal(client)); } - if (t->is_stereo) { - tuner_info("Stereo: %s\n", t->is_stereo(client) ? "yes" : "no"); + if (t->ops.is_stereo) { + tuner_info("Stereo: %s\n", t->ops.is_stereo(client) ? "yes" : "no"); } } @@ -460,7 +460,7 @@ static int tuner_attach(struct i2c_adapter *adap, int addr, int kind) t->type = UNSET; t->audmode = V4L2_TUNER_MODE_STEREO; t->mode_mask = T_UNINITIALIZED; - t->tuner_status = tuner_status; + t->ops.tuner_status = tuner_status; if (show_i2c) { unsigned char buffer[16]; @@ -566,8 +566,8 @@ static int tuner_detach(struct i2c_client *client) return err; } - if (t->release) - t->release(client); + if (t->ops.release) + t->ops.release(client); else { kfree(t->priv); } @@ -591,8 +591,8 @@ static inline int set_mode(struct i2c_client *client, struct tuner *t, int mode, if (check_mode(t, cmd) == EINVAL) { t->mode = T_STANDBY; - if (t->standby) - t->standby (client); + if (t->ops.standby) + t->ops.standby (client); return EINVAL; } return 0; @@ -640,8 +640,8 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) if (check_mode(t, "TUNER_SET_STANDBY") == EINVAL) return 0; t->mode = T_STANDBY; - if (t->standby) - t->standby (client); + if (t->ops.standby) + t->ops.standby (client); break; #ifdef CONFIG_VIDEO_V4L1 case VIDIOCSAUDIO: @@ -700,10 +700,10 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) return 0; if (V4L2_TUNER_RADIO == t->mode) { - if (t->has_signal) - vt->signal = t->has_signal(client); - if (t->is_stereo) { - if (t->is_stereo(client)) + if (t->ops.has_signal) + vt->signal = t->ops.has_signal(client); + if (t->ops.is_stereo) { + if (t->ops.is_stereo(client)) vt->flags |= VIDEO_TUNER_STEREO_ON; else @@ -731,8 +731,8 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) if (check_v4l2(t) == EINVAL) return 0; - if (V4L2_TUNER_RADIO == t->mode && t->is_stereo) - va->mode = t->is_stereo(client) + if (V4L2_TUNER_RADIO == t->mode && t->ops.is_stereo) + va->mode = t->ops.is_stereo(client) ? VIDEO_SOUND_STEREO : VIDEO_SOUND_MONO; return 0; } @@ -797,8 +797,8 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) switch_v4l2(); tuner->type = t->mode; - if (t->get_afc) - tuner->afc=t->get_afc(client); + if (t->ops.get_afc) + tuner->afc=t->ops.get_afc(client); if (t->mode == V4L2_TUNER_ANALOG_TV) tuner->capability |= V4L2_TUNER_CAP_NORM; if (t->mode != V4L2_TUNER_RADIO) { @@ -808,13 +808,13 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) } /* radio mode */ - if (t->has_signal) - tuner->signal = t->has_signal(client); + if (t->ops.has_signal) + tuner->signal = t->ops.has_signal(client); tuner->rxsubchans = V4L2_TUNER_SUB_MONO | V4L2_TUNER_SUB_STEREO; - if (t->is_stereo) { - tuner->rxsubchans = t->is_stereo(client) ? + if (t->ops.is_stereo) { + tuner->rxsubchans = t->ops.is_stereo(client) ? V4L2_TUNER_SUB_STEREO : V4L2_TUNER_SUB_MONO; } @@ -842,8 +842,8 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) break; } case VIDIOC_LOG_STATUS: - if (t->tuner_status) - t->tuner_status(client); + if (t->ops.tuner_status) + t->ops.tuner_status(client); break; } diff --git a/drivers/media/video/tuner-simple.c b/drivers/media/video/tuner-simple.c index b5792d6a73f..fb4addbd5eb 100644 --- a/drivers/media/video/tuner-simple.c +++ b/drivers/media/video/tuner-simple.c @@ -487,11 +487,11 @@ int default_tuner_init(struct i2c_client *c) t->type, tuners[t->type].name); strlcpy(c->name, tuners[t->type].name, sizeof(c->name)); - t->set_tv_freq = default_set_tv_freq; - t->set_radio_freq = default_set_radio_freq; - t->has_signal = tuner_signal; - t->is_stereo = tuner_stereo; - t->standby = NULL; + t->ops.set_tv_freq = default_set_tv_freq; + t->ops.set_radio_freq = default_set_radio_freq; + t->ops.has_signal = tuner_signal; + t->ops.is_stereo = tuner_stereo; + t->ops.standby = NULL; return 0; } diff --git a/include/media/tuner.h b/include/media/tuner.h index da821a027e7..88eaf893020 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -183,6 +183,17 @@ struct tuner_setup { int (*tuner_callback) (void *dev, int command,int arg); }; +struct tuner_operations { + void (*set_tv_freq)(struct i2c_client *c, unsigned int freq); + void (*set_radio_freq)(struct i2c_client *c, unsigned int freq); + int (*has_signal)(struct i2c_client *c); + int (*is_stereo)(struct i2c_client *c); + int (*get_afc)(struct i2c_client *c); + void (*tuner_status)(struct i2c_client *c); + void (*standby)(struct i2c_client *c); + void (*release)(struct i2c_client *c); +}; + struct tuner { /* device */ struct i2c_client i2c; @@ -207,15 +218,7 @@ struct tuner { unsigned int config; int (*tuner_callback) (void *dev, int command,int arg); - /* function ptrs */ - void (*set_tv_freq)(struct i2c_client *c, unsigned int freq); - void (*set_radio_freq)(struct i2c_client *c, unsigned int freq); - int (*has_signal)(struct i2c_client *c); - int (*is_stereo)(struct i2c_client *c); - int (*get_afc)(struct i2c_client *c); - void (*tuner_status)(struct i2c_client *c); - void (*standby)(struct i2c_client *c); - void (*release)(struct i2c_client *c); + struct tuner_operations ops; }; extern unsigned const int tuner_count; -- cgit v1.2.3-70-g09d2 From 8218b0b2caecf4af55742e12e9986c15605bb197 Mon Sep 17 00:00:00 2001 From: Michael Krufky Date: Tue, 26 Jun 2007 13:12:08 -0300 Subject: V4L/DVB (5793): Tuner: remove hardware-specific info from public header Move internal structures and debug macros to drivers/media/video/tuner-driver.h Signed-off-by: Michael Krufky Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/mt20xx.c | 2 +- drivers/media/video/tda8290.c | 2 +- drivers/media/video/tda9887.c | 1 + drivers/media/video/tea5761.c | 1 + drivers/media/video/tea5767.c | 2 +- drivers/media/video/tuner-core.c | 1 + drivers/media/video/tuner-driver.h | 107 +++++++++++++++++++++++++++++++++++++ drivers/media/video/tuner-simple.c | 2 + include/media/tuner.h | 68 ----------------------- 9 files changed, 115 insertions(+), 71 deletions(-) create mode 100644 drivers/media/video/tuner-driver.h (limited to 'include') diff --git a/drivers/media/video/mt20xx.c b/drivers/media/video/mt20xx.c index 8a838bdb01b..7549114aaac 100644 --- a/drivers/media/video/mt20xx.c +++ b/drivers/media/video/mt20xx.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include "tuner-driver.h" /* ---------------------------------------------------------------------- */ diff --git a/drivers/media/video/tda8290.c b/drivers/media/video/tda8290.c index 9c1b64c51c0..59cff5a3c59 100644 --- a/drivers/media/video/tda8290.c +++ b/drivers/media/video/tda8290.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include "tuner-driver.h" /* ---------------------------------------------------------------------- */ diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index caca1092790..a8f773274fe 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -11,6 +11,7 @@ #include #include +#include "tuner-driver.h" /* Chips: diff --git a/drivers/media/video/tea5761.c b/drivers/media/video/tea5761.c index 2fcb81d4d0d..ae105c2cd0a 100644 --- a/drivers/media/video/tea5761.c +++ b/drivers/media/video/tea5761.c @@ -11,6 +11,7 @@ #include #include #include +#include "tuner-driver.h" #define PREFIX "TEA5761 " diff --git a/drivers/media/video/tea5767.c b/drivers/media/video/tea5767.c index 9cce9be718c..4985d47a508 100644 --- a/drivers/media/video/tea5767.c +++ b/drivers/media/video/tea5767.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include "tuner-driver.h" #define PREFIX "TEA5767 " diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index 603476532f5..e646465464a 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -20,6 +20,7 @@ #include #include +#include "tuner-driver.h" #define UNSET (-1U) diff --git a/drivers/media/video/tuner-driver.h b/drivers/media/video/tuner-driver.h new file mode 100644 index 00000000000..0334a912507 --- /dev/null +++ b/drivers/media/video/tuner-driver.h @@ -0,0 +1,107 @@ +/* + tuner-driver.h - interface for different tuners + + Copyright (C) 1997 Markus Schroeder (schroedm@uni-duesseldorf.de) + minor modifications by Ralph Metzler (rjkm@thp.uni-koeln.de) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef __TUNER_HW_H__ +#define __TUNER_HW_H__ + +#include +#include + +extern unsigned const int tuner_count; + +struct tuner_operations { + void (*set_tv_freq)(struct i2c_client *c, unsigned int freq); + void (*set_radio_freq)(struct i2c_client *c, unsigned int freq); + int (*has_signal)(struct i2c_client *c); + int (*is_stereo)(struct i2c_client *c); + int (*get_afc)(struct i2c_client *c); + void (*tuner_status)(struct i2c_client *c); + void (*standby)(struct i2c_client *c); + void (*release)(struct i2c_client *c); +}; + +struct tuner { + /* device */ + struct i2c_client i2c; + + unsigned int type; /* chip type */ + + unsigned int mode; + unsigned int mode_mask; /* Combination of allowable modes */ + + unsigned int tv_freq; /* keep track of the current settings */ + unsigned int radio_freq; + u16 last_div; + unsigned int audmode; + v4l2_std_id std; + + int using_v4l2; + void *priv; + + /* used by tda9887 */ + unsigned int tda9887_config; + + unsigned int config; + int (*tuner_callback) (void *dev, int command,int arg); + + struct tuner_operations ops; +}; + +/* ------------------------------------------------------------------------ */ + +extern int default_tuner_init(struct i2c_client *c); + +extern int tda9887_tuner_init(struct i2c_client *c); + +extern int microtune_init(struct i2c_client *c); + +extern int tda8290_init(struct i2c_client *c); +extern int tda8290_probe(struct i2c_client *c); + +extern int tea5761_tuner_init(struct i2c_client *c); +extern int tea5761_autodetection(struct i2c_client *c); + +extern int tea5767_autodetection(struct i2c_client *c); +extern int tea5767_tuner_init(struct i2c_client *c); + +/* ------------------------------------------------------------------------ */ + +#define tuner_warn(fmt, arg...) do {\ + printk(KERN_WARNING "%s %d-%04x: " fmt, t->i2c.driver->driver.name, \ + i2c_adapter_id(t->i2c.adapter), t->i2c.addr , ##arg); } while (0) +#define tuner_info(fmt, arg...) do {\ + printk(KERN_INFO "%s %d-%04x: " fmt, t->i2c.driver->driver.name, \ + i2c_adapter_id(t->i2c.adapter), t->i2c.addr , ##arg); } while (0) +#define tuner_dbg(fmt, arg...) do {\ + extern int tuner_debug; \ + if (tuner_debug) \ + printk(KERN_DEBUG "%s %d-%04x: " fmt, t->i2c.driver->driver.name, \ + i2c_adapter_id(t->i2c.adapter), t->i2c.addr , ##arg); } while (0) + +#endif /* __TUNER_HW_H__ */ + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * --------------------------------------------------------------------------- + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/drivers/media/video/tuner-simple.c b/drivers/media/video/tuner-simple.c index fd23c1d8aa2..2d57e8bc0db 100644 --- a/drivers/media/video/tuner-simple.c +++ b/drivers/media/video/tuner-simple.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include "tuner-driver.h" static int offset = 0; module_param(offset, int, 0664); diff --git a/include/media/tuner.h b/include/media/tuner.h index 88eaf893020..160381c72e4 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -23,8 +23,6 @@ #define _TUNER_H #include -#include -#include extern int tuner_debug; @@ -183,72 +181,6 @@ struct tuner_setup { int (*tuner_callback) (void *dev, int command,int arg); }; -struct tuner_operations { - void (*set_tv_freq)(struct i2c_client *c, unsigned int freq); - void (*set_radio_freq)(struct i2c_client *c, unsigned int freq); - int (*has_signal)(struct i2c_client *c); - int (*is_stereo)(struct i2c_client *c); - int (*get_afc)(struct i2c_client *c); - void (*tuner_status)(struct i2c_client *c); - void (*standby)(struct i2c_client *c); - void (*release)(struct i2c_client *c); -}; - -struct tuner { - /* device */ - struct i2c_client i2c; - - unsigned int type; /* chip type */ - - unsigned int mode; - unsigned int mode_mask; /* Combination of allowable modes */ - - unsigned int tv_freq; /* keep track of the current settings */ - unsigned int radio_freq; - u16 last_div; - unsigned int audmode; - v4l2_std_id std; - - int using_v4l2; - void *priv; - - /* used by tda9887 */ - unsigned int tda9887_config; - - unsigned int config; - int (*tuner_callback) (void *dev, int command,int arg); - - struct tuner_operations ops; -}; - -extern unsigned const int tuner_count; - -extern int microtune_init(struct i2c_client *c); -extern int xc3028_init(struct i2c_client *c); -extern int tda8290_init(struct i2c_client *c); -extern int tda8290_probe(struct i2c_client *c); -extern int default_tuner_init(struct i2c_client *c); - -extern int tea5767_autodetection(struct i2c_client *c); -extern int tea5767_tuner_init(struct i2c_client *c); - -extern int tda9887_tuner_init(struct i2c_client *c); - -extern int tea5761_tuner_init(struct i2c_client *c); -extern int tea5761_autodetection(struct i2c_client *c); - -#define tuner_warn(fmt, arg...) do {\ - printk(KERN_WARNING "%s %d-%04x: " fmt, t->i2c.driver->driver.name, \ - i2c_adapter_id(t->i2c.adapter), t->i2c.addr , ##arg); } while (0) -#define tuner_info(fmt, arg...) do {\ - printk(KERN_INFO "%s %d-%04x: " fmt, t->i2c.driver->driver.name, \ - i2c_adapter_id(t->i2c.adapter), t->i2c.addr , ##arg); } while (0) -#define tuner_dbg(fmt, arg...) do {\ - extern int tuner_debug; \ - if (tuner_debug) \ - printk(KERN_DEBUG "%s %d-%04x: " fmt, t->i2c.driver->driver.name, \ - i2c_adapter_id(t->i2c.adapter), t->i2c.addr , ##arg); } while (0) - #endif /* __KERNEL__ */ #endif /* _TUNER_H */ -- cgit v1.2.3-70-g09d2 From 804b4458943f14bf144d3c3ba50097ced9b27b29 Mon Sep 17 00:00:00 2001 From: Oliver Endriss Date: Thu, 12 Jul 2007 20:37:50 -0300 Subject: V4L/DVB (5835): saa7146/dvb-ttpci: Fix signedness warnings (gcc 4.1.1, kernel 2.6.22) Fix signedness warnings (gcc 4.1.1, kernel 2.6.22). Signed-off-by: Oliver Endriss Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/saa7146_core.c | 8 ++++---- drivers/media/common/saa7146_video.c | 8 ++++---- drivers/media/dvb/ttpci/av7110_av.c | 8 ++++---- drivers/media/dvb/ttpci/av7110_ca.c | 4 ++-- drivers/media/dvb/ttpci/av7110_hw.c | 8 ++++---- drivers/media/dvb/ttpci/av7110_hw.h | 2 +- drivers/media/dvb/ttpci/av7110_v4l.c | 2 +- include/media/saa7146.h | 6 +++--- 8 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/media/common/saa7146_core.c b/drivers/media/common/saa7146_core.c index ef3e54cd940..ba6701e9767 100644 --- a/drivers/media/common/saa7146_core.c +++ b/drivers/media/common/saa7146_core.c @@ -27,7 +27,7 @@ static int saa7146_num; unsigned int saa7146_debug; -module_param(saa7146_debug, int, 0644); +module_param(saa7146_debug, uint, 0644); MODULE_PARM_DESC(saa7146_debug, "debug level (default: 0)"); #if 0 @@ -130,10 +130,10 @@ static struct scatterlist* vmalloc_to_sg(unsigned char *virt, int nr_pages) /********************************************************************************/ /* common page table functions */ -char *saa7146_vmalloc_build_pgtable(struct pci_dev *pci, long length, struct saa7146_pgtable *pt) +void *saa7146_vmalloc_build_pgtable(struct pci_dev *pci, long length, struct saa7146_pgtable *pt) { int pages = (length+PAGE_SIZE-1)/PAGE_SIZE; - char *mem = vmalloc_32(length); + void *mem = vmalloc_32(length); int slen = 0; if (NULL == mem) @@ -168,7 +168,7 @@ err_null: return NULL; } -void saa7146_vfree_destroy_pgtable(struct pci_dev *pci, char *mem, struct saa7146_pgtable *pt) +void saa7146_vfree_destroy_pgtable(struct pci_dev *pci, void *mem, struct saa7146_pgtable *pt) { pci_unmap_sg(pci, pt->slist, pt->nents, PCI_DMA_FROMDEVICE); saa7146_pgtable_free(pci, pt); diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c index e3d04a4cef4..664280c78ff 100644 --- a/drivers/media/common/saa7146_video.c +++ b/drivers/media/common/saa7146_video.c @@ -889,9 +889,9 @@ int saa7146_video_do_ioctl(struct inode *inode, struct file *file, unsigned int DEB_EE(("VIDIOC_QUERYCAP\n")); - strcpy(cap->driver, "saa7146 v4l2"); - strlcpy(cap->card, dev->ext->name, sizeof(cap->card)); - sprintf(cap->bus_info,"PCI:%s", pci_name(dev->pci)); + strcpy((char *)cap->driver, "saa7146 v4l2"); + strlcpy((char *)cap->card, dev->ext->name, sizeof(cap->card)); + sprintf((char *)cap->bus_info,"PCI:%s", pci_name(dev->pci)); cap->version = SAA7146_VERSION_CODE; cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | @@ -968,7 +968,7 @@ int saa7146_video_do_ioctl(struct inode *inode, struct file *file, unsigned int } memset(f,0,sizeof(*f)); f->index = index; - strlcpy(f->description,formats[index].name,sizeof(f->description)); + strlcpy((char *)f->description,formats[index].name,sizeof(f->description)); f->pixelformat = formats[index].pixelformat; break; } diff --git a/drivers/media/dvb/ttpci/av7110_av.c b/drivers/media/dvb/ttpci/av7110_av.c index 58678c05aa5..f7a8219d7cf 100644 --- a/drivers/media/dvb/ttpci/av7110_av.c +++ b/drivers/media/dvb/ttpci/av7110_av.c @@ -391,7 +391,7 @@ static int get_video_format(struct av7110 *av7110, u8 *buf, int count) ****************************************************************************/ static inline long aux_ring_buffer_write(struct dvb_ringbuffer *rbuf, - const char *buf, unsigned long count) + const u8 *buf, unsigned long count) { unsigned long todo = count; int free; @@ -436,7 +436,7 @@ static void play_audio_cb(u8 *buf, int count, void *priv) #define FREE_COND (dvb_ringbuffer_free(&av7110->avout) >= 20 * 1024 && \ dvb_ringbuffer_free(&av7110->aout) >= 20 * 1024) -static ssize_t dvb_play(struct av7110 *av7110, const u8 __user *buf, +static ssize_t dvb_play(struct av7110 *av7110, const char __user *buf, unsigned long count, int nonblock, int type) { unsigned long todo = count, n; @@ -499,7 +499,7 @@ static ssize_t dvb_play_kernel(struct av7110 *av7110, const u8 *buf, return count - todo; } -static ssize_t dvb_aplay(struct av7110 *av7110, const u8 __user *buf, +static ssize_t dvb_aplay(struct av7110 *av7110, const char __user *buf, unsigned long count, int nonblock, int type) { unsigned long todo = count, n; @@ -959,7 +959,7 @@ static u8 iframe_header[] = { 0x00, 0x00, 0x01, 0xe0, 0x00, 0x00, 0x80, 0x00, 0x #define MIN_IFRAME 400000 -static int play_iframe(struct av7110 *av7110, u8 __user *buf, unsigned int len, int nonblock) +static int play_iframe(struct av7110 *av7110, char __user *buf, unsigned int len, int nonblock) { int i, n; diff --git a/drivers/media/dvb/ttpci/av7110_ca.c b/drivers/media/dvb/ttpci/av7110_ca.c index e1c1294bb76..c58e3fc509e 100644 --- a/drivers/media/dvb/ttpci/av7110_ca.c +++ b/drivers/media/dvb/ttpci/av7110_ca.c @@ -151,7 +151,7 @@ static ssize_t ci_ll_write(struct dvb_ringbuffer *cibuf, struct file *file, { int free; int non_blocking = file->f_flags & O_NONBLOCK; - char *page = (char *)__get_free_page(GFP_USER); + u8 *page = (u8 *)__get_free_page(GFP_USER); int res; if (!page) @@ -208,7 +208,7 @@ static ssize_t ci_ll_read(struct dvb_ringbuffer *cibuf, struct file *file, return -EINVAL; DVB_RINGBUFFER_SKIP(cibuf, 2); - return dvb_ringbuffer_read(cibuf, buf, len, 1); + return dvb_ringbuffer_read(cibuf, (u8 *)buf, len, 1); } static int dvb_ca_open(struct inode *inode, struct file *file) diff --git a/drivers/media/dvb/ttpci/av7110_hw.c b/drivers/media/dvb/ttpci/av7110_hw.c index 70aee4eb5da..515e8232e02 100644 --- a/drivers/media/dvb/ttpci/av7110_hw.c +++ b/drivers/media/dvb/ttpci/av7110_hw.c @@ -158,7 +158,7 @@ static int load_dram(struct av7110 *av7110, u32 *data, int len) } dprintk(4, "writing DRAM block %d\n", i); mwdebi(av7110, DEBISWAB, bootblock, - ((char*)data) + i * AV7110_BOOT_MAX_SIZE, AV7110_BOOT_MAX_SIZE); + ((u8 *)data) + i * AV7110_BOOT_MAX_SIZE, AV7110_BOOT_MAX_SIZE); bootblock ^= 0x1400; iwdebi(av7110, DEBISWAB, AV7110_BOOT_BASE, swab32(base), 4); iwdebi(av7110, DEBINOSWAP, AV7110_BOOT_SIZE, AV7110_BOOT_MAX_SIZE, 2); @@ -173,10 +173,10 @@ static int load_dram(struct av7110 *av7110, u32 *data, int len) } if (rest > 4) mwdebi(av7110, DEBISWAB, bootblock, - ((char*)data) + i * AV7110_BOOT_MAX_SIZE, rest); + ((u8 *)data) + i * AV7110_BOOT_MAX_SIZE, rest); else mwdebi(av7110, DEBISWAB, bootblock, - ((char*)data) + i * AV7110_BOOT_MAX_SIZE - 4, rest + 4); + ((u8 *)data) + i * AV7110_BOOT_MAX_SIZE - 4, rest + 4); iwdebi(av7110, DEBISWAB, AV7110_BOOT_BASE, swab32(base), 4); iwdebi(av7110, DEBINOSWAP, AV7110_BOOT_SIZE, rest, 2); @@ -751,7 +751,7 @@ static int FlushText(struct av7110 *av7110) return 0; } -static int WriteText(struct av7110 *av7110, u8 win, u16 x, u16 y, u8* buf) +static int WriteText(struct av7110 *av7110, u8 win, u16 x, u16 y, char *buf) { int i, ret; unsigned long start; diff --git a/drivers/media/dvb/ttpci/av7110_hw.h b/drivers/media/dvb/ttpci/av7110_hw.h index 673d9b3f064..74d940f75da 100644 --- a/drivers/media/dvb/ttpci/av7110_hw.h +++ b/drivers/media/dvb/ttpci/av7110_hw.h @@ -393,7 +393,7 @@ static inline void iwdebi(struct av7110 *av7110, u32 config, int addr, u32 val, } /* buffer writes */ -static inline void mwdebi(struct av7110 *av7110, u32 config, int addr, char *val, int count) +static inline void mwdebi(struct av7110 *av7110, u32 config, int addr, u8 *val, int count) { memcpy(av7110->debi_virt, val, count); av7110_debiwrite(av7110, config, addr, 0, count); diff --git a/drivers/media/dvb/ttpci/av7110_v4l.c b/drivers/media/dvb/ttpci/av7110_v4l.c index fcd9994058d..87afaebc070 100644 --- a/drivers/media/dvb/ttpci/av7110_v4l.c +++ b/drivers/media/dvb/ttpci/av7110_v4l.c @@ -333,7 +333,7 @@ static int av7110_ioctl(struct saa7146_fh *fh, unsigned int cmd, void *arg) return -EINVAL; memset(t, 0, sizeof(*t)); - strcpy(t->name, "Television"); + strcpy((char *)t->name, "Television"); t->type = V4L2_TUNER_ANALOG_TV; t->capability = V4L2_TUNER_CAP_NORM | V4L2_TUNER_CAP_STEREO | diff --git a/include/media/saa7146.h b/include/media/saa7146.h index d3f4f5a3821..67703249b24 100644 --- a/include/media/saa7146.h +++ b/include/media/saa7146.h @@ -114,7 +114,7 @@ struct saa7146_dev struct mutex lock; unsigned char __iomem *mem; /* pointer to mapped IO memory */ - int revision; /* chip revision; needed for bug-workarounds*/ + u32 revision; /* chip revision; needed for bug-workarounds*/ /* pci-device & irq stuff*/ char name[32]; @@ -157,8 +157,8 @@ struct saa7146_format* format_by_fourcc(struct saa7146_dev *dev, int fourcc); int saa7146_pgtable_alloc(struct pci_dev *pci, struct saa7146_pgtable *pt); void saa7146_pgtable_free(struct pci_dev *pci, struct saa7146_pgtable *pt); int saa7146_pgtable_build_single(struct pci_dev *pci, struct saa7146_pgtable *pt, struct scatterlist *list, int length ); -char *saa7146_vmalloc_build_pgtable(struct pci_dev *pci, long length, struct saa7146_pgtable *pt); -void saa7146_vfree_destroy_pgtable(struct pci_dev *pci, char *mem, struct saa7146_pgtable *pt); +void *saa7146_vmalloc_build_pgtable(struct pci_dev *pci, long length, struct saa7146_pgtable *pt); +void saa7146_vfree_destroy_pgtable(struct pci_dev *pci, void *mem, struct saa7146_pgtable *pt); void saa7146_setgpio(struct saa7146_dev *dev, int port, u32 data); int saa7146_wait_for_debi_done(struct saa7146_dev *dev, int nobusyloop); -- cgit v1.2.3-70-g09d2 From c6083cd61b5a64a1c73d1634744382f54cb99595 Mon Sep 17 00:00:00 2001 From: David Brownell Date: Fri, 25 May 2007 18:47:47 -0700 Subject: [AVR32] faster avr32 unaligned access Use a more conventional implementation for unaligned access, and include an AT32AP-specific optimization: the CPU will handle unaligned words. The result is always faster and smaller for 8, 16, and 32 bit values. For 64 bit quantities, it's presumably larger. Signed-off-by: David Brownell Signed-off-by: Haavard Skinnemoen --- include/asm-avr32/unaligned.h | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/asm-avr32/unaligned.h b/include/asm-avr32/unaligned.h index 3042723fcbf..791361786fc 100644 --- a/include/asm-avr32/unaligned.h +++ b/include/asm-avr32/unaligned.h @@ -6,20 +6,31 @@ * implementation. The AVR32 AP implementation can handle unaligned * words, but halfwords must be halfword-aligned, and doublewords must * be word-aligned. - * - * TODO: Make all this CPU-specific and optimize. */ -#include +#include -/* Use memmove here, so gcc does not insert a __builtin_memcpy. */ +#ifdef CONFIG_CPU_AT32AP7000 +/* REVISIT calling memmove() may be smaller for 64-bit values ... */ + +#undef get_unaligned #define get_unaligned(ptr) \ - ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; }) + ___get_unaligned(ptr, sizeof((*ptr))) +#define ___get_unaligned(ptr, size) \ + ((size == 4) ? *(ptr) : __get_unaligned(ptr, size)) + +#undef put_unaligned +#define put_unaligned(val, ptr) \ + ___put_unaligned((__u64)(val), ptr, sizeof((*ptr))) +#define ___put_unaligned(val, ptr, size) \ +do { \ + if (size == 4) \ + *(ptr) = (val); \ + else \ + __put_unaligned(val, ptr, size); \ +} while (0) -#define put_unaligned(val, ptr) \ - ({ __typeof__(*(ptr)) __tmp = (val); \ - memmove((ptr), &__tmp, sizeof(*(ptr))); \ - (void)0; }) +#endif #endif /* __ASM_AVR32_UNALIGNED_H */ -- cgit v1.2.3-70-g09d2 From 7a5b80590772c29bba1d54d3685622177d6fe39f Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Mon, 4 Jun 2007 12:58:30 +0200 Subject: [AVR32] Split SM device into PM, RTC, WDT and EIC Split the SM platform device into separate platform devices for PM, RTC, WDT and EIC. This is more correct according to the documentation and allows us to simplify the code a little. Also turn the EIC driver into a real platform driver. Signed-off-by: Haavard Skinnemoen Acked-by: Hans-Christian Egtvedt --- arch/avr32/mach-at32ap/at32ap.c | 31 ----- arch/avr32/mach-at32ap/at32ap7000.c | 213 ++++++++++++++++++------------- arch/avr32/mach-at32ap/extint.c | 200 +++++++++++++++++++---------- arch/avr32/mach-at32ap/pm.h | 112 +++++++++++++++++ arch/avr32/mach-at32ap/sm.h | 242 ------------------------------------ include/asm-avr32/arch-at32ap/sm.h | 27 ---- 6 files changed, 372 insertions(+), 453 deletions(-) create mode 100644 arch/avr32/mach-at32ap/pm.h delete mode 100644 arch/avr32/mach-at32ap/sm.h delete mode 100644 include/asm-avr32/arch-at32ap/sm.h (limited to 'include') diff --git a/arch/avr32/mach-at32ap/at32ap.c b/arch/avr32/mach-at32ap/at32ap.c index 90f207e8e96..7c4987f3287 100644 --- a/arch/avr32/mach-at32ap/at32ap.c +++ b/arch/avr32/mach-at32ap/at32ap.c @@ -11,41 +11,10 @@ #include #include -#include - #include -#include - -struct at32_sm system_manager; - -static int __init at32_sm_init(void) -{ - struct resource *regs; - struct at32_sm *sm = &system_manager; - int ret = -ENXIO; - - regs = platform_get_resource(&at32_sm_device, IORESOURCE_MEM, 0); - if (!regs) - goto fail; - - spin_lock_init(&sm->lock); - sm->pdev = &at32_sm_device; - - ret = -ENOMEM; - sm->regs = ioremap(regs->start, regs->end - regs->start + 1); - if (!sm->regs) - goto fail; - - return 0; - -fail: - printk(KERN_ERR "Failed to initialize System Manager: %d\n", ret); - return ret; -} void __init setup_platform(void) { - at32_sm_init(); at32_clock_init(); at32_portmux_init(); } diff --git a/arch/avr32/mach-at32ap/at32ap7000.c b/arch/avr32/mach-at32ap/at32ap7000.c index 4dda42d3f6d..5faa97e5ab1 100644 --- a/arch/avr32/mach-at32ap/at32ap7000.c +++ b/arch/avr32/mach-at32ap/at32ap7000.c @@ -17,14 +17,20 @@ #include #include #include -#include #include