diff options
Diffstat (limited to 'arch/sparc64/kernel')
28 files changed, 6105 insertions, 1013 deletions
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile index f964bf28d21..b66876bf410 100644 --- a/arch/sparc64/kernel/Makefile +++ b/arch/sparc64/kernel/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ pci_psycho.o pci_sabre.o pci_schizo.o \ pci_sun4v.o pci_sun4v_asm.o pci_fire.o -obj-$(CONFIG_SMP) += smp.o trampoline.o +obj-$(CONFIG_SMP) += smp.o trampoline.o hvtramp.o obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o obj-$(CONFIG_BINFMT_AOUT32) += binfmt_aout32.o @@ -26,6 +26,7 @@ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o obj-y += $(obj-yy) diff --git a/arch/sparc64/kernel/auxio.c b/arch/sparc64/kernel/auxio.c index 826118ee53d..7b379761e9f 100644 --- a/arch/sparc64/kernel/auxio.c +++ b/arch/sparc64/kernel/auxio.c @@ -155,7 +155,7 @@ static struct of_platform_driver auxio_driver = { static int __init auxio_init(void) { - return of_register_driver(&auxio_driver, &of_bus_type); + return of_register_driver(&auxio_driver, &of_platform_bus_type); } /* Must be after subsys_initcall() so that busses are probed. Must diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c new file mode 100644 index 00000000000..9f472a79d37 --- /dev/null +++ b/arch/sparc64/kernel/ds.c @@ -0,0 +1,1246 @@ +/* ds.c: Domain Services driver for Logical Domains + * + * Copyright (C) 2007 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/mutex.h> +#include <linux/kthread.h> +#include <linux/reboot.h> +#include <linux/cpu.h> + +#include <asm/ldc.h> +#include <asm/vio.h> +#include <asm/mdesc.h> +#include <asm/head.h> +#include <asm/irq.h> + +#define DRV_MODULE_NAME "ds" +#define PFX DRV_MODULE_NAME ": " +#define DRV_MODULE_VERSION "1.0" +#define DRV_MODULE_RELDATE "Jul 11, 2007" + +static char version[] __devinitdata = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; +MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_DESCRIPTION("Sun LDOM domain services driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +struct ds_msg_tag { + __u32 type; +#define DS_INIT_REQ 0x00 +#define DS_INIT_ACK 0x01 +#define DS_INIT_NACK 0x02 +#define DS_REG_REQ 0x03 +#define DS_REG_ACK 0x04 +#define DS_REG_NACK 0x05 +#define DS_UNREG_REQ 0x06 +#define DS_UNREG_ACK 0x07 +#define DS_UNREG_NACK 0x08 +#define DS_DATA 0x09 +#define DS_NACK 0x0a + + __u32 len; +}; + +/* Result codes */ +#define DS_OK 0x00 +#define DS_REG_VER_NACK 0x01 +#define DS_REG_DUP 0x02 +#define DS_INV_HDL 0x03 +#define DS_TYPE_UNKNOWN 0x04 + +struct ds_version { + __u16 major; + __u16 minor; +}; + +struct ds_ver_req { + struct ds_msg_tag tag; + struct ds_version ver; +}; + +struct ds_ver_ack { + struct ds_msg_tag tag; + __u16 minor; +}; + +struct ds_ver_nack { + struct ds_msg_tag tag; + __u16 major; +}; + +struct ds_reg_req { + struct ds_msg_tag tag; + __u64 handle; + __u16 major; + __u16 minor; + char svc_id[0]; +}; + +struct ds_reg_ack { + struct ds_msg_tag tag; + __u64 handle; + __u16 minor; +}; + +struct ds_reg_nack { + struct ds_msg_tag tag; + __u64 handle; + __u16 major; +}; + +struct ds_unreg_req { + struct ds_msg_tag tag; + __u64 handle; +}; + +struct ds_unreg_ack { + struct ds_msg_tag tag; + __u64 handle; +}; + +struct ds_unreg_nack { + struct ds_msg_tag tag; + __u64 handle; +}; + +struct ds_data { + struct ds_msg_tag tag; + __u64 handle; +}; + +struct ds_data_nack { + struct ds_msg_tag tag; + __u64 handle; + __u64 result; +}; + +struct ds_info; +struct ds_cap_state { + __u64 handle; + + void (*data)(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); + + const char *service_id; + + u8 state; +#define CAP_STATE_UNKNOWN 0x00 +#define CAP_STATE_REG_SENT 0x01 +#define CAP_STATE_REGISTERED 0x02 +}; + +static void md_update_data(struct ds_info *dp, struct ds_cap_state *cp, + void *buf, int len); +static void domain_shutdown_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); +static void domain_panic_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); +#ifdef CONFIG_HOTPLUG_CPU +static void dr_cpu_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); +#endif +static void ds_pri_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); +static void ds_var_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len); + +struct ds_cap_state ds_states_template[] = { + { + .service_id = "md-update", + .data = md_update_data, + }, + { + .service_id = "domain-shutdown", + .data = domain_shutdown_data, + }, + { + .service_id = "domain-panic", + .data = domain_panic_data, + }, +#ifdef CONFIG_HOTPLUG_CPU + { + .service_id = "dr-cpu", + .data = dr_cpu_data, + }, +#endif + { + .service_id = "pri", + .data = ds_pri_data, + }, + { + .service_id = "var-config", + .data = ds_var_data, + }, + { + .service_id = "var-config-backup", + .data = ds_var_data, + }, +}; + +static DEFINE_SPINLOCK(ds_lock); + +struct ds_info { + struct ldc_channel *lp; + u8 hs_state; +#define DS_HS_START 0x01 +#define DS_HS_DONE 0x02 + + u64 id; + + void *rcv_buf; + int rcv_buf_len; + + struct ds_cap_state *ds_states; + int num_ds_states; + + struct ds_info *next; +}; + +static struct ds_info *ds_info_list; + +static struct ds_cap_state *find_cap(struct ds_info *dp, u64 handle) +{ + unsigned int index = handle >> 32; + + if (index >= dp->num_ds_states) + return NULL; + return &dp->ds_states[index]; +} + +static struct ds_cap_state *find_cap_by_string(struct ds_info *dp, + const char *name) +{ + int i; + + for (i = 0; i < dp->num_ds_states; i++) { + if (strcmp(dp->ds_states[i].service_id, name)) + continue; + + return &dp->ds_states[i]; + } + return NULL; +} + +static int __ds_send(struct ldc_channel *lp, void *data, int len) +{ + int err, limit = 1000; + + err = -EINVAL; + while (limit-- > 0) { + err = ldc_write(lp, data, len); + if (!err || (err != -EAGAIN)) + break; + udelay(1); + } + + return err; +} + +static int ds_send(struct ldc_channel *lp, void *data, int len) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&ds_lock, flags); + err = __ds_send(lp, data, len); + spin_unlock_irqrestore(&ds_lock, flags); + + return err; +} + +struct ds_md_update_req { + __u64 req_num; +}; + +struct ds_md_update_res { + __u64 req_num; + __u32 result; +}; + +static void md_update_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ldc_channel *lp = dp->lp; + struct ds_data *dpkt = buf; + struct ds_md_update_req *rp; + struct { + struct ds_data data; + struct ds_md_update_res res; + } pkt; + + rp = (struct ds_md_update_req *) (dpkt + 1); + + printk(KERN_INFO "ds-%lu: Machine description update.\n", dp->id); + + mdesc_update(); + + memset(&pkt, 0, sizeof(pkt)); + pkt.data.tag.type = DS_DATA; + pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag); + pkt.data.handle = cp->handle; + pkt.res.req_num = rp->req_num; + pkt.res.result = DS_OK; + + ds_send(lp, &pkt, sizeof(pkt)); +} + +struct ds_shutdown_req { + __u64 req_num; + __u32 ms_delay; +}; + +struct ds_shutdown_res { + __u64 req_num; + __u32 result; + char reason[1]; +}; + +static void domain_shutdown_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ldc_channel *lp = dp->lp; + struct ds_data *dpkt = buf; + struct ds_shutdown_req *rp; + struct { + struct ds_data data; + struct ds_shutdown_res res; + } pkt; + + rp = (struct ds_shutdown_req *) (dpkt + 1); + + printk(KERN_ALERT "ds-%lu: Shutdown request from " + "LDOM manager received.\n", dp->id); + + memset(&pkt, 0, sizeof(pkt)); + pkt.data.tag.type = DS_DATA; + pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag); + pkt.data.handle = cp->handle; + pkt.res.req_num = rp->req_num; + pkt.res.result = DS_OK; + pkt.res.reason[0] = 0; + + ds_send(lp, &pkt, sizeof(pkt)); + + orderly_poweroff(true); +} + +struct ds_panic_req { + __u64 req_num; +}; + +struct ds_panic_res { + __u64 req_num; + __u32 result; + char reason[1]; +}; + +static void domain_panic_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ldc_channel *lp = dp->lp; + struct ds_data *dpkt = buf; + struct ds_panic_req *rp; + struct { + struct ds_data data; + struct ds_panic_res res; + } pkt; + + rp = (struct ds_panic_req *) (dpkt + 1); + + printk(KERN_ALERT "ds-%lu: Panic request from " + "LDOM manager received.\n", dp->id); + + memset(&pkt, 0, sizeof(pkt)); + pkt.data.tag.type = DS_DATA; + pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag); + pkt.data.handle = cp->handle; + pkt.res.req_num = rp->req_num; + pkt.res.result = DS_OK; + pkt.res.reason[0] = 0; + + ds_send(lp, &pkt, sizeof(pkt)); + + panic("PANIC requested by LDOM manager."); +} + +#ifdef CONFIG_HOTPLUG_CPU +struct dr_cpu_tag { + __u64 req_num; + __u32 type; +#define DR_CPU_CONFIGURE 0x43 +#define DR_CPU_UNCONFIGURE 0x55 +#define DR_CPU_FORCE_UNCONFIGURE 0x46 +#define DR_CPU_STATUS 0x53 + +/* Responses */ +#define DR_CPU_OK 0x6f +#define DR_CPU_ERROR 0x65 + + __u32 num_records; +}; + +struct dr_cpu_resp_entry { + __u32 cpu; + __u32 result; +#define DR_CPU_RES_OK 0x00 +#define DR_CPU_RES_FAILURE 0x01 +#define DR_CPU_RES_BLOCKED 0x02 +#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03 +#define DR_CPU_RES_NOT_IN_MD 0x04 + + __u32 stat; +#define DR_CPU_STAT_NOT_PRESENT 0x00 +#define DR_CPU_STAT_UNCONFIGURED 0x01 +#define DR_CPU_STAT_CONFIGURED 0x02 + + __u32 str_off; +}; + +static void __dr_cpu_send_error(struct ds_info *dp, + struct ds_cap_state *cp, + struct ds_data *data) +{ + struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); + struct { + struct ds_data data; + struct dr_cpu_tag tag; + } pkt; + int msg_len; + + memset(&pkt, 0, sizeof(pkt)); + pkt.data.tag.type = DS_DATA; + pkt.data.handle = cp->handle; + pkt.tag.req_num = tag->req_num; + pkt.tag.type = DR_CPU_ERROR; + pkt.tag.num_records = 0; + + msg_len = (sizeof(struct ds_data) + + sizeof(struct dr_cpu_tag)); + + pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag); + + __ds_send(dp->lp, &pkt, msg_len); +} + +static void dr_cpu_send_error(struct ds_info *dp, + struct ds_cap_state *cp, + struct ds_data *data) +{ + unsigned long flags; + + spin_lock_irqsave(&ds_lock, flags); + __dr_cpu_send_error(dp, cp, data); + spin_unlock_irqrestore(&ds_lock, flags); +} + +#define CPU_SENTINEL 0xffffffff + +static void purge_dups(u32 *list, u32 num_ents) +{ + unsigned int i; + + for (i = 0; i < num_ents; i++) { + u32 cpu = list[i]; + unsigned int j; + + if (cpu == CPU_SENTINEL) + continue; + + for (j = i + 1; j < num_ents; j++) { + if (list[j] == cpu) + list[j] = CPU_SENTINEL; + } + } +} + +static int dr_cpu_size_response(int ncpus) +{ + return (sizeof(struct ds_data) + + sizeof(struct dr_cpu_tag) + + (sizeof(struct dr_cpu_resp_entry) * ncpus)); +} + +static void dr_cpu_init_response(struct ds_data *resp, u64 req_num, + u64 handle, int resp_len, int ncpus, + cpumask_t *mask, u32 default_stat) +{ + struct dr_cpu_resp_entry *ent; + struct dr_cpu_tag *tag; + int i, cpu; + + tag = (struct dr_cpu_tag *) (resp + 1); + ent = (struct dr_cpu_resp_entry *) (tag + 1); + + resp->tag.type = DS_DATA; + resp->tag.len = resp_len - sizeof(struct ds_msg_tag); + resp->handle = handle; + tag->req_num = req_num; + tag->type = DR_CPU_OK; + tag->num_records = ncpus; + + i = 0; + for_each_cpu_mask(cpu, *mask) { + ent[i].cpu = cpu; + ent[i].result = DR_CPU_RES_OK; + ent[i].stat = default_stat; + i++; + } + BUG_ON(i != ncpus); +} + +static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus, + u32 res, u32 stat) +{ + struct dr_cpu_resp_entry *ent; + struct dr_cpu_tag *tag; + int i; + + tag = (struct dr_cpu_tag *) (resp + 1); + ent = (struct dr_cpu_resp_entry *) (tag + 1); + + for (i = 0; i < ncpus; i++) { + if (ent[i].cpu != cpu) + continue; + ent[i].result = res; + ent[i].stat = stat; + break; + } +} + +static int dr_cpu_configure(struct ds_info *dp, + struct ds_cap_state *cp, + u64 req_num, + cpumask_t *mask) +{ + struct ds_data *resp; + int resp_len, ncpus, cpu; + unsigned long flags; + + ncpus = cpus_weight(*mask); + resp_len = dr_cpu_size_response(ncpus); + resp = kzalloc(resp_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + dr_cpu_init_response(resp, req_num, cp->handle, + resp_len, ncpus, mask, + DR_CPU_STAT_CONFIGURED); + + mdesc_fill_in_cpu_data(*mask); + + for_each_cpu_mask(cpu, *mask) { + int err; + + printk(KERN_INFO "ds-%lu: Starting cpu %d...\n", + dp->id, cpu); + err = cpu_up(cpu); + if (err) { + __u32 res = DR_CPU_RES_FAILURE; + __u32 stat = DR_CPU_STAT_UNCONFIGURED; + + if (!cpu_present(cpu)) { + /* CPU not present in MD */ + res = DR_CPU_RES_NOT_IN_MD; + stat = DR_CPU_STAT_NOT_PRESENT; + } else if (err == -ENODEV) { + /* CPU did not call in successfully */ + res = DR_CPU_RES_CPU_NOT_RESPONDING; + } + + printk(KERN_INFO "ds-%lu: CPU startup failed err=%d\n", + dp->id, err); + dr_cpu_mark(resp, cpu, ncpus, res, stat); + } + } + + spin_lock_irqsave(&ds_lock, flags); + __ds_send(dp->lp, resp, resp_len); + spin_unlock_irqrestore(&ds_lock, flags); + + kfree(resp); + + /* Redistribute IRQs, taking into account the new cpus. */ + fixup_irqs(); + + return 0; +} + +static int dr_cpu_unconfigure(struct ds_info *dp, + struct ds_cap_state *cp, + u64 req_num, + cpumask_t *mask) +{ + struct ds_data *resp; + int resp_len, ncpus, cpu; + unsigned long flags; + + ncpus = cpus_weight(*mask); + resp_len = dr_cpu_size_response(ncpus); + resp = kzalloc(resp_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + dr_cpu_init_response(resp, req_num, cp->handle, + resp_len, ncpus, mask, + DR_CPU_STAT_UNCONFIGURED); + + for_each_cpu_mask(cpu, *mask) { + int err; + + printk(KERN_INFO "ds-%lu: Shutting down cpu %d...\n", + dp->id, cpu); + err = cpu_down(cpu); + if (err) + dr_cpu_mark(resp, cpu, ncpus, + DR_CPU_RES_FAILURE, + DR_CPU_STAT_CONFIGURED); + } + + spin_lock_irqsave(&ds_lock, flags); + __ds_send(dp->lp, resp, resp_len); + spin_unlock_irqrestore(&ds_lock, flags); + + kfree(resp); + + return 0; +} + +static void dr_cpu_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ds_data *data = buf; + struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1); + u32 *cpu_list = (u32 *) (tag + 1); + u64 req_num = tag->req_num; + cpumask_t mask; + unsigned int i; + int err; + + switch (tag->type) { + case DR_CPU_CONFIGURE: + case DR_CPU_UNCONFIGURE: + case DR_CPU_FORCE_UNCONFIGURE: + break; + + default: + dr_cpu_send_error(dp, cp, data); + return; + } + + purge_dups(cpu_list, tag->num_records); + + cpus_clear(mask); + for (i = 0; i < tag->num_records; i++) { + if (cpu_list[i] == CPU_SENTINEL) + continue; + + if (cpu_list[i] < NR_CPUS) + cpu_set(cpu_list[i], mask); + } + + if (tag->type == DR_CPU_CONFIGURE) + err = dr_cpu_configure(dp, cp, req_num, &mask); + else + err = dr_cpu_unconfigure(dp, cp, req_num, &mask); + + if (err) + dr_cpu_send_error(dp, cp, data); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +struct ds_pri_msg { + __u64 req_num; + __u64 type; +#define DS_PRI_REQUEST 0x00 +#define DS_PRI_DATA 0x01 +#define DS_PRI_UPDATE 0x02 +}; + +static void ds_pri_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ds_data *dpkt = buf; + struct ds_pri_msg *rp; + + rp = (struct ds_pri_msg *) (dpkt + 1); + + printk(KERN_INFO "ds-%lu: PRI REQ [%lx:%lx], len=%d\n", + dp->id, rp->req_num, rp->type, len); +} + +struct ds_var_hdr { + __u32 type; +#define DS_VAR_SET_REQ 0x00 +#define DS_VAR_DELETE_REQ 0x01 +#define DS_VAR_SET_RESP 0x02 +#define DS_VAR_DELETE_RESP 0x03 +}; + +struct ds_var_set_msg { + struct ds_var_hdr hdr; + char name_and_value[0]; +}; + +struct ds_var_delete_msg { + struct ds_var_hdr hdr; + char name[0]; +}; + +struct ds_var_resp { + struct ds_var_hdr hdr; + __u32 result; +#define DS_VAR_SUCCESS 0x00 +#define DS_VAR_NO_SPACE 0x01 +#define DS_VAR_INVALID_VAR 0x02 +#define DS_VAR_INVALID_VAL 0x03 +#define DS_VAR_NOT_PRESENT 0x04 +}; + +static DEFINE_MUTEX(ds_var_mutex); +static int ds_var_doorbell; +static int ds_var_response; + +static void ds_var_data(struct ds_info *dp, + struct ds_cap_state *cp, + void *buf, int len) +{ + struct ds_data *dpkt = buf; + struct ds_var_resp *rp; + + rp = (struct ds_var_resp *) (dpkt + 1); + + if (rp->hdr.type != DS_VAR_SET_RESP && + rp->hdr.type != DS_VAR_DELETE_RESP) + return; + + ds_var_response = rp->result; + wmb(); + ds_var_doorbell = 1; +} + +void ldom_set_var(const char *var, const char *value) +{ + struct ds_cap_state *cp; + struct ds_info *dp; + unsigned long flags; + + spin_lock_irqsave(&ds_lock, flags); + cp = NULL; + for (dp = ds_info_list; dp; dp = dp->next) { + struct ds_cap_state *tmp; + + tmp = find_cap_by_string(dp, "var-config"); + if (tmp && tmp->state == CAP_STATE_REGISTERED) { + cp = tmp; + break; + } + } + if (!cp) { + for (dp = ds_info_list; dp; dp = dp->next) { + struct ds_cap_state *tmp; + + tmp = find_cap_by_string(dp, "var-config-backup"); + if (tmp && tmp->state == CAP_STATE_REGISTERED) { + cp = tmp; + break; + } + } + } + spin_unlock_irqrestore(&ds_lock, flags); + + if (cp) { + union { + struct { + struct ds_data data; + struct ds_var_set_msg msg; + } header; + char all[512]; + } pkt; + char *base, *p; + int msg_len, loops; + + memset(&pkt, 0, sizeof(pkt)); + pkt.header.data.tag.type = DS_DATA; + pkt.header.data.handle = cp->handle; + pkt.header.msg.hdr.type = DS_VAR_SET_REQ; + base = p = &pkt.header.msg.name_and_value[0]; + strcpy(p, var); + p += strlen(var) + 1; + strcpy(p, value); + p += strlen(value) + 1; + + msg_len = (sizeof(struct ds_data) + + sizeof(struct ds_var_set_msg) + + (p - base)); + msg_len = (msg_len + 3) & ~3; + pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag); + + mutex_lock(&ds_var_mutex); + + spin_lock_irqsave(&ds_lock, flags); + ds_var_doorbell = 0; + ds_var_response = -1; + + __ds_send(dp->lp, &pkt, msg_len); + spin_unlock_irqrestore(&ds_lock, flags); + + loops = 1000; + while (ds_var_doorbell == 0) { + if (loops-- < 0) + break; + barrier(); + udelay(100); + } + + mutex_unlock(&ds_var_mutex); + + if (ds_var_doorbell == 0 || + ds_var_response != DS_VAR_SUCCESS) + printk(KERN_ERR "ds-%lu: var-config [%s:%s] " + "failed, response(%d).\n", + dp->id, var, value, + ds_var_response); + } else { + printk(KERN_ERR PFX "var-config not registered so " + "could not set (%s) variable to (%s).\n", + var, value); + } +} + +void ldom_reboot(const char *boot_command) +{ + /* Don't bother with any of this if the boot_command + * is empty. + */ + if (boot_command && strlen(boot_command)) { + char full_boot_str[256]; + + strcpy(full_boot_str, "boot "); + strcpy(full_boot_str + strlen("boot "), boot_command); + + ldom_set_var("reboot-command", full_boot_str); + } + sun4v_mach_sir(); +} + +void ldom_power_off(void) +{ + sun4v_mach_exit(0); +} + +static void ds_conn_reset(struct ds_info *dp) +{ + printk(KERN_ERR "ds-%lu: ds_conn_reset() from %p\n", + dp->id, __builtin_return_address(0)); +} + +static int register_services(struct ds_info *dp) +{ + struct ldc_channel *lp = dp->lp; + int i; + + for (i = 0; i < dp->num_ds_states; i++) { + struct { + struct ds_reg_req req; + u8 id_buf[256]; + } pbuf; + struct ds_cap_state *cp = &dp->ds_states[i]; + int err, msg_len; + u64 new_count; + + if (cp->state == CAP_STATE_REGISTERED) + continue; + + new_count = sched_clock() & 0xffffffff; + cp->handle = ((u64) i << 32) | new_count; + + msg_len = (sizeof(struct ds_reg_req) + + strlen(cp->service_id)); + + memset(&pbuf, 0, sizeof(pbuf)); + pbuf.req.tag.type = DS_REG_REQ; + pbuf.req.tag.len = (msg_len - sizeof(struct ds_msg_tag)); + pbuf.req.handle = cp->handle; + pbuf.req.major = 1; + pbuf.req.minor = 0; + strcpy(pbuf.req.svc_id, cp->service_id); + + err = __ds_send(lp, &pbuf, msg_len); + if (err > 0) + cp->state = CAP_STATE_REG_SENT; + } + return 0; +} + +static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt) +{ + + if (dp->hs_state == DS_HS_START) { + if (pkt->type != DS_INIT_ACK) + goto conn_reset; + + dp->hs_state = DS_HS_DONE; + + return register_services(dp); + } + + if (dp->hs_state != DS_HS_DONE) + goto conn_reset; + + if (pkt->type == DS_REG_ACK) { + struct ds_reg_ack *ap = (struct ds_reg_ack *) pkt; + struct ds_cap_state *cp = find_cap(dp, ap->handle); + + if (!cp) { + printk(KERN_ERR "ds-%lu: REG ACK for unknown " + "handle %lx\n", dp->id, ap->handle); + return 0; + } + printk(KERN_INFO "ds-%lu: Registered %s service.\n", + dp->id, cp->service_id); + cp->state = CAP_STATE_REGISTERED; + } else if (pkt->type == DS_REG_NACK) { + struct ds_reg_nack *np = (struct ds_reg_nack *) pkt; + struct ds_cap_state *cp = find_cap(dp, np->handle); + + if (!cp) { + printk(KERN_ERR "ds-%lu: REG NACK for " + "unknown handle %lx\n", + dp->id, np->handle); + return 0; + } + cp->state = CAP_STATE_UNKNOWN; + } + + return 0; + +conn_reset: + ds_conn_reset(dp); + return -ECONNRESET; +} + +static void __send_ds_nack(struct ds_info *dp, u64 handle) +{ + struct ds_data_nack nack = { + .tag = { + .type = DS_NACK, + .len = (sizeof(struct ds_data_nack) - + sizeof(struct ds_msg_tag)), + }, + .handle = handle, + .result = DS_INV_HDL, + }; + + __ds_send(dp->lp, &nack, sizeof(nack)); +} + +static LIST_HEAD(ds_work_list); +static DECLARE_WAIT_QUEUE_HEAD(ds_wait); + +struct ds_queue_entry { + struct list_head list; + struct ds_info *dp; + int req_len; + int __pad; + u64 req[0]; +}; + +static void process_ds_work(void) +{ + struct ds_queue_entry *qp, *tmp; + unsigned long flags; + LIST_HEAD(todo); + + spin_lock_irqsave(&ds_lock, flags); + list_splice(&ds_work_list, &todo); + INIT_LIST_HEAD(&ds_work_list); + spin_unlock_irqrestore(&ds_lock, flags); + + list_for_each_entry_safe(qp, tmp, &todo, list) { + struct ds_data *dpkt = (struct ds_data *) qp->req; + struct ds_info *dp = qp->dp; + struct ds_cap_state *cp = find_cap(dp, dpkt->handle); + int req_len = qp->req_len; + + if (!cp) { + printk(KERN_ERR "ds-%lu: Data for unknown " + "handle %lu\n", + dp->id, dpkt->handle); + + spin_lock_irqsave(&ds_lock, flags); + __send_ds_nack(dp, dpkt->handle); + spin_unlock_irqrestore(&ds_lock, flags); + } else { + cp->data(dp, cp, dpkt, req_len); + } + + list_del(&qp->list); + kfree(qp); + } +} + +static int ds_thread(void *__unused) +{ + DEFINE_WAIT(wait); + + while (1) { + prepare_to_wait(&ds_wait, &wait, TASK_INTERRUPTIBLE); + if (list_empty(&ds_work_list)) + schedule(); + finish_wait(&ds_wait, &wait); + + if (kthread_should_stop()) + break; + + process_ds_work(); + } + + return 0; +} + +static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len) +{ + struct ds_data *dpkt = (struct ds_data *) pkt; + struct ds_queue_entry *qp; + + qp = kmalloc(sizeof(struct ds_queue_entry) + len, GFP_ATOMIC); + if (!qp) { + __send_ds_nack(dp, dpkt->handle); + } else { + qp->dp = dp; + memcpy(&qp->req, pkt, len); + list_add_tail(&qp->list, &ds_work_list); + wake_up(&ds_wait); + } + return 0; +} + +static void ds_up(struct ds_info *dp) +{ + struct ldc_channel *lp = dp->lp; + struct ds_ver_req req; + int err; + + req.tag.type = DS_INIT_REQ; + req.tag.len = sizeof(req) - sizeof(struct ds_msg_tag); + req.ver.major = 1; + req.ver.minor = 0; + + err = __ds_send(lp, &req, sizeof(req)); + if (err > 0) + dp->hs_state = DS_HS_START; +} + +static void ds_reset(struct ds_info *dp) +{ + int i; + + dp->hs_state = 0; + + for (i = 0; i < dp->num_ds_states; i++) { + struct ds_cap_state *cp = &dp->ds_states[i]; + + cp->state = CAP_STATE_UNKNOWN; + } +} + +static void ds_event(void *arg, int event) +{ + struct ds_info *dp = arg; + struct ldc_channel *lp = dp->lp; + unsigned long flags; + int err; + + spin_lock_irqsave(&ds_lock, flags); + + if (event == LDC_EVENT_UP) { + ds_up(dp); + spin_unlock_irqrestore(&ds_lock, flags); + return; + } + + if (event == LDC_EVENT_RESET) { + ds_reset(dp); + spin_unlock_irqrestore(&ds_lock, flags); + return; + } + + if (event != LDC_EVENT_DATA_READY) { + printk(KERN_WARNING "ds-%lu: Unexpected LDC event %d\n", + dp->id, event); + spin_unlock_irqrestore(&ds_lock, flags); + return; + } + + err = 0; + while (1) { + struct ds_msg_tag *tag; + + err = ldc_read(lp, dp->rcv_buf, sizeof(*tag)); + + if (unlikely(err < 0)) { + if (err == -ECONNRESET) + ds_conn_reset(dp); + break; + } + if (err == 0) + break; + + tag = dp->rcv_buf; + err = ldc_read(lp, tag + 1, tag->len); + + if (unlikely(err < 0)) { + if (err == -ECONNRESET) + ds_conn_reset(dp); + break; + } + if (err < tag->len) + break; + + if (tag->type < DS_DATA) + err = ds_handshake(dp, dp->rcv_buf); + else + err = ds_data(dp, dp->rcv_buf, + sizeof(*tag) + err); + if (err == -ECONNRESET) + break; + } + + spin_unlock_irqrestore(&ds_lock, flags); +} + +static int __devinit ds_probe(struct vio_dev *vdev, + const struct vio_device_id *id) +{ + static int ds_version_printed; + struct ldc_channel_config ds_cfg = { + .event = ds_event, + .mtu = 4096, + .mode = LDC_MODE_STREAM, + }; + struct mdesc_handle *hp; + struct ldc_channel *lp; + struct ds_info *dp; + const u64 *val; + int err, i; + + if (ds_version_printed++ == 0) + printk(KERN_INFO "%s", version); + + dp = kzalloc(sizeof(*dp), GFP_KERNEL); + err = -ENOMEM; + if (!dp) + goto out_err; + + hp = mdesc_grab(); + val = mdesc_get_property(hp, vdev->mp, "id", NULL); + if (val) + dp->id = *val; + mdesc_release(hp); + + dp->rcv_buf = kzalloc(4096, GFP_KERNEL); + if (!dp->rcv_buf) + goto out_free_dp; + + dp->rcv_buf_len = 4096; + + dp->ds_states = kzalloc(sizeof(ds_states_template), + GFP_KERNEL); + if (!dp->ds_states) + goto out_free_rcv_buf; + + memcpy(dp->ds_states, ds_states_template, + sizeof(ds_states_template)); + dp->num_ds_states = ARRAY_SIZE(ds_states_template); + + for (i = 0; i < dp->num_ds_states; i++) + dp->ds_states[i].handle = ((u64)i << 32); + + ds_cfg.tx_irq = vdev->tx_irq; + ds_cfg.rx_irq = vdev->rx_irq; + + lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out_free_ds_states; + } + dp->lp = lp; + + err = ldc_bind(lp, "DS"); + if (err) + goto out_free_ldc; + + spin_lock_irq(&ds_lock); + dp->next = ds_info_list; + ds_info_list = dp; + spin_unlock_irq(&ds_lock); + + return err; + +out_free_ldc: + ldc_free(dp->lp); + +out_free_ds_states: + kfree(dp->ds_states); + +out_free_rcv_buf: + kfree(dp->rcv_buf); + +out_free_dp: + kfree(dp); + +out_err: + return err; +} + +static int ds_remove(struct vio_dev *vdev) +{ + return 0; +} + +static struct vio_device_id ds_match[] = { + { + .type = "domain-services-port", + }, + {}, +}; + +static struct vio_driver ds_driver = { + .id_table = ds_match, + .probe = ds_probe, + .remove = ds_remove, + .driver = { + .name = "ds", + .owner = THIS_MODULE, + } +}; + +static int __init ds_init(void) +{ + kthread_run(ds_thread, NULL, "kldomd"); + + return vio_register_driver(&ds_driver); +} + +subsys_initcall(ds_init); diff --git a/arch/sparc64/kernel/ebus.c b/arch/sparc64/kernel/ebus.c index ad55a9bb50d..6d2956179cd 100644 --- a/arch/sparc64/kernel/ebus.c +++ b/arch/sparc64/kernel/ebus.c @@ -362,6 +362,7 @@ static int __init child_regs_nonstandard(struct linux_ebus_device *dev) static void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_device *dev) { struct linux_ebus_child *child; + struct dev_archdata *sd; struct of_device *op; int i, len; @@ -387,6 +388,10 @@ static void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_de dev->irqs[i] = op->irqs[i]; } + sd = &dev->ofdev.dev.archdata; + sd->prom_node = dp; + sd->op = &dev->ofdev; + dev->ofdev.node = dp; dev->ofdev.dev.parent = &dev->bus->ofdev.dev; dev->ofdev.dev.bus = &ebus_bus_type; diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 77259526cb1..35feacb6b8e 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -458,7 +458,6 @@ tlb_fixup_done: or %g6, %lo(init_thread_union), %g6 ldx [%g6 + TI_TASK], %g4 mov %sp, %l6 - mov %o4, %l7 wr %g0, ASI_P, %asi mov 1, %g1 diff --git a/arch/sparc64/kernel/hvtramp.S b/arch/sparc64/kernel/hvtramp.S new file mode 100644 index 00000000000..a55c252e18c --- /dev/null +++ b/arch/sparc64/kernel/hvtramp.S @@ -0,0 +1,140 @@ +/* hvtramp.S: Hypervisor start-cpu trampoline code. + * + * Copyright (C) 2007 David S. Miller <davem@davemloft.net> + */ + +#include <asm/thread_info.h> +#include <asm/hypervisor.h> +#include <asm/scratchpad.h> +#include <asm/spitfire.h> +#include <asm/hvtramp.h> +#include <asm/pstate.h> +#include <asm/ptrace.h> +#include <asm/head.h> +#include <asm/asi.h> + + .text + .align 8 + .globl hv_cpu_startup, hv_cpu_startup_end + + /* This code executes directly out of the hypervisor + * with physical addressing (va==pa). %o0 contains + * our client argument which for Linux points to + * a descriptor data structure which defines the + * MMU entries we need to load up. + * + * After we set things up we enable the MMU and call + * into the kernel. + * + * First setup basic privileged cpu state. + */ +hv_cpu_startup: + SET_GL(0) + wrpr %g0, 15, %pil + wrpr %g0, 0, %canrestore + wrpr %g0, 0, %otherwin + wrpr %g0, 6, %cansave + wrpr %g0, 6, %cleanwin + wrpr %g0, 0, %cwp + wrpr %g0, 0, %wstate + wrpr %g0, 0, %tl + + sethi %hi(sparc64_ttable_tl0), %g1 + wrpr %g1, %tba + + mov %o0, %l0 + + lduw [%l0 + HVTRAMP_DESCR_CPU], %g1 + mov SCRATCHPAD_CPUID, %g2 + stxa %g1, [%g2] ASI_SCRATCHPAD + + ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD + + mov 0, %l1 + lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2 + add %l0, HVTRAMP_DESCR_MAPS, %l3 + +1: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0 + clr %o1 + ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2 + mov HV_MMU_IMMU | HV_MMU_DMMU, %o3 + mov HV_FAST_MMU_MAP_PERM_ADDR, %o5 + ta HV_FAST_TRAP + + brnz,pn %o0, 80f + nop + + add %l1, 1, %l1 + cmp %l1, %l2 + blt,a,pt %xcc, 1b + add %l3, HVTRAMP_MAPPING_SIZE, %l3 + + ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0 + mov HV_FAST_MMU_FAULT_AREA_CONF, %o5 + ta HV_FAST_TRAP + + brnz,pn %o0, 80f + nop + + wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate + + ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6 + + mov 1, %o0 + set 1f, %o1 + mov HV_FAST_MMU_ENABLE, %o5 + ta HV_FAST_TRAP + + ba,pt %xcc, 80f + nop + +1: + wr %g0, 0, %fprs + wr %g0, ASI_P, %asi + + mov PRIMARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + + mov SECONDARY_CONTEXT, %g7 + stxa %g0, [%g7] ASI_MMU + membar #Sync + + mov %l6, %g6 + ldx [%g6 + TI_TASK], %g4 + + mov 1, %g5 + sllx %g5, THREAD_SHIFT, %g5 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp + mov 0, %fp + + call init_irqwork_curcpu + nop + call hard_smp_processor_id + nop + + mov %o0, %o1 + mov 0, %o0 + mov 0, %o2 + call sun4v_init_mondo_queues + mov 1, %o3 + + call init_cur_cpu_trap + mov %g6, %o0 + + wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate + + call smp_callin + nop + call cpu_idle + mov 0, %o0 + call cpu_panic + nop + +80: ba,pt %xcc, 80b + nop + + .align 8 +hv_cpu_startup_end: diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 6b6165d36fd..db31bf6b42d 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -87,7 +87,11 @@ struct ino_bucket ivector_table[NUM_IVECS] __attribute__ ((aligned (SMP_CACHE_BY */ #define irq_work(__cpu) &(trap_block[(__cpu)].irq_worklist) -static unsigned int virt_to_real_irq_table[NR_IRQS]; +static struct { + unsigned int irq; + unsigned int dev_handle; + unsigned int dev_ino; +} virt_to_real_irq_table[NR_IRQS]; static unsigned char virt_irq_alloc(unsigned int real_irq) { @@ -96,7 +100,7 @@ static unsigned char virt_irq_alloc(unsigned int real_irq) BUILD_BUG_ON(NR_IRQS >= 256); for (ent = 1; ent < NR_IRQS; ent++) { - if (!virt_to_real_irq_table[ent]) + if (!virt_to_real_irq_table[ent].irq) break; } if (ent >= NR_IRQS) { @@ -104,7 +108,7 @@ static unsigned char virt_irq_alloc(unsigned int real_irq) return 0; } - virt_to_real_irq_table[ent] = real_irq; + virt_to_real_irq_table[ent].irq = real_irq; return ent; } @@ -117,8 +121,8 @@ static void virt_irq_free(unsigned int virt_irq) if (virt_irq >= NR_IRQS) return; - real_irq = virt_to_real_irq_table[virt_irq]; - virt_to_real_irq_table[virt_irq] = 0; + real_irq = virt_to_real_irq_table[virt_irq].irq; + virt_to_real_irq_table[virt_irq].irq = 0; __bucket(real_irq)->virt_irq = 0; } @@ -126,7 +130,7 @@ static void virt_irq_free(unsigned int virt_irq) static unsigned int virt_to_real_irq(unsigned char virt_irq) { - return virt_to_real_irq_table[virt_irq]; + return virt_to_real_irq_table[virt_irq].irq; } /* @@ -293,6 +297,11 @@ static void sun4u_irq_enable(unsigned int virt_irq) } } +static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask) +{ + sun4u_irq_enable(virt_irq); +} + static void sun4u_irq_disable(unsigned int virt_irq) { struct irq_handler_data *data = get_irq_chip_data(virt_irq); @@ -309,6 +318,10 @@ static void sun4u_irq_disable(unsigned int virt_irq) static void sun4u_irq_end(unsigned int virt_irq) { struct irq_handler_data *data = get_irq_chip_data(virt_irq); + struct irq_desc *desc = irq_desc + virt_irq; + + if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))) + return; if (likely(data)) upa_writeq(ICLR_IDLE, data->iclr); @@ -327,19 +340,37 @@ static void sun4v_irq_enable(unsigned int virt_irq) err = sun4v_intr_settarget(ino, cpuid); if (err != HV_EOK) - printk("sun4v_intr_settarget(%x,%lu): err(%d)\n", - ino, cpuid, err); + printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " + "err(%d)\n", ino, cpuid, err); err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); if (err != HV_EOK) - printk("sun4v_intr_setstate(%x): " + printk(KERN_ERR "sun4v_intr_setstate(%x): " "err(%d)\n", ino, err); err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED); if (err != HV_EOK) - printk("sun4v_intr_setenabled(%x): err(%d)\n", + printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n", ino, err); } } +static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask) +{ + struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); + unsigned int ino = bucket - &ivector_table[0]; + + if (likely(bucket)) { + unsigned long cpuid; + int err; + + cpuid = irq_choose_cpu(virt_irq); + + err = sun4v_intr_settarget(ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " + "err(%d)\n", ino, cpuid, err); + } +} + static void sun4v_irq_disable(unsigned int virt_irq) { struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); @@ -350,7 +381,7 @@ static void sun4v_irq_disable(unsigned int virt_irq) err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); if (err != HV_EOK) - printk("sun4v_intr_setenabled(%x): " + printk(KERN_ERR "sun4v_intr_setenabled(%x): " "err(%d)\n", ino, err); } } @@ -373,13 +404,17 @@ static void sun4v_irq_end(unsigned int virt_irq) { struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); unsigned int ino = bucket - &ivector_table[0]; + struct irq_desc *desc = irq_desc + virt_irq; + + if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))) + return; if (likely(bucket)) { int err; err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); if (err != HV_EOK) - printk("sun4v_intr_setstate(%x): " + printk(KERN_ERR "sun4v_intr_setstate(%x): " "err(%d)\n", ino, err); } } @@ -387,7 +422,6 @@ static void sun4v_irq_end(unsigned int virt_irq) static void sun4v_virq_enable(unsigned int virt_irq) { struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); - unsigned int ino = bucket - &ivector_table[0]; if (likely(bucket)) { unsigned long cpuid, dev_handle, dev_ino; @@ -395,45 +429,65 @@ static void sun4v_virq_enable(unsigned int virt_irq) cpuid = irq_choose_cpu(virt_irq); - dev_handle = ino & IMAP_IGN; - dev_ino = ino & IMAP_INO; + dev_handle = virt_to_real_irq_table[virt_irq].dev_handle; + dev_ino = virt_to_real_irq_table[virt_irq].dev_ino; err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); if (err != HV_EOK) - printk("sun4v_vintr_set_target(%lx,%lx,%lu): " + printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " "err(%d)\n", dev_handle, dev_ino, cpuid, err); err = sun4v_vintr_set_state(dev_handle, dev_ino, HV_INTR_STATE_IDLE); if (err != HV_EOK) - printk("sun4v_vintr_set_state(%lx,%lx," + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," "HV_INTR_STATE_IDLE): err(%d)\n", dev_handle, dev_ino, err); err = sun4v_vintr_set_valid(dev_handle, dev_ino, HV_INTR_ENABLED); if (err != HV_EOK) - printk("sun4v_vintr_set_state(%lx,%lx," + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," "HV_INTR_ENABLED): err(%d)\n", dev_handle, dev_ino, err); } } +static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask) +{ + struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); + + if (likely(bucket)) { + unsigned long cpuid, dev_handle, dev_ino; + int err; + + cpuid = irq_choose_cpu(virt_irq); + + dev_handle = virt_to_real_irq_table[virt_irq].dev_handle; + dev_ino = virt_to_real_irq_table[virt_irq].dev_ino; + + err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " + "err(%d)\n", + dev_handle, dev_ino, cpuid, err); + } +} + static void sun4v_virq_disable(unsigned int virt_irq) { struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); - unsigned int ino = bucket - &ivector_table[0]; if (likely(bucket)) { unsigned long dev_handle, dev_ino; int err; - dev_handle = ino & IMAP_IGN; - dev_ino = ino & IMAP_INO; + dev_handle = virt_to_real_irq_table[virt_irq].dev_handle; + dev_ino = virt_to_real_irq_table[virt_irq].dev_ino; err = sun4v_vintr_set_valid(dev_handle, dev_ino, HV_INTR_DISABLED); if (err != HV_EOK) - printk("sun4v_vintr_set_state(%lx,%lx," + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," "HV_INTR_DISABLED): err(%d)\n", dev_handle, dev_ino, err); } @@ -442,19 +496,22 @@ static void sun4v_virq_disable(unsigned int virt_irq) static void sun4v_virq_end(unsigned int virt_irq) { struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); - unsigned int ino = bucket - &ivector_table[0]; + struct irq_desc *desc = irq_desc + virt_irq; + + if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))) + return; if (likely(bucket)) { unsigned long dev_handle, dev_ino; int err; - dev_handle = ino & IMAP_IGN; - dev_ino = ino & IMAP_INO; + dev_handle = virt_to_real_irq_table[virt_irq].dev_handle; + dev_ino = virt_to_real_irq_table[virt_irq].dev_ino; err = sun4v_vintr_set_state(dev_handle, dev_ino, HV_INTR_STATE_IDLE); if (err != HV_EOK) - printk("sun4v_vintr_set_state(%lx,%lx," + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," "HV_INTR_STATE_IDLE): err(%d)\n", dev_handle, dev_ino, err); } @@ -477,6 +534,7 @@ static struct irq_chip sun4u_irq = { .enable = sun4u_irq_enable, .disable = sun4u_irq_disable, .end = sun4u_irq_end, + .set_affinity = sun4u_set_affinity, }; static struct irq_chip sun4u_irq_ack = { @@ -485,6 +543,7 @@ static struct irq_chip sun4u_irq_ack = { .disable = sun4u_irq_disable, .ack = run_pre_handler, .end = sun4u_irq_end, + .set_affinity = sun4u_set_affinity, }; static struct irq_chip sun4v_irq = { @@ -492,6 +551,7 @@ static struct irq_chip sun4v_irq = { .enable = sun4v_irq_enable, .disable = sun4v_irq_disable, .end = sun4v_irq_end, + .set_affinity = sun4v_set_affinity, }; static struct irq_chip sun4v_irq_ack = { @@ -500,6 +560,7 @@ static struct irq_chip sun4v_irq_ack = { .disable = sun4v_irq_disable, .ack = run_pre_handler, .end = sun4v_irq_end, + .set_affinity = sun4v_set_affinity, }; #ifdef CONFIG_PCI_MSI @@ -511,6 +572,7 @@ static struct irq_chip sun4v_msi = { .disable = sun4v_msi_disable, .ack = run_pre_handler, .end = sun4v_irq_end, + .set_affinity = sun4v_set_affinity, }; #endif @@ -519,6 +581,7 @@ static struct irq_chip sun4v_virq = { .enable = sun4v_virq_enable, .disable = sun4v_virq_disable, .end = sun4v_virq_end, + .set_affinity = sun4v_virt_set_affinity, }; static struct irq_chip sun4v_virq_ack = { @@ -527,6 +590,7 @@ static struct irq_chip sun4v_virq_ack = { .disable = sun4v_virq_disable, .ack = run_pre_handler, .end = sun4v_virq_end, + .set_affinity = sun4v_virt_set_affinity, }; void irq_install_pre_handler(int virt_irq, @@ -636,11 +700,12 @@ unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) { unsigned long sysino, hv_err; + unsigned int virq; - BUG_ON(devhandle & ~IMAP_IGN); - BUG_ON(devino & ~IMAP_INO); + BUG_ON(devhandle & devino); sysino = devhandle | devino; + BUG_ON(sysino & ~(IMAP_IGN | IMAP_INO)); hv_err = sun4v_vintr_set_cookie(devhandle, devino, sysino); if (hv_err) { @@ -649,7 +714,12 @@ unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) prom_halt(); } - return sun4v_build_common(sysino, &sun4v_virq); + virq = sun4v_build_common(sysino, &sun4v_virq); + + virt_to_real_irq_table[virq].dev_handle = devhandle; + virt_to_real_irq_table[virq].dev_ino = devino; + + return virq; } #ifdef CONFIG_PCI_MSI @@ -739,6 +809,26 @@ void handler_irq(int irq, struct pt_regs *regs) set_irq_regs(old_regs); } +#ifdef CONFIG_HOTPLUG_CPU +void fixup_irqs(void) +{ + unsigned int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + unsigned long flags; + + spin_lock_irqsave(&irq_desc[irq].lock, flags); + if (irq_desc[irq].action && + !(irq_desc[irq].status & IRQ_PER_CPU)) { + if (irq_desc[irq].chip->set_affinity) + irq_desc[irq].chip->set_affinity(irq, + irq_desc[irq].affinity); + } + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + } +} +#endif + struct sun5_timer { u64 count0; u64 limit0; diff --git a/arch/sparc64/kernel/isa.c b/arch/sparc64/kernel/isa.c index 6a6882e57ff..1a1043fcf97 100644 --- a/arch/sparc64/kernel/isa.c +++ b/arch/sparc64/kernel/isa.c @@ -79,6 +79,7 @@ static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br) while (dp) { struct sparc_isa_device *isa_dev; + struct dev_archdata *sd; isa_dev = kzalloc(sizeof(*isa_dev), GFP_KERNEL); if (!isa_dev) { @@ -86,6 +87,10 @@ static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br) return; } + sd = &isa_dev->ofdev.dev.archdata; + sd->prom_node = dp; + sd->op = &isa_dev->ofdev; + isa_dev->ofdev.node = dp; isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev; isa_dev->ofdev.dev.bus = &isa_bus_type; diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c new file mode 100644 index 00000000000..85a2be0b096 --- /dev/null +++ b/arch/sparc64/kernel/ldc.c @@ -0,0 +1,2373 @@ +/* ldc.c: Logical Domain Channel link-layer protocol driver. + * + * Copyright (C) 2007 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/scatterlist.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/init.h> + +#include <asm/hypervisor.h> +#include <asm/iommu.h> +#include <asm/page.h> +#include <asm/ldc.h> +#include <asm/mdesc.h> + +#define DRV_MODULE_NAME "ldc" +#define PFX DRV_MODULE_NAME ": " +#define DRV_MODULE_VERSION "1.0" +#define DRV_MODULE_RELDATE "June 25, 2007" + +static char version[] __devinitdata = + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; +#define LDC_PACKET_SIZE 64 + +/* Packet header layout for unreliable and reliable mode frames. + * When in RAW mode, packets are simply straight 64-byte payloads + * with no headers. + */ +struct ldc_packet { + u8 type; +#define LDC_CTRL 0x01 +#define LDC_DATA 0x02 +#define LDC_ERR 0x10 + + u8 stype; +#define LDC_INFO 0x01 +#define LDC_ACK 0x02 +#define LDC_NACK 0x04 + + u8 ctrl; +#define LDC_VERS 0x01 /* Link Version */ +#define LDC_RTS 0x02 /* Request To Send */ +#define LDC_RTR 0x03 /* Ready To Receive */ +#define LDC_RDX 0x04 /* Ready for Data eXchange */ +#define LDC_CTRL_MSK 0x0f + + u8 env; +#define LDC_LEN 0x3f +#define LDC_FRAG_MASK 0xc0 +#define LDC_START 0x40 +#define LDC_STOP 0x80 + + u32 seqid; + + union { + u8 u_data[LDC_PACKET_SIZE - 8]; + struct { + u32 pad; + u32 ackid; + u8 r_data[LDC_PACKET_SIZE - 8 - 8]; + } r; + } u; +}; + +struct ldc_version { + u16 major; + u16 minor; +}; + +/* Ordered from largest major to lowest. */ +static struct ldc_version ver_arr[] = { + { .major = 1, .minor = 0 }, +}; + +#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE) +#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE) + +struct ldc_channel; + +struct ldc_mode_ops { + int (*write)(struct ldc_channel *, const void *, unsigned int); + int (*read)(struct ldc_channel *, void *, unsigned int); +}; + +static const struct ldc_mode_ops raw_ops; +static const struct ldc_mode_ops nonraw_ops; +static const struct ldc_mode_ops stream_ops; + +int ldom_domaining_enabled; + +struct ldc_iommu { + /* Protects arena alloc/free. */ + spinlock_t lock; + struct iommu_arena arena; + struct ldc_mtable_entry *page_table; +}; + +struct ldc_channel { + /* Protects all operations that depend upon channel state. */ + spinlock_t lock; + + unsigned long id; + + u8 *mssbuf; + u32 mssbuf_len; + u32 mssbuf_off; + + struct ldc_packet *tx_base; + unsigned long tx_head; + unsigned long tx_tail; + unsigned long tx_num_entries; + unsigned long tx_ra; + + unsigned long tx_acked; + + struct ldc_packet *rx_base; + unsigned long rx_head; + unsigned long rx_tail; + unsigned long rx_num_entries; + unsigned long rx_ra; + + u32 rcv_nxt; + u32 snd_nxt; + + unsigned long chan_state; + + struct ldc_channel_config cfg; + void *event_arg; + + const struct ldc_mode_ops *mops; + + struct ldc_iommu iommu; + + struct ldc_version ver; + + u8 hs_state; +#define LDC_HS_CLOSED 0x00 +#define LDC_HS_OPEN 0x01 +#define LDC_HS_GOTVERS 0x02 +#define LDC_HS_SENTRTR 0x03 +#define LDC_HS_GOTRTR 0x04 +#define LDC_HS_COMPLETE 0x10 + + u8 flags; +#define LDC_FLAG_ALLOCED_QUEUES 0x01 +#define LDC_FLAG_REGISTERED_QUEUES 0x02 +#define LDC_FLAG_REGISTERED_IRQS 0x04 +#define LDC_FLAG_RESET 0x10 + + u8 mss; + u8 state; + +#define LDC_IRQ_NAME_MAX 32 + char rx_irq_name[LDC_IRQ_NAME_MAX]; + char tx_irq_name[LDC_IRQ_NAME_MAX]; + + struct hlist_head mh_list; + + struct hlist_node list; +}; + +#define ldcdbg(TYPE, f, a...) \ +do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \ + printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \ +} while (0) + +static const char *state_to_str(u8 state) +{ + switch (state) { + case LDC_STATE_INVALID: + return "INVALID"; + case LDC_STATE_INIT: + return "INIT"; + case LDC_STATE_BOUND: + return "BOUND"; + case LDC_STATE_READY: + return "READY"; + case LDC_STATE_CONNECTED: + return "CONNECTED"; + default: + return "<UNKNOWN>"; + } +} + +static void ldc_set_state(struct ldc_channel *lp, u8 state) +{ + ldcdbg(STATE, "STATE (%s) --> (%s)\n", + state_to_str(lp->state), + state_to_str(state)); + + lp->state = state; +} + +static unsigned long __advance(unsigned long off, unsigned long num_entries) +{ + off += LDC_PACKET_SIZE; + if (off == (num_entries * LDC_PACKET_SIZE)) + off = 0; + + return off; +} + +static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off) +{ + return __advance(off, lp->rx_num_entries); +} + +static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off) +{ + return __advance(off, lp->tx_num_entries); +} + +static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp, + unsigned long *new_tail) +{ + struct ldc_packet *p; + unsigned long t; + + t = tx_advance(lp, lp->tx_tail); + if (t == lp->tx_head) + return NULL; + + *new_tail = t; + + p = lp->tx_base; + return p + (lp->tx_tail / LDC_PACKET_SIZE); +} + +/* When we are in reliable or stream mode, have to track the next packet + * we haven't gotten an ACK for in the TX queue using tx_acked. We have + * to be careful not to stomp over the queue past that point. During + * the handshake, we don't have TX data packets pending in the queue + * and that's why handshake_get_tx_packet() need not be mindful of + * lp->tx_acked. + */ +static unsigned long head_for_data(struct ldc_channel *lp) +{ + if (lp->cfg.mode == LDC_MODE_STREAM) + return lp->tx_acked; + return lp->tx_head; +} + +static int tx_has_space_for(struct ldc_channel *lp, unsigned int size) +{ + unsigned long limit, tail, new_tail, diff; + unsigned int mss; + + limit = head_for_data(lp); + tail = lp->tx_tail; + new_tail = tx_advance(lp, tail); + if (new_tail == limit) + return 0; + + if (limit > new_tail) + diff = limit - new_tail; + else + diff = (limit + + ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail)); + diff /= LDC_PACKET_SIZE; + mss = lp->mss; + + if (diff * mss < size) + return 0; + + return 1; +} + +static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp, + unsigned long *new_tail) +{ + struct ldc_packet *p; + unsigned long h, t; + + h = head_for_data(lp); + t = tx_advance(lp, lp->tx_tail); + if (t == h) + return NULL; + + *new_tail = t; + + p = lp->tx_base; + return p + (lp->tx_tail / LDC_PACKET_SIZE); +} + +static int set_tx_tail(struct ldc_channel *lp, unsigned long tail) +{ + unsigned long orig_tail = lp->tx_tail; + int limit = 1000; + + lp->tx_tail = tail; + while (limit-- > 0) { + unsigned long err; + + err = sun4v_ldc_tx_set_qtail(lp->id, tail); + if (!err) + return 0; + + if (err != HV_EWOULDBLOCK) { + lp->tx_tail = orig_tail; + return -EINVAL; + } + udelay(1); + } + + lp->tx_tail = orig_tail; + return -EBUSY; +} + +/* This just updates the head value in the hypervisor using + * a polling loop with a timeout. The caller takes care of + * upating software state representing the head change, if any. + */ +static int __set_rx_head(struct ldc_channel *lp, unsigned long head) +{ + int limit = 1000; + + while (limit-- > 0) { + unsigned long err; + + err = sun4v_ldc_rx_set_qhead(lp->id, head); + if (!err) + return 0; + + if (err != HV_EWOULDBLOCK) + return -EINVAL; + + udelay(1); + } + + return -EBUSY; +} + +static int send_tx_packet(struct ldc_channel *lp, + struct ldc_packet *p, + unsigned long new_tail) +{ + BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE))); + + return set_tx_tail(lp, new_tail); +} + +static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp, + u8 stype, u8 ctrl, + void *data, int dlen, + unsigned long *new_tail) +{ + struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail); + + if (p) { + memset(p, 0, sizeof(*p)); + p->type = LDC_CTRL; + p->stype = stype; + p->ctrl = ctrl; + if (data) + memcpy(p->u.u_data, data, dlen); + } + return p; +} + +static int start_handshake(struct ldc_channel *lp) +{ + struct ldc_packet *p; + struct ldc_version *ver; + unsigned long new_tail; + + ver = &ver_arr[0]; + + ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n", + ver->major, ver->minor); + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS, + ver, sizeof(*ver), &new_tail); + if (p) { + int err = send_tx_packet(lp, p, new_tail); + if (!err) + lp->flags &= ~LDC_FLAG_RESET; + return err; + } + return -EBUSY; +} + +static int send_version_nack(struct ldc_channel *lp, + u16 major, u16 minor) +{ + struct ldc_packet *p; + struct ldc_version ver; + unsigned long new_tail; + + ver.major = major; + ver.minor = minor; + + p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS, + &ver, sizeof(ver), &new_tail); + if (p) { + ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n", + ver.major, ver.minor); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_version_ack(struct ldc_channel *lp, + struct ldc_version *vp) +{ + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS, + vp, sizeof(*vp), &new_tail); + if (p) { + ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n", + vp->major, vp->minor); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_rts(struct ldc_channel *lp) +{ + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0, + &new_tail); + if (p) { + p->env = lp->cfg.mode; + p->seqid = 0; + lp->rcv_nxt = 0; + + ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n", + p->env, p->seqid); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_rtr(struct ldc_channel *lp) +{ + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0, + &new_tail); + if (p) { + p->env = lp->cfg.mode; + p->seqid = 0; + + ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n", + p->env, p->seqid); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_rdx(struct ldc_channel *lp) +{ + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0, + &new_tail); + if (p) { + p->env = 0; + p->seqid = ++lp->snd_nxt; + p->u.r.ackid = lp->rcv_nxt; + + ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n", + p->env, p->seqid, p->u.r.ackid); + + return send_tx_packet(lp, p, new_tail); + } + return -EBUSY; +} + +static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt) +{ + struct ldc_packet *p; + unsigned long new_tail; + int err; + + p = data_get_tx_packet(lp, &new_tail); + if (!p) + return -EBUSY; + memset(p, 0, sizeof(*p)); + p->type = data_pkt->type; + p->stype = LDC_NACK; + p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK; + p->seqid = lp->snd_nxt + 1; + p->u.r.ackid = lp->rcv_nxt; + + ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n", + p->type, p->ctrl, p->seqid, p->u.r.ackid); + + err = send_tx_packet(lp, p, new_tail); + if (!err) + lp->snd_nxt++; + + return err; +} + +static int ldc_abort(struct ldc_channel *lp) +{ + unsigned long hv_err; + + ldcdbg(STATE, "ABORT\n"); + + /* We report but do not act upon the hypervisor errors because + * there really isn't much we can do if they fail at this point. + */ + hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries); + if (hv_err) + printk(KERN_ERR PFX "ldc_abort: " + "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n", + lp->id, lp->tx_ra, lp->tx_num_entries, hv_err); + + hv_err = sun4v_ldc_tx_get_state(lp->id, + &lp->tx_head, + &lp->tx_tail, + &lp->chan_state); + if (hv_err) + printk(KERN_ERR PFX "ldc_abort: " + "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n", + lp->id, hv_err); + + hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries); + if (hv_err) + printk(KERN_ERR PFX "ldc_abort: " + "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n", + lp->id, lp->rx_ra, lp->rx_num_entries, hv_err); + + /* Refetch the RX queue state as well, because we could be invoked + * here in the queue processing context. + */ + hv_err = sun4v_ldc_rx_get_state(lp->id, + &lp->rx_head, + &lp->rx_tail, + &lp->chan_state); + if (hv_err) + printk(KERN_ERR PFX "ldc_abort: " + "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n", + lp->id, hv_err); + + return -ECONNRESET; +} + +static struct ldc_version *find_by_major(u16 major) +{ + struct ldc_version *ret = NULL; + int i; + + for (i = 0; i < ARRAY_SIZE(ver_arr); i++) { + struct ldc_version *v = &ver_arr[i]; + if (v->major <= major) { + ret = v; + break; + } + } + return ret; +} + +static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp) +{ + struct ldc_version *vap; + int err; + + ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n", + vp->major, vp->minor); + + if (lp->hs_state == LDC_HS_GOTVERS) { + lp->hs_state = LDC_HS_OPEN; + memset(&lp->ver, 0, sizeof(lp->ver)); + } + + vap = find_by_major(vp->major); + if (!vap) { + err = send_version_nack(lp, 0, 0); + } else if (vap->major != vp->major) { + err = send_version_nack(lp, vap->major, vap->minor); + } else { + struct ldc_version ver = *vp; + if (ver.minor > vap->minor) + ver.minor = vap->minor; + err = send_version_ack(lp, &ver); + if (!err) { + lp->ver = ver; + lp->hs_state = LDC_HS_GOTVERS; + } + } + if (err) + return ldc_abort(lp); + + return 0; +} + +static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp) +{ + ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n", + vp->major, vp->minor); + + if (lp->hs_state == LDC_HS_GOTVERS) { + if (lp->ver.major != vp->major || + lp->ver.minor != vp->minor) + return ldc_abort(lp); + } else { + lp->ver = *vp; + lp->hs_state = LDC_HS_GOTVERS; + } + if (send_rts(lp)) + return ldc_abort(lp); + return 0; +} + +static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp) +{ + struct ldc_version *vap; + + if ((vp->major == 0 && vp->minor == 0) || + !(vap = find_by_major(vp->major))) { + return ldc_abort(lp); + } else { + struct ldc_packet *p; + unsigned long new_tail; + + p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS, + vap, sizeof(*vap), + &new_tail); + if (p) + return send_tx_packet(lp, p, new_tail); + else + return ldc_abort(lp); + } +} + +static int process_version(struct ldc_channel *lp, + struct ldc_packet *p) +{ + struct ldc_version *vp; + + vp = (struct ldc_version *) p->u.u_data; + + switch (p->stype) { + case LDC_INFO: + return process_ver_info(lp, vp); + + case LDC_ACK: + return process_ver_ack(lp, vp); + + case LDC_NACK: + return process_ver_nack(lp, vp); + + default: + return ldc_abort(lp); + } +} + +static int process_rts(struct ldc_channel *lp, + struct ldc_packet *p) +{ + ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n", + p->stype, p->seqid, p->env); + + if (p->stype != LDC_INFO || + lp->hs_state != LDC_HS_GOTVERS || + p->env != lp->cfg.mode) + return ldc_abort(lp); + + lp->snd_nxt = p->seqid; + lp->rcv_nxt = p->seqid; + lp->hs_state = LDC_HS_SENTRTR; + if (send_rtr(lp)) + return ldc_abort(lp); + + return 0; +} + +static int process_rtr(struct ldc_channel *lp, + struct ldc_packet *p) +{ + ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n", + p->stype, p->seqid, p->env); + + if (p->stype != LDC_INFO || + p->env != lp->cfg.mode) + return ldc_abort(lp); + + lp->snd_nxt = p->seqid; + lp->hs_state = LDC_HS_COMPLETE; + ldc_set_state(lp, LDC_STATE_CONNECTED); + send_rdx(lp); + + return LDC_EVENT_UP; +} + +static int rx_seq_ok(struct ldc_channel *lp, u32 seqid) +{ + return lp->rcv_nxt + 1 == seqid; +} + +static int process_rdx(struct ldc_channel *lp, + struct ldc_packet *p) +{ + ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n", + p->stype, p->seqid, p->env, p->u.r.ackid); + + if (p->stype != LDC_INFO || + !(rx_seq_ok(lp, p->seqid))) + return ldc_abort(lp); + + lp->rcv_nxt = p->seqid; + + lp->hs_state = LDC_HS_COMPLETE; + ldc_set_state(lp, LDC_STATE_CONNECTED); + + return LDC_EVENT_UP; +} + +static int process_control_frame(struct ldc_channel *lp, + struct ldc_packet *p) +{ + switch (p->ctrl) { + case LDC_VERS: + return process_version(lp, p); + + case LDC_RTS: + return process_rts(lp, p); + + case LDC_RTR: + return process_rtr(lp, p); + + case LDC_RDX: + return process_rdx(lp, p); + + default: + return ldc_abort(lp); + } +} + +static int process_error_frame(struct ldc_channel *lp, + struct ldc_packet *p) +{ + return ldc_abort(lp); +} + +static int process_data_ack(struct ldc_channel *lp, + struct ldc_packet *ack) +{ + unsigned long head = lp->tx_acked; + u32 ackid = ack->u.r.ackid; + + while (1) { + struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE); + + head = tx_advance(lp, head); + + if (p->seqid == ackid) { + lp->tx_acked = head; + return 0; + } + if (head == lp->tx_tail) + return ldc_abort(lp); + } + + return 0; +} + +static void send_events(struct ldc_channel *lp, unsigned int event_mask) +{ + if (event_mask & LDC_EVENT_RESET) + lp->cfg.event(lp->event_arg, LDC_EVENT_RESET); + if (event_mask & LDC_EVENT_UP) + lp->cfg.event(lp->event_arg, LDC_EVENT_UP); + if (event_mask & LDC_EVENT_DATA_READY) + lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY); +} + +static irqreturn_t ldc_rx(int irq, void *dev_id) +{ + struct ldc_channel *lp = dev_id; + unsigned long orig_state, hv_err, flags; + unsigned int event_mask; + + spin_lock_irqsave(&lp->lock, flags); + + orig_state = lp->chan_state; + hv_err = sun4v_ldc_rx_get_state(lp->id, + &lp->rx_head, + &lp->rx_tail, + &lp->chan_state); + + ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n", + orig_state, lp->chan_state, lp->rx_head, lp->rx_tail); + + event_mask = 0; + + if (lp->cfg.mode == LDC_MODE_RAW && + lp->chan_state == LDC_CHANNEL_UP) { + lp->hs_state = LDC_HS_COMPLETE; + ldc_set_state(lp, LDC_STATE_CONNECTED); + + event_mask |= LDC_EVENT_UP; + + orig_state = lp->chan_state; + } + + /* If we are in reset state, flush the RX queue and ignore + * everything. + */ + if (lp->flags & LDC_FLAG_RESET) { + (void) __set_rx_head(lp, lp->rx_tail); + goto out; + } + + /* Once we finish the handshake, we let the ldc_read() + * paths do all of the control frame and state management. + * Just trigger the callback. + */ + if (lp->hs_state == LDC_HS_COMPLETE) { +handshake_complete: + if (lp->chan_state != orig_state) { + unsigned int event = LDC_EVENT_RESET; + + if (lp->chan_state == LDC_CHANNEL_UP) + event = LDC_EVENT_UP; + + event_mask |= event; + } + if (lp->rx_head != lp->rx_tail) + event_mask |= LDC_EVENT_DATA_READY; + + goto out; + } + + if (lp->chan_state != orig_state) + goto out; + + while (lp->rx_head != lp->rx_tail) { + struct ldc_packet *p; + unsigned long new; + int err; + + p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE); + + switch (p->type) { + case LDC_CTRL: + err = process_control_frame(lp, p); + if (err > 0) + event_mask |= err; + break; + + case LDC_DATA: + event_mask |= LDC_EVENT_DATA_READY; + err = 0; + break; + + case LDC_ERR: + err = process_error_frame(lp, p); + break; + + default: + err = ldc_abort(lp); + break; + } + + if (err < 0) + break; + + new = lp->rx_head; + new += LDC_PACKET_SIZE; + if (new == (lp->rx_num_entries * LDC_PACKET_SIZE)) + new = 0; + lp->rx_head = new; + + err = __set_rx_head(lp, new); + if (err < 0) { + (void) ldc_abort(lp); + break; + } + if (lp->hs_state == LDC_HS_COMPLETE) + goto handshake_complete; + } + +out: + spin_unlock_irqrestore(&lp->lock, flags); + + send_events(lp, event_mask); + + return IRQ_HANDLED; +} + +static irqreturn_t ldc_tx(int irq, void *dev_id) +{ + struct ldc_channel *lp = dev_id; + unsigned long flags, hv_err, orig_state; + unsigned int event_mask = 0; + + spin_lock_irqsave(&lp->lock, flags); + + orig_state = lp->chan_state; + hv_err = sun4v_ldc_tx_get_state(lp->id, + &lp->tx_head, + &lp->tx_tail, + &lp->chan_state); + + ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n", + orig_state, lp->chan_state, lp->tx_head, lp->tx_tail); + + if (lp->cfg.mode == LDC_MODE_RAW && + lp->chan_state == LDC_CHANNEL_UP) { + lp->hs_state = LDC_HS_COMPLETE; + ldc_set_state(lp, LDC_STATE_CONNECTED); + + event_mask |= LDC_EVENT_UP; + } + + spin_unlock_irqrestore(&lp->lock, flags); + + send_events(lp, event_mask); + + return IRQ_HANDLED; +} + +/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so + * XXX that addition and removal from the ldc_channel_list has + * XXX atomicity, otherwise the __ldc_channel_exists() check is + * XXX totally pointless as another thread can slip into ldc_alloc() + * XXX and add a channel with the same ID. There also needs to be + * XXX a spinlock for ldc_channel_list. + */ +static HLIST_HEAD(ldc_channel_list); + +static int __ldc_channel_exists(unsigned long id) +{ + struct ldc_channel *lp; + struct hlist_node *n; + + hlist_for_each_entry(lp, n, &ldc_channel_list, list) { + if (lp->id == id) + return 1; + } + return 0; +} + +static int alloc_queue(const char *name, unsigned long num_entries, + struct ldc_packet **base, unsigned long *ra) +{ + unsigned long size, order; + void *q; + + size = num_entries * LDC_PACKET_SIZE; + order = get_order(size); + + q = (void *) __get_free_pages(GFP_KERNEL, order); + if (!q) { + printk(KERN_ERR PFX "Alloc of %s queue failed with " + "size=%lu order=%lu\n", name, size, order); + return -ENOMEM; + } + + memset(q, 0, PAGE_SIZE << order); + + *base = q; + *ra = __pa(q); + + return 0; +} + +static void free_queue(unsigned long num_entries, struct ldc_packet *q) +{ + unsigned long size, order; + + if (!q) + return; + + size = num_entries * LDC_PACKET_SIZE; + order = get_order(size); + + free_pages((unsigned long)q, order); +} + +/* XXX Make this configurable... XXX */ +#define LDC_IOTABLE_SIZE (8 * 1024) + +static int ldc_iommu_init(struct ldc_channel *lp) +{ + unsigned long sz, num_tsb_entries, tsbsize, order; + struct ldc_iommu *iommu = &lp->iommu; + struct ldc_mtable_entry *table; + unsigned long hv_err; + int err; + + num_tsb_entries = LDC_IOTABLE_SIZE; + tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); + + spin_lock_init(&iommu->lock); + + sz = num_tsb_entries / 8; + sz = (sz + 7UL) & ~7UL; + iommu->arena.map = kzalloc(sz, GFP_KERNEL); + if (!iommu->arena.map) { + printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz); + return -ENOMEM; + } + + iommu->arena.limit = num_tsb_entries; + + order = get_order(tsbsize); + + table = (struct ldc_mtable_entry *) + __get_free_pages(GFP_KERNEL, order); + err = -ENOMEM; + if (!table) { + printk(KERN_ERR PFX "Alloc of MTE table failed, " + "size=%lu order=%lu\n", tsbsize, order); + goto out_free_map; + } + + memset(table, 0, PAGE_SIZE << order); + + iommu->page_table = table; + + hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table), + num_tsb_entries); + err = -EINVAL; + if (hv_err) + goto out_free_table; + + return 0; + +out_free_table: + free_pages((unsigned long) table, order); + iommu->page_table = NULL; + +out_free_map: + kfree(iommu->arena.map); + iommu->arena.map = NULL; + + return err; +} + +static void ldc_iommu_release(struct ldc_channel *lp) +{ + struct ldc_iommu *iommu = &lp->iommu; + unsigned long num_tsb_entries, tsbsize, order; + + (void) sun4v_ldc_set_map_table(lp->id, 0, 0); + + num_tsb_entries = iommu->arena.limit; + tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry); + order = get_order(tsbsize); + + free_pages((unsigned long) iommu->page_table, order); + iommu->page_table = NULL; + + kfree(iommu->arena.map); + iommu->arena.map = NULL; +} + +struct ldc_channel *ldc_alloc(unsigned long id, + const struct ldc_channel_config *cfgp, + void *event_arg) +{ + struct ldc_channel *lp; + const struct ldc_mode_ops *mops; + unsigned long dummy1, dummy2, hv_err; + u8 mss, *mssbuf; + int err; + + err = -ENODEV; + if (!ldom_domaining_enabled) + goto out_err; + + err = -EINVAL; + if (!cfgp) + goto out_err; + + switch (cfgp->mode) { + case LDC_MODE_RAW: + mops = &raw_ops; + mss = LDC_PACKET_SIZE; + break; + + case LDC_MODE_UNRELIABLE: + mops = &nonraw_ops; + mss = LDC_PACKET_SIZE - 8; + break; + + case LDC_MODE_STREAM: + mops = &stream_ops; + mss = LDC_PACKET_SIZE - 8 - 8; + break; + + default: + goto out_err; + } + + if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq) + goto out_err; + + hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2); + err = -ENODEV; + if (hv_err == HV_ECHANNEL) + goto out_err; + + err = -EEXIST; + if (__ldc_channel_exists(id)) + goto out_err; + + mssbuf = NULL; + + lp = kzalloc(sizeof(*lp), GFP_KERNEL); + err = -ENOMEM; + if (!lp) + goto out_err; + + spin_lock_init(&lp->lock); + + lp->id = id; + + err = ldc_iommu_init(lp); + if (err) + goto out_free_ldc; + + lp->mops = mops; + lp->mss = mss; + + lp->cfg = *cfgp; + if (!lp->cfg.mtu) + lp->cfg.mtu = LDC_DEFAULT_MTU; + + if (lp->cfg.mode == LDC_MODE_STREAM) { + mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL); + if (!mssbuf) { + err = -ENOMEM; + goto out_free_iommu; + } + lp->mssbuf = mssbuf; + } + + lp->event_arg = event_arg; + + /* XXX allow setting via ldc_channel_config to override defaults + * XXX or use some formula based upon mtu + */ + lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES; + lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES; + + err = alloc_queue("TX", lp->tx_num_entries, + &lp->tx_base, &lp->tx_ra); + if (err) + goto out_free_mssbuf; + + err = alloc_queue("RX", lp->rx_num_entries, + &lp->rx_base, &lp->rx_ra); + if (err) + goto out_free_txq; + + lp->flags |= LDC_FLAG_ALLOCED_QUEUES; + + lp->hs_state = LDC_HS_CLOSED; + ldc_set_state(lp, LDC_STATE_INIT); + + INIT_HLIST_NODE(&lp->list); + hlist_add_head(&lp->list, &ldc_channel_list); + + INIT_HLIST_HEAD(&lp->mh_list); + + return lp; + +out_free_txq: + free_queue(lp->tx_num_entries, lp->tx_base); + +out_free_mssbuf: + if (mssbuf) + kfree(mssbuf); + +out_free_iommu: + ldc_iommu_release(lp); + +out_free_ldc: + kfree(lp); + +out_err: + return ERR_PTR(err); +} +EXPORT_SYMBOL(ldc_alloc); + +void ldc_free(struct ldc_channel *lp) +{ + if (lp->flags & LDC_FLAG_REGISTERED_IRQS) { + free_irq(lp->cfg.rx_irq, lp); + free_irq(lp->cfg.tx_irq, lp); + } + + if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) { + sun4v_ldc_tx_qconf(lp->id, 0, 0); + sun4v_ldc_rx_qconf(lp->id, 0, 0); + lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES; + } + if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) { + free_queue(lp->tx_num_entries, lp->tx_base); + free_queue(lp->rx_num_entries, lp->rx_base); + lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES; + } + + hlist_del(&lp->list); + + if (lp->mssbuf) + kfree(lp->mssbuf); + + ldc_iommu_release(lp); + + kfree(lp); +} +EXPORT_SYMBOL(ldc_free); + +/* Bind the channel. This registers the LDC queues with + * the hypervisor and puts the channel into a pseudo-listening + * state. This does not initiate a handshake, ldc_connect() does + * that. + */ +int ldc_bind(struct ldc_channel *lp, const char *name) +{ + unsigned long hv_err, flags; + int err = -EINVAL; + + spin_lock_irqsave(&lp->lock, flags); + + if (!name) + goto out_err; + + if (lp->state != LDC_STATE_INIT) + goto out_err; + + snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); + snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); + + err = request_irq(lp->cfg.rx_irq, ldc_rx, + IRQF_SAMPLE_RANDOM | IRQF_SHARED, + lp->rx_irq_name, lp); + if (err) + goto out_err; + + err = request_irq(lp->cfg.tx_irq, ldc_tx, + IRQF_SAMPLE_RANDOM | IRQF_SHARED, + lp->tx_irq_name, lp); + if (err) + goto out_free_rx_irq; + + + lp->flags |= LDC_FLAG_REGISTERED_IRQS; + + err = -ENODEV; + hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0); + if (hv_err) + goto out_free_tx_irq; + + hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries); + if (hv_err) + goto out_free_tx_irq; + + hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0); + if (hv_err) + goto out_unmap_tx; + + hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries); + if (hv_err) + goto out_unmap_tx; + + lp->flags |= LDC_FLAG_REGISTERED_QUEUES; + + hv_err = sun4v_ldc_tx_get_state(lp->id, + &lp->tx_head, + &lp->tx_tail, + &lp->chan_state); + err = -EBUSY; + if (hv_err) + goto out_unmap_rx; + + lp->tx_acked = lp->tx_head; + + lp->hs_state = LDC_HS_OPEN; + ldc_set_state(lp, LDC_STATE_BOUND); + + spin_unlock_irqrestore(&lp->lock, flags); + + return 0; + +out_unmap_rx: + lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES; + sun4v_ldc_rx_qconf(lp->id, 0, 0); + +out_unmap_tx: + sun4v_ldc_tx_qconf(lp->id, 0, 0); + +out_free_tx_irq: + lp->flags &= ~LDC_FLAG_REGISTERED_IRQS; + free_irq(lp->cfg.tx_irq, lp); + +out_free_rx_irq: + free_irq(lp->cfg.rx_irq, lp); + +out_err: + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_bind); + +int ldc_connect(struct ldc_channel *lp) +{ + unsigned long flags; + int err; + + if (lp->cfg.mode == LDC_MODE_RAW) + return -EINVAL; + + spin_lock_irqsave(&lp->lock, flags); + + if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) || + !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) || + lp->hs_state != LDC_HS_OPEN) + err = -EINVAL; + else + err = start_handshake(lp); + + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_connect); + +int ldc_disconnect(struct ldc_channel *lp) +{ + unsigned long hv_err, flags; + int err; + + if (lp->cfg.mode == LDC_MODE_RAW) + return -EINVAL; + + if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) || + !(lp->flags & LDC_FLAG_REGISTERED_QUEUES)) + return -EINVAL; + + spin_lock_irqsave(&lp->lock, flags); + + err = -ENODEV; + hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0); + if (hv_err) + goto out_err; + + hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries); + if (hv_err) + goto out_err; + + hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0); + if (hv_err) + goto out_err; + + hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries); + if (hv_err) + goto out_err; + + ldc_set_state(lp, LDC_STATE_BOUND); + lp->hs_state = LDC_HS_OPEN; + lp->flags |= LDC_FLAG_RESET; + + spin_unlock_irqrestore(&lp->lock, flags); + + return 0; + +out_err: + sun4v_ldc_tx_qconf(lp->id, 0, 0); + sun4v_ldc_rx_qconf(lp->id, 0, 0); + free_irq(lp->cfg.tx_irq, lp); + free_irq(lp->cfg.rx_irq, lp); + lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS | + LDC_FLAG_REGISTERED_QUEUES); + ldc_set_state(lp, LDC_STATE_INIT); + + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_disconnect); + +int ldc_state(struct ldc_channel *lp) +{ + return lp->state; +} +EXPORT_SYMBOL(ldc_state); + +static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size) +{ + struct ldc_packet *p; + unsigned long new_tail; + int err; + + if (size > LDC_PACKET_SIZE) + return -EMSGSIZE; + + p = data_get_tx_packet(lp, &new_tail); + if (!p) + return -EAGAIN; + + memcpy(p, buf, size); + + err = send_tx_packet(lp, p, new_tail); + if (!err) + err = size; + + return err; +} + +static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size) +{ + struct ldc_packet *p; + unsigned long hv_err, new; + int err; + + if (size < LDC_PACKET_SIZE) + return -EINVAL; + + hv_err = sun4v_ldc_rx_get_state(lp->id, + &lp->rx_head, + &lp->rx_tail, + &lp->chan_state); + if (hv_err) + return ldc_abort(lp); + + if (lp->chan_state == LDC_CHANNEL_DOWN || + lp->chan_state == LDC_CHANNEL_RESETTING) + return -ECONNRESET; + + if (lp->rx_head == lp->rx_tail) + return 0; + + p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE); + memcpy(buf, p, LDC_PACKET_SIZE); + + new = rx_advance(lp, lp->rx_head); + lp->rx_head = new; + + err = __set_rx_head(lp, new); + if (err < 0) + err = -ECONNRESET; + else + err = LDC_PACKET_SIZE; + + return err; +} + +static const struct ldc_mode_ops raw_ops = { + .write = write_raw, + .read = read_raw, +}; + +static int write_nonraw(struct ldc_channel *lp, const void *buf, + unsigned int size) +{ + unsigned long hv_err, tail; + unsigned int copied; + u32 seq; + int err; + + hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail, + &lp->chan_state); + if (unlikely(hv_err)) + return -EBUSY; + + if (unlikely(lp->chan_state != LDC_CHANNEL_UP)) + return ldc_abort(lp); + + if (!tx_has_space_for(lp, size)) + return -EAGAIN; + + seq = lp->snd_nxt; + copied = 0; + tail = lp->tx_tail; + while (copied < size) { + struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE); + u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ? + p->u.u_data : + p->u.r.r_data); + int data_len; + + p->type = LDC_DATA; + p->stype = LDC_INFO; + p->ctrl = 0; + + data_len = size - copied; + if (data_len > lp->mss) + data_len = lp->mss; + + BUG_ON(data_len > LDC_LEN); + + p->env = (data_len | + (copied == 0 ? LDC_START : 0) | + (data_len == size - copied ? LDC_STOP : 0)); + + p->seqid = ++seq; + + ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n", + p->type, + p->stype, + p->ctrl, + p->env, + p->seqid); + + memcpy(data, buf, data_len); + buf += data_len; + copied += data_len; + + tail = tx_advance(lp, tail); + } + + err = set_tx_tail(lp, tail); + if (!err) { + lp->snd_nxt = seq; + err = size; + } + + return err; +} + +static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p, + struct ldc_packet *first_frag) +{ + int err; + + if (first_frag) + lp->rcv_nxt = first_frag->seqid - 1; + + err = send_data_nack(lp, p); + if (err) + return err; + + err = __set_rx_head(lp, lp->rx_tail); + if (err < 0) + return ldc_abort(lp); + + return 0; +} + +static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p) +{ + if (p->stype & LDC_ACK) { + int err = process_data_ack(lp, p); + if (err) + return err; + } + if (p->stype & LDC_NACK) + return ldc_abort(lp); + + return 0; +} + +static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head) +{ + unsigned long dummy; + int limit = 1000; + + ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n", + cur_head, lp->rx_head, lp->rx_tail); + while (limit-- > 0) { + unsigned long hv_err; + + hv_err = sun4v_ldc_rx_get_state(lp->id, + &dummy, + &lp->rx_tail, + &lp->chan_state); + if (hv_err) + return ldc_abort(lp); + + if (lp->chan_state == LDC_CHANNEL_DOWN || + lp->chan_state == LDC_CHANNEL_RESETTING) + return -ECONNRESET; + + if (cur_head != lp->rx_tail) { + ldcdbg(DATA, "DATA WAIT DONE " + "head[%lx] tail[%lx] chan_state[%lx]\n", + dummy, lp->rx_tail, lp->chan_state); + return 0; + } + + udelay(1); + } + return -EAGAIN; +} + +static int rx_set_head(struct ldc_channel *lp, unsigned long head) +{ + int err = __set_rx_head(lp, head); + + if (err < 0) + return ldc_abort(lp); + + lp->rx_head = head; + return 0; +} + +static void send_data_ack(struct ldc_channel *lp) +{ + unsigned long new_tail; + struct ldc_packet *p; + + p = data_get_tx_packet(lp, &new_tail); + if (likely(p)) { + int err; + + memset(p, 0, sizeof(*p)); + p->type = LDC_DATA; + p->stype = LDC_ACK; + p->ctrl = 0; + p->seqid = lp->snd_nxt + 1; + p->u.r.ackid = lp->rcv_nxt; + + err = send_tx_packet(lp, p, new_tail); + if (!err) + lp->snd_nxt++; + } +} + +static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size) +{ + struct ldc_packet *first_frag; + unsigned long hv_err, new; + int err, copied; + + hv_err = sun4v_ldc_rx_get_state(lp->id, + &lp->rx_head, + &lp->rx_tail, + &lp->chan_state); + if (hv_err) + return ldc_abort(lp); + + if (lp->chan_state == LDC_CHANNEL_DOWN || + lp->chan_state == LDC_CHANNEL_RESETTING) + return -ECONNRESET; + + if (lp->rx_head == lp->rx_tail) + return 0; + + first_frag = NULL; + copied = err = 0; + new = lp->rx_head; + while (1) { + struct ldc_packet *p; + int pkt_len; + + BUG_ON(new == lp->rx_tail); + p = lp->rx_base + (new / LDC_PACKET_SIZE); + + ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] " + "rcv_nxt[%08x]\n", + p->type, + p->stype, + p->ctrl, + p->env, + p->seqid, + p->u.r.ackid, + lp->rcv_nxt); + + if (unlikely(!rx_seq_ok(lp, p->seqid))) { + err = rx_bad_seq(lp, p, first_frag); + copied = 0; + break; + } + + if (p->type & LDC_CTRL) { + err = process_control_frame(lp, p); + if (err < 0) + break; + err = 0; + } + + lp->rcv_nxt = p->seqid; + + if (!(p->type & LDC_DATA)) { + new = rx_advance(lp, new); + goto no_data; + } + if (p->stype & (LDC_ACK | LDC_NACK)) { + err = data_ack_nack(lp, p); + if (err) + break; + } + if (!(p->stype & LDC_INFO)) { + new = rx_advance(lp, new); + err = rx_set_head(lp, new); + if (err) + break; + goto no_data; + } + + pkt_len = p->env & LDC_LEN; + + /* Every initial packet starts with the START bit set. + * + * Singleton packets will have both START+STOP set. + * + * Fragments will have START set in the first frame, STOP + * set in the last frame, and neither bit set in middle + * frames of the packet. + * + * Therefore if we are at the beginning of a packet and + * we don't see START, or we are in the middle of a fragmented + * packet and do see START, we are unsynchronized and should + * flush the RX queue. + */ + if ((first_frag == NULL && !(p->env & LDC_START)) || + (first_frag != NULL && (p->env & LDC_START))) { + if (!first_frag) + new = rx_advance(lp, new); + + err = rx_set_head(lp, new); + if (err) + break; + + if (!first_frag) + goto no_data; + } + if (!first_frag) + first_frag = p; + + if (pkt_len > size - copied) { + /* User didn't give us a big enough buffer, + * what to do? This is a pretty serious error. + * + * Since we haven't updated the RX ring head to + * consume any of the packets, signal the error + * to the user and just leave the RX ring alone. + * + * This seems the best behavior because this allows + * a user of the LDC layer to start with a small + * RX buffer for ldc_read() calls and use -EMSGSIZE + * as a cue to enlarge it's read buffer. + */ + err = -EMSGSIZE; + break; + } + + /* Ok, we are gonna eat this one. */ + new = rx_advance(lp, new); + + memcpy(buf, + (lp->cfg.mode == LDC_MODE_UNRELIABLE ? + p->u.u_data : p->u.r.r_data), pkt_len); + buf += pkt_len; + copied += pkt_len; + + if (p->env & LDC_STOP) + break; + +no_data: + if (new == lp->rx_tail) { + err = rx_data_wait(lp, new); + if (err) + break; + } + } + + if (!err) + err = rx_set_head(lp, new); + + if (err && first_frag) + lp->rcv_nxt = first_frag->seqid - 1; + + if (!err) { + err = copied; + if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE) + send_data_ack(lp); + } + + return err; +} + +static const struct ldc_mode_ops nonraw_ops = { + .write = write_nonraw, + .read = read_nonraw, +}; + +static int write_stream(struct ldc_channel *lp, const void *buf, + unsigned int size) +{ + if (size > lp->cfg.mtu) + size = lp->cfg.mtu; + return write_nonraw(lp, buf, size); +} + +static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size) +{ + if (!lp->mssbuf_len) { + int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu); + if (err < 0) + return err; + + lp->mssbuf_len = err; + lp->mssbuf_off = 0; + } + + if (size > lp->mssbuf_len) + size = lp->mssbuf_len; + memcpy(buf, lp->mssbuf + lp->mssbuf_off, size); + + lp->mssbuf_off += size; + lp->mssbuf_len -= size; + + return size; +} + +static const struct ldc_mode_ops stream_ops = { + .write = write_stream, + .read = read_stream, +}; + +int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size) +{ + unsigned long flags; + int err; + + if (!buf) + return -EINVAL; + + if (!size) + return 0; + + spin_lock_irqsave(&lp->lock, flags); + + if (lp->hs_state != LDC_HS_COMPLETE) + err = -ENOTCONN; + else + err = lp->mops->write(lp, buf, size); + + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_write); + +int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size) +{ + unsigned long flags; + int err; + + if (!buf) + return -EINVAL; + + if (!size) + return 0; + + spin_lock_irqsave(&lp->lock, flags); + + if (lp->hs_state != LDC_HS_COMPLETE) + err = -ENOTCONN; + else + err = lp->mops->read(lp, buf, size); + + spin_unlock_irqrestore(&lp->lock, flags); + + return err; +} +EXPORT_SYMBOL(ldc_read); + +static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages) +{ + struct iommu_arena *arena = &iommu->arena; + unsigned long n, i, start, end, limit; + int pass; + + limit = arena->limit; + start = arena->hint; + pass = 0; + +again: + n = find_next_zero_bit(arena->map, limit, start); + end = n + npages; + if (unlikely(end >= limit)) { + if (likely(pass < 1)) { + limit = start; + start = 0; + pass++; + goto again; + } else { + /* Scanned the whole thing, give up. */ + return -1; + } + } + + for (i = n; i < end; i++) { + if (test_bit(i, arena->map)) { + start = i + 1; + goto again; + } + } + + for (i = n; i < end; i++) + __set_bit(i, arena->map); + + arena->hint = end; + + return n; +} + +#define COOKIE_PGSZ_CODE 0xf000000000000000ULL +#define COOKIE_PGSZ_CODE_SHIFT 60ULL + +static u64 pagesize_code(void) +{ + switch (PAGE_SIZE) { + default: + case (8ULL * 1024ULL): + return 0; + case (64ULL * 1024ULL): + return 1; + case (512ULL * 1024ULL): + return 2; + case (4ULL * 1024ULL * 1024ULL): + return 3; + case (32ULL * 1024ULL * 1024ULL): + return 4; + case (256ULL * 1024ULL * 1024ULL): + return 5; + } +} + +static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset) +{ + return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) | + (index << PAGE_SHIFT) | + page_offset); +} + +static u64 cookie_to_index(u64 cookie, unsigned long *shift) +{ + u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT; + + cookie &= ~COOKIE_PGSZ_CODE; + + *shift = szcode * 3; + + return (cookie >> (13ULL + (szcode * 3ULL))); +} + +static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu, + unsigned long npages) +{ + long entry; + + entry = arena_alloc(iommu, npages); + if (unlikely(entry < 0)) + return NULL; + + return iommu->page_table + entry; +} + +static u64 perm_to_mte(unsigned int map_perm) +{ + u64 mte_base; + + mte_base = pagesize_code(); + + if (map_perm & LDC_MAP_SHADOW) { + if (map_perm & LDC_MAP_R) + mte_base |= LDC_MTE_COPY_R; + if (map_perm & LDC_MAP_W) + mte_base |= LDC_MTE_COPY_W; + } + if (map_perm & LDC_MAP_DIRECT) { + if (map_perm & LDC_MAP_R) + mte_base |= LDC_MTE_READ; + if (map_perm & LDC_MAP_W) + mte_base |= LDC_MTE_WRITE; + if (map_perm & LDC_MAP_X) + mte_base |= LDC_MTE_EXEC; + } + if (map_perm & LDC_MAP_IO) { + if (map_perm & LDC_MAP_R) + mte_base |= LDC_MTE_IOMMU_R; + if (map_perm & LDC_MAP_W) + mte_base |= LDC_MTE_IOMMU_W; + } + + return mte_base; +} + +static int pages_in_region(unsigned long base, long len) +{ + int count = 0; + + do { + unsigned long new = (base + PAGE_SIZE) & PAGE_MASK; + + len -= (new - base); + base = new; + count++; + } while (len > 0); + + return count; +} + +struct cookie_state { + struct ldc_mtable_entry *page_table; + struct ldc_trans_cookie *cookies; + u64 mte_base; + u64 prev_cookie; + u32 pte_idx; + u32 nc; +}; + +static void fill_cookies(struct cookie_state *sp, unsigned long pa, + unsigned long off, unsigned long len) +{ + do { + unsigned long tlen, new = pa + PAGE_SIZE; + u64 this_cookie; + + sp->page_table[sp->pte_idx].mte = sp->mte_base | pa; + + tlen = PAGE_SIZE; + if (off) + tlen = PAGE_SIZE - off; + if (tlen > len) + tlen = len; + + this_cookie = make_cookie(sp->pte_idx, + pagesize_code(), off); + + off = 0; + + if (this_cookie == sp->prev_cookie) { + sp->cookies[sp->nc - 1].cookie_size += tlen; + } else { + sp->cookies[sp->nc].cookie_addr = this_cookie; + sp->cookies[sp->nc].cookie_size = tlen; + sp->nc++; + } + sp->prev_cookie = this_cookie + tlen; + + sp->pte_idx++; + + len -= tlen; + pa = new; + } while (len > 0); +} + +static int sg_count_one(struct scatterlist *sg) +{ + unsigned long base = page_to_pfn(sg->page) << PAGE_SHIFT; + long len = sg->length; + + if ((sg->offset | len) & (8UL - 1)) + return -EFAULT; + + return pages_in_region(base + sg->offset, len); +} + +static int sg_count_pages(struct scatterlist *sg, int num_sg) +{ + int count; + int i; + + count = 0; + for (i = 0; i < num_sg; i++) { + int err = sg_count_one(sg + i); + if (err < 0) + return err; + count += err; + } + + return count; +} + +int ldc_map_sg(struct ldc_channel *lp, + struct scatterlist *sg, int num_sg, + struct ldc_trans_cookie *cookies, int ncookies, + unsigned int map_perm) +{ + unsigned long i, npages, flags; + struct ldc_mtable_entry *base; + struct cookie_state state; + struct ldc_iommu *iommu; + int err; + + if (map_perm & ~LDC_MAP_ALL) + return -EINVAL; + + err = sg_count_pages(sg, num_sg); + if (err < 0) + return err; + + npages = err; + if (err > ncookies) + return -EMSGSIZE; + + iommu = &lp->iommu; + + spin_lock_irqsave(&iommu->lock, flags); + base = alloc_npages(iommu, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (!base) + return -ENOMEM; + + state.page_table = iommu->page_table; + state.cookies = cookies; + state.mte_base = perm_to_mte(map_perm); + state.prev_cookie = ~(u64)0; + state.pte_idx = (base - iommu->page_table); + state.nc = 0; + + for (i = 0; i < num_sg; i++) + fill_cookies(&state, page_to_pfn(sg[i].page) << PAGE_SHIFT, + sg[i].offset, sg[i].length); + + return state.nc; +} +EXPORT_SYMBOL(ldc_map_sg); + +int ldc_map_single(struct ldc_channel *lp, + void *buf, unsigned int len, + struct ldc_trans_cookie *cookies, int ncookies, + unsigned int map_perm) +{ + unsigned long npages, pa, flags; + struct ldc_mtable_entry *base; + struct cookie_state state; + struct ldc_iommu *iommu; + + if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1)) + return -EINVAL; + + pa = __pa(buf); + if ((pa | len) & (8UL - 1)) + return -EFAULT; + + npages = pages_in_region(pa, len); + + iommu = &lp->iommu; + + spin_lock_irqsave(&iommu->lock, flags); + base = alloc_npages(iommu, npages); + spin_unlock_irqrestore(&iommu->lock, flags); + + if (!base) + return -ENOMEM; + + state.page_table = iommu->page_table; + state.cookies = cookies; + state.mte_base = perm_to_mte(map_perm); + state.prev_cookie = ~(u64)0; + state.pte_idx = (base - iommu->page_table); + state.nc = 0; + fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len); + BUG_ON(state.nc != 1); + + return state.nc; +} +EXPORT_SYMBOL(ldc_map_single); + +static void free_npages(unsigned long id, struct ldc_iommu *iommu, + u64 cookie, u64 size) +{ + struct iommu_arena *arena = &iommu->arena; + unsigned long i, shift, index, npages; + struct ldc_mtable_entry *base; + + npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT; + index = cookie_to_index(cookie, &shift); + base = iommu->page_table + index; + + BUG_ON(index > arena->limit || + (index + npages) > arena->limit); + + for (i = 0; i < npages; i++) { + if (base->cookie) + sun4v_ldc_revoke(id, cookie + (i << shift), + base->cookie); + base->mte = 0; + __clear_bit(index + i, arena->map); + } +} + +void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies, + int ncookies) +{ + struct ldc_iommu *iommu = &lp->iommu; + unsigned long flags; + int i; + + spin_lock_irqsave(&iommu->lock, flags); + for (i = 0; i < ncookies; i++) { + u64 addr = cookies[i].cookie_addr; + u64 size = cookies[i].cookie_size; + + free_npages(lp->id, iommu, addr, size); + } + spin_unlock_irqrestore(&iommu->lock, flags); +} +EXPORT_SYMBOL(ldc_unmap); + +int ldc_copy(struct ldc_channel *lp, int copy_dir, + void *buf, unsigned int len, unsigned long offset, + struct ldc_trans_cookie *cookies, int ncookies) +{ + unsigned int orig_len; + unsigned long ra; + int i; + + if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) { + printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n", + lp->id, copy_dir); + return -EINVAL; + } + + ra = __pa(buf); + if ((ra | len | offset) & (8UL - 1)) { + printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer " + "ra[%lx] len[%x] offset[%lx]\n", + lp->id, ra, len, offset); + return -EFAULT; + } + + if (lp->hs_state != LDC_HS_COMPLETE || + (lp->flags & LDC_FLAG_RESET)) { + printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] " + "flags[%x]\n", lp->id, lp->hs_state, lp->flags); + return -ECONNRESET; + } + + orig_len = len; + for (i = 0; i < ncookies; i++) { + unsigned long cookie_raddr = cookies[i].cookie_addr; + unsigned long this_len = cookies[i].cookie_size; + unsigned long actual_len; + + if (unlikely(offset)) { + unsigned long this_off = offset; + + if (this_off > this_len) + this_off = this_len; + + offset -= this_off; + this_len -= this_off; + if (!this_len) + continue; + cookie_raddr += this_off; + } + + if (this_len > len) + this_len = len; + + while (1) { + unsigned long hv_err; + + hv_err = sun4v_ldc_copy(lp->id, copy_dir, + cookie_raddr, ra, + this_len, &actual_len); + if (unlikely(hv_err)) { + printk(KERN_ERR PFX "ldc_copy: ID[%lu] " + "HV error %lu\n", + lp->id, hv_err); + if (lp->hs_state != LDC_HS_COMPLETE || + (lp->flags & LDC_FLAG_RESET)) + return -ECONNRESET; + else + return -EFAULT; + } + + cookie_raddr += actual_len; + ra += actual_len; + len -= actual_len; + if (actual_len == this_len) + break; + + this_len -= actual_len; + } + + if (!len) + break; + } + + /* It is caller policy what to do about short copies. + * For example, a networking driver can declare the + * packet a runt and drop it. + */ + + return orig_len - len; +} +EXPORT_SYMBOL(ldc_copy); + +void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len, + struct ldc_trans_cookie *cookies, int *ncookies, + unsigned int map_perm) +{ + void *buf; + int err; + + if (len & (8UL - 1)) + return ERR_PTR(-EINVAL); + + buf = kzalloc(len, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm); + if (err < 0) { + kfree(buf); + return ERR_PTR(err); + } + *ncookies = err; + + return buf; +} +EXPORT_SYMBOL(ldc_alloc_exp_dring); + +void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len, + struct ldc_trans_cookie *cookies, int ncookies) +{ + ldc_unmap(lp, cookies, ncookies); + kfree(buf); +} +EXPORT_SYMBOL(ldc_free_exp_dring); + +static int __init ldc_init(void) +{ + unsigned long major, minor; + struct mdesc_handle *hp; + const u64 *v; + u64 mp; + + hp = mdesc_grab(); + if (!hp) + return -ENODEV; + + mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform"); + if (mp == MDESC_NODE_NULL) + return -ENODEV; + + v = mdesc_get_property(hp, mp, "domaining-enabled", NULL); + if (!v) + return -ENODEV; + + major = 1; + minor = 0; + if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) { + printk(KERN_INFO PFX "Could not register LDOM hvapi.\n"); + return -ENODEV; + } + + printk(KERN_INFO "%s", version); + + if (!*v) { + printk(KERN_INFO PFX "Domaining disabled.\n"); + return -ENODEV; + } + ldom_domaining_enabled = 1; + + return 0; +} + +core_initcall(ldc_init); diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c index f0e16045fb1..cce4d0ddf5d 100644 --- a/arch/sparc64/kernel/mdesc.c +++ b/arch/sparc64/kernel/mdesc.c @@ -6,6 +6,9 @@ #include <linux/types.h> #include <linux/bootmem.h> #include <linux/log2.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/mm.h> #include <asm/hypervisor.h> #include <asm/mdesc.h> @@ -29,7 +32,7 @@ struct mdesc_hdr { u32 node_sz; /* node block size */ u32 name_sz; /* name block size */ u32 data_sz; /* data block size */ -}; +} __attribute__((aligned(16))); struct mdesc_elem { u8 tag; @@ -53,306 +56,516 @@ struct mdesc_elem { } d; }; -static struct mdesc_hdr *main_mdesc; -static struct mdesc_node *allnodes; +struct mdesc_mem_ops { + struct mdesc_handle *(*alloc)(unsigned int mdesc_size); + void (*free)(struct mdesc_handle *handle); +}; -static struct mdesc_node *allnodes_tail; -static unsigned int unique_id; +struct mdesc_handle { + struct list_head list; + struct mdesc_mem_ops *mops; + void *self_base; + atomic_t refcnt; + unsigned int handle_size; + struct mdesc_hdr mdesc; +}; + +static void mdesc_handle_init(struct mdesc_handle *hp, + unsigned int handle_size, + void *base) +{ + BUG_ON(((unsigned long)&hp->mdesc) & (16UL - 1)); -static struct mdesc_node **mdesc_hash; -static unsigned int mdesc_hash_size; + memset(hp, 0, handle_size); + INIT_LIST_HEAD(&hp->list); + hp->self_base = base; + atomic_set(&hp->refcnt, 1); + hp->handle_size = handle_size; +} -static inline unsigned int node_hashfn(u64 node) +static struct mdesc_handle * __init mdesc_bootmem_alloc(unsigned int mdesc_size) { - return ((unsigned int) (node ^ (node >> 8) ^ (node >> 16))) - & (mdesc_hash_size - 1); + struct mdesc_handle *hp; + unsigned int handle_size, alloc_size; + + handle_size = (sizeof(struct mdesc_handle) - + sizeof(struct mdesc_hdr) + + mdesc_size); + alloc_size = PAGE_ALIGN(handle_size); + + hp = __alloc_bootmem(alloc_size, PAGE_SIZE, 0UL); + if (hp) + mdesc_handle_init(hp, handle_size, hp); + + return hp; } -static inline void hash_node(struct mdesc_node *mp) +static void mdesc_bootmem_free(struct mdesc_handle *hp) { - struct mdesc_node **head = &mdesc_hash[node_hashfn(mp->node)]; + unsigned int alloc_size, handle_size = hp->handle_size; + unsigned long start, end; - mp->hash_next = *head; - *head = mp; + BUG_ON(atomic_read(&hp->refcnt) != 0); + BUG_ON(!list_empty(&hp->list)); - if (allnodes_tail) { - allnodes_tail->allnodes_next = mp; - allnodes_tail = mp; - } else { - allnodes = allnodes_tail = mp; + alloc_size = PAGE_ALIGN(handle_size); + + start = (unsigned long) hp; + end = start + alloc_size; + + while (start < end) { + struct page *p; + + p = virt_to_page(start); + ClearPageReserved(p); + __free_page(p); + start += PAGE_SIZE; } } -static struct mdesc_node *find_node(u64 node) +static struct mdesc_mem_ops bootmem_mdesc_ops = { + .alloc = mdesc_bootmem_alloc, + .free = mdesc_bootmem_free, +}; + +static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) { - struct mdesc_node *mp = mdesc_hash[node_hashfn(node)]; + unsigned int handle_size; + void *base; - while (mp) { - if (mp->node == node) - return mp; + handle_size = (sizeof(struct mdesc_handle) - + sizeof(struct mdesc_hdr) + + mdesc_size); - mp = mp->hash_next; + base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_NOFAIL); + if (base) { + struct mdesc_handle *hp; + unsigned long addr; + + addr = (unsigned long)base; + addr = (addr + 15UL) & ~15UL; + hp = (struct mdesc_handle *) addr; + + mdesc_handle_init(hp, handle_size, base); + return hp; } + return NULL; } -struct property *md_find_property(const struct mdesc_node *mp, - const char *name, - int *lenp) +static void mdesc_kfree(struct mdesc_handle *hp) { - struct property *pp; + BUG_ON(atomic_read(&hp->refcnt) != 0); + BUG_ON(!list_empty(&hp->list)); - for (pp = mp->properties; pp != 0; pp = pp->next) { - if (strcasecmp(pp->name, name) == 0) { - if (lenp) - *lenp = pp->length; - break; - } - } - return pp; + kfree(hp->self_base); } -EXPORT_SYMBOL(md_find_property); -/* - * Find a property with a given name for a given node - * and return the value. - */ -const void *md_get_property(const struct mdesc_node *mp, const char *name, - int *lenp) +static struct mdesc_mem_ops kmalloc_mdesc_memops = { + .alloc = mdesc_kmalloc, + .free = mdesc_kfree, +}; + +static struct mdesc_handle *mdesc_alloc(unsigned int mdesc_size, + struct mdesc_mem_ops *mops) { - struct property *pp = md_find_property(mp, name, lenp); - return pp ? pp->value : NULL; + struct mdesc_handle *hp = mops->alloc(mdesc_size); + + if (hp) + hp->mops = mops; + + return hp; } -EXPORT_SYMBOL(md_get_property); -struct mdesc_node *md_find_node_by_name(struct mdesc_node *from, - const char *name) +static void mdesc_free(struct mdesc_handle *hp) { - struct mdesc_node *mp; + hp->mops->free(hp); +} - mp = from ? from->allnodes_next : allnodes; - for (; mp != NULL; mp = mp->allnodes_next) { - if (strcmp(mp->name, name) == 0) - break; +static struct mdesc_handle *cur_mdesc; +static LIST_HEAD(mdesc_zombie_list); +static DEFINE_SPINLOCK(mdesc_lock); + +struct mdesc_handle *mdesc_grab(void) +{ + struct mdesc_handle *hp; + unsigned long flags; + + spin_lock_irqsave(&mdesc_lock, flags); + hp = cur_mdesc; + if (hp) + atomic_inc(&hp->refcnt); + spin_unlock_irqrestore(&mdesc_lock, flags); + + return hp; +} +EXPORT_SYMBOL(mdesc_grab); + +void mdesc_release(struct mdesc_handle *hp) +{ + unsigned long flags; + + spin_lock_irqsave(&mdesc_lock, flags); + if (atomic_dec_and_test(&hp->refcnt)) { + list_del_init(&hp->list); + hp->mops->free(hp); } - return mp; + spin_unlock_irqrestore(&mdesc_lock, flags); } -EXPORT_SYMBOL(md_find_node_by_name); +EXPORT_SYMBOL(mdesc_release); -static unsigned int mdesc_early_allocated; +static DEFINE_MUTEX(mdesc_mutex); +static struct mdesc_notifier_client *client_list; -static void * __init mdesc_early_alloc(unsigned long size) +void mdesc_register_notifier(struct mdesc_notifier_client *client) { - void *ret; + u64 node; - ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL); - if (ret == NULL) { - prom_printf("MDESC: alloc of %lu bytes failed.\n", size); - prom_halt(); + mutex_lock(&mdesc_mutex); + client->next = client_list; + client_list = client; + + mdesc_for_each_node_by_name(cur_mdesc, node, client->node_name) + client->add(cur_mdesc, node); + + mutex_unlock(&mdesc_mutex); +} + +static const u64 *parent_cfg_handle(struct mdesc_handle *hp, u64 node) +{ + const u64 *id; + u64 a; + + id = NULL; + mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) { + u64 target; + + target = mdesc_arc_target(hp, a); + id = mdesc_get_property(hp, target, + "cfg-handle", NULL); + if (id) + break; } - memset(ret, 0, size); + return id; +} - mdesc_early_allocated += size; +/* Run 'func' on nodes which are in A but not in B. */ +static void invoke_on_missing(const char *name, + struct mdesc_handle *a, + struct mdesc_handle *b, + void (*func)(struct mdesc_handle *, u64)) +{ + u64 node; - return ret; + mdesc_for_each_node_by_name(a, node, name) { + int found = 0, is_vdc_port = 0; + const char *name_prop; + const u64 *id; + u64 fnode; + + name_prop = mdesc_get_property(a, node, "name", NULL); + if (name_prop && !strcmp(name_prop, "vdc-port")) { + is_vdc_port = 1; + id = parent_cfg_handle(a, node); + } else + id = mdesc_get_property(a, node, "id", NULL); + + if (!id) { + printk(KERN_ERR "MD: Cannot find ID for %s node.\n", + (name_prop ? name_prop : name)); + continue; + } + + mdesc_for_each_node_by_name(b, fnode, name) { + const u64 *fid; + + if (is_vdc_port) { + name_prop = mdesc_get_property(b, fnode, + "name", NULL); + if (!name_prop || + strcmp(name_prop, "vdc-port")) + continue; + fid = parent_cfg_handle(b, fnode); + if (!fid) { + printk(KERN_ERR "MD: Cannot find ID " + "for vdc-port node.\n"); + continue; + } + } else + fid = mdesc_get_property(b, fnode, + "id", NULL); + + if (*id == *fid) { + found = 1; + break; + } + } + if (!found) + func(a, node); + } } -static unsigned int __init count_arcs(struct mdesc_elem *ep) +static void notify_one(struct mdesc_notifier_client *p, + struct mdesc_handle *old_hp, + struct mdesc_handle *new_hp) { - unsigned int ret = 0; + invoke_on_missing(p->node_name, old_hp, new_hp, p->remove); + invoke_on_missing(p->node_name, new_hp, old_hp, p->add); +} - ep++; - while (ep->tag != MD_NODE_END) { - if (ep->tag == MD_PROP_ARC) - ret++; - ep++; +static void mdesc_notify_clients(struct mdesc_handle *old_hp, + struct mdesc_handle *new_hp) +{ + struct mdesc_notifier_client *p = client_list; + + while (p) { + notify_one(p, old_hp, new_hp); + p = p->next; } - return ret; } -static void __init mdesc_node_alloc(u64 node, struct mdesc_elem *ep, const char *names) +void mdesc_update(void) { - unsigned int num_arcs = count_arcs(ep); - struct mdesc_node *mp; + unsigned long len, real_len, status; + struct mdesc_handle *hp, *orig_hp; + unsigned long flags; + + mutex_lock(&mdesc_mutex); + + (void) sun4v_mach_desc(0UL, 0UL, &len); + + hp = mdesc_alloc(len, &kmalloc_mdesc_memops); + if (!hp) { + printk(KERN_ERR "MD: mdesc alloc fails\n"); + goto out; + } + + status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len); + if (status != HV_EOK || real_len > len) { + printk(KERN_ERR "MD: mdesc reread fails with %lu\n", + status); + atomic_dec(&hp->refcnt); + mdesc_free(hp); + goto out; + } - mp = mdesc_early_alloc(sizeof(*mp) + - (num_arcs * sizeof(struct mdesc_arc))); - mp->name = names + ep->name_offset; - mp->node = node; - mp->unique_id = unique_id++; - mp->num_arcs = num_arcs; + spin_lock_irqsave(&mdesc_lock, flags); + orig_hp = cur_mdesc; + cur_mdesc = hp; + spin_unlock_irqrestore(&mdesc_lock, flags); - hash_node(mp); + mdesc_notify_clients(orig_hp, hp); + + spin_lock_irqsave(&mdesc_lock, flags); + if (atomic_dec_and_test(&orig_hp->refcnt)) + mdesc_free(orig_hp); + else + list_add(&orig_hp->list, &mdesc_zombie_list); + spin_unlock_irqrestore(&mdesc_lock, flags); + +out: + mutex_unlock(&mdesc_mutex); } -static inline struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) +static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) { return (struct mdesc_elem *) (mdesc + 1); } -static inline void *name_block(struct mdesc_hdr *mdesc) +static void *name_block(struct mdesc_hdr *mdesc) { return ((void *) node_block(mdesc)) + mdesc->node_sz; } -static inline void *data_block(struct mdesc_hdr *mdesc) +static void *data_block(struct mdesc_hdr *mdesc) { return ((void *) name_block(mdesc)) + mdesc->name_sz; } -/* In order to avoid recursion (the graph can be very deep) we use a - * two pass algorithm. First we allocate all the nodes and hash them. - * Then we iterate over each node, filling in the arcs and properties. - */ -static void __init build_all_nodes(struct mdesc_hdr *mdesc) +u64 mdesc_node_by_name(struct mdesc_handle *hp, + u64 from_node, const char *name) { - struct mdesc_elem *start, *ep; - struct mdesc_node *mp; - const char *names; - void *data; - u64 last_node; + struct mdesc_elem *ep = node_block(&hp->mdesc); + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + u64 ret; + + if (from_node == MDESC_NODE_NULL) { + ret = from_node = 0; + } else if (from_node >= last_node) { + return MDESC_NODE_NULL; + } else { + ret = ep[from_node].d.val; + } - start = ep = node_block(mdesc); - last_node = mdesc->node_sz / 16; + while (ret < last_node) { + if (ep[ret].tag != MD_NODE) + return MDESC_NODE_NULL; + if (!strcmp(names + ep[ret].name_offset, name)) + break; + ret = ep[ret].d.val; + } + if (ret >= last_node) + ret = MDESC_NODE_NULL; + return ret; +} +EXPORT_SYMBOL(mdesc_node_by_name); - names = name_block(mdesc); +const void *mdesc_get_property(struct mdesc_handle *hp, u64 node, + const char *name, int *lenp) +{ + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + void *data = data_block(&hp->mdesc); + struct mdesc_elem *ep; - while (1) { - u64 node = ep - start; + if (node == MDESC_NODE_NULL || node >= last_node) + return NULL; - if (ep->tag == MD_LIST_END) + ep = node_block(&hp->mdesc) + node; + ep++; + for (; ep->tag != MD_NODE_END; ep++) { + void *val = NULL; + int len = 0; + + switch (ep->tag) { + case MD_PROP_VAL: + val = &ep->d.val; + len = 8; break; - if (ep->tag != MD_NODE) { - prom_printf("MDESC: Inconsistent element list.\n"); - prom_halt(); - } - - mdesc_node_alloc(node, ep, names); + case MD_PROP_STR: + case MD_PROP_DATA: + val = data + ep->d.data.data_offset; + len = ep->d.data.data_len; + break; - if (ep->d.val >= last_node) { - printk("MDESC: Warning, early break out of node scan.\n"); - printk("MDESC: Next node [%lu] last_node [%lu].\n", - node, last_node); + default: break; } + if (!val) + continue; - ep = start + ep->d.val; + if (!strcmp(names + ep->name_offset, name)) { + if (lenp) + *lenp = len; + return val; + } } - data = data_block(mdesc); - for (mp = allnodes; mp; mp = mp->allnodes_next) { - struct mdesc_elem *ep = start + mp->node; - struct property **link = &mp->properties; - unsigned int this_arc = 0; - - ep++; - while (ep->tag != MD_NODE_END) { - switch (ep->tag) { - case MD_PROP_ARC: { - struct mdesc_node *target; - - if (this_arc >= mp->num_arcs) { - prom_printf("MDESC: ARC overrun [%u:%u]\n", - this_arc, mp->num_arcs); - prom_halt(); - } - target = find_node(ep->d.val); - if (!target) { - printk("MDESC: Warning, arc points to " - "missing node, ignoring.\n"); - break; - } - mp->arcs[this_arc].name = - (names + ep->name_offset); - mp->arcs[this_arc].arc = target; - this_arc++; - break; - } + return NULL; +} +EXPORT_SYMBOL(mdesc_get_property); - case MD_PROP_VAL: - case MD_PROP_STR: - case MD_PROP_DATA: { - struct property *p = mdesc_early_alloc(sizeof(*p)); - - p->unique_id = unique_id++; - p->name = (char *) names + ep->name_offset; - if (ep->tag == MD_PROP_VAL) { - p->value = &ep->d.val; - p->length = 8; - } else { - p->value = data + ep->d.data.data_offset; - p->length = ep->d.data.data_len; - } - *link = p; - link = &p->next; - break; - } +u64 mdesc_next_arc(struct mdesc_handle *hp, u64 from, const char *arc_type) +{ + struct mdesc_elem *ep, *base = node_block(&hp->mdesc); + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; - case MD_NOOP: - break; + if (from == MDESC_NODE_NULL || from >= last_node) + return MDESC_NODE_NULL; - default: - printk("MDESC: Warning, ignoring unknown tag type %02x\n", - ep->tag); - } - ep++; - } + ep = base + from; + + ep++; + for (; ep->tag != MD_NODE_END; ep++) { + if (ep->tag != MD_PROP_ARC) + continue; + + if (strcmp(names + ep->name_offset, arc_type)) + continue; + + return ep - base; } + + return MDESC_NODE_NULL; } +EXPORT_SYMBOL(mdesc_next_arc); -static unsigned int __init count_nodes(struct mdesc_hdr *mdesc) +u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc) { - struct mdesc_elem *ep = node_block(mdesc); - struct mdesc_elem *end; - unsigned int cnt = 0; + struct mdesc_elem *ep, *base = node_block(&hp->mdesc); - end = ((void *)ep) + mdesc->node_sz; - while (ep < end) { - if (ep->tag == MD_NODE) - cnt++; - ep++; - } - return cnt; + ep = base + arc; + + return ep->d.val; +} +EXPORT_SYMBOL(mdesc_arc_target); + +const char *mdesc_node_name(struct mdesc_handle *hp, u64 node) +{ + struct mdesc_elem *ep, *base = node_block(&hp->mdesc); + const char *names = name_block(&hp->mdesc); + u64 last_node = hp->mdesc.node_sz / 16; + + if (node == MDESC_NODE_NULL || node >= last_node) + return NULL; + + ep = base + node; + if (ep->tag != MD_NODE) + return NULL; + + return names + ep->name_offset; } +EXPORT_SYMBOL(mdesc_node_name); static void __init report_platform_properties(void) { - struct mdesc_node *pn = md_find_node_by_name(NULL, "platform"); + struct mdesc_handle *hp = mdesc_grab(); + u64 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform"); const char *s; const u64 *v; - if (!pn) { + if (pn == MDESC_NODE_NULL) { prom_printf("No platform node in machine-description.\n"); prom_halt(); } - s = md_get_property(pn, "banner-name", NULL); + s = mdesc_get_property(hp, pn, "banner-name", NULL); printk("PLATFORM: banner-name [%s]\n", s); - s = md_get_property(pn, "name", NULL); + s = mdesc_get_property(hp, pn, "name", NULL); printk("PLATFORM: name [%s]\n", s); - v = md_get_property(pn, "hostid", NULL); + v = mdesc_get_property(hp, pn, "hostid", NULL); if (v) printk("PLATFORM: hostid [%08lx]\n", *v); - v = md_get_property(pn, "serial#", NULL); + v = mdesc_get_property(hp, pn, "serial#", NULL); if (v) printk("PLATFORM: serial# [%08lx]\n", *v); - v = md_get_property(pn, "stick-frequency", NULL); + v = mdesc_get_property(hp, pn, "stick-frequency", NULL); printk("PLATFORM: stick-frequency [%08lx]\n", *v); - v = md_get_property(pn, "mac-address", NULL); + v = mdesc_get_property(hp, pn, "mac-address", NULL); if (v) printk("PLATFORM: mac-address [%lx]\n", *v); - v = md_get_property(pn, "watchdog-resolution", NULL); + v = mdesc_get_property(hp, pn, "watchdog-resolution", NULL); if (v) printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v); - v = md_get_property(pn, "watchdog-max-timeout", NULL); + v = mdesc_get_property(hp, pn, "watchdog-max-timeout", NULL); if (v) printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v); - v = md_get_property(pn, "max-cpus", NULL); + v = mdesc_get_property(hp, pn, "max-cpus", NULL); if (v) printk("PLATFORM: max-cpus [%lu]\n", *v); + +#ifdef CONFIG_SMP + { + int max_cpu, i; + + if (v) { + max_cpu = *v; + if (max_cpu > NR_CPUS) + max_cpu = NR_CPUS; + } else { + max_cpu = NR_CPUS; + } + for (i = 0; i < max_cpu; i++) + cpu_set(i, cpu_possible_map); + } +#endif + + mdesc_release(hp); } static int inline find_in_proplist(const char *list, const char *match, int len) @@ -369,15 +582,17 @@ static int inline find_in_proplist(const char *list, const char *match, int len) return 0; } -static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp) +static void __devinit fill_in_one_cache(cpuinfo_sparc *c, + struct mdesc_handle *hp, + u64 mp) { - const u64 *level = md_get_property(mp, "level", NULL); - const u64 *size = md_get_property(mp, "size", NULL); - const u64 *line_size = md_get_property(mp, "line-size", NULL); + const u64 *level = mdesc_get_property(hp, mp, "level", NULL); + const u64 *size = mdesc_get_property(hp, mp, "size", NULL); + const u64 *line_size = mdesc_get_property(hp, mp, "line-size", NULL); const char *type; int type_len; - type = md_get_property(mp, "type", &type_len); + type = mdesc_get_property(hp, mp, "type", &type_len); switch (*level) { case 1: @@ -400,48 +615,45 @@ static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp) } if (*level == 1) { - unsigned int i; + u64 a; - for (i = 0; i < mp->num_arcs; i++) { - struct mdesc_node *t = mp->arcs[i].arc; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(hp, a); + const char *name = mdesc_node_name(hp, target); - if (strcmp(mp->arcs[i].name, "fwd")) - continue; - - if (!strcmp(t->name, "cache")) - fill_in_one_cache(c, t); + if (!strcmp(name, "cache")) + fill_in_one_cache(c, hp, target); } } } -static void __init mark_core_ids(struct mdesc_node *mp, int core_id) +static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp, + int core_id) { - unsigned int i; + u64 a; - for (i = 0; i < mp->num_arcs; i++) { - struct mdesc_node *t = mp->arcs[i].arc; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { + u64 t = mdesc_arc_target(hp, a); + const char *name; const u64 *id; - if (strcmp(mp->arcs[i].name, "back")) - continue; - - if (!strcmp(t->name, "cpu")) { - id = md_get_property(t, "id", NULL); + name = mdesc_node_name(hp, t); + if (!strcmp(name, "cpu")) { + id = mdesc_get_property(hp, t, "id", NULL); if (*id < NR_CPUS) cpu_data(*id).core_id = core_id; } else { - unsigned int j; + u64 j; - for (j = 0; j < t->num_arcs; j++) { - struct mdesc_node *n = t->arcs[j].arc; + mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) { + u64 n = mdesc_arc_target(hp, j); + const char *n_name; - if (strcmp(t->arcs[j].name, "back")) + n_name = mdesc_node_name(hp, n); + if (strcmp(n_name, "cpu")) continue; - if (strcmp(n->name, "cpu")) - continue; - - id = md_get_property(n, "id", NULL); + id = mdesc_get_property(hp, n, "id", NULL); if (*id < NR_CPUS) cpu_data(*id).core_id = core_id; } @@ -449,78 +661,81 @@ static void __init mark_core_ids(struct mdesc_node *mp, int core_id) } } -static void __init set_core_ids(void) +static void __devinit set_core_ids(struct mdesc_handle *hp) { - struct mdesc_node *mp; int idx; + u64 mp; idx = 1; - md_for_each_node_by_name(mp, "cache") { - const u64 *level = md_get_property(mp, "level", NULL); + mdesc_for_each_node_by_name(hp, mp, "cache") { + const u64 *level; const char *type; int len; + level = mdesc_get_property(hp, mp, "level", NULL); if (*level != 1) continue; - type = md_get_property(mp, "type", &len); + type = mdesc_get_property(hp, mp, "type", &len); if (!find_in_proplist(type, "instn", len)) continue; - mark_core_ids(mp, idx); + mark_core_ids(hp, mp, idx); idx++; } } -static void __init mark_proc_ids(struct mdesc_node *mp, int proc_id) +static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp, + int proc_id) { - int i; + u64 a; - for (i = 0; i < mp->num_arcs; i++) { - struct mdesc_node *t = mp->arcs[i].arc; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { + u64 t = mdesc_arc_target(hp, a); + const char *name; const u64 *id; - if (strcmp(mp->arcs[i].name, "back")) + name = mdesc_node_name(hp, t); + if (strcmp(name, "cpu")) continue; - if (strcmp(t->name, "cpu")) - continue; - - id = md_get_property(t, "id", NULL); + id = mdesc_get_property(hp, t, "id", NULL); if (*id < NR_CPUS) cpu_data(*id).proc_id = proc_id; } } -static void __init __set_proc_ids(const char *exec_unit_name) +static void __devinit __set_proc_ids(struct mdesc_handle *hp, + const char *exec_unit_name) { - struct mdesc_node *mp; int idx; + u64 mp; idx = 0; - md_for_each_node_by_name(mp, exec_unit_name) { + mdesc_for_each_node_by_name(hp, mp, exec_unit_name) { const char *type; int len; - type = md_get_property(mp, "type", &len); + type = mdesc_get_property(hp, mp, "type", &len); if (!find_in_proplist(type, "int", len) && !find_in_proplist(type, "integer", len)) continue; - mark_proc_ids(mp, idx); + mark_proc_ids(hp, mp, idx); idx++; } } -static void __init set_proc_ids(void) +static void __devinit set_proc_ids(struct mdesc_handle *hp) { - __set_proc_ids("exec_unit"); - __set_proc_ids("exec-unit"); + __set_proc_ids(hp, "exec_unit"); + __set_proc_ids(hp, "exec-unit"); } -static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def) +static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask, + unsigned char def) { u64 val; @@ -538,35 +753,37 @@ use_default: *mask = ((1U << def) * 64U) - 1U; } -static void __init get_mondo_data(struct mdesc_node *mp, struct trap_per_cpu *tb) +static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp, + struct trap_per_cpu *tb) { const u64 *val; - val = md_get_property(mp, "q-cpu-mondo-#bits", NULL); + val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL); get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7); - val = md_get_property(mp, "q-dev-mondo-#bits", NULL); + val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL); get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7); - val = md_get_property(mp, "q-resumable-#bits", NULL); + val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL); get_one_mondo_bits(val, &tb->resum_qmask, 6); - val = md_get_property(mp, "q-nonresumable-#bits", NULL); + val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL); get_one_mondo_bits(val, &tb->nonresum_qmask, 2); } -static void __init mdesc_fill_in_cpu_data(void) +void __devinit mdesc_fill_in_cpu_data(cpumask_t mask) { - struct mdesc_node *mp; + struct mdesc_handle *hp = mdesc_grab(); + u64 mp; ncpus_probed = 0; - md_for_each_node_by_name(mp, "cpu") { - const u64 *id = md_get_property(mp, "id", NULL); - const u64 *cfreq = md_get_property(mp, "clock-frequency", NULL); + mdesc_for_each_node_by_name(hp, mp, "cpu") { + const u64 *id = mdesc_get_property(hp, mp, "id", NULL); + const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); struct trap_per_cpu *tb; cpuinfo_sparc *c; - unsigned int i; int cpuid; + u64 a; ncpus_probed++; @@ -575,6 +792,8 @@ static void __init mdesc_fill_in_cpu_data(void) #ifdef CONFIG_SMP if (cpuid >= NR_CPUS) continue; + if (!cpu_isset(cpuid, mask)) + continue; #else /* On uniprocessor we only want the values for the * real physical cpu the kernel booted onto, however @@ -589,35 +808,30 @@ static void __init mdesc_fill_in_cpu_data(void) c->clock_tick = *cfreq; tb = &trap_block[cpuid]; - get_mondo_data(mp, tb); + get_mondo_data(hp, mp, tb); - for (i = 0; i < mp->num_arcs; i++) { - struct mdesc_node *t = mp->arcs[i].arc; - unsigned int j; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + u64 j, t = mdesc_arc_target(hp, a); + const char *t_name; - if (strcmp(mp->arcs[i].name, "fwd")) - continue; - - if (!strcmp(t->name, "cache")) { - fill_in_one_cache(c, t); + t_name = mdesc_node_name(hp, t); + if (!strcmp(t_name, "cache")) { + fill_in_one_cache(c, hp, t); continue; } - for (j = 0; j < t->num_arcs; j++) { - struct mdesc_node *n; - - n = t->arcs[j].arc; - if (strcmp(t->arcs[j].name, "fwd")) - continue; + mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) { + u64 n = mdesc_arc_target(hp, j); + const char *n_name; - if (!strcmp(n->name, "cache")) - fill_in_one_cache(c, n); + n_name = mdesc_node_name(hp, n); + if (!strcmp(n_name, "cache")) + fill_in_one_cache(c, hp, n); } } #ifdef CONFIG_SMP cpu_set(cpuid, cpu_present_map); - cpu_set(cpuid, phys_cpu_present_map); #endif c->core_id = 0; @@ -628,45 +842,43 @@ static void __init mdesc_fill_in_cpu_data(void) sparc64_multi_core = 1; #endif - set_core_ids(); - set_proc_ids(); + set_core_ids(hp); + set_proc_ids(hp); smp_fill_in_sib_core_maps(); + + mdesc_release(hp); } void __init sun4v_mdesc_init(void) { + struct mdesc_handle *hp; unsigned long len, real_len, status; + cpumask_t mask; (void) sun4v_mach_desc(0UL, 0UL, &len); printk("MDESC: Size is %lu bytes.\n", len); - main_mdesc = mdesc_early_alloc(len); + hp = mdesc_alloc(len, &bootmem_mdesc_ops); + if (hp == NULL) { + prom_printf("MDESC: alloc of %lu bytes failed.\n", len); + prom_halt(); + } - status = sun4v_mach_desc(__pa(main_mdesc), len, &real_len); + status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len); if (status != HV_EOK || real_len > len) { prom_printf("sun4v_mach_desc fails, err(%lu), " "len(%lu), real_len(%lu)\n", status, len, real_len); + mdesc_free(hp); prom_halt(); } - len = count_nodes(main_mdesc); - printk("MDESC: %lu nodes.\n", len); - - len = roundup_pow_of_two(len); - - mdesc_hash = mdesc_early_alloc(len * sizeof(struct mdesc_node *)); - mdesc_hash_size = len; - - printk("MDESC: Hash size %lu entries.\n", len); - - build_all_nodes(main_mdesc); - - printk("MDESC: Built graph with %u bytes of memory.\n", - mdesc_early_allocated); + cur_mdesc = hp; report_platform_properties(); - mdesc_fill_in_cpu_data(); + + cpus_setall(mask); + mdesc_fill_in_cpu_data(mask); } diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c index 6676b93219d..4cc77485f53 100644 --- a/arch/sparc64/kernel/of_device.c +++ b/arch/sparc64/kernel/of_device.c @@ -1,132 +1,13 @@ #include <linux/string.h> #include <linux/kernel.h> +#include <linux/of.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mod_devicetable.h> #include <linux/slab.h> - -#include <asm/errno.h> -#include <asm/of_device.h> - -/** - * of_match_device - Tell if an of_device structure has a matching - * of_match structure - * @ids: array of of device match structures to search in - * @dev: the of device structure to match against - * - * Used by a driver to check whether an of_device present in the - * system is in its list of supported devices. - */ -const struct of_device_id *of_match_device(const struct of_device_id *matches, - const struct of_device *dev) -{ - if (!dev->node) - return NULL; - while (matches->name[0] || matches->type[0] || matches->compatible[0]) { - int match = 1; - if (matches->name[0]) - match &= dev->node->name - && !strcmp(matches->name, dev->node->name); - if (matches->type[0]) - match &= dev->node->type - && !strcmp(matches->type, dev->node->type); - if (matches->compatible[0]) - match &= of_device_is_compatible(dev->node, - matches->compatible); - if (match) - return matches; - matches++; - } - return NULL; -} - -static int of_platform_bus_match(struct device *dev, struct device_driver *drv) -{ - struct of_device * of_dev = to_of_device(dev); - struct of_platform_driver * of_drv = to_of_platform_driver(drv); - const struct of_device_id * matches = of_drv->match_table; - - if (!matches) - return 0; - - return of_match_device(matches, of_dev) != NULL; -} - -struct of_device *of_dev_get(struct of_device *dev) -{ - struct device *tmp; - - if (!dev) - return NULL; - tmp = get_device(&dev->dev); - if (tmp) - return to_of_device(tmp); - else - return NULL; -} - -void of_dev_put(struct of_device *dev) -{ - if (dev) - put_device(&dev->dev); -} - - -static int of_device_probe(struct device *dev) -{ - int error = -ENODEV; - struct of_platform_driver *drv; - struct of_device *of_dev; - const struct of_device_id *match; - - drv = to_of_platform_driver(dev->driver); - of_dev = to_of_device(dev); - - if (!drv->probe) - return error; - - of_dev_get(of_dev); - - match = of_match_device(drv->match_table, of_dev); - if (match) - error = drv->probe(of_dev, match); - if (error) - of_dev_put(of_dev); - - return error; -} - -static int of_device_remove(struct device *dev) -{ - struct of_device * of_dev = to_of_device(dev); - struct of_platform_driver * drv = to_of_platform_driver(dev->driver); - - if (dev->driver && drv->remove) - drv->remove(of_dev); - return 0; -} - -static int of_device_suspend(struct device *dev, pm_message_t state) -{ - struct of_device * of_dev = to_of_device(dev); - struct of_platform_driver * drv = to_of_platform_driver(dev->driver); - int error = 0; - - if (dev->driver && drv->suspend) - error = drv->suspend(of_dev, state); - return error; -} - -static int of_device_resume(struct device * dev) -{ - struct of_device * of_dev = to_of_device(dev); - struct of_platform_driver * drv = to_of_platform_driver(dev->driver); - int error = 0; - - if (dev->driver && drv->resume) - error = drv->resume(of_dev); - return error; -} +#include <linux/errno.h> +#include <linux/of_device.h> +#include <linux/of_platform.h> void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name) { @@ -163,7 +44,7 @@ static int node_match(struct device *dev, void *data) struct of_device *of_find_device_by_node(struct device_node *dp) { - struct device *dev = bus_find_device(&of_bus_type, NULL, + struct device *dev = bus_find_device(&of_platform_bus_type, NULL, dp, node_match); if (dev) @@ -174,48 +55,20 @@ struct of_device *of_find_device_by_node(struct device_node *dp) EXPORT_SYMBOL(of_find_device_by_node); #ifdef CONFIG_PCI -struct bus_type isa_bus_type = { - .name = "isa", - .match = of_platform_bus_match, - .probe = of_device_probe, - .remove = of_device_remove, - .suspend = of_device_suspend, - .resume = of_device_resume, -}; +struct bus_type isa_bus_type; EXPORT_SYMBOL(isa_bus_type); -struct bus_type ebus_bus_type = { - .name = "ebus", - .match = of_platform_bus_match, - .probe = of_device_probe, - .remove = of_device_remove, - .suspend = of_device_suspend, - .resume = of_device_resume, -}; +struct bus_type ebus_bus_type; EXPORT_SYMBOL(ebus_bus_type); #endif #ifdef CONFIG_SBUS -struct bus_type sbus_bus_type = { - .name = "sbus", - .match = of_platform_bus_match, - .probe = of_device_probe, - .remove = of_device_remove, - .suspend = of_device_suspend, - .resume = of_device_resume, -}; +struct bus_type sbus_bus_type; EXPORT_SYMBOL(sbus_bus_type); #endif -struct bus_type of_bus_type = { - .name = "of", - .match = of_platform_bus_match, - .probe = of_device_probe, - .remove = of_device_remove, - .suspend = of_device_suspend, - .resume = of_device_resume, -}; -EXPORT_SYMBOL(of_bus_type); +struct bus_type of_platform_bus_type; +EXPORT_SYMBOL(of_platform_bus_type); static inline u64 of_read_addr(const u32 *cell, int size) { @@ -899,11 +752,16 @@ static struct of_device * __init scan_one_device(struct device_node *dp, { struct of_device *op = kzalloc(sizeof(*op), GFP_KERNEL); const unsigned int *irq; + struct dev_archdata *sd; int len, i; if (!op) return NULL; + sd = &op->dev.archdata; + sd->prom_node = dp; + sd->op = op; + op->node = dp; op->clock_freq = of_getintprop_default(dp, "clock-frequency", @@ -933,7 +791,7 @@ static struct of_device * __init scan_one_device(struct device_node *dp, op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]); op->dev.parent = parent; - op->dev.bus = &of_bus_type; + op->dev.bus = &of_platform_bus_type; if (!parent) strcpy(op->dev.bus_id, "root"); else @@ -977,16 +835,16 @@ static int __init of_bus_driver_init(void) { int err; - err = bus_register(&of_bus_type); + err = of_bus_type_init(&of_platform_bus_type, "of"); #ifdef CONFIG_PCI if (!err) - err = bus_register(&isa_bus_type); + err = of_bus_type_init(&isa_bus_type, "isa"); if (!err) - err = bus_register(&ebus_bus_type); + err = of_bus_type_init(&ebus_bus_type, "ebus"); #endif #ifdef CONFIG_SBUS if (!err) - err = bus_register(&sbus_bus_type); + err = of_bus_type_init(&sbus_bus_type, "sbus"); #endif if (!err) @@ -1020,61 +878,13 @@ int of_register_driver(struct of_platform_driver *drv, struct bus_type *bus) /* register with core */ return driver_register(&drv->driver); } +EXPORT_SYMBOL(of_register_driver); void of_unregister_driver(struct of_platform_driver *drv) { driver_unregister(&drv->driver); } - - -static ssize_t dev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct of_device *ofdev; - - ofdev = to_of_device(dev); - return sprintf(buf, "%s", ofdev->node->full_name); -} - -static DEVICE_ATTR(devspec, S_IRUGO, dev_show_devspec, NULL); - -/** - * of_release_dev - free an of device structure when all users of it are finished. - * @dev: device that's been disconnected - * - * Will be called only by the device core when all users of this of device are - * done. - */ -void of_release_dev(struct device *dev) -{ - struct of_device *ofdev; - - ofdev = to_of_device(dev); - - kfree(ofdev); -} - -int of_device_register(struct of_device *ofdev) -{ - int rc; - - BUG_ON(ofdev->node == NULL); - - rc = device_register(&ofdev->dev); - if (rc) - return rc; - - rc = device_create_file(&ofdev->dev, &dev_attr_devspec); - if (rc) - device_unregister(&ofdev->dev); - - return rc; -} - -void of_device_unregister(struct of_device *ofdev) -{ - device_remove_file(&ofdev->dev, &dev_attr_devspec); - device_unregister(&ofdev->dev); -} +EXPORT_SYMBOL(of_unregister_driver); struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id, @@ -1100,13 +910,4 @@ struct of_device* of_platform_device_create(struct device_node *np, return dev; } - -EXPORT_SYMBOL(of_match_device); -EXPORT_SYMBOL(of_register_driver); -EXPORT_SYMBOL(of_unregister_driver); -EXPORT_SYMBOL(of_device_register); -EXPORT_SYMBOL(of_device_unregister); -EXPORT_SYMBOL(of_dev_get); -EXPORT_SYMBOL(of_dev_put); EXPORT_SYMBOL(of_platform_device_create); -EXPORT_SYMBOL(of_release_dev); diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 81f4a5ea05f..55ad1b899bb 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -448,6 +448,7 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, */ pci_read_config_dword(dev, PCI_CLASS_REVISION, &class); dev->class = class >> 8; + dev->revision = class & 0xff; sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus), dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn)); diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c index 6b3fe2c1d65..639cf06ca37 100644 --- a/arch/sparc64/kernel/pci_sun4v.c +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -1129,7 +1129,7 @@ static void pci_sun4v_msi_init(struct pci_pbm_info *pbm) } #endif /* !(CONFIG_PCI_MSI) */ -static void pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 devhandle) +static void __init pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node *dp, u32 devhandle) { struct pci_pbm_info *pbm; @@ -1163,7 +1163,7 @@ static void pci_sun4v_pbm_init(struct pci_controller_info *p, struct device_node pci_sun4v_msi_init(pbm); } -void sun4v_pci_init(struct device_node *dp, char *model_name) +void __init sun4v_pci_init(struct device_node *dp, char *model_name) { static int hvapi_negotiated = 0; struct pci_controller_info *p; diff --git a/arch/sparc64/kernel/power.c b/arch/sparc64/kernel/power.c index 5d6adea3967..881a09ee4c4 100644 --- a/arch/sparc64/kernel/power.c +++ b/arch/sparc64/kernel/power.c @@ -1,7 +1,6 @@ -/* $Id: power.c,v 1.10 2001/12/11 01:57:16 davem Exp $ - * power.c: Power management driver. +/* power.c: Power management driver. * - * Copyright (C) 1999 David S. Miller (davem@redhat.com) + * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net) */ #include <linux/kernel.h> @@ -13,6 +12,7 @@ #include <linux/interrupt.h> #include <linux/pm.h> #include <linux/syscalls.h> +#include <linux/reboot.h> #include <asm/system.h> #include <asm/auxio.h> @@ -29,24 +29,15 @@ */ int scons_pwroff = 1; -#ifdef CONFIG_PCI -#include <linux/pci.h> static void __iomem *power_reg; -static DECLARE_WAIT_QUEUE_HEAD(powerd_wait); -static int button_pressed; - static irqreturn_t power_handler(int irq, void *dev_id) { - if (button_pressed == 0) { - button_pressed = 1; - wake_up(&powerd_wait); - } + orderly_poweroff(true); /* FIXME: Check registers for status... */ return IRQ_HANDLED; } -#endif /* CONFIG_PCI */ extern void machine_halt(void); extern void machine_alt_power_off(void); @@ -55,20 +46,19 @@ static void (*poweroff_method)(void) = machine_alt_power_off; void machine_power_off(void) { sstate_poweroff(); - if (!serial_console || scons_pwroff) { -#ifdef CONFIG_PCI + if (strcmp(of_console_device->type, "serial") || scons_pwroff) { if (power_reg) { /* Both register bits seem to have the * same effect, so until I figure out * what the difference is... */ writel(AUXIO_PCIO_CPWR_OFF | AUXIO_PCIO_SPWR_OFF, power_reg); - } else -#endif /* CONFIG_PCI */ + } else { if (poweroff_method != NULL) { poweroff_method(); /* not reached */ } + } } machine_halt(); } @@ -76,40 +66,9 @@ void machine_power_off(void) void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL(pm_power_off); -#ifdef CONFIG_PCI -static int powerd(void *__unused) -{ - static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; - char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; - DECLARE_WAITQUEUE(wait, current); - - daemonize("powerd"); - - add_wait_queue(&powerd_wait, &wait); -again: - for (;;) { - set_task_state(current, TASK_INTERRUPTIBLE); - if (button_pressed) - break; - flush_signals(current); - schedule(); - } - __set_current_state(TASK_RUNNING); - remove_wait_queue(&powerd_wait, &wait); - - /* Ok, down we go... */ - button_pressed = 0; - if (kernel_execve("/sbin/shutdown", argv, envp) < 0) { - printk("powerd: shutdown execution failed\n"); - add_wait_queue(&powerd_wait, &wait); - goto again; - } - return 0; -} - static int __init has_button_interrupt(unsigned int irq, struct device_node *dp) { - if (irq == PCI_IRQ_NONE) + if (irq == 0xffffffff) return 0; if (!of_find_property(dp, "button", NULL)) return 0; @@ -124,23 +83,15 @@ static int __devinit power_probe(struct of_device *op, const struct of_device_id power_reg = of_ioremap(res, 0, 0x4, "power"); - printk("%s: Control reg at %lx ... ", + printk(KERN_INFO "%s: Control reg at %lx\n", op->node->name, res->start); poweroff_method = machine_halt; /* able to use the standard halt */ if (has_button_interrupt(irq, op->node)) { - if (kernel_thread(powerd, NULL, CLONE_FS) < 0) { - printk("Failed to start power daemon.\n"); - return 0; - } - printk("powerd running.\n"); - if (request_irq(irq, power_handler, 0, "power", NULL) < 0) - printk("power: Error, cannot register IRQ handler.\n"); - } else { - printk("not using powerd.\n"); + printk(KERN_ERR "power: Cannot setup IRQ handler.\n"); } return 0; @@ -161,7 +112,6 @@ static struct of_platform_driver power_driver = { void __init power_init(void) { - of_register_driver(&power_driver, &of_bus_type); + of_register_driver(&power_driver, &of_platform_bus_type); return; } -#endif /* CONFIG_PCI */ diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index f5f97e2c669..fd7899ba1d7 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -29,6 +29,7 @@ #include <linux/compat.h> #include <linux/tick.h> #include <linux/init.h> +#include <linux/cpu.h> #include <asm/oplib.h> #include <asm/uaccess.h> @@ -49,7 +50,7 @@ /* #define VERBOSE_SHOWREGS */ -static void sparc64_yield(void) +static void sparc64_yield(int cpu) { if (tlb_type != hypervisor) return; @@ -57,7 +58,7 @@ static void sparc64_yield(void) clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); - while (!need_resched()) { + while (!need_resched() && !cpu_is_offline(cpu)) { unsigned long pstate; /* Disable interrupts. */ @@ -68,7 +69,7 @@ static void sparc64_yield(void) : "=&r" (pstate) : "i" (PSTATE_IE)); - if (!need_resched()) + if (!need_resched() && !cpu_is_offline(cpu)) sun4v_cpu_yield(); /* Re-enable interrupts. */ @@ -86,15 +87,25 @@ static void sparc64_yield(void) /* The idle loop on sparc64. */ void cpu_idle(void) { + int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); while(1) { tick_nohz_stop_sched_tick(); - while (!need_resched()) - sparc64_yield(); + + while (!need_resched() && !cpu_is_offline(cpu)) + sparc64_yield(cpu); + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_is_offline(cpu)) + cpu_play_dead(); +#endif + schedule(); preempt_disable(); } @@ -108,7 +119,7 @@ extern void (*prom_keyboard)(void); void machine_halt(void) { sstate_halt(); - if (!serial_console && prom_palette) + if (prom_palette) prom_palette (1); if (prom_keyboard) prom_keyboard(); @@ -119,7 +130,7 @@ void machine_halt(void) void machine_alt_power_off(void) { sstate_poweroff(); - if (!serial_console && prom_palette) + if (prom_palette) prom_palette(1); if (prom_keyboard) prom_keyboard(); @@ -134,7 +145,7 @@ void machine_restart(char * cmd) sstate_reboot(); p = strchr (reboot_command, '\n'); if (p) *p = 0; - if (!serial_console && prom_palette) + if (prom_palette) prom_palette (1); if (prom_keyboard) prom_keyboard(); diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c index 61036b34666..f4e0a9ad9be 100644 --- a/arch/sparc64/kernel/prom.c +++ b/arch/sparc64/kernel/prom.c @@ -30,73 +30,9 @@ #include <asm/upa.h> #include <asm/smp.h> -static struct device_node *allnodes; +extern struct device_node *allnodes; /* temporary while merging */ -/* use when traversing tree through the allnext, child, sibling, - * or parent members of struct device_node. - */ -static DEFINE_RWLOCK(devtree_lock); - -int of_device_is_compatible(const struct device_node *device, - const char *compat) -{ - const char* cp; - int cplen, l; - - cp = of_get_property(device, "compatible", &cplen); - if (cp == NULL) - return 0; - while (cplen > 0) { - if (strncmp(cp, compat, strlen(compat)) == 0) - return 1; - l = strlen(cp) + 1; - cp += l; - cplen -= l; - } - - return 0; -} -EXPORT_SYMBOL(of_device_is_compatible); - -struct device_node *of_get_parent(const struct device_node *node) -{ - struct device_node *np; - - if (!node) - return NULL; - - np = node->parent; - - return np; -} -EXPORT_SYMBOL(of_get_parent); - -struct device_node *of_get_next_child(const struct device_node *node, - struct device_node *prev) -{ - struct device_node *next; - - next = prev ? prev->sibling : node->child; - for (; next != 0; next = next->sibling) { - break; - } - - return next; -} -EXPORT_SYMBOL(of_get_next_child); - -struct device_node *of_find_node_by_path(const char *path) -{ - struct device_node *np = allnodes; - - for (; np != 0; np = np->allnext) { - if (np->full_name != 0 && strcmp(np->full_name, path) == 0) - break; - } - - return np; -} -EXPORT_SYMBOL(of_find_node_by_path); +extern rwlock_t devtree_lock; /* temporary while merging */ struct device_node *of_find_node_by_phandle(phandle handle) { @@ -110,81 +46,6 @@ struct device_node *of_find_node_by_phandle(phandle handle) } EXPORT_SYMBOL(of_find_node_by_phandle); -struct device_node *of_find_node_by_name(struct device_node *from, - const char *name) -{ - struct device_node *np; - - np = from ? from->allnext : allnodes; - for (; np != NULL; np = np->allnext) - if (np->name != NULL && strcmp(np->name, name) == 0) - break; - - return np; -} -EXPORT_SYMBOL(of_find_node_by_name); - -struct device_node *of_find_node_by_type(struct device_node *from, - const char *type) -{ - struct device_node *np; - - np = from ? from->allnext : allnodes; - for (; np != 0; np = np->allnext) - if (np->type != 0 && strcmp(np->type, type) == 0) - break; - - return np; -} -EXPORT_SYMBOL(of_find_node_by_type); - -struct device_node *of_find_compatible_node(struct device_node *from, - const char *type, const char *compatible) -{ - struct device_node *np; - - np = from ? from->allnext : allnodes; - for (; np != 0; np = np->allnext) { - if (type != NULL - && !(np->type != 0 && strcmp(np->type, type) == 0)) - continue; - if (of_device_is_compatible(np, compatible)) - break; - } - - return np; -} -EXPORT_SYMBOL(of_find_compatible_node); - -struct property *of_find_property(const struct device_node *np, - const char *name, - int *lenp) -{ - struct property *pp; - - for (pp = np->properties; pp != 0; pp = pp->next) { - if (strcasecmp(pp->name, name) == 0) { - if (lenp != 0) - *lenp = pp->length; - break; - } - } - return pp; -} -EXPORT_SYMBOL(of_find_property); - -/* - * Find a property with a given name for a given node - * and return the value. - */ -const void *of_get_property(const struct device_node *np, const char *name, - int *lenp) -{ - struct property *pp = of_find_property(np,name,lenp); - return pp ? pp->value : NULL; -} -EXPORT_SYMBOL(of_get_property); - int of_getintprop_default(struct device_node *np, const char *name, int def) { struct property *prop; @@ -198,36 +59,6 @@ int of_getintprop_default(struct device_node *np, const char *name, int def) } EXPORT_SYMBOL(of_getintprop_default); -int of_n_addr_cells(struct device_node *np) -{ - const int* ip; - do { - if (np->parent) - np = np->parent; - ip = of_get_property(np, "#address-cells", NULL); - if (ip != NULL) - return *ip; - } while (np->parent); - /* No #address-cells property for the root node, default to 2 */ - return 2; -} -EXPORT_SYMBOL(of_n_addr_cells); - -int of_n_size_cells(struct device_node *np) -{ - const int* ip; - do { - if (np->parent) - np = np->parent; - ip = of_get_property(np, "#size-cells", NULL); - if (ip != NULL) - return *ip; - } while (np->parent); - /* No #size-cells property for the root node, default to 1 */ - return 1; -} -EXPORT_SYMBOL(of_n_size_cells); - int of_set_property(struct device_node *dp, const char *name, void *val, int len) { struct property **prevp; @@ -1808,13 +1639,67 @@ static void __init of_fill_in_cpu_data(void) #ifdef CONFIG_SMP cpu_set(cpuid, cpu_present_map); - cpu_set(cpuid, phys_cpu_present_map); + cpu_set(cpuid, cpu_possible_map); #endif } smp_fill_in_sib_core_maps(); } +struct device_node *of_console_device; +EXPORT_SYMBOL(of_console_device); + +char *of_console_path; +EXPORT_SYMBOL(of_console_path); + +char *of_console_options; +EXPORT_SYMBOL(of_console_options); + +static void __init of_console_init(void) +{ + char *msg = "OF stdout device is: %s\n"; + struct device_node *dp; + const char *type; + phandle node; + + of_console_path = prom_early_alloc(256); + if (prom_ihandle2path(prom_stdout, of_console_path, 256) < 0) { + prom_printf("Cannot obtain path of stdout.\n"); + prom_halt(); + } + of_console_options = strrchr(of_console_path, ':'); + if (of_console_options) { + of_console_options++; + if (*of_console_options == '\0') + of_console_options = NULL; + } + + node = prom_inst2pkg(prom_stdout); + if (!node) { + prom_printf("Cannot resolve stdout node from " + "instance %08x.\n", prom_stdout); + prom_halt(); + } + + dp = of_find_node_by_phandle(node); + type = of_get_property(dp, "device_type", NULL); + if (!type) { + prom_printf("Console stdout lacks device_type property.\n"); + prom_halt(); + } + + if (strcmp(type, "display") && strcmp(type, "serial")) { + prom_printf("Console device_type is neither display " + "nor serial.\n"); + prom_halt(); + } + + of_console_device = dp; + + prom_printf(msg, of_console_path); + printk(msg, of_console_path); +} + void __init prom_build_devicetree(void) { struct device_node **nextp; @@ -1827,6 +1712,8 @@ void __init prom_build_devicetree(void) allnodes->child = build_tree(allnodes, prom_getchild(allnodes->node), &nextp); + of_console_init(); + printk("PROM: Built device tree with %u bytes of memory.\n", prom_early_allocated); diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 7490cc670a5..0f5be828ee9 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -133,33 +133,6 @@ static void __init process_switch(char c) } } -static void __init process_console(char *commands) -{ - serial_console = 0; - commands += 8; - /* Linux-style serial */ - if (!strncmp(commands, "ttyS", 4)) - serial_console = simple_strtoul(commands + 4, NULL, 10) + 1; - else if (!strncmp(commands, "tty", 3)) { - char c = *(commands + 3); - /* Solaris-style serial */ - if (c == 'a' || c == 'b') { - serial_console = c - 'a' + 1; - prom_printf ("Using /dev/tty%c as console.\n", c); - } - /* else Linux-style fbcon, not serial */ - } -#if defined(CONFIG_PROM_CONSOLE) - if (!strncmp(commands, "prom", 4)) { - char *p; - - for (p = commands - 8; *p && *p != ' '; p++) - *p = ' '; - conswitchp = &prom_con; - } -#endif -} - static void __init boot_flags_init(char *commands) { while (*commands) { @@ -176,9 +149,7 @@ static void __init boot_flags_init(char *commands) process_switch(*commands++); continue; } - if (!strncmp(commands, "console=", 8)) { - process_console(commands); - } else if (!strncmp(commands, "mem=", 4)) { + if (!strncmp(commands, "mem=", 4)) { /* * "mem=XXX[kKmM]" overrides the PROM-reported * memory size. @@ -378,44 +349,6 @@ void __init setup_arch(char **cmdline_p) paging_init(); } -static int __init set_preferred_console(void) -{ - int idev, odev; - - /* The user has requested a console so this is already set up. */ - if (serial_console >= 0) - return -EBUSY; - - idev = prom_query_input_device(); - odev = prom_query_output_device(); - if (idev == PROMDEV_IKBD && odev == PROMDEV_OSCREEN) { - serial_console = 0; - } else if (idev == PROMDEV_ITTYA && odev == PROMDEV_OTTYA) { - serial_console = 1; - } else if (idev == PROMDEV_ITTYB && odev == PROMDEV_OTTYB) { - serial_console = 2; - } else if (idev == PROMDEV_IRSC && odev == PROMDEV_ORSC) { - serial_console = 3; - } else if (idev == PROMDEV_IVCONS && odev == PROMDEV_OVCONS) { - /* sunhv_console_init() doesn't check the serial_console - * value anyways... - */ - serial_console = 4; - return add_preferred_console("ttyHV", 0, NULL); - } else { - prom_printf("Inconsistent console: " - "input %d, output %d\n", - idev, odev); - prom_halt(); - } - - if (serial_console) - return add_preferred_console("ttyS", serial_console - 1, NULL); - - return -ENODEV; -} -console_initcall(set_preferred_console); - /* BUFFER is PAGE_SIZE bytes long. */ extern char *sparc_cpu_type; @@ -442,7 +375,6 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) "D$ parity tl1\t: %u\n" "I$ parity tl1\t: %u\n" #ifndef CONFIG_SMP - "Cpu0Bogo\t: %lu.%02lu\n" "Cpu0ClkTck\t: %016lx\n" #endif , @@ -457,9 +389,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) dcache_parity_tl1_occurred, icache_parity_tl1_occurred #ifndef CONFIG_SMP - , cpu_data(0).udelay_val/(500000/HZ), - (cpu_data(0).udelay_val/(5000/HZ)) % 100, - cpu_data(0).clock_tick + , cpu_data(0).clock_tick #endif ); #ifdef CONFIG_SMP @@ -511,5 +441,4 @@ void sun_do_break(void) prom_cmdline(); } -int serial_console = -1; int stop_a_enabled = 1; diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c index 203e8730100..fb13775b368 100644 --- a/arch/sparc64/kernel/signal.c +++ b/arch/sparc64/kernel/signal.c @@ -289,9 +289,7 @@ void do_rt_sigreturn(struct pt_regs *regs) struct rt_signal_frame __user *sf; unsigned long tpc, tnpc, tstate; __siginfo_fpu_t __user *fpu_save; - mm_segment_t old_fs; sigset_t set; - stack_t st; int err; /* Always make any pending restarted system calls return -EINTR */ @@ -327,20 +325,13 @@ void do_rt_sigreturn(struct pt_regs *regs) err |= restore_fpu_state(regs, &sf->fpu_state); err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t)); - err |= __copy_from_user(&st, &sf->stack, sizeof(stack_t)); - + err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf); + if (err) goto segv; - + regs->tpc = tpc; regs->tnpc = tnpc; - - /* It is more difficult to avoid calling this function than to - call it and ignore errors. */ - old_fs = get_fs(); - set_fs(KERNEL_DS); - do_sigaltstack((const stack_t __user *) &st, NULL, (unsigned long)sf); - set_fs(old_fs); sigdelsetmask(&set, ~_BLOCKABLE); spin_lock_irq(¤t->sighand->siglock); diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 40e40f968d6..b448d33321c 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -1,6 +1,6 @@ /* smp.c: Sparc64 SMP support. * - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net) */ #include <linux/module.h> @@ -28,6 +28,8 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/cpudata.h> +#include <asm/hvtramp.h> +#include <asm/io.h> #include <asm/irq.h> #include <asm/irq_regs.h> @@ -41,22 +43,26 @@ #include <asm/sections.h> #include <asm/prom.h> #include <asm/mdesc.h> +#include <asm/ldc.h> +#include <asm/hypervisor.h> extern void calibrate_delay(void); int sparc64_multi_core __read_mostly; -/* Please don't make this stuff initdata!!! --DaveM */ -unsigned char boot_cpu_id; - +cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE; cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; -cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE; cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + +EXPORT_SYMBOL(cpu_possible_map); +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(cpu_sibling_map); +EXPORT_SYMBOL(cpu_core_map); + static cpumask_t smp_commenced_mask; -static cpumask_t cpu_callout_map; void smp_info(struct seq_file *m) { @@ -73,18 +79,17 @@ void smp_bogo(struct seq_file *m) for_each_online_cpu(i) seq_printf(m, - "Cpu%dBogo\t: %lu.%02lu\n" "Cpu%dClkTck\t: %016lx\n", - i, cpu_data(i).udelay_val / (500000/HZ), - (cpu_data(i).udelay_val / (5000/HZ)) % 100, i, cpu_data(i).clock_tick); } +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); + extern void setup_sparc64_timer(void); static volatile unsigned long callin_flag = 0; -void __init smp_callin(void) +void __devinit smp_callin(void) { int cpuid = hard_smp_processor_id(); @@ -102,8 +107,6 @@ void __init smp_callin(void) local_irq_enable(); - calibrate_delay(); - cpu_data(cpuid).udelay_val = loops_per_jiffy; callin_flag = 1; __asm__ __volatile__("membar #Sync\n\t" "flush %%g6" : : : "memory"); @@ -120,7 +123,9 @@ void __init smp_callin(void) while (!cpu_isset(cpuid, smp_commenced_mask)) rmb(); + spin_lock(&call_lock); cpu_set(cpuid, cpu_online_map); + spin_unlock(&call_lock); /* idle thread is expected to have preempt disabled */ preempt_disable(); @@ -268,6 +273,67 @@ static void smp_synchronize_one_tick(int cpu) spin_unlock_irqrestore(&itc_sync_lock, flags); } +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) +/* XXX Put this in some common place. XXX */ +static unsigned long kimage_addr_to_ra(void *p) +{ + unsigned long val = (unsigned long) p; + + return kern_base + (val - KERNBASE); +} + +static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) +{ + extern unsigned long sparc64_ttable_tl0; + extern unsigned long kern_locked_tte_data; + extern int bigkernel; + struct hvtramp_descr *hdesc; + unsigned long trampoline_ra; + struct trap_per_cpu *tb; + u64 tte_vaddr, tte_data; + unsigned long hv_err; + + hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL); + if (!hdesc) { + printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate " + "hvtramp_descr.\n"); + return; + } + + hdesc->cpu = cpu; + hdesc->num_mappings = (bigkernel ? 2 : 1); + + tb = &trap_block[cpu]; + tb->hdesc = hdesc; + + hdesc->fault_info_va = (unsigned long) &tb->fault_info; + hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info); + + hdesc->thread_reg = thread_reg; + + tte_vaddr = (unsigned long) KERNBASE; + tte_data = kern_locked_tte_data; + + hdesc->maps[0].vaddr = tte_vaddr; + hdesc->maps[0].tte = tte_data; + if (bigkernel) { + tte_vaddr += 0x400000; + tte_data += 0x400000; + hdesc->maps[1].vaddr = tte_vaddr; + hdesc->maps[1].tte = tte_data; + } + + trampoline_ra = kimage_addr_to_ra(hv_cpu_startup); + + hv_err = sun4v_cpu_start(cpu, trampoline_ra, + kimage_addr_to_ra(&sparc64_ttable_tl0), + __pa(hdesc)); + if (hv_err) + printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() " + "gives error %lu\n", hv_err); +} +#endif + extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load); extern unsigned long sparc64_cpu_startup; @@ -280,6 +346,7 @@ static struct thread_info *cpu_new_thread = NULL; static int __devinit smp_boot_one_cpu(unsigned int cpu) { + struct trap_per_cpu *tb = &trap_block[cpu]; unsigned long entry = (unsigned long)(&sparc64_cpu_startup); unsigned long cookie = @@ -290,20 +357,25 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) p = fork_idle(cpu); callin_flag = 0; cpu_new_thread = task_thread_info(p); - cpu_set(cpu, cpu_callout_map); if (tlb_type == hypervisor) { /* Alloc the mondo queues, cpu will load them. */ sun4v_init_mondo_queues(0, cpu, 1, 0); - prom_startcpu_cpuid(cpu, entry, cookie); +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) + if (ldom_domaining_enabled) + ldom_startcpu_cpuid(cpu, + (unsigned long) cpu_new_thread); + else +#endif + prom_startcpu_cpuid(cpu, entry, cookie); } else { struct device_node *dp = of_find_node_by_cpuid(cpu); prom_startcpu(dp->node, entry, cookie); } - for (timeout = 0; timeout < 5000000; timeout++) { + for (timeout = 0; timeout < 50000; timeout++) { if (callin_flag) break; udelay(100); @@ -313,11 +385,15 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) ret = 0; } else { printk("Processor %d is stuck.\n", cpu); - cpu_clear(cpu, cpu_callout_map); ret = -ENODEV; } cpu_new_thread = NULL; + if (tb->hdesc) { + kfree(tb->hdesc); + tb->hdesc = NULL; + } + return ret; } @@ -720,7 +796,6 @@ struct call_data_struct { int wait; }; -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); static struct call_data_struct *call_data; extern unsigned long xcall_call_function; @@ -1152,34 +1227,14 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) preempt_enable(); } -void __init smp_tick_init(void) -{ - boot_cpu_id = hard_smp_processor_id(); -} - /* /proc/profile writes can call this, don't __init it please. */ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } -/* Constrain the number of cpus to max_cpus. */ void __init smp_prepare_cpus(unsigned int max_cpus) { - int i; - - if (num_possible_cpus() > max_cpus) { - for_each_possible_cpu(i) { - if (i != boot_cpu_id) { - cpu_clear(i, phys_cpu_present_map); - cpu_clear(i, cpu_present_map); - if (num_possible_cpus() <= max_cpus) - break; - } - } - } - - cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy; } void __devinit smp_prepare_boot_cpu(void) @@ -1190,30 +1245,32 @@ void __devinit smp_fill_in_sib_core_maps(void) { unsigned int i; - for_each_possible_cpu(i) { + for_each_present_cpu(i) { unsigned int j; + cpus_clear(cpu_core_map[i]); if (cpu_data(i).core_id == 0) { cpu_set(i, cpu_core_map[i]); continue; } - for_each_possible_cpu(j) { + for_each_present_cpu(j) { if (cpu_data(i).core_id == cpu_data(j).core_id) cpu_set(j, cpu_core_map[i]); } } - for_each_possible_cpu(i) { + for_each_present_cpu(i) { unsigned int j; + cpus_clear(cpu_sibling_map[i]); if (cpu_data(i).proc_id == -1) { cpu_set(i, cpu_sibling_map[i]); continue; } - for_each_possible_cpu(j) { + for_each_present_cpu(j) { if (cpu_data(i).proc_id == cpu_data(j).proc_id) cpu_set(j, cpu_sibling_map[i]); @@ -1242,18 +1299,112 @@ int __cpuinit __cpu_up(unsigned int cpu) return ret; } -void __init smp_cpus_done(unsigned int max_cpus) +#ifdef CONFIG_HOTPLUG_CPU +void cpu_play_dead(void) +{ + int cpu = smp_processor_id(); + unsigned long pstate; + + idle_task_exit(); + + if (tlb_type == hypervisor) { + struct trap_per_cpu *tb = &trap_block[cpu]; + + sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO, + tb->cpu_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO, + tb->dev_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR, + tb->resum_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR, + tb->nonresum_mondo_pa, 0); + } + + cpu_clear(cpu, smp_commenced_mask); + membar_safe("#Sync"); + + local_irq_disable(); + + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + + while (1) + barrier(); +} + +int __cpu_disable(void) { - unsigned long bogosum = 0; + int cpu = smp_processor_id(); + cpuinfo_sparc *c; int i; - for_each_online_cpu(i) - bogosum += cpu_data(i).udelay_val; - printk("Total of %ld processors activated " - "(%lu.%02lu BogoMIPS).\n", - (long) num_online_cpus(), - bogosum/(500000/HZ), - (bogosum/(5000/HZ))%100); + for_each_cpu_mask(i, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[i]); + cpus_clear(cpu_core_map[cpu]); + + for_each_cpu_mask(i, cpu_sibling_map[cpu]) + cpu_clear(cpu, cpu_sibling_map[i]); + cpus_clear(cpu_sibling_map[cpu]); + + c = &cpu_data(cpu); + + c->core_id = 0; + c->proc_id = -1; + + spin_lock(&call_lock); + cpu_clear(cpu, cpu_online_map); + spin_unlock(&call_lock); + + smp_wmb(); + + /* Make sure no interrupts point to this cpu. */ + fixup_irqs(); + + local_irq_enable(); + mdelay(1); + local_irq_disable(); + + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + int i; + + for (i = 0; i < 100; i++) { + smp_rmb(); + if (!cpu_isset(cpu, smp_commenced_mask)) + break; + msleep(100); + } + if (cpu_isset(cpu, smp_commenced_mask)) { + printk(KERN_ERR "CPU %u didn't die...\n", cpu); + } else { +#if defined(CONFIG_SUN_LDOMS) + unsigned long hv_err; + int limit = 100; + + do { + hv_err = sun4v_cpu_stop(cpu); + if (hv_err == HV_EOK) { + cpu_clear(cpu, cpu_present_map); + break; + } + } while (--limit > 0); + if (limit <= 0) { + printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n", + hv_err); + } +#endif + } +} +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ } void smp_send_reschedule(int cpu) diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 6fa76161289..d270c2f0be0 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -1,7 +1,6 @@ -/* $Id: sparc64_ksyms.c,v 1.121 2002/02/09 19:49:31 davem Exp $ - * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support. +/* arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support. * - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) */ @@ -28,7 +27,6 @@ #include <net/compat.h> #include <asm/oplib.h> -#include <asm/delay.h> #include <asm/system.h> #include <asm/auxio.h> #include <asm/pgtable.h> @@ -124,10 +122,6 @@ EXPORT_SYMBOL(__write_lock); EXPORT_SYMBOL(__write_unlock); EXPORT_SYMBOL(__write_trylock); -/* CPU online map and active count. */ -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(phys_cpu_present_map); - EXPORT_SYMBOL(smp_call_function); #endif /* CONFIG_SMP */ @@ -286,6 +280,7 @@ EXPORT_SYMBOL(sys_getgid); EXPORT_SYMBOL(svr4_getcontext); EXPORT_SYMBOL(svr4_setcontext); EXPORT_SYMBOL(compat_sys_ioctl); +EXPORT_SYMBOL(sys_ioctl); EXPORT_SYMBOL(sparc32_open); #endif @@ -330,19 +325,12 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(strncmp); -/* Delay routines. */ -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__const_udelay); -EXPORT_SYMBOL(__delay); - void VISenter(void); /* RAID code needs this */ EXPORT_SYMBOL(VISenter); /* for input/keybdev */ EXPORT_SYMBOL(sun_do_break); -EXPORT_SYMBOL(serial_console); EXPORT_SYMBOL(stop_a_enabled); #ifdef CONFIG_DEBUG_BUGVERBOSE diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index abd83129b2e..e8dce90d05d 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -1,8 +1,7 @@ -/* $Id: sys_sparc32.c,v 1.184 2002/02/09 19:49:31 davem Exp $ - * sys_sparc32.c: Conversion between 32bit and 64bit native syscalls. +/* sys_sparc32.c: Conversion between 32bit and 64bit native syscalls. * * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net) * * These routines maintain argument size conversion between 32bit and 64bit * environment. @@ -1028,3 +1027,10 @@ long compat_sync_file_range(int fd, unsigned long off_high, unsigned long off_lo (nb_high << 32) | nb_low, flags); } + +asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, + u32 lenhi, u32 lenlo) +{ + return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, + ((loff_t)lenhi << 32) | lenlo); +} diff --git a/arch/sparc64/kernel/sysfs.c b/arch/sparc64/kernel/sysfs.c index cdb1477af89..52816c7be0b 100644 --- a/arch/sparc64/kernel/sysfs.c +++ b/arch/sparc64/kernel/sysfs.c @@ -193,7 +193,6 @@ static ssize_t show_##NAME(struct sys_device *dev, char *buf) \ } SHOW_CPUDATA_ULONG_NAME(clock_tick, clock_tick); -SHOW_CPUDATA_ULONG_NAME(udelay_val, udelay_val); SHOW_CPUDATA_UINT_NAME(l1_dcache_size, dcache_size); SHOW_CPUDATA_UINT_NAME(l1_dcache_line_size, dcache_line_size); SHOW_CPUDATA_UINT_NAME(l1_icache_size, icache_size); @@ -203,7 +202,6 @@ SHOW_CPUDATA_UINT_NAME(l2_cache_line_size, ecache_line_size); static struct sysdev_attribute cpu_core_attrs[] = { _SYSDEV_ATTR(clock_tick, 0444, show_clock_tick, NULL), - _SYSDEV_ATTR(udelay_val, 0444, show_udelay_val, NULL), _SYSDEV_ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL), _SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL), _SYSDEV_ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL), diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index 8765e32155a..06d10907d8c 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -1,8 +1,7 @@ -/* $Id: systbls.S,v 1.81 2002/02/08 03:57:14 davem Exp $ - * systbls.S: System call entry point tables for OS compatibility. +/* systbls.S: System call entry point tables for OS compatibility. * The native Linux system call table lives here also. * - * Copyright (C) 1995, 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1995, 1996, 2007 David S. Miller (davem@davemloft.net) * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * * Based upon preliminary work which is: @@ -81,7 +80,7 @@ sys_call_table32: .word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare /*300*/ .word compat_sys_set_robust_list, compat_sys_get_robust_list, compat_sys_migrate_pages, compat_sys_mbind, compat_sys_get_mempolicy .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait -/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, compat_sys_timerfd, sys_eventfd +/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, compat_sys_timerfd, sys_eventfd, compat_sys_fallocate #endif /* CONFIG_COMPAT */ @@ -153,7 +152,7 @@ sys_call_table: .word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare /*300*/ .word sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait -/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd, sys_eventfd +/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd, sys_eventfd, sys_fallocate #if defined(CONFIG_SUNOS_EMUL) || defined(CONFIG_SOLARIS_EMUL) || \ defined(CONFIG_SOLARIS_EMUL_MODULE) @@ -272,6 +271,6 @@ sunos_sys_table: .word sunos_nosys, sunos_nosys, sunos_nosys .word sunos_nosys /*310*/ .word sunos_nosys, sunos_nosys, sunos_nosys - .word sunos_nosys + .word sunos_nosys, sunos_nosys #endif diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index a31a0439244..49063ca2efc 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -403,58 +403,9 @@ static struct sparc64_tick_ops hbtick_operations __read_mostly = { static unsigned long timer_ticks_per_nsec_quotient __read_mostly; -#define TICK_SIZE (tick_nsec / 1000) - -#define USEC_AFTER 500000 -#define USEC_BEFORE 500000 - -static void sync_cmos_clock(unsigned long dummy); - -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); - -static void sync_cmos_clock(unsigned long dummy) +int update_persistent_clock(struct timespec now) { - struct timeval now, next; - int fail = 1; - - /* - * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be - * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... - */ - if (!ntp_synced()) - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ - return; - - do_gettimeofday(&now); - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) - fail = set_rtc_mmss(now.tv_sec); - - next.tv_usec = USEC_AFTER - now.tv_usec; - if (next.tv_usec <= 0) - next.tv_usec += USEC_PER_SEC; - - if (!fail) - next.tv_sec = 659; - else - next.tv_sec = 0; - - if (next.tv_usec >= USEC_PER_SEC) { - next.tv_sec++; - next.tv_usec -= USEC_PER_SEC; - } - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next)); -} - -void notify_arch_cmos_timer(void) -{ - mod_timer(&sync_cmos_timer, jiffies + 1); + return set_rtc_mmss(now.tv_sec); } /* Kick start a stopped clock (procedure from the Sun NVRAM/hostid FAQ). */ @@ -835,7 +786,7 @@ static int __init clock_init(void) return 0; } - return of_register_driver(&clock_driver, &of_bus_type); + return of_register_driver(&clock_driver, &of_platform_bus_type); } /* Must be after subsys_initcall() so that busses are probed. Must @@ -849,9 +800,6 @@ static unsigned long sparc64_init_timers(void) { struct device_node *dp; unsigned long clock; -#ifdef CONFIG_SMP - extern void smp_tick_init(void); -#endif dp = of_find_node_by_path("/"); if (tlb_type == spitfire) { @@ -874,10 +822,6 @@ static unsigned long sparc64_init_timers(void) clock = of_getintprop_default(dp, "stick-frequency", 0); } -#ifdef CONFIG_SMP - smp_tick_init(); -#endif - return clock; } @@ -938,6 +882,7 @@ static void sparc64_timer_setup(enum clock_event_mode mode, { switch (mode) { case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: break; case CLOCK_EVT_MODE_SHUTDOWN: @@ -1038,10 +983,31 @@ static void __init setup_clockevent_multiplier(unsigned long hz) sparc64_clockevent.mult = mult; } +static unsigned long tb_ticks_per_usec __read_mostly; + +void __delay(unsigned long loops) +{ + unsigned long bclock, now; + + bclock = tick_ops->get_tick(); + do { + now = tick_ops->get_tick(); + } while ((now-bclock) < loops); +} +EXPORT_SYMBOL(__delay); + +void udelay(unsigned long usecs) +{ + __delay(tb_ticks_per_usec * usecs); +} +EXPORT_SYMBOL(udelay); + void __init time_init(void) { unsigned long clock = sparc64_init_timers(); + tb_ticks_per_usec = clock / USEC_PER_SEC; + timer_ticks_per_nsec_quotient = clocksource_hz2mult(clock, SPARC64_NSEC_PER_CYC_SHIFT); @@ -1420,6 +1386,78 @@ static int bq4802_set_rtc_time(struct rtc_time *time) return 0; } + +static void cmos_get_rtc_time(struct rtc_time *rtc_tm) +{ + unsigned char ctrl; + + rtc_tm->tm_sec = CMOS_READ(RTC_SECONDS); + rtc_tm->tm_min = CMOS_READ(RTC_MINUTES); + rtc_tm->tm_hour = CMOS_READ(RTC_HOURS); + rtc_tm->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH); + rtc_tm->tm_mon = CMOS_READ(RTC_MONTH); + rtc_tm->tm_year = CMOS_READ(RTC_YEAR); + rtc_tm->tm_wday = CMOS_READ(RTC_DAY_OF_WEEK); + + ctrl = CMOS_READ(RTC_CONTROL); + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + BCD_TO_BIN(rtc_tm->tm_sec); + BCD_TO_BIN(rtc_tm->tm_min); + BCD_TO_BIN(rtc_tm->tm_hour); + BCD_TO_BIN(rtc_tm->tm_mday); + BCD_TO_BIN(rtc_tm->tm_mon); + BCD_TO_BIN(rtc_tm->tm_year); + BCD_TO_BIN(rtc_tm->tm_wday); + } + + if (rtc_tm->tm_year <= 69) + rtc_tm->tm_year += 100; + + rtc_tm->tm_mon--; +} + +static int cmos_set_rtc_time(struct rtc_time *rtc_tm) +{ + unsigned char mon, day, hrs, min, sec; + unsigned char save_control, save_freq_select; + unsigned int yrs; + + yrs = rtc_tm->tm_year; + mon = rtc_tm->tm_mon + 1; + day = rtc_tm->tm_mday; + hrs = rtc_tm->tm_hour; + min = rtc_tm->tm_min; + sec = rtc_tm->tm_sec; + + if (yrs >= 100) + yrs -= 100; + + if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + BIN_TO_BCD(sec); + BIN_TO_BCD(min); + BIN_TO_BCD(hrs); + BIN_TO_BCD(day); + BIN_TO_BCD(mon); + BIN_TO_BCD(yrs); + } + + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + + CMOS_WRITE(yrs, RTC_YEAR); + CMOS_WRITE(mon, RTC_MONTH); + CMOS_WRITE(day, RTC_DAY_OF_MONTH); + CMOS_WRITE(hrs, RTC_HOURS); + CMOS_WRITE(min, RTC_MINUTES); + CMOS_WRITE(sec, RTC_SECONDS); + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + + return 0; +} #endif /* CONFIG_PCI */ struct mini_rtc_ops { @@ -1442,6 +1480,11 @@ static struct mini_rtc_ops bq4802_rtc_ops = { .get_rtc_time = bq4802_get_rtc_time, .set_rtc_time = bq4802_set_rtc_time, }; + +static struct mini_rtc_ops cmos_rtc_ops = { + .get_rtc_time = cmos_get_rtc_time, + .set_rtc_time = cmos_set_rtc_time, +}; #endif /* CONFIG_PCI */ static struct mini_rtc_ops *mini_rtc_ops; @@ -1569,6 +1612,8 @@ static int __init rtc_mini_init(void) #ifdef CONFIG_PCI else if (bq4802_regs) mini_rtc_ops = &bq4802_rtc_ops; + else if (ds1287_regs) + mini_rtc_ops = &cmos_rtc_ops; #endif /* CONFIG_PCI */ else return -ENODEV; diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 00a9e3286c8..6ef2d299fb1 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -2225,6 +2225,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV); __asm__ __volatile__("flushw"); __show_regs(regs); + add_taint(TAINT_DIE); if (regs->tstate & TSTATE_PRIV) { struct reg_window *rw = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS); diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c new file mode 100644 index 00000000000..3685daf5157 --- /dev/null +++ b/arch/sparc64/kernel/vio.c @@ -0,0 +1,443 @@ +/* vio.c: Virtual I/O channel devices probing infrastructure. + * + * Copyright (c) 2003-2005 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Santiago Leon santil@us.ibm.com + * Hollis Blanchard <hollisb@us.ibm.com> + * Stephen Rothwell + * + * Adapted to sparc64 by David S. Miller davem@davemloft.net + */ + +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/init.h> + +#include <asm/mdesc.h> +#include <asm/vio.h> + +static inline int find_in_proplist(const char *list, const char *match, + int len) +{ + while (len > 0) { + int l; + + if (!strcmp(list, match)) + return 1; + l = strlen(list) + 1; + list += l; + len -= l; + } + return 0; +} + +static const struct vio_device_id *vio_match_device( + const struct vio_device_id *matches, + const struct vio_dev *dev) +{ + const char *type, *compat; + int len; + + type = dev->type; + compat = dev->compat; + len = dev->compat_len; + + while (matches->type[0] || matches->compat[0]) { + int match = 1; + if (matches->type[0]) + match &= !strcmp(matches->type, type); + + if (matches->compat[0]) { + match &= len && + find_in_proplist(compat, matches->compat, len); + } + if (match) + return matches; + matches++; + } + return NULL; +} + +static int vio_bus_match(struct device *dev, struct device_driver *drv) +{ + struct vio_dev *vio_dev = to_vio_dev(dev); + struct vio_driver *vio_drv = to_vio_driver(drv); + const struct vio_device_id *matches = vio_drv->id_table; + + if (!matches) + return 0; + + return vio_match_device(matches, vio_dev) != NULL; +} + +static int vio_device_probe(struct device *dev) +{ + struct vio_dev *vdev = to_vio_dev(dev); + struct vio_driver *drv = to_vio_driver(dev->driver); + const struct vio_device_id *id; + int error = -ENODEV; + + if (drv->probe) { + id = vio_match_device(drv->id_table, vdev); + if (id) + error = drv->probe(vdev, id); + } + + return error; +} + +static int vio_device_remove(struct device *dev) +{ + struct vio_dev *vdev = to_vio_dev(dev); + struct vio_driver *drv = to_vio_driver(dev->driver); + + if (drv->remove) + return drv->remove(vdev); + + return 1; +} + +static ssize_t devspec_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vio_dev *vdev = to_vio_dev(dev); + const char *str = "none"; + + if (!strcmp(vdev->type, "vnet-port")) + str = "vnet"; + else if (!strcmp(vdev->type, "vdc-port")) + str = "vdisk"; + + return sprintf(buf, "%s\n", str); +} + +static ssize_t type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vio_dev *vdev = to_vio_dev(dev); + return sprintf(buf, "%s\n", vdev->type); +} + +static struct device_attribute vio_dev_attrs[] = { + __ATTR_RO(devspec), + __ATTR_RO(type), + __ATTR_NULL +}; + +static struct bus_type vio_bus_type = { + .name = "vio", + .dev_attrs = vio_dev_attrs, + .match = vio_bus_match, + .probe = vio_device_probe, + .remove = vio_device_remove, +}; + +int vio_register_driver(struct vio_driver *viodrv) +{ + viodrv->driver.bus = &vio_bus_type; + + return driver_register(&viodrv->driver); +} +EXPORT_SYMBOL(vio_register_driver); + +void vio_unregister_driver(struct vio_driver *viodrv) +{ + driver_unregister(&viodrv->driver); +} +EXPORT_SYMBOL(vio_unregister_driver); + +static void __devinit vio_dev_release(struct device *dev) +{ + kfree(to_vio_dev(dev)); +} + +static ssize_t +show_pciobppath_attr(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct vio_dev *vdev; + struct device_node *dp; + + vdev = to_vio_dev(dev); + dp = vdev->dp; + + return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name); +} + +static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH, + show_pciobppath_attr, NULL); + +struct device_node *cdev_node; + +static struct vio_dev *root_vdev; +static u64 cdev_cfg_handle; + +static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp, + struct vio_dev *vdev) +{ + u64 a; + + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) { + const u64 *chan_id; + const u64 *irq; + u64 target; + + target = mdesc_arc_target(hp, a); + + irq = mdesc_get_property(hp, target, "tx-ino", NULL); + if (irq) + vdev->tx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); + + irq = mdesc_get_property(hp, target, "rx-ino", NULL); + if (irq) + vdev->rx_irq = sun4v_build_virq(cdev_cfg_handle, *irq); + + chan_id = mdesc_get_property(hp, target, "id", NULL); + if (chan_id) + vdev->channel_id = *chan_id; + } +} + +static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, + struct device *parent) +{ + const char *type, *compat, *bus_id_name; + struct device_node *dp; + struct vio_dev *vdev; + int err, tlen, clen; + const u64 *id, *cfg_handle; + u64 a; + + type = mdesc_get_property(hp, mp, "device-type", &tlen); + if (!type) { + type = mdesc_get_property(hp, mp, "name", &tlen); + if (!type) { + type = mdesc_node_name(hp, mp); + tlen = strlen(type) + 1; + } + } + if (tlen > VIO_MAX_TYPE_LEN) { + printk(KERN_ERR "VIO: Type string [%s] is too long.\n", + type); + return NULL; + } + + id = mdesc_get_property(hp, mp, "id", NULL); + + cfg_handle = NULL; + mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) { + u64 target; + + target = mdesc_arc_target(hp, a); + cfg_handle = mdesc_get_property(hp, target, + "cfg-handle", NULL); + if (cfg_handle) + break; + } + + bus_id_name = type; + if (!strcmp(type, "domain-services-port")) + bus_id_name = "ds"; + + if (strlen(bus_id_name) >= KOBJ_NAME_LEN - 4) { + printk(KERN_ERR "VIO: bus_id_name [%s] is too long.\n", + bus_id_name); + return NULL; + } + + compat = mdesc_get_property(hp, mp, "device-type", &clen); + if (!compat) { + clen = 0; + } else if (clen > VIO_MAX_COMPAT_LEN) { + printk(KERN_ERR "VIO: Compat len %d for [%s] is too long.\n", + clen, type); + return NULL; + } + + vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); + if (!vdev) { + printk(KERN_ERR "VIO: Could not allocate vio_dev\n"); + return NULL; + } + + vdev->mp = mp; + memcpy(vdev->type, type, tlen); + if (compat) + memcpy(vdev->compat, compat, clen); + else + memset(vdev->compat, 0, sizeof(vdev->compat)); + vdev->compat_len = clen; + + vdev->channel_id = ~0UL; + vdev->tx_irq = ~0; + vdev->rx_irq = ~0; + + vio_fill_channel_info(hp, mp, vdev); + + if (!id) { + snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%s", + bus_id_name); + vdev->dev_no = ~(u64)0; + } else if (!cfg_handle) { + snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%s-%lu", + bus_id_name, *id); + vdev->dev_no = *id; + } else { + snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%s-%lu-%lu", + bus_id_name, *cfg_handle, *id); + vdev->dev_no = *cfg_handle; + } + + vdev->dev.parent = parent; + vdev->dev.bus = &vio_bus_type; + vdev->dev.release = vio_dev_release; + + if (parent == NULL) { + dp = cdev_node; + } else if (to_vio_dev(parent) == root_vdev) { + dp = of_get_next_child(cdev_node, NULL); + while (dp) { + if (!strcmp(dp->type, type)) + break; + + dp = of_get_next_child(cdev_node, dp); + } + } else { + dp = to_vio_dev(parent)->dp; + } + vdev->dp = dp; + + printk(KERN_ERR "VIO: Adding device %s\n", vdev->dev.bus_id); + + err = device_register(&vdev->dev); + if (err) { + printk(KERN_ERR "VIO: Could not register device %s, err=%d\n", + vdev->dev.bus_id, err); + kfree(vdev); + return NULL; + } + if (vdev->dp) + err = sysfs_create_file(&vdev->dev.kobj, + &dev_attr_obppath.attr); + + return vdev; +} + +static void vio_add(struct mdesc_handle *hp, u64 node) +{ + (void) vio_create_one(hp, node, &root_vdev->dev); +} + +static int vio_md_node_match(struct device *dev, void *arg) +{ + struct vio_dev *vdev = to_vio_dev(dev); + + if (vdev->mp == (u64) arg) + return 1; + + return 0; +} + +static void vio_remove(struct mdesc_handle *hp, u64 node) +{ + struct device *dev; + + dev = device_find_child(&root_vdev->dev, (void *) node, + vio_md_node_match); + if (dev) { + printk(KERN_INFO "VIO: Removing device %s\n", dev->bus_id); + + device_unregister(dev); + } +} + +static struct mdesc_notifier_client vio_device_notifier = { + .add = vio_add, + .remove = vio_remove, + .node_name = "virtual-device-port", +}; + +static struct mdesc_notifier_client vio_ds_notifier = { + .add = vio_add, + .remove = vio_remove, + .node_name = "domain-services-port", +}; + +const char *channel_devices_node = "channel-devices"; +const char *channel_devices_compat = "SUNW,sun4v-channel-devices"; +const char *cfg_handle_prop = "cfg-handle"; + +static int __init vio_init(void) +{ + struct mdesc_handle *hp; + const char *compat; + const u64 *cfg_handle; + int err, len; + u64 root; + + err = bus_register(&vio_bus_type); + if (err) { + printk(KERN_ERR "VIO: Could not register bus type err=%d\n", + err); + return err; + } + + hp = mdesc_grab(); + if (!hp) + return 0; + + root = mdesc_node_by_name(hp, MDESC_NODE_NULL, channel_devices_node); + if (root == MDESC_NODE_NULL) { + printk(KERN_INFO "VIO: No channel-devices MDESC node.\n"); + mdesc_release(hp); + return 0; + } + + cdev_node = of_find_node_by_name(NULL, "channel-devices"); + err = -ENODEV; + if (!cdev_node) { + printk(KERN_INFO "VIO: No channel-devices OBP node.\n"); + goto out_release; + } + + compat = mdesc_get_property(hp, root, "compatible", &len); + if (!compat) { + printk(KERN_ERR "VIO: Channel devices lacks compatible " + "property\n"); + goto out_release; + } + if (!find_in_proplist(compat, channel_devices_compat, len)) { + printk(KERN_ERR "VIO: Channel devices node lacks (%s) " + "compat entry.\n", channel_devices_compat); + goto out_release; + } + + cfg_handle = mdesc_get_property(hp, root, cfg_handle_prop, NULL); + if (!cfg_handle) { + printk(KERN_ERR "VIO: Channel devices lacks %s property\n", + cfg_handle_prop); + goto out_release; + } + + cdev_cfg_handle = *cfg_handle; + + root_vdev = vio_create_one(hp, root, NULL); + err = -ENODEV; + if (!root_vdev) { + printk(KERN_ERR "VIO: Coult not create root device.\n"); + goto out_release; + } + + mdesc_register_notifier(&vio_device_notifier); + mdesc_register_notifier(&vio_ds_notifier); + + mdesc_release(hp); + + return err; + +out_release: + mdesc_release(hp); + return err; +} + +postcore_initcall(vio_init); diff --git a/arch/sparc64/kernel/viohs.c b/arch/sparc64/kernel/viohs.c new file mode 100644 index 00000000000..09126fc338b --- /dev/null +++ b/arch/sparc64/kernel/viohs.c @@ -0,0 +1,822 @@ +/* viohs.c: LDOM Virtual I/O handshake helper layer. + * + * Copyright (C) 2007 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/ldc.h> +#include <asm/vio.h> + +int vio_ldc_send(struct vio_driver_state *vio, void *data, int len) +{ + int err, limit = 1000; + + err = -EINVAL; + while (limit-- > 0) { + err = ldc_write(vio->lp, data, len); + if (!err || (err != -EAGAIN)) + break; + udelay(1); + } + + return err; +} +EXPORT_SYMBOL(vio_ldc_send); + +static int send_ctrl(struct vio_driver_state *vio, + struct vio_msg_tag *tag, int len) +{ + tag->sid = vio_send_sid(vio); + return vio_ldc_send(vio, tag, len); +} + +static void init_tag(struct vio_msg_tag *tag, u8 type, u8 stype, u16 stype_env) +{ + tag->type = type; + tag->stype = stype; + tag->stype_env = stype_env; +} + +static int send_version(struct vio_driver_state *vio, u16 major, u16 minor) +{ + struct vio_ver_info pkt; + + vio->_local_sid = (u32) sched_clock(); + + memset(&pkt, 0, sizeof(pkt)); + init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_VER_INFO); + pkt.major = major; + pkt.minor = minor; + pkt.dev_class = vio->dev_class; + + viodbg(HS, "SEND VERSION INFO maj[%u] min[%u] devclass[%u]\n", + major, minor, vio->dev_class); + + return send_ctrl(vio, &pkt.tag, sizeof(pkt)); +} + +static int start_handshake(struct vio_driver_state *vio) +{ + int err; + + viodbg(HS, "START HANDSHAKE\n"); + + vio->hs_state = VIO_HS_INVALID; + + err = send_version(vio, + vio->ver_table[0].major, + vio->ver_table[0].minor); + if (err < 0) + return err; + + return 0; +} + +static void flush_rx_dring(struct vio_driver_state *vio) +{ + struct vio_dring_state *dr; + u64 ident; + + BUG_ON(!(vio->dr_state & VIO_DR_STATE_RXREG)); + + dr = &vio->drings[VIO_DRIVER_RX_RING]; + ident = dr->ident; + + BUG_ON(!vio->desc_buf); + kfree(vio->desc_buf); + vio->desc_buf = NULL; + + memset(dr, 0, sizeof(*dr)); + dr->ident = ident; +} + +void vio_link_state_change(struct vio_driver_state *vio, int event) +{ + if (event == LDC_EVENT_UP) { + vio->hs_state = VIO_HS_INVALID; + + switch (vio->dev_class) { + case VDEV_NETWORK: + case VDEV_NETWORK_SWITCH: + vio->dr_state = (VIO_DR_STATE_TXREQ | + VIO_DR_STATE_RXREQ); + break; + + case VDEV_DISK: + vio->dr_state = VIO_DR_STATE_TXREQ; + break; + case VDEV_DISK_SERVER: + vio->dr_state = VIO_DR_STATE_RXREQ; + break; + } + start_handshake(vio); + } else if (event == LDC_EVENT_RESET) { + vio->hs_state = VIO_HS_INVALID; + + if (vio->dr_state & VIO_DR_STATE_RXREG) + flush_rx_dring(vio); + + vio->dr_state = 0x00; + memset(&vio->ver, 0, sizeof(vio->ver)); + + ldc_disconnect(vio->lp); + } +} +EXPORT_SYMBOL(vio_link_state_change); + +static int handshake_failure(struct vio_driver_state *vio) +{ + struct vio_dring_state *dr; + + /* XXX Put policy here... Perhaps start a timer to fire + * XXX in 100 ms, which will bring the link up and retry + * XXX the handshake. + */ + + viodbg(HS, "HANDSHAKE FAILURE\n"); + + vio->dr_state &= ~(VIO_DR_STATE_TXREG | + VIO_DR_STATE_RXREG); + + dr = &vio->drings[VIO_DRIVER_RX_RING]; + memset(dr, 0, sizeof(*dr)); + + kfree(vio->desc_buf); + vio->desc_buf = NULL; + vio->desc_buf_len = 0; + + vio->hs_state = VIO_HS_INVALID; + + return -ECONNRESET; +} + +static int process_unknown(struct vio_driver_state *vio, void *arg) +{ + struct vio_msg_tag *pkt = arg; + + viodbg(HS, "UNKNOWN CONTROL [%02x:%02x:%04x:%08x]\n", + pkt->type, pkt->stype, pkt->stype_env, pkt->sid); + + printk(KERN_ERR "vio: ID[%lu] Resetting connection.\n", + vio->vdev->channel_id); + + ldc_disconnect(vio->lp); + + return -ECONNRESET; +} + +static int send_dreg(struct vio_driver_state *vio) +{ + struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_TX_RING]; + union { + struct vio_dring_register pkt; + char all[sizeof(struct vio_dring_register) + + (sizeof(struct ldc_trans_cookie) * + dr->ncookies)]; + } u; + int i; + + memset(&u, 0, sizeof(u)); + init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG); + u.pkt.dring_ident = 0; + u.pkt.num_descr = dr->num_entries; + u.pkt.descr_size = dr->entry_size; + u.pkt.options = VIO_TX_DRING; + u.pkt.num_cookies = dr->ncookies; + + viodbg(HS, "SEND DRING_REG INFO ndesc[%u] dsz[%u] opt[0x%x] " + "ncookies[%u]\n", + u.pkt.num_descr, u.pkt.descr_size, u.pkt.options, + u.pkt.num_cookies); + + for (i = 0; i < dr->ncookies; i++) { + u.pkt.cookies[i] = dr->cookies[i]; + + viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n", + i, + (unsigned long long) u.pkt.cookies[i].cookie_addr, + (unsigned long long) u.pkt.cookies[i].cookie_size); + } + + return send_ctrl(vio, &u.pkt.tag, sizeof(u)); +} + +static int send_rdx(struct vio_driver_state *vio) +{ + struct vio_rdx pkt; + + memset(&pkt, 0, sizeof(pkt)); + + init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX); + + viodbg(HS, "SEND RDX INFO\n"); + + return send_ctrl(vio, &pkt.tag, sizeof(pkt)); +} + +static int send_attr(struct vio_driver_state *vio) +{ + return vio->ops->send_attr(vio); +} + +static struct vio_version *find_by_major(struct vio_driver_state *vio, + u16 major) +{ + struct vio_version *ret = NULL; + int i; + + for (i = 0; i < vio->ver_table_entries; i++) { + struct vio_version *v = &vio->ver_table[i]; + if (v->major <= major) { + ret = v; + break; + } + } + return ret; +} + +static int process_ver_info(struct vio_driver_state *vio, + struct vio_ver_info *pkt) +{ + struct vio_version *vap; + int err; + + viodbg(HS, "GOT VERSION INFO maj[%u] min[%u] devclass[%u]\n", + pkt->major, pkt->minor, pkt->dev_class); + + if (vio->hs_state != VIO_HS_INVALID) { + /* XXX Perhaps invoke start_handshake? XXX */ + memset(&vio->ver, 0, sizeof(vio->ver)); + vio->hs_state = VIO_HS_INVALID; + } + + vap = find_by_major(vio, pkt->major); + + vio->_peer_sid = pkt->tag.sid; + + if (!vap) { + pkt->tag.stype = VIO_SUBTYPE_NACK; + pkt->major = 0; + pkt->minor = 0; + viodbg(HS, "SEND VERSION NACK maj[0] min[0]\n"); + err = send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + } else if (vap->major != pkt->major) { + pkt->tag.stype = VIO_SUBTYPE_NACK; + pkt->major = vap->major; + pkt->minor = vap->minor; + viodbg(HS, "SEND VERSION NACK maj[%u] min[%u]\n", + pkt->major, pkt->minor); + err = send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + } else { + struct vio_version ver = { + .major = pkt->major, + .minor = pkt->minor, + }; + if (ver.minor > vap->minor) + ver.minor = vap->minor; + pkt->minor = ver.minor; + pkt->tag.stype = VIO_SUBTYPE_ACK; + viodbg(HS, "SEND VERSION ACK maj[%u] min[%u]\n", + pkt->major, pkt->minor); + err = send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + if (err > 0) { + vio->ver = ver; + vio->hs_state = VIO_HS_GOTVERS; + } + } + if (err < 0) + return handshake_failure(vio); + + return 0; +} + +static int process_ver_ack(struct vio_driver_state *vio, + struct vio_ver_info *pkt) +{ + viodbg(HS, "GOT VERSION ACK maj[%u] min[%u] devclass[%u]\n", + pkt->major, pkt->minor, pkt->dev_class); + + if (vio->hs_state & VIO_HS_GOTVERS) { + if (vio->ver.major != pkt->major || + vio->ver.minor != pkt->minor) { + pkt->tag.stype = VIO_SUBTYPE_NACK; + (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + return handshake_failure(vio); + } + } else { + vio->ver.major = pkt->major; + vio->ver.minor = pkt->minor; + vio->hs_state = VIO_HS_GOTVERS; + } + + switch (vio->dev_class) { + case VDEV_NETWORK: + case VDEV_DISK: + if (send_attr(vio) < 0) + return handshake_failure(vio); + break; + + default: + break; + } + + return 0; +} + +static int process_ver_nack(struct vio_driver_state *vio, + struct vio_ver_info *pkt) +{ + struct vio_version *nver; + + viodbg(HS, "GOT VERSION NACK maj[%u] min[%u] devclass[%u]\n", + pkt->major, pkt->minor, pkt->dev_class); + + if ((pkt->major == 0 && pkt->minor == 0) || + !(nver = find_by_major(vio, pkt->major))) + return handshake_failure(vio); + + if (send_version(vio, nver->major, nver->minor) < 0) + return handshake_failure(vio); + + return 0; +} + +static int process_ver(struct vio_driver_state *vio, struct vio_ver_info *pkt) +{ + switch (pkt->tag.stype) { + case VIO_SUBTYPE_INFO: + return process_ver_info(vio, pkt); + + case VIO_SUBTYPE_ACK: + return process_ver_ack(vio, pkt); + + case VIO_SUBTYPE_NACK: + return process_ver_nack(vio, pkt); + + default: + return handshake_failure(vio); + }; +} + +static int process_attr(struct vio_driver_state *vio, void *pkt) +{ + int err; + + if (!(vio->hs_state & VIO_HS_GOTVERS)) + return handshake_failure(vio); + + err = vio->ops->handle_attr(vio, pkt); + if (err < 0) { + return handshake_failure(vio); + } else { + vio->hs_state |= VIO_HS_GOT_ATTR; + + if ((vio->dr_state & VIO_DR_STATE_TXREQ) && + !(vio->hs_state & VIO_HS_SENT_DREG)) { + if (send_dreg(vio) < 0) + return handshake_failure(vio); + + vio->hs_state |= VIO_HS_SENT_DREG; + } + } + return 0; +} + +static int all_drings_registered(struct vio_driver_state *vio) +{ + int need_rx, need_tx; + + need_rx = (vio->dr_state & VIO_DR_STATE_RXREQ); + need_tx = (vio->dr_state & VIO_DR_STATE_TXREQ); + + if (need_rx && + !(vio->dr_state & VIO_DR_STATE_RXREG)) + return 0; + + if (need_tx && + !(vio->dr_state & VIO_DR_STATE_TXREG)) + return 0; + + return 1; +} + +static int process_dreg_info(struct vio_driver_state *vio, + struct vio_dring_register *pkt) +{ + struct vio_dring_state *dr; + int i, len; + + viodbg(HS, "GOT DRING_REG INFO ident[%llx] " + "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n", + (unsigned long long) pkt->dring_ident, + pkt->num_descr, pkt->descr_size, pkt->options, + pkt->num_cookies); + + if (!(vio->dr_state & VIO_DR_STATE_RXREQ)) + goto send_nack; + + if (vio->dr_state & VIO_DR_STATE_RXREG) + goto send_nack; + + BUG_ON(vio->desc_buf); + + vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC); + if (!vio->desc_buf) + goto send_nack; + + vio->desc_buf_len = pkt->descr_size; + + dr = &vio->drings[VIO_DRIVER_RX_RING]; + + dr->num_entries = pkt->num_descr; + dr->entry_size = pkt->descr_size; + dr->ncookies = pkt->num_cookies; + for (i = 0; i < dr->ncookies; i++) { + dr->cookies[i] = pkt->cookies[i]; + + viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n", + i, + (unsigned long long) + pkt->cookies[i].cookie_addr, + (unsigned long long) + pkt->cookies[i].cookie_size); + } + + pkt->tag.stype = VIO_SUBTYPE_ACK; + pkt->dring_ident = ++dr->ident; + + viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n", + (unsigned long long) pkt->dring_ident); + + len = (sizeof(*pkt) + + (dr->ncookies * sizeof(struct ldc_trans_cookie))); + if (send_ctrl(vio, &pkt->tag, len) < 0) + goto send_nack; + + vio->dr_state |= VIO_DR_STATE_RXREG; + + return 0; + +send_nack: + pkt->tag.stype = VIO_SUBTYPE_NACK; + viodbg(HS, "SEND DRING_REG NACK\n"); + (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt)); + + return handshake_failure(vio); +} + +static int process_dreg_ack(struct vio_driver_state *vio, + struct vio_dring_register *pkt) +{ + struct vio_dring_state *dr; + + viodbg(HS, "GOT DRING_REG ACK ident[%llx] " + "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n", + (unsigned long long) pkt->dring_ident, + pkt->num_descr, pkt->descr_size, pkt->options, + pkt->num_cookies); + + dr = &vio->drings[VIO_DRIVER_TX_RING]; + + if (!(vio->dr_state & VIO_DR_STATE_TXREQ)) + return handshake_failure(vio); + + dr->ident = pkt->dring_ident; + vio->dr_state |= VIO_DR_STATE_TXREG; + + if (all_drings_registered(vio)) { + if (send_rdx(vio) < 0) + return handshake_failure(vio); + vio->hs_state = VIO_HS_SENT_RDX; + } + return 0; +} + +static int process_dreg_nack(struct vio_driver_state *vio, + struct vio_dring_register *pkt) +{ + viodbg(HS, "GOT DRING_REG NACK ident[%llx] " + "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n", + (unsigned long long) pkt->dring_ident, + pkt->num_descr, pkt->descr_size, pkt->options, + pkt->num_cookies); + + return handshake_failure(vio); +} + +static int process_dreg(struct vio_driver_state *vio, + struct vio_dring_register *pkt) +{ + if (!(vio->hs_state & VIO_HS_GOTVERS)) + return handshake_failure(vio); + + switch (pkt->tag.stype) { + case VIO_SUBTYPE_INFO: + return process_dreg_info(vio, pkt); + + case VIO_SUBTYPE_ACK: + return process_dreg_ack(vio, pkt); + + case VIO_SUBTYPE_NACK: + return process_dreg_nack(vio, pkt); + + default: + return handshake_failure(vio); + } +} + +static int process_dunreg(struct vio_driver_state *vio, + struct vio_dring_unregister *pkt) +{ + struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING]; + + viodbg(HS, "GOT DRING_UNREG\n"); + + if (pkt->dring_ident != dr->ident) + return 0; + + vio->dr_state &= ~VIO_DR_STATE_RXREG; + + memset(dr, 0, sizeof(*dr)); + + kfree(vio->desc_buf); + vio->desc_buf = NULL; + vio->desc_buf_len = 0; + + return 0; +} + +static int process_rdx_info(struct vio_driver_state *vio, struct vio_rdx *pkt) +{ + viodbg(HS, "GOT RDX INFO\n"); + + pkt->tag.stype = VIO_SUBTYPE_ACK; + viodbg(HS, "SEND RDX ACK\n"); + if (send_ctrl(vio, &pkt->tag, sizeof(*pkt)) < 0) + return handshake_failure(vio); + + vio->hs_state |= VIO_HS_SENT_RDX_ACK; + return 0; +} + +static int process_rdx_ack(struct vio_driver_state *vio, struct vio_rdx *pkt) +{ + viodbg(HS, "GOT RDX ACK\n"); + + if (!(vio->hs_state & VIO_HS_SENT_RDX)) + return handshake_failure(vio); + + vio->hs_state |= VIO_HS_GOT_RDX_ACK; + return 0; +} + +static int process_rdx_nack(struct vio_driver_state *vio, struct vio_rdx *pkt) +{ + viodbg(HS, "GOT RDX NACK\n"); + + return handshake_failure(vio); +} + +static int process_rdx(struct vio_driver_state *vio, struct vio_rdx *pkt) +{ + if (!all_drings_registered(vio)) + handshake_failure(vio); + + switch (pkt->tag.stype) { + case VIO_SUBTYPE_INFO: + return process_rdx_info(vio, pkt); + + case VIO_SUBTYPE_ACK: + return process_rdx_ack(vio, pkt); + + case VIO_SUBTYPE_NACK: + return process_rdx_nack(vio, pkt); + + default: + return handshake_failure(vio); + } +} + +int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt) +{ + struct vio_msg_tag *tag = pkt; + u8 prev_state = vio->hs_state; + int err; + + switch (tag->stype_env) { + case VIO_VER_INFO: + err = process_ver(vio, pkt); + break; + + case VIO_ATTR_INFO: + err = process_attr(vio, pkt); + break; + + case VIO_DRING_REG: + err = process_dreg(vio, pkt); + break; + + case VIO_DRING_UNREG: + err = process_dunreg(vio, pkt); + break; + + case VIO_RDX: + err = process_rdx(vio, pkt); + break; + + default: + err = process_unknown(vio, pkt); + break; + } + if (!err && + vio->hs_state != prev_state && + (vio->hs_state & VIO_HS_COMPLETE)) + vio->ops->handshake_complete(vio); + + return err; +} +EXPORT_SYMBOL(vio_control_pkt_engine); + +void vio_conn_reset(struct vio_driver_state *vio) +{ +} +EXPORT_SYMBOL(vio_conn_reset); + +/* The issue is that the Solaris virtual disk server just mirrors the + * SID values it gets from the client peer. So we work around that + * here in vio_{validate,send}_sid() so that the drivers don't need + * to be aware of this crap. + */ +int vio_validate_sid(struct vio_driver_state *vio, struct vio_msg_tag *tp) +{ + u32 sid; + + /* Always let VERSION+INFO packets through unchecked, they + * define the new SID. + */ + if (tp->type == VIO_TYPE_CTRL && + tp->stype == VIO_SUBTYPE_INFO && + tp->stype_env == VIO_VER_INFO) + return 0; + + /* Ok, now figure out which SID to use. */ + switch (vio->dev_class) { + case VDEV_NETWORK: + case VDEV_NETWORK_SWITCH: + case VDEV_DISK_SERVER: + default: + sid = vio->_peer_sid; + break; + + case VDEV_DISK: + sid = vio->_local_sid; + break; + } + + if (sid == tp->sid) + return 0; + viodbg(DATA, "BAD SID tag->sid[%08x] peer_sid[%08x] local_sid[%08x]\n", + tp->sid, vio->_peer_sid, vio->_local_sid); + return -EINVAL; +} +EXPORT_SYMBOL(vio_validate_sid); + +u32 vio_send_sid(struct vio_driver_state *vio) +{ + switch (vio->dev_class) { + case VDEV_NETWORK: + case VDEV_NETWORK_SWITCH: + case VDEV_DISK: + default: + return vio->_local_sid; + + case VDEV_DISK_SERVER: + return vio->_peer_sid; + } +} +EXPORT_SYMBOL(vio_send_sid); + +extern int vio_ldc_alloc(struct vio_driver_state *vio, + struct ldc_channel_config *base_cfg, + void *event_arg) +{ + struct ldc_channel_config cfg = *base_cfg; + struct ldc_channel *lp; + + cfg.tx_irq = vio->vdev->tx_irq; + cfg.rx_irq = vio->vdev->rx_irq; + + lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg); + if (IS_ERR(lp)) + return PTR_ERR(lp); + + vio->lp = lp; + + return 0; +} +EXPORT_SYMBOL(vio_ldc_alloc); + +void vio_ldc_free(struct vio_driver_state *vio) +{ + ldc_free(vio->lp); + vio->lp = NULL; + + kfree(vio->desc_buf); + vio->desc_buf = NULL; + vio->desc_buf_len = 0; +} +EXPORT_SYMBOL(vio_ldc_free); + +void vio_port_up(struct vio_driver_state *vio) +{ + unsigned long flags; + int err, state; + + spin_lock_irqsave(&vio->lock, flags); + + state = ldc_state(vio->lp); + + err = 0; + if (state == LDC_STATE_INIT) { + err = ldc_bind(vio->lp, vio->name); + if (err) + printk(KERN_WARNING "%s: Port %lu bind failed, " + "err=%d\n", + vio->name, vio->vdev->channel_id, err); + } + + if (!err) { + err = ldc_connect(vio->lp); + if (err) + printk(KERN_WARNING "%s: Port %lu connect failed, " + "err=%d\n", + vio->name, vio->vdev->channel_id, err); + } + if (err) { + unsigned long expires = jiffies + HZ; + + expires = round_jiffies(expires); + mod_timer(&vio->timer, expires); + } + + spin_unlock_irqrestore(&vio->lock, flags); +} +EXPORT_SYMBOL(vio_port_up); + +static void vio_port_timer(unsigned long _arg) +{ + struct vio_driver_state *vio = (struct vio_driver_state *) _arg; + + vio_port_up(vio); +} + +int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, + u8 dev_class, struct vio_version *ver_table, + int ver_table_size, struct vio_driver_ops *ops, + char *name) +{ + switch (dev_class) { + case VDEV_NETWORK: + case VDEV_NETWORK_SWITCH: + case VDEV_DISK: + case VDEV_DISK_SERVER: + break; + + default: + return -EINVAL; + } + + if (!ops->send_attr || + !ops->handle_attr || + !ops->handshake_complete) + return -EINVAL; + + if (!ver_table || ver_table_size < 0) + return -EINVAL; + + if (!name) + return -EINVAL; + + spin_lock_init(&vio->lock); + + vio->name = name; + + vio->dev_class = dev_class; + vio->vdev = vdev; + + vio->ver_table = ver_table; + vio->ver_table_entries = ver_table_size; + + vio->ops = ops; + + setup_timer(&vio->timer, vio_port_timer, (unsigned long) vio); + + return 0; +} +EXPORT_SYMBOL(vio_driver_init); diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 3ad10f3027e..481861764de 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -90,10 +90,8 @@ SECTIONS __initramfs_end = .; #endif - . = ALIGN(PAGE_SIZE); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; + PERCPU(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); __init_end = .; __bss_start = .; |