/*
* SN Platform GRU Driver
*
* KERNEL SERVICES THAT USE THE GRU
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/device.h>
#include <linux/miscdevice.h>
#include <linux/proc_fs.h>
#include <linux/interrupt.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <asm/io_apic.h>
#include "gru.h"
#include "grulib.h"
#include "grutables.h"
#include "grukservices.h"
#include "gru_instructions.h"
#include <asm/uv/uv_hub.h>
/*
* Kernel GRU Usage
*
* The following is an interim algorithm for management of kernel GRU
* resources. This will likely be replaced when we better understand the
* kernel/user requirements.
*
* Blade percpu resources reserved for kernel use. These resources are
* reserved whenever the the kernel context for the blade is loaded. Note
* that the kernel context is not guaranteed to be always available. It is
* loaded on demand & can be stolen by a user if the user demand exceeds the
* kernel demand. The kernel can always reload the kernel context but
* a SLEEP may be required!!!.
*
* Async Overview:
*
* Each blade has one "kernel context" that owns GRU kernel resources
* located on the blade. Kernel drivers use GRU resources in this context
* for sending messages, zeroing memory, etc.
*
* The kernel context is dynamically loaded on demand. If it is not in
* use by the kernel, the kernel context can be unloaded & given to a user.
* The kernel context will be reloaded when needed. This may require that
* a context be stolen from a user.
* NOTE: frequent unloading/reloading of the kernel context is
* expensive. We are depending on batch schedulers, cpusets, sane
* drivers or some other mechanism to prevent the need for frequent
* stealing/reloading.
*
* The kernel context consists of two parts:
* - 1 CB & a few DSRs that are reserved for each cpu on the blade.
* Each cpu has it's own private resources & does not share them
* with other cpus. These resources are used serially, ie,
* locked, used & unlocked on each call to a function in
* grukservices.
* (Now that we have dynamic loading of kernel contexts, I
* may rethink this & allow sharing between cpus....)
*
* - Additional resources can be reserved long term & used directly
* by UV drivers located in the kernel. Drivers using these GRU
* resources can use asynchronous GRU instructions that send
* interrupts on completion.
* - these resources must be explicitly locked/unlocked
* - locked resources prevent (obviously) the kernel
* context from being unloaded.
* - drivers using these resource directly issue their own
* GRU instruction and must wait/check completion.
*
* When these resources are reserved, the caller can optionally
* associate a wait_queue with the resources and use asynchronous
* GRU instructions. When an async GRU instruction completes, the
* driver will do a wakeup on the event.
*
*/
#define ASYNC_HAN_TO_BID(h) ((h) - 1)
#define ASYNC_BID_TO_HAN(b) ((b) + 1)
#define ASYNC_HAN_TO_BS(h) gru_base[ASYNC_HAN_TO_BID(h)]
#define GRU_NUM_KERNEL_CBR 1
#define GRU_NUM_KERNEL_DSR_BYTES 256
#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \
GRU_CACHE_LINE_BYTES)
/* GRU instruction attributes for all instructions */
#define IMA IMA_CB_DELAY
/* GRU cacheline size is always 64 bytes - even on arches with 128 byte lines */
#define __gru_cacheline_aligned__ \
__attribute__((__aligned__(GRU_CACHE_LINE_BYTES)))
#define MAGIC 0x1234567887654321UL
/* Default retry count for GRU errors on kernel instructions */
#define EXCEPTION_RETRY_LIMIT 3
/* Status of message queue sections */
#define MQS_EMPTY 0
#define MQS_FULL 1
#define MQS_NOOP 2
/*----------------- RESOURCE MANAGEMENT -------------------------------------*/
/* optimized for x86_64 */
struct message_queue {
union gru_mesqhead head __gru_cacheline_aligned__; /* CL 0 */
int qlines; /* DW 1 */
long hstatus[2];
void *next __gru_cacheline_aligned__;/* CL 1 */
void *limit;
void *start;
void *start2;
char data ____cacheline_aligned; /* CL 2 */
};
/* First word in every message - used by mesq interface */
struct message_header {
char present;
char present2;
char lines;
char fill;
};
#define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h]))
/*
* Reload the blade's kernel context into a GRU chiplet. Called holding
* the bs_kgts_sema for READ. Will steal user contexts if necessary.
*/
static void gru_load_kernel_context(struct gru_blade_state *bs, int blade_id)
{
struct gru_state *gru;
struct gru_thread_state *kgts;
void *vaddr;
int ctxnum, ncpus;
up_read(&bs->bs_kgts_sema);
down_write(&bs->bs_kgts_sema);
if (!bs->bs_kgts) {
bs->bs_kgts = gru_alloc_gts(NULL, 0, 0, 0, 0, 0);
bs->bs_kgts->ts_user_blade_id = blade_id;
}
kgts = bs->bs_kgts;
if (!kgts->ts_gru) {
STAT(load_kernel_context);
ncpus = uv_blade_nr_possible_cpus(blade_id);
kgts->ts_cbr_au_count = GRU_CB_COUNT_TO_AU(
GRU_NUM_KERNEL_CBR * ncpus + bs->bs_async_cbrs);
kgts->ts_dsr_au_count = GRU_DS_BYTES_TO_AU(
GRU_NUM_KERNEL_DSR_BYTES * ncpus +
bs->bs_async_dsr_bytes);
while (!gru_assign_gru_context(kgts)) {