aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rwxr-xr-xtools/hv/hv_get_dhcp_info.sh28
-rwxr-xr-xtools/hv/hv_get_dns_info.sh13
-rw-r--r--tools/hv/hv_kvp_daemon.c1036
-rwxr-xr-xtools/hv/hv_set_ifconfig.sh68
-rw-r--r--tools/lguest/lguest.c1
-rw-r--r--tools/perf/Makefile5
-rw-r--r--tools/perf/perf.h6
-rw-r--r--tools/perf/util/include/linux/rbtree_augmented.h2
-rw-r--r--tools/power/acpi/Makefile18
-rw-r--r--tools/power/acpi/acpidump.859
-rw-r--r--tools/power/acpi/acpidump.c560
-rw-r--r--tools/power/cpupower/Makefile2
-rw-r--r--tools/power/x86/turbostat/turbostat.855
-rw-r--r--tools/power/x86/turbostat/turbostat.c214
-rwxr-xr-xtools/testing/ktest/ktest.pl15
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/epoll/Makefile11
-rw-r--r--tools/testing/selftests/epoll/test_epoll.c344
-rw-r--r--tools/usb/testusb.c25
-rw-r--r--tools/virtio/virtio-trace/Makefile13
-rw-r--r--tools/virtio/virtio-trace/README118
-rw-r--r--tools/virtio/virtio-trace/trace-agent-ctl.c137
-rw-r--r--tools/virtio/virtio-trace/trace-agent-rw.c192
-rw-r--r--tools/virtio/virtio-trace/trace-agent.c270
-rw-r--r--tools/virtio/virtio-trace/trace-agent.h75
25 files changed, 3085 insertions, 184 deletions
diff --git a/tools/hv/hv_get_dhcp_info.sh b/tools/hv/hv_get_dhcp_info.sh
new file mode 100755
index 00000000000..ccd3e953276
--- /dev/null
+++ b/tools/hv/hv_get_dhcp_info.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# This example script retrieves the DHCP state of a given interface.
+# In the interest of keeping the KVP daemon code free of distro specific
+# information; the kvp daemon code invokes this external script to gather
+# DHCP setting for the specific interface.
+#
+# Input: Name of the interface
+#
+# Output: The script prints the string "Enabled" to stdout to indicate
+# that DHCP is enabled on the interface. If DHCP is not enabled,
+# the script prints the string "Disabled" to stdout.
+#
+# Each Distro is expected to implement this script in a distro specific
+# fashion. For instance on Distros that ship with Network Manager enabled,
+# this script can be based on the Network Manager APIs for retrieving DHCP
+# information.
+
+if_file="/etc/sysconfig/network-scripts/ifcfg-"$1
+
+dhcp=$(grep "dhcp" $if_file 2>/dev/null)
+
+if [ "$dhcp" != "" ];
+then
+echo "Enabled"
+else
+echo "Disabled"
+fi
diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh
new file mode 100755
index 00000000000..058c17b46ff
--- /dev/null
+++ b/tools/hv/hv_get_dns_info.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# This example script parses /etc/resolv.conf to retrive DNS information.
+# In the interest of keeping the KVP daemon code free of distro specific
+# information; the kvp daemon code invokes this external script to gather
+# DNS information.
+# This script is expected to print the nameserver values to stdout.
+# Each Distro is expected to implement this script in a distro specific
+# fashion. For instance on Distros that ship with Network Manager enabled,
+# this script can be based on the Network Manager APIs for retrieving DNS
+# entries.
+
+cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }'
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index d9834b36294..5959affd882 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -31,6 +31,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
+#include <ctype.h>
#include <errno.h>
#include <arpa/inet.h>
#include <linux/connector.h>
@@ -41,6 +42,7 @@
#include <syslog.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <dirent.h>
/*
* KVP protocol: The user mode component first registers with the
@@ -68,25 +70,39 @@ enum key_index {
ProcessorArchitecture
};
+
+enum {
+ IPADDR = 0,
+ NETMASK,
+ GATEWAY,
+ DNS
+};
+
static char kvp_send_buffer[4096];
-static char kvp_recv_buffer[4096];
+static char kvp_recv_buffer[4096 * 2];
static struct sockaddr_nl addr;
+static int in_hand_shake = 1;
static char *os_name = "";
static char *os_major = "";
static char *os_minor = "";
static char *processor_arch;
static char *os_build;
-static char *lic_version;
+static char *lic_version = "Unknown version";
static struct utsname uts_buf;
+/*
+ * The location of the interface configuration file.
+ */
+
+#define KVP_CONFIG_LOC "/var/opt/"
#define MAX_FILE_NAME 100
#define ENTRIES_PER_BLOCK 50
struct kvp_record {
- __u8 key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
- __u8 value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
+ char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
+ char value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
};
struct kvp_file_state {
@@ -94,7 +110,7 @@ struct kvp_file_state {
int num_blocks;
struct kvp_record *records;
int num_records;
- __u8 fname[MAX_FILE_NAME];
+ char fname[MAX_FILE_NAME];
};
static struct kvp_file_state kvp_file_info[KVP_POOL_COUNT];
@@ -106,7 +122,7 @@ static void kvp_acquire_lock(int pool)
if (fcntl(kvp_file_info[pool].fd, F_SETLKW, &fl) == -1) {
syslog(LOG_ERR, "Failed to acquire the lock pool: %d", pool);
- exit(-1);
+ exit(EXIT_FAILURE);
}
}
@@ -118,7 +134,7 @@ static void kvp_release_lock(int pool)
if (fcntl(kvp_file_info[pool].fd, F_SETLK, &fl) == -1) {
perror("fcntl");
syslog(LOG_ERR, "Failed to release the lock pool: %d", pool);
- exit(-1);
+ exit(EXIT_FAILURE);
}
}
@@ -137,14 +153,19 @@ static void kvp_update_file(int pool)
if (!filep) {
kvp_release_lock(pool);
syslog(LOG_ERR, "Failed to open file, pool: %d", pool);
- exit(-1);
+ exit(EXIT_FAILURE);
}
bytes_written = fwrite(kvp_file_info[pool].records,
sizeof(struct kvp_record),
kvp_file_info[pool].num_records, filep);
- fflush(filep);
+ if (ferror(filep) || fclose(filep)) {
+ kvp_release_lock(pool);
+ syslog(LOG_ERR, "Failed to write file, pool: %d", pool);
+ exit(EXIT_FAILURE);
+ }
+
kvp_release_lock(pool);
}
@@ -163,14 +184,19 @@ static void kvp_update_mem_state(int pool)
if (!filep) {
kvp_release_lock(pool);
syslog(LOG_ERR, "Failed to open file, pool: %d", pool);
- exit(-1);
+ exit(EXIT_FAILURE);
}
- while (!feof(filep)) {
+ for (;;) {
readp = &record[records_read];
records_read += fread(readp, sizeof(struct kvp_record),
ENTRIES_PER_BLOCK * num_blocks,
filep);
+ if (ferror(filep)) {
+ syslog(LOG_ERR, "Failed to read file, pool: %d", pool);
+ exit(EXIT_FAILURE);
+ }
+
if (!feof(filep)) {
/*
* We have more data to read.
@@ -180,7 +206,7 @@ static void kvp_update_mem_state(int pool)
if (record == NULL) {
syslog(LOG_ERR, "malloc failed");
- exit(-1);
+ exit(EXIT_FAILURE);
}
continue;
}
@@ -191,14 +217,15 @@ static void kvp_update_mem_state(int pool)
kvp_file_info[pool].records = record;
kvp_file_info[pool].num_records = records_read;
+ fclose(filep);
kvp_release_lock(pool);
}
static int kvp_file_init(void)
{
- int ret, fd;
+ int fd;
FILE *filep;
size_t records_read;
- __u8 *fname;
+ char *fname;
struct kvp_record *record;
struct kvp_record *readp;
int num_blocks;
@@ -208,7 +235,7 @@ static int kvp_file_init(void)
if (access("/var/opt/hyperv", F_OK)) {
if (mkdir("/var/opt/hyperv", S_IRUSR | S_IWUSR | S_IROTH)) {
syslog(LOG_ERR, " Failed to create /var/opt/hyperv");
- exit(-1);
+ exit(EXIT_FAILURE);
}
}
@@ -232,12 +259,18 @@ static int kvp_file_init(void)
fclose(filep);
return 1;
}
- while (!feof(filep)) {
+ for (;;) {
readp = &record[records_read];
records_read += fread(readp, sizeof(struct kvp_record),
ENTRIES_PER_BLOCK,
filep);
+ if (ferror(filep)) {
+ syslog(LOG_ERR, "Failed to read file, pool: %d",
+ i);
+ exit(EXIT_FAILURE);
+ }
+
if (!feof(filep)) {
/*
* We have more data to read.
@@ -311,7 +344,6 @@ static int kvp_key_add_or_modify(int pool, __u8 *key, int key_size, __u8 *value,
int value_size)
{
int i;
- int j, k;
int num_records;
struct kvp_record *record;
int num_blocks;
@@ -394,7 +426,7 @@ static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value,
return 1;
}
-static void kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size,
+static int kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size,
__u8 *value, int value_size)
{
struct kvp_record *record;
@@ -406,16 +438,12 @@ static void kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size,
record = kvp_file_info[pool].records;
if (index >= kvp_file_info[pool].num_records) {
- /*
- * This is an invalid index; terminate enumeration;
- * - a NULL value will do the trick.
- */
- strcpy(value, "");
- return;
+ return 1;
}
memcpy(key, record[index].key, key_size);
memcpy(value, record[index].value, value_size);
+ return 0;
}
@@ -426,6 +454,7 @@ void kvp_get_os_info(void)
uname(&uts_buf);
os_build = uts_buf.release;
+ os_name = uts_buf.sysname;
processor_arch = uts_buf.machine;
/*
@@ -437,20 +466,70 @@ void kvp_get_os_info(void)
if (p)
*p = '\0';
+ /*
+ * Parse the /etc/os-release file if present:
+ * http://www.freedesktop.org/software/systemd/man/os-release.html
+ */
+ file = fopen("/etc/os-release", "r");
+ if (file != NULL) {
+ while (fgets(buf, sizeof(buf), file)) {
+ char *value, *q;
+
+ /* Ignore comments */
+ if (buf[0] == '#')
+ continue;
+
+ /* Split into name=value */
+ p = strchr(buf, '=');
+ if (!p)
+ continue;
+ *p++ = 0;
+
+ /* Remove quotes and newline; un-escape */
+ value = p;
+ q = p;
+ while (*p) {
+ if (*p == '\\') {
+ ++p;
+ if (!*p)
+ break;
+ *q++ = *p++;
+ } else if (*p == '\'' || *p == '"' ||
+ *p == '\n') {
+ ++p;
+ } else {
+ *q++ = *p++;
+ }
+ }
+ *q = 0;
+
+ if (!strcmp(buf, "NAME")) {
+ p = strdup(value);
+ if (!p)
+ break;
+ os_name = p;
+ } else if (!strcmp(buf, "VERSION_ID")) {
+ p = strdup(value);
+ if (!p)
+ break;
+ os_major = p;
+ }
+ }
+ fclose(file);
+ return;
+ }
+
+ /* Fallback for older RH/SUSE releases */
file = fopen("/etc/SuSE-release", "r");
if (file != NULL)
goto kvp_osinfo_found;
file = fopen("/etc/redhat-release", "r");
if (file != NULL)
goto kvp_osinfo_found;
- /*
- * Add code for other supported platforms.
- */
/*
* We don't have information about the os.
*/
- os_name = uts_buf.sysname;
return;
kvp_osinfo_found:
@@ -494,82 +573,458 @@ done:
return;
}
+
+
+/*
+ * Retrieve an interface name corresponding to the specified guid.
+ * If there is a match, the function returns a pointer
+ * to the interface name and if not, a NULL is returned.
+ * If a match is found, the caller is responsible for
+ * freeing the memory.
+ */
+
+static char *kvp_get_if_name(char *guid)
+{
+ DIR *dir;
+ struct dirent *entry;
+ FILE *file;
+ char *p, *q, *x;
+ char *if_name = NULL;
+ char buf[256];
+ char *kvp_net_dir = "/sys/class/net/";
+ char dev_id[256];
+
+ dir = opendir(kvp_net_dir);
+ if (dir == NULL)
+ return NULL;
+
+ snprintf(dev_id, sizeof(dev_id), "%s", kvp_net_dir);
+ q = dev_id + strlen(kvp_net_dir);
+
+ while ((entry = readdir(dir)) != NULL) {
+ /*
+ * Set the state for the next pass.
+ */
+ *q = '\0';
+ strcat(dev_id, entry->d_name);
+ strcat(dev_id, "/device/device_id");
+
+ file = fopen(dev_id, "r");
+ if (file == NULL)
+ continue;
+
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+
+ if (!strcmp(p, guid)) {
+ /*
+ * Found the guid match; return the interface
+ * name. The caller will free the memory.
+ */
+ if_name = strdup(entry->d_name);
+ fclose(file);
+ break;
+ }
+ }
+ fclose(file);
+ }
+
+ closedir(dir);
+ return if_name;
+}
+
+/*
+ * Retrieve the MAC address given the interface name.
+ */
+
+static char *kvp_if_name_to_mac(char *if_name)
+{
+ FILE *file;
+ char *p, *x;
+ char buf[256];
+ char addr_file[256];
+ int i;
+ char *mac_addr = NULL;
+
+ snprintf(addr_file, sizeof(addr_file), "%s%s%s", "/sys/class/net/",
+ if_name, "/address");
+
+ file = fopen(addr_file, "r");
+ if (file == NULL)
+ return NULL;
+
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+ for (i = 0; i < strlen(p); i++)
+ p[i] = toupper(p[i]);
+ mac_addr = strdup(p);
+ }
+
+ fclose(file);
+ return mac_addr;
+}
+
+
+/*
+ * Retrieve the interface name given tha MAC address.
+ */
+
+static char *kvp_mac_to_if_name(char *mac)
+{
+ DIR *dir;
+ struct dirent *entry;
+ FILE *file;
+ char *p, *q, *x;
+ char *if_name = NULL;
+ char buf[256];
+ char *kvp_net_dir = "/sys/class/net/";
+ char dev_id[256];
+ int i;
+
+ dir = opendir(kvp_net_dir);
+ if (dir == NULL)
+ return NULL;
+
+ snprintf(dev_id, sizeof(dev_id), kvp_net_dir);
+ q = dev_id + strlen(kvp_net_dir);
+
+ while ((entry = readdir(dir)) != NULL) {
+ /*
+ * Set the state for the next pass.
+ */
+ *q = '\0';
+
+ strcat(dev_id, entry->d_name);
+ strcat(dev_id, "/address");
+
+ file = fopen(dev_id, "r");
+ if (file == NULL)
+ continue;
+
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+
+ for (i = 0; i < strlen(p); i++)
+ p[i] = toupper(p[i]);
+
+ if (!strcmp(p, mac)) {
+ /*
+ * Found the MAC match; return the interface
+ * name. The caller will free the memory.
+ */
+ if_name = strdup(entry->d_name);
+ fclose(file);
+ break;
+ }
+ }
+ fclose(file);
+ }
+
+ closedir(dir);
+ return if_name;
+}
+
+
+static void kvp_process_ipconfig_file(char *cmd,
+ char *config_buf, int len,
+ int element_size, int offset)
+{
+ char buf[256];
+ char *p;
+ char *x;
+ FILE *file;
+
+ /*
+ * First execute the command.
+ */
+ file = popen(cmd, "r");
+ if (file == NULL)
+ return;
+
+ if (offset == 0)
+ memset(config_buf, 0, len);
+ while ((p = fgets(buf, sizeof(buf), file)) != NULL) {
+ if ((len - strlen(config_buf)) < (element_size + 1))
+ break;
+
+ x = strchr(p, '\n');
+ *x = '\0';
+ strcat(config_buf, p);
+ strcat(config_buf, ";");
+ }
+ pclose(file);
+}
+
+static void kvp_get_ipconfig_info(char *if_name,
+ struct hv_kvp_ipaddr_value *buffer)
+{
+ char cmd[512];
+ char dhcp_info[128];
+ char *p;
+ FILE *file;
+
+ /*
+ * Get the address of default gateway (ipv4).
+ */
+ sprintf(cmd, "%s %s", "ip route show dev", if_name);
+ strcat(cmd, " | awk '/default/ {print $3 }'");
+
+ /*
+ * Execute the command to gather gateway info.
+ */
+ kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
+ (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0);
+
+ /*
+ * Get the address of default gateway (ipv6).
+ */
+ sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name);
+ strcat(cmd, " | awk '/default/ {print $3 }'");
+
+ /*
+ * Execute the command to gather gateway info (ipv6).
+ */
+ kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
+ (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1);
+
+
+ /*
+ * Gather the DNS state.
+ * Since there is no standard way to get this information
+ * across various distributions of interest; we just invoke
+ * an external script that needs to be ported across distros
+ * of interest.
+ *
+ * Following is the expected format of the information from the script:
+ *
+ * ipaddr1 (nameserver1)
+ * ipaddr2 (nameserver2)
+ * .
+ * .
+ */
+
+ sprintf(cmd, "%s", "hv_get_dns_info");
+
+ /*
+ * Execute the command to gather DNS info.
+ */
+ kvp_process_ipconfig_file(cmd, (char *)buffer->dns_addr,
+ (MAX_IP_ADDR_SIZE * 2), INET_ADDRSTRLEN, 0);
+
+ /*
+ * Gather the DHCP state.
+ * We will gather this state by invoking an external script.
+ * The parameter to the script is the interface name.
+ * Here is the expected output:
+ *
+ * Enabled: DHCP enabled.
+ */
+
+ sprintf(cmd, "%s %s", "hv_get_dhcp_info", if_name);
+
+ file = popen(cmd, "r");
+ if (file == NULL)
+ return;
+
+ p = fgets(dhcp_info, sizeof(dhcp_info), file);
+ if (p == NULL) {
+ pclose(file);
+ return;
+ }
+
+ if (!strncmp(p, "Enabled", 7))
+ buffer->dhcp_enabled = 1;
+ else
+ buffer->dhcp_enabled = 0;
+
+ pclose(file);
+}
+
+
+static unsigned int hweight32(unsigned int *w)
+{
+ unsigned int res = *w - ((*w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res + (res >> 4)) & 0x0F0F0F0F;
+ res = res + (res >> 8);
+ return (res + (res >> 16)) & 0x000000FF;
+}
+
+static int kvp_process_ip_address(void *addrp,
+ int family, char *buffer,
+ int length, int *offset)
+{
+ struct sockaddr_in *addr;
+ struct sockaddr_in6 *addr6;
+ int addr_length;
+ char tmp[50];
+ const char *str;
+
+ if (family == AF_INET) {
+ addr = (struct sockaddr_in *)addrp;
+ str = inet_ntop(family, &addr->sin_addr, tmp, 50);
+ addr_length = INET_ADDRSTRLEN;
+ } else {
+ addr6 = (struct sockaddr_in6 *)addrp;
+ str = inet_ntop(family, &addr6->sin6_addr.s6_addr, tmp, 50);
+ addr_length = INET6_ADDRSTRLEN;
+ }
+
+ if ((length - *offset) < addr_length + 1)
+ return HV_E_FAIL;
+ if (str == NULL) {
+ strcpy(buffer, "inet_ntop failed\n");
+ return HV_E_FAIL;
+ }
+ if (*offset == 0)
+ strcpy(buffer, tmp);
+ else
+ strcat(buffer, tmp);
+ strcat(buffer, ";");
+
+ *offset += strlen(str) + 1;
+ return 0;
+}
+
static int
-kvp_get_ip_address(int family, char *buffer, int length)
+kvp_get_ip_info(int family, char *if_name, int op,
+ void *out_buffer, int length)
{
struct ifaddrs *ifap;
struct ifaddrs *curp;
- int ipv4_len = strlen("255.255.255.255") + 1;
- int ipv6_len = strlen("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")+1;
int offset = 0;
- const char *str;
- char tmp[50];
+ int sn_offset = 0;
int error = 0;
-
+ char *buffer;
+ struct hv_kvp_ipaddr_value *ip_buffer;
+ char cidr_mask[5]; /* /xyz */
+ int weight;
+ int i;
+ unsigned int *w;
+ char *sn_str;
+ struct sockaddr_in6 *addr6;
+
+ if (op == KVP_OP_ENUMERATE) {
+ buffer = out_buffer;
+ } else {
+ ip_buffer = out_buffer;
+ buffer = (char *)ip_buffer->ip_addr;
+ ip_buffer->addr_family = 0;
+ }
/*
* On entry into this function, the buffer is capable of holding the
- * maximum key value (2048 bytes).
+ * maximum key value.
*/
if (getifaddrs(&ifap)) {
strcpy(buffer, "getifaddrs failed\n");
- return 1;
+ return HV_E_FAIL;
}
curp = ifap;
while (curp != NULL) {
- if ((curp->ifa_addr != NULL) &&
- (curp->ifa_addr->sa_family == family)) {
- if (family == AF_INET) {
- struct sockaddr_in *addr =
- (struct sockaddr_in *) curp->ifa_addr;
-
- str = inet_ntop(family, &addr->sin_addr,
- tmp, 50);
- if (str == NULL) {
- strcpy(buffer, "inet_ntop failed\n");
- error = 1;
- goto getaddr_done;
- }
- if (offset == 0)
- strcpy(buffer, tmp);
- else
- strcat(buffer, tmp);
- strcat(buffer, ";");
+ if (curp->ifa_addr == NULL) {
+ curp = curp->ifa_next;
+ continue;
+ }
- offset += strlen(str) + 1;
- if ((length - offset) < (ipv4_len + 1))
- goto getaddr_done;
+ if ((if_name != NULL) &&
+ (strncmp(curp->ifa_name, if_name, strlen(if_name)))) {
+ /*
+ * We want info about a specific interface;
+ * just continue.
+ */
+ curp = curp->ifa_next;
+ continue;
+ }
- } else {
+ /*
+ * We only support two address families: AF_INET and AF_INET6.
+ * If a family value of 0 is specified, we collect both
+ * supported address families; if not we gather info on
+ * the specified address family.
+ */
+ if ((family != 0) && (curp->ifa_addr->sa_family != family)) {
+ curp = curp->ifa_next;
+ continue;
+ }
+ if ((curp->ifa_addr->sa_family != AF_INET) &&
+ (curp->ifa_addr->sa_family != AF_INET6)) {
+ curp = curp->ifa_next;
+ continue;
+ }
+ if (op == KVP_OP_GET_IP_INFO) {
/*
- * We only support AF_INET and AF_INET6
- * and the list of addresses is separated by a ";".
+ * Gather info other than the IP address.
+ * IP address info will be gathered later.
*/
- struct sockaddr_in6 *addr =
- (struct sockaddr_in6 *) curp->ifa_addr;
-
- str = inet_ntop(family,
- &addr->sin6_addr.s6_addr,
- tmp, 50);
- if (str == NULL) {
- strcpy(buffer, "inet_ntop failed\n");
- error = 1;
- goto getaddr_done;
- }
- if (offset == 0)
- strcpy(buffer, tmp);
- else
- strcat(buffer, tmp);
- strcat(buffer, ";");
- offset += strlen(str) + 1;
- if ((length - offset) < (ipv6_len + 1))
- goto getaddr_done;
+ if (curp->ifa_addr->sa_family == AF_INET) {
+ ip_buffer->addr_family |= ADDR_FAMILY_IPV4;
+ /*
+ * Get subnet info.
+ */
+ error = kvp_process_ip_address(
+ curp->ifa_netmask,
+ AF_INET,
+ (char *)
+ ip_buffer->sub_net,
+ length,
+ &sn_offset);
+ if (error)
+ goto gather_ipaddr;
+ } else {
+ ip_buffer->addr_family |= ADDR_FAMILY_IPV6;
+ /*
+ * Get subnet info in CIDR format.
+ */
+ weight = 0;
+ sn_str = (char *)ip_buffer->sub_net;
+ addr6 = (struct sockaddr_in6 *)
+ curp->ifa_netmask;
+ w = addr6->sin6_addr.s6_addr32;
+
+ for (i = 0; i < 4; i++)
+ weight += hweight32(&w[i]);
+
+ sprintf(cidr_mask, "/%d", weight);
+ if ((length - sn_offset) <
+ (strlen(cidr_mask) + 1))
+ goto gather_ipaddr;
+
+ if (sn_offset == 0)
+ strcpy(sn_str, cidr_mask);
+ else
+ strcat(sn_str, cidr_mask);
+ strcat((char *)ip_buffer->sub_net, ";");
+ sn_offset += strlen(sn_str) + 1;
}
+ /*
+ * Collect other ip related configuration info.
+ */
+
+ kvp_get_ipconfig_info(if_name, ip_buffer);
}
+
+gather_ipaddr:
+ error = kvp_process_ip_address(curp->ifa_addr,
+ curp->ifa_addr->sa_family,
+ buffer,
+ length, &offset);
+ if (error)
+ goto getaddr_done;
+
curp = curp->ifa_next;
}
@@ -579,6 +1034,315 @@ getaddr_done:
}
+static int expand_ipv6(char *addr, int type)
+{
+ int ret;
+ struct in6_addr v6_addr;
+
+ ret = inet_pton(AF_INET6, addr, &v6_addr);
+
+ if (ret != 1) {
+ if (type == NETMASK)
+ return 1;
+ return 0;
+ }
+
+ sprintf(addr, "%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:"
+ "%02x%02x:%02x%02x:%02x%02x",
+ (int)v6_addr.s6_addr[0], (int)v6_addr.s6_addr[1],
+ (int)v6_addr.s6_addr[2], (int)v6_addr.s6_addr[3],
+ (int)v6_addr.s6_addr[4], (int)v6_addr.s6_addr[5],
+ (int)v6_addr.s6_addr[6], (int)v6_addr.s6_addr[7],
+ (int)v6_addr.s6_addr[8], (int)v6_addr.s6_addr[9],
+ (int)v6_addr.s6_addr[10], (int)v6_addr.s6_addr[11],
+ (int)v6_addr.s6_addr[12], (int)v6_addr.s6_addr[13],
+ (int)v6_addr.s6_addr[14], (int)v6_addr.s6_addr[15]);
+
+ return 1;
+
+}
+
+static int is_ipv4(char *addr)
+{
+ int ret;
+ struct in_addr ipv4_addr;
+
+ ret = inet_pton(AF_INET, addr, &ipv4_addr);
+
+ if (ret == 1)
+ return 1;
+ return 0;
+}
+
+static int parse_ip_val_buffer(char *in_buf, int *offset,
+ char *out_buf, int out_len)
+{
+ char *x;
+ char *start;
+
+ /*
+ * in_buf has sequence of characters that are seperated by
+ * the character ';'. The last sequence does not have the
+ * terminating ";" character.
+ */
+ start = in_buf + *offset;
+
+ x = strchr(start, ';');
+ if (x)
+ *x = 0;
+ else
+ x = start + strlen(start);
+
+ if (strlen(start) != 0) {
+ int i = 0;
+ /*
+ * Get rid of leading spaces.
+ */
+ while (start[i] == ' ')
+ i++;
+
+ if ((x - start) <= out_len) {
+ strcpy(out_buf, (start + i));
+ *offset += (x - start) + 1;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int kvp_write_file(FILE *f, char *s1, char *s2, char *s3)
+{
+ int ret;
+
+ ret = fprintf(f, "%s%s%s%s\n", s1, s2, "=", s3);
+
+ if (ret < 0)
+ return HV_E_FAIL;
+
+ return 0;
+}
+
+
+static int process_ip_string(FILE *f, char *ip_string, int type)
+{
+ int error = 0;
+ char addr[INET6_ADDRSTRLEN];
+ int i = 0;
+ int j = 0;
+ char str[256];
+ char sub_str[10];
+ int offset = 0;
+
+ memset(addr, 0, sizeof(addr));
+
+ while (parse_ip_val_buffer(ip_string, &offset, addr,
+ (MAX_IP_ADDR_SIZE * 2))) {
+
+ sub_str[0] = 0;
+ if (is_ipv4(addr)) {
+ switch (type) {
+ case IPADDR:
+ snprintf(str, sizeof(str), "%s", "IPADDR");
+ break;
+ case NETMASK:
+ snprintf(str, sizeof(str), "%s", "NETMASK");
+ break;
+ case GATEWAY:
+ snprintf(str, sizeof(str), "%s", "GATEWAY");
+ break;
+ case DNS:
+ snprintf(str, sizeof(str), "%s", "DNS");
+ break;
+ }
+ if (i != 0) {
+ if (type != DNS) {
+ snprintf(sub_str, sizeof(sub_str),
+ "_%d", i++);
+ } else {
+ snprintf(sub_str, sizeof(sub_str),
+ "%d", ++i);
+ }
+ } else if (type == DNS) {
+ snprintf(sub_str, sizeof(sub_str), "%d", ++i);
+ }
+
+
+ } else if (expand_ipv6(addr, type)) {
+ switch (type) {
+ case IPADDR:
+ snprintf(str, sizeof(str), "%s", "IPV6ADDR");
+ break;
+ case NETMASK:
+ snprintf(str, sizeof(str), "%s", "IPV6NETMASK");
+ break;
+ case GATEWAY:
+ snprintf(str, sizeof(str), "%s",
+ "IPV6_DEFAULTGW");
+ break;
+ case DNS:
+ snprintf(str, sizeof(str), "%s", "DNS");
+ break;
+ }
+ if ((j != 0) || (type == DNS)) {
+ if (type != DNS) {
+ snprintf(sub_str, sizeof(sub_str),
+ "_%d", j++);
+ } else {
+ snprintf(sub_str, sizeof(sub_str),
+ "%d", ++i);
+ }
+ } else if (type == DNS) {
+ snprintf(sub_str, sizeof(sub_str),
+ "%d", ++i);
+ }
+ } else {
+ return HV_INVALIDARG;
+ }
+
+ error = kvp_write_file(f, str, sub_str, addr);
+ if (error)
+ return error;
+ memset(addr, 0, sizeof(addr));
+ }
+
+ return 0;
+}
+
+static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
+{
+ int error = 0;
+ char if_file[128];
+ FILE *file;
+ char cmd[512];
+ char *mac_addr;
+
+ /*
+ * Set the configuration for the specified interface with
+ * the information provided. Since there is no standard
+ * way to configure an interface, we will have an external
+ * script that does the job of configuring the interface and
+ * flushing the configuration.
+ *
+ * The parameters passed to this external script are:
+ * 1. A configuration file that has the specified configuration.
+ *
+ * We will embed the name of the interface in the configuration
+ * file: ifcfg-ethx (where ethx is the interface name).
+ *
+ * The information provided here may be more than what is needed
+ * in a given distro to configure the interface and so are free
+ * ignore information that may not be relevant.
+ *
+ * Here is the format of the ip configuration file:
+ *
+ * HWADDR=macaddr
+ * IF_NAME=interface name
+ * DHCP=yes (This is optional; if yes, DHCP is configured)
+ *
+ * IPADDR=ipaddr1
+ * IPADDR_1=ipaddr2
+ * IPADDR_x=ipaddry (where y = x + 1)
+ *
+ * NETMASK=netmask1
+ * NETMASK_x=netmasky (where y = x + 1)
+ *
+ * GATEWAY=ipaddr1
+ * GATEWAY_x=ipaddry (where y = x + 1)
+ *
+ * DNSx=ipaddrx (where first DNS address is tagged as DNS1 etc)
+ *
+ * IPV6 addresses will be tagged as IPV6ADDR, IPV6 gateway will be
+ * tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as
+ * IPV6NETMASK.
+ *
+ * The host can specify multiple ipv4 and ipv6 addresses to be
+ * configured for the interface. Furthermore, the configuration
+ * needs to be persistent. A subsequent GET call on the interface
+ * is expected to return the configuration that is set via the SET
+ * call.
+ */
+
+ snprintf(if_file, sizeof(if_file), "%s%s%s", KVP_CONFIG_LOC,
+ "hyperv/ifcfg-", if_name);
+
+ file = fopen(if_file, "w");
+
+ if (file == NULL) {
+ syslog(LOG_ERR, "Failed to open config file");
+ return HV_E_FAIL;
+ }
+
+ /*
+ * First write out the MAC address.
+ */
+
+ mac_addr = kvp_if_name_to_mac(if_name);
+ if (mac_addr == NULL) {
+ error = HV_E_FAIL;
+ goto setval_error;
+ }
+
+ error = kvp_write_file(file, "HWADDR", "", mac_addr);
+ if (error)
+ goto setval_error;
+
+ error = kvp_write_file(file, "IF_NAME", "", if_name);
+ if (error)
+ goto setval_error;
+
+ if (new_val->dhcp_enabled) {
+ error = kvp_write_file(file, "DHCP", "", "yes");
+ if (error)
+ goto setval_error;
+
+ /*
+ * We are done!.
+ */
+ goto setval_done;
+ }
+
+ /*
+ * Write the configuration for ipaddress, netmask, gateway and
+ * name servers.
+ */
+
+ error = process_ip_string(file, (char *)new_val->ip_addr, IPADDR);
+ if (error)
+ goto setval_error;
+
+ error = process_ip_string(file, (char *)new_val->sub_net, NETMASK);
+ if (error)
+ goto setval_error;
+
+ error = process_ip_string(file, (char *)new_val->gate_way, GATEWAY);
+ if (error)
+ goto setval_error;
+
+ error = process_ip_string(file, (char *)new_val->dns_addr, DNS);
+ if (error)
+ goto setval_error;
+
+setval_done:
+ free(mac_addr);
+ fclose(file);
+
+ /*
+ * Now that we have populated the configuration file,
+ * invoke the external script to do its magic.
+ */
+
+ snprintf(cmd, sizeof(cmd), "%s %s", "hv_set_ifconfig", if_file);
+ system(cmd);
+ return 0;
+
+setval_error:
+ syslog(LOG_ERR, "Failed to write config file");
+ free(mac_addr);
+ fclose(file);
+ return error;
+}
+
+
static int
kvp_get_domain_name(char *buffer, int length)
{
@@ -646,6 +1410,10 @@ int main(void)
char *p;
char *key_value;
char *key_name;
+ int op;
+ int pool;
+ char *if_name;
+ struct hv_kvp_ipaddr_value *kvp_ip_val;
daemon(1, 0);
openlog("KVP", 0, LOG_USER);
@@ -657,13 +1425,13 @@ int main(void)
if (kvp_file_init()) {
syslog(LOG_ERR, "Failed to initialize the pools");
- exit(-1);
+ exit(EXIT_FAILURE);
}
fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
if (fd < 0) {
syslog(LOG_ERR, "netlink socket creation failed; error:%d", fd);
- exit(-1);
+ exit(EXIT_FAILURE);
}
addr.nl_family = AF_NETLINK;
addr.nl_pad = 0;
@@ -675,7 +1443,7 @@ int main(void)
if (error < 0) {
syslog(LOG_ERR, "bind failed; error:%d", error);
close(fd);
- exit(-1);
+ exit(EXIT_FAILURE);
}
sock_opt = addr.nl_groups;
setsockopt(fd, 270, 1, &sock_opt, sizeof(sock_opt));
@@ -687,7 +1455,7 @@ int main(void)
message->id.val = CN_KVP_VAL;
hv_msg = (struct hv_kvp_msg *)message->data;
- hv_msg->kvp_hdr.operation = KVP_OP_REGISTER;
+ hv_msg->kvp_hdr.operation = KVP_OP_REGISTER1;
message->ack = 0;
message->len = sizeof(struct hv_kvp_msg);
@@ -695,7 +1463,7 @@ int main(void)
if (len < 0) {
syslog(LOG_ERR, "netlink_send failed; error:%d", len);
close(fd);
- exit(-1);
+ exit(EXIT_FAILURE);
}
pfd.fd = fd;
@@ -721,12 +1489,21 @@ int main(void)
incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg);
hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data;
- switch (hv_msg->kvp_hdr.operation) {
- case KVP_OP_REGISTER:
+ /*
+ * We will use the KVP header information to pass back
+ * the error from this daemon. So, first copy the state
+ * and set the error code to success.
+ */
+ op = hv_msg->kvp_hdr.operation;
+ pool = hv_msg->kvp_hdr.pool;
+ hv_msg->error = HV_S_OK;
+
+ if ((in_hand_shake) && (op == KVP_OP_REGISTER1)) {
/*
* Driver is registering with us; stash away the version
* information.
*/
+ in_hand_shake = 0;
p = (char *)hv_msg->body.kvp_register.version;
lic_version = malloc(strlen(p) + 1);
if (lic_version) {
@@ -737,44 +1514,82 @@ int main(void)
syslog(LOG_ERR, "malloc failed");
}
continue;
+ }
- /*
- * The current protocol with the kernel component uses a
- * NULL key name to pass an error condition.
- * For the SET, GET and DELETE operations,
- * use the existing protocol to pass back error.
- */
+ switch (op) {
+ case KVP_OP_GET_IP_INFO:
+ kvp_ip_val = &hv_msg->body.kvp_ip_val;
+ if_name =
+ kvp_mac_to_if_name((char *)kvp_ip_val->adapter_id);
+
+ if (if_name == NULL) {
+ /*
+ * We could not map the mac address to an
+ * interface name; return error.
+ */
+ hv_msg->error = HV_E_FAIL;
+ break;
+ }
+ error = kvp_get_ip_info(
+ 0, if_name, KVP_OP_GET_IP_INFO,
+ kvp_ip_val,
+ (MAX_IP_ADDR_SIZE * 2));
+
+ if (error)
+ hv_msg->error = error;
+
+ free(if_name);
+ break;
+
+ case KVP_OP_SET_IP_INFO:
+ kvp_ip_val = &hv_msg->body.kvp_ip_val;
+ if_name = kvp_get_if_name(
+ (char *)kvp_ip_val->adapter_id);
+ if (if_name == NULL) {
+ /*
+ * We could not map the guid to an
+ * interface name; return error.
+ */
+ hv_msg->error = HV_GUID_NOTFOUND;
+ break;
+ }
+ error = kvp_set_ip_info(if_name, kvp_ip_val);
+ if (error)
+ hv_msg->error = error;
+
+ free(if_name);
+ break;
case KVP_OP_SET:
- if (kvp_key_add_or_modify(hv_msg->kvp_hdr.pool,
+ if (kvp_key_add_or_modify(pool,
hv_msg->body.kvp_set.data.key,
hv_msg->body.kvp_set.data.key_size,
hv_msg->body.kvp_set.data.value,
hv_msg->body.kvp_set.data.value_size))
- strcpy(hv_msg->body.kvp_set.data.key, "");
+ hv_msg->error = HV_S_CONT;
break;
case KVP_OP_GET:
- if (kvp_get_value(hv_msg->kvp_hdr.pool,
+ if (kvp_get_value(pool,
hv_msg->body.kvp_set.data.key,
hv_msg->body.kvp_set.data.key_size,
hv_msg->body.kvp_set.data.value,
hv_msg->body.kvp_set.data.value_size))
- strcpy(hv_msg->body.kvp_set.data.key, "");
+ hv_msg->error = HV_S_CONT;
break;
case KVP_OP_DELETE:
- if (kvp_key_delete(hv_msg->kvp_hdr.pool,
+ if (kvp_key_delete(pool,
hv_msg->body.kvp_delete.key,
hv_msg->body.kvp_delete.key_size))
- strcpy(hv_msg->body.kvp_delete.key, "");
+ hv_msg->error = HV_S_CONT;
break;
default:
break;
}
- if (hv_msg->kvp_hdr.operation != KVP_OP_ENUMERATE)
+ if (op != KVP_OP_ENUMERATE)
goto kvp_done;
/*
@@ -782,13 +1597,14 @@ int main(void)
* both the key and the value; if not read from the
* appropriate pool.
*/
- if (hv_msg->kvp_hdr.pool != KVP_POOL_AUTO) {
- kvp_pool_enumerate(hv_msg->kvp_hdr.pool,
+ if (pool != KVP_POOL_AUTO) {
+ if (kvp_pool_enumerate(pool,
hv_msg->body.kvp_enum_data.index,
hv_msg->body.kvp_enum_data.data.key,
HV_KVP_EXCHANGE_MAX_KEY_SIZE,
hv_msg->body.kvp_enum_data.data.value,
- HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
+ HV_KVP_EXCHANGE_MAX_VALUE_SIZE))
+ hv_msg->error = HV_S_CONT;
goto kvp_done;
}
@@ -807,13 +1623,13 @@ int main(void)
strcpy(key_value, lic_version);
break;
case NetworkAddressIPv4:
- kvp_get_ip_address(AF_INET, key_value,
- HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
+ kvp_get_ip_info(AF_INET, NULL, KVP_OP_ENUMERATE,
+ key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
strcpy(key_name, "NetworkAddressIPv4");
break;
case NetworkAddressIPv6:
- kvp_get_ip_address(AF_INET6, key_value,
- HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
+ kvp_get_ip_info(AF_INET6, NULL, KVP_OP_ENUMERATE,
+ key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
strcpy(key_name, "NetworkAddressIPv6");
break;
case OSBuildNumber:
@@ -841,11 +1657,7 @@ int main(void)
strcpy(key_name, "ProcessorArchitecture");
break;
default:
- strcpy(key_value, "Unknown Key");
- /*
- * We use a null key name to terminate enumeration.
- */
- strcpy(key_name, "");
+ hv_msg->error = HV_S_CONT;
break;
}
/*
@@ -863,7 +1675,7 @@ kvp_done:
len = netlink_send(fd, incoming_cn_msg);
if (len < 0) {
syslog(LOG_ERR, "net_link send failed; error:%d", len);
- exit(-1);
+ exit(EXIT_FAILURE);
}
}
diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh
new file mode 100755
index 00000000000..3e9427e08d8
--- /dev/null
+++ b/tools/hv/hv_set_ifconfig.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+# This example script activates an interface based on the specified
+# configuration.
+#
+# In the interest of keeping the KVP daemon code free of distro specific
+# information; the kvp daemon code invokes this external script to configure
+# the interface.
+#
+# The only argument to this script is the configuration file that is to
+# be used to configure the interface.
+#
+# Each Distro is expected to implement this script in a distro specific
+# fashion. For instance on Distros that ship with Network Manager enabled,
+# this script can be based on the Network Manager APIs for configuring the
+# interface.
+#
+# This example script is based on a RHEL environment.
+#
+# Here is the format of the ip configuration file:
+#
+# HWADDR=macaddr
+# IF_NAME=interface name
+# DHCP=yes (This is optional; if yes, DHCP is configured)
+#
+# IPADDR=ipaddr1
+# IPADDR_1=ipaddr2
+# IPADDR_x=ipaddry (where y = x + 1)
+#
+# NETMASK=netmask1
+# NETMASK_x=netmasky (where y = x + 1)
+#
+# GATEWAY=ipaddr1
+# GATEWAY_x=ipaddry (where y = x + 1)
+#
+# DNSx=ipaddrx (where first DNS address is tagged as DNS1 etc)
+#
+# IPV6 addresses will be tagged as IPV6ADDR, IPV6 gateway will be
+# tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as
+# IPV6NETMASK.
+#
+# The host can specify multiple ipv4 and ipv6 addresses to be
+# configured for the interface. Furthermore, the configuration
+# needs to be persistent. A subsequent GET call on the interface
+# is expected to return the configuration that is set via the SET
+# call.
+#
+
+
+
+echo "IPV6INIT=yes" >> $1
+echo "NM_CONTROLLED=no" >> $1
+echo "PEERDNS=yes" >> $1
+echo "ONBOOT=yes" >> $1
+
+dhcp=$(grep "DHCP" $1 2>/dev/null)
+if [ "$dhcp" != "" ];
+then
+echo "BOOTPROTO=dhcp" >> $1;
+fi
+
+cp $1 /etc/sysconfig/network-scripts/
+
+
+interface=$(echo $1 | awk -F - '{ print $2 }')
+
+/sbin/ifdown $interface 2>/dev/null
+/sbin/ifup $interfac 2>/dev/null
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index f759f4f097c..fd2f9221b24 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -1299,6 +1299,7 @@ static struct device *new_device(const char *name, u16 type)
dev->feature_len = 0;
dev->num_vq = 0;
dev->running = false;
+ dev->next = NULL;
/*
* Append to device list. Prepending to a single-linked list is
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index f9126f89efe..247264502fb 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -74,7 +74,7 @@ ifeq ($(ARCH),x86_64)
override ARCH := x86
IS_X86_64 := 0
ifeq (, $(findstring m32,$(EXTRA_CFLAGS)))
- IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1)
+ IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1)
endif
ifeq (${IS_X86_64}, 1)
RAW_ARCH := x86_64
@@ -270,6 +270,7 @@ LIB_H += util/include/linux/magic.h
LIB_H += util/include/linux/poison.h
LIB_H += util/include/linux/prefetch.h
LIB_H += util/include/linux/rbtree.h
+LIB_H += util/include/linux/rbtree_augmented.h
LIB_H += util/include/linux/string.h
LIB_H += util/include/linux/types.h
LIB_H += util/include/linux/linkage.h
@@ -881,7 +882,7 @@ $(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+ $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-redundant-decls $<
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 87f4ec6d1f3..a89cbbb6180 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -88,6 +88,12 @@ void get_term_dimensions(struct winsize *ws);
#define CPUINFO_PROC "Processor"
#endif
+#ifdef __aarch64__
+#include "../../arch/arm64/include/asm/unistd.h"
+#define rmb() asm volatile("dmb ld" ::: "memory")
+#define cpu_relax() asm volatile("yield" ::: "memory")
+#endif
+
#ifdef __mips__
#include "../../arch/mips/include/asm/unistd.h"
#define rmb() asm volatile( \
diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h
new file mode 100644
index 00000000000..9d6fcdf1788
--- /dev/null
+++ b/tools/perf/util/include/linux/rbtree_augmented.h
@@ -0,0 +1,2 @@
+#include <stdbool.h>
+#include "../../../../include/linux/rbtree_augmented.h"
diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile
new file mode 100644
index 00000000000..6b9cf7a987c
--- /dev/null
+++ b/tools/power/acpi/Makefile
@@ -0,0 +1,18 @@
+PROG= acpidump
+SRCS= acpidump.c
+KERNEL_INCLUDE := ../../../include
+CFLAGS += -Wall -Wstrict-prototypes -Wdeclaration-after-statement -Os -s -D_LINUX -DDEFINE_ALTERNATE_TYPES -I$(KERNEL_INCLUDE)
+
+all: acpidump
+$(PROG) : $(SRCS)
+ $(CC) $(CFLAGS) $(SRCS) -o $(PROG)
+
+CLEANFILES= $(PROG)
+
+clean :
+ rm -f $(CLEANFILES) $(patsubst %.c,%.o, $(SRCS)) *~
+
+install :
+ install acpidump /usr/bin/acpidump
+ install acpidump.8 /usr/share/man/man8
+
diff --git a/tools/power/acpi/acpidump.8 b/tools/power/acpi/acpidump.8
new file mode 100644
index 00000000000..adfa99166e5
--- /dev/null
+++ b/tools/power/acpi/acpidump.8
@@ -0,0 +1,59 @@
+.TH ACPIDUMP 8
+.SH NAME
+acpidump \- Dump system's ACPI tables to an ASCII file.
+.SH SYNOPSIS
+.ft B
+.B acpidump > acpidump.out
+.SH DESCRIPTION
+\fBacpidump \fP dumps the systems ACPI tables to an ASCII file
+appropriate for attaching to a bug report.
+
+Subsequently, they can be processed by utilities in the ACPICA package.
+.SS Options
+no options worth worrying about.
+.PP
+.SH EXAMPLE
+
+.nf
+# acpidump > acpidump.out
+
+$ acpixtract -a acpidump.out
+ Acpi table [DSDT] - 15974 bytes written to DSDT.dat
+ Acpi table [FACS] - 64 bytes written to FACS.dat
+ Acpi table [FACP] - 116 bytes written to FACP.dat
+ Acpi table [APIC] - 120 bytes written to APIC.dat
+ Acpi table [MCFG] - 60 bytes written to MCFG.dat
+ Acpi table [SSDT] - 444 bytes written to SSDT1.dat
+ Acpi table [SSDT] - 439 bytes written to SSDT2.dat
+ Acpi table [SSDT] - 439 bytes written to SSDT3.dat
+ Acpi table [SSDT] - 439 bytes written to SSDT4.dat
+ Acpi table [SSDT] - 439 bytes written to SSDT5.dat
+ Acpi table [RSDT] - 76 bytes written to RSDT.dat
+ Acpi table [RSDP] - 20 bytes written to RSDP.dat
+
+$ iasl -d *.dat
+...
+.fi
+creates *.dsl, a human readable form which can be edited
+and compiled using iasl.
+
+
+.SH NOTES
+
+.B "acpidump "
+must be run as root.
+
+.SH REFERENCES
+ACPICA: https://acpica.org/
+
+.SH FILES
+.ta
+.nf
+/dev/mem
+/sys/firmware/acpi/tables/dynamic/*
+.fi
+
+.PP
+.SH AUTHOR
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/acpi/acpidump.c b/tools/power/acpi/acpidump.c
new file mode 100644
index 00000000000..07779871421
--- /dev/null
+++ b/tools/power/acpi/acpidump.c
@@ -0,0 +1,560 @@
+/*
+ * (c) Alexey Starikovskiy, Intel, 2005-2006.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#ifdef DEFINE_ALTERNATE_TYPES
+/* hack to enable building old application with new headers -lenb */
+#define acpi_fadt_descriptor acpi_table_fadt
+#define acpi_rsdp_descriptor acpi_table_rsdp
+#define DSDT_SIG ACPI_SIG_DSDT
+#define FACS_SIG ACPI_SIG_FACS
+#define FADT_SIG ACPI_SIG_FADT
+#define xfirmware_ctrl Xfacs
+#define firmware_ctrl facs
+
+typedef int s32;
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+typedef long long s64;
+#endif
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <getopt.h>
+
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <acpi/acconfig.h>
+#include <acpi/platform/acenv.h>
+#include <acpi/actypes.h>
+#include <acpi/actbl.h>
+
+static inline u8 checksum(u8 * buffer, u32 length)
+{
+ u8 sum = 0, *i = buffer;
+ buffer += length;
+ for (; i < buffer; sum += *(i++));
+ return sum;
+}
+
+static unsigned long psz, addr, length;
+static int print, connect, skip;
+static u8 select_sig[4];
+
+static unsigned long read_efi_systab( void )
+{
+ char buffer[80];
+ unsigned long addr;
+ FILE *f = fopen("/sys/firmware/efi/systab", "r");
+ if (f) {
+ while (fgets(buffer, 80, f)) {
+ if (sscanf(buffer, "ACPI20=0x%lx", &addr) == 1)
+ return addr;
+ }
+ fclose(f);
+ }
+ return 0;
+}
+
+static u8 *acpi_map_memory(unsigned long where, unsigned length)
+{
+ unsigned long offset;
+ u8 *there;
+ int fd = open("/dev/mem", O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "acpi_os_map_memory: cannot open /dev/mem\n");
+ exit(1);
+ }
+ offset = where % psz;
+ there = mmap(NULL, length + offset, PROT_READ, MAP_PRIVATE,
+ fd, where - offset);
+ close(fd);
+ if (there == MAP_FAILED) return 0;
+ return (there + offset);
+}
+
+static void acpi_unmap_memory(u8 * there, unsigned length)
+{
+ unsigned long offset = (unsigned long)there % psz;
+ munmap(there - offset, length + offset);
+}
+
+static struct acpi_table_header *acpi_map_table(unsigned long where, char *sig)
+{
+ unsigned size;
+ struct acpi_table_header *tbl = (struct acpi_table_header *)
+ acpi_map_memory(where, sizeof(struct acpi_table_header));
+ if (!tbl || (sig && memcmp(sig, tbl->signature, 4))) return 0;
+ size = tbl->length;
+ acpi_unmap_memory((u8 *) tbl, sizeof(struct acpi_table_header));
+ return (struct acpi_table_header *)acpi_map_memory(where, size);
+}
+
+static void acpi_unmap_table(struct acpi_table_header *tbl)
+{
+ acpi_unmap_memory((u8 *)tbl, tbl->length);
+}
+
+static struct acpi_rsdp_descriptor *acpi_scan_for_rsdp(u8 *begin, u32 length)
+{
+ struct acpi_rsdp_descriptor *rsdp;
+ u8 *i, *end = begin + length;
+ /* Search from given start address for the requested length */
+ for (i = begin; i < end; i += ACPI_RSDP_SCAN_STEP) {
+ /* The signature and checksum must both be correct */
+ if (memcmp((char *)i, "RSD PTR ", 8)) continue;
+ rsdp = (struct acpi_rsdp_descriptor *)i;
+ /* Signature matches, check the appropriate checksum */
+ if (!checksum((u8 *) rsdp, (rsdp->revision < 2) ?
+ ACPI_RSDP_CHECKSUM_LENGTH :
+ ACPI_RSDP_XCHECKSUM_LENGTH))
+ /* Checksum valid, we have found a valid RSDP */
+ return rsdp;
+ }
+ /* Searched entire block, no RSDP was found */
+ return 0;
+}
+
+/*
+ * Output data
+ */
+static void acpi_show_data(int fd, u8 * data, int size)
+{
+ char buffer[256];
+ int len;
+ int i, remain = size;
+ while (remain > 0) {
+ len = snprintf(buffer, 256, " %04x:", size - remain);
+ for (i = 0; i < 16 && i < remain; i++) {
+ len +=
+ snprintf(&buffer[len], 256 - len, " %02x", data[i]);
+ }
+ for (; i < 16; i++) {
+ len += snprintf(&buffer[len], 256 - len, " ");
+ }
+ len += snprintf(&buffer[len], 256 - len, " ");
+ for (i = 0; i < 16 && i < remain; i++) {
+ buffer[len++] = (isprint(data[i])) ? data[i] : '.';
+ }
+ buffer[len++] = '\n';
+ write(fd, buffer, len);
+ data += 16;
+ remain -= 16;
+ }
+}
+
+/*
+ * Output ACPI table
+ */
+static void acpi_show_table(int fd, struct acpi_table_header *table, unsigned long addr)
+{
+ char buff[80];
+ int len = snprintf(buff, 80, "%.4s @ %p\n", table->signature, (void *)addr);
+ write(fd, buff, len);
+ acpi_show_data(fd, (u8 *) table, table->length);
+ buff[0] = '\n';
+ write(fd, buff, 1);
+}
+
+static void write_table(int fd, struct acpi_table_header *tbl, unsigned long addr)
+{
+ static int select_done = 0;
+ if (!select_sig[0]) {
+ if (print) {
+ acpi_show_table(fd, tbl, addr);
+ } else {
+ write(fd, tbl, tbl->length);
+ }
+ } else if (!select_done && !memcmp(select_sig, tbl->signature, 4)) {
+ if (skip > 0) {
+ --skip;
+ return;
+ }
+ if (print) {
+ acpi_show_table(fd, tbl, addr);
+ } else {
+ write(fd, tbl, tbl->length);
+ }
+ select_done = 1;
+ }
+}
+
+static void acpi_dump_FADT(int fd, struct acpi_table_header *tbl, unsigned long xaddr) {
+ struct acpi_fadt_descriptor x;
+ unsigned long addr;
+ size_t len = sizeof(struct acpi_fadt_descriptor);
+ if (len > tbl->length) len = tbl->length;
+ memcpy(&x, tbl, len);
+ x.header.length = len;
+ if (checksum((u8 *)tbl, len)) {
+ fprintf(stderr, "Wrong checksum for FADT!\n");
+ }
+ if (x.header.length >= 148 && x.Xdsdt) {
+ addr = (unsigned long)x.Xdsdt;
+ if (connect) {
+ x.Xdsdt = lseek(fd, 0, SEEK_CUR);
+ }
+ } else if (x.header.length >= 44 && x.dsdt) {
+ addr = (unsigned long)x.dsdt;
+ if (connect) {
+ x.dsdt = lseek(fd, 0, SEEK_CUR);
+ }
+ } else {
+ fprintf(stderr, "No DSDT in FADT!\n");
+ goto no_dsdt;
+ }
+ tbl = acpi_map_table(addr, DSDT_SIG);
+ if (!tbl) goto no_dsdt;
+ if (checksum((u8 *)tbl, tbl->length))
+ fprintf(stderr, "Wrong checksum for DSDT!\n");
+ write_table(fd, tbl, addr);
+ acpi_unmap_table(tbl);
+no_dsdt:
+ if (x.header.length >= 140 && x.xfirmware_ctrl) {
+ addr = (unsigned long)x.xfirmware_ctrl;
+ if (connect) {
+ x.xfirmware_ctrl = lseek(fd, 0, SEEK_CUR);
+ }
+ } else if (x.header.length >= 40 && x.firmware_ctrl) {
+ addr = (unsigned long)x.firmware_ctrl;
+ if (connect) {
+ x.firmware_ctrl = lseek(fd, 0, SEEK_CUR);
+ }
+ } else {
+ fprintf(stderr, "No FACS in FADT!\n");
+ goto no_facs;
+ }
+ tbl = acpi_map_table(addr, FACS_SIG);
+ if (!tbl) goto no_facs;
+ /* do not checksum FACS */
+ write_table(fd, tbl, addr);
+ acpi_unmap_table(tbl);
+no_facs:
+ write_table(fd, (struct acpi_table_header *)&x, xaddr);
+}
+
+static int acpi_dump_SDT(int fd, struct acpi_rsdp_descriptor *rsdp)
+{
+ struct acpi_table_header *sdt, *tbl = 0;
+ int xsdt = 1, i, num;
+ char *offset;
+ unsigned long addr;
+ if (rsdp->revision > 1 && rsdp->xsdt_physical_address) {
+ tbl = acpi_map_table(rsdp->xsdt_physical_address, "XSDT");
+ }
+ if (!tbl && rsdp->rsdt_physical_address) {
+ xsdt = 0;
+ tbl = acpi_map_table(rsdp->rsdt_physical_address, "RSDT");
+ }
+ if (!tbl) return 0;
+ sdt = malloc(tbl->length);
+ memcpy(sdt, tbl, tbl->length);
+ acpi_unmap_table(tbl);
+ if (checksum((u8 *)sdt, sdt->length))
+ fprintf(stderr, "Wrong checksum for %s!\n", (xsdt)?"XSDT":"RSDT");
+ num = (sdt->length - sizeof(struct acpi_table_header))/((xsdt)?sizeof(u64):sizeof(u32));
+ offset = (char *)sdt + sizeof(struct acpi_table_header);
+ for (i = 0; i < num; ++i, offset += ((xsdt) ? sizeof(u64) : sizeof(u32))) {
+ addr = (xsdt) ? (unsigned long)(*(u64 *)offset):
+ (unsigned long)(*(u32 *)offset);
+ if (!addr) continue;
+ tbl = acpi_map_table(addr, 0);
+ if (!tbl) continue;
+ if (!memcmp(tbl->signature, FADT_SIG, 4)) {
+ acpi_dump_FADT(fd, tbl, addr);
+ } else {
+ if (checksum((u8 *)tbl, tbl->length))
+ fprintf(stderr, "Wrong checksum for generic table!\n");
+ write_table(fd, tbl, addr);
+ }
+ acpi_unmap_table(tbl);
+ if (connect) {
+ if (xsdt)
+ (*(u64*)offset) = lseek(fd, 0, SEEK_CUR);
+ else
+ (*(u32*)offset) = lseek(fd, 0, SEEK_CUR);
+ }
+ }
+ if (xsdt) {
+ addr = (unsigned long)rsdp->xsdt_physical_address;
+ if (connect) {
+ rsdp->xsdt_physical_address = lseek(fd, 0, SEEK_CUR);
+ }
+ } else {
+ addr = (unsigned long)rsdp->rsdt_physical_address;
+ if (connect) {
+ rsdp->rsdt_physical_address = lseek(fd, 0, SEEK_CUR);
+ }
+ }
+ write_table(fd, sdt, addr);
+ free (sdt);
+ return 1;
+}
+
+#define DYNAMIC_SSDT "/sys/firmware/acpi/tables/dynamic"
+
+static void acpi_dump_dynamic_SSDT(int fd)
+{
+ struct stat file_stat;
+ char filename[256], *ptr;
+ DIR *tabledir;
+ struct dirent *entry;
+ FILE *fp;
+ int count, readcount, length;
+ struct acpi_table_header table_header, *ptable;
+
+ if (stat(DYNAMIC_SSDT, &file_stat) == -1) {
+ /* The directory doesn't exist */
+ return;
+ }
+ tabledir = opendir(DYNAMIC_SSDT);
+ if(!tabledir){
+ /*can't open the directory */
+ return;
+ }
+
+ while ((entry = readdir(tabledir)) != 0){
+ /* skip the file of . /.. */
+ if (entry->d_name[0] == '.')
+ continue;
+
+ sprintf(filename, "%s/%s", DYNAMIC_SSDT, entry->d_name);
+ fp = fopen(filename, "r");
+ if (fp == NULL) {
+ fprintf(stderr, "Can't open the file of %s\n",
+ filename);
+ continue;
+ }
+ /* Read the Table header to parse the table length */
+ count = fread(&table_header, 1, sizeof(struct acpi_table_header), fp);
+ if (count < sizeof(table_header)) {
+ /* the length is lessn than ACPI table header. skip it */
+ fclose(fp);
+ continue;
+ }
+ length = table_header.length;
+ ptr = malloc(table_header.length);
+ fseek(fp, 0, SEEK_SET);
+ readcount = 0;
+ while(!feof(fp) && readcount < length) {
+ count = fread(ptr + readcount, 1, 256, fp);
+ readcount += count;
+ }
+ fclose(fp);
+ ptable = (struct acpi_table_header *) ptr;
+ if (checksum((u8 *) ptable, ptable->length))
+ fprintf(stderr, "Wrong checksum "
+ "for dynamic SSDT table!\n");
+ write_table(fd, ptable, 0);
+ free(ptr);
+ }
+ closedir(tabledir);
+ return;
+}
+
+static void usage(const char *progname)
+{
+ puts("Usage:");
+ printf("%s [--addr 0x1234][--table DSDT][--output filename]"
+ "[--binary][--length 0x456][--help]\n", progname);
+ puts("\t--addr 0x1234 or -a 0x1234 -- look for tables at this physical address");
+ puts("\t--table DSDT or -t DSDT -- only dump table with DSDT signature");
+ puts("\t--output filename or -o filename -- redirect output from stdin to filename");
+ puts("\t--binary or -b -- dump data in binary form rather than in hex-dump format");
+ puts("\t--length 0x456 or -l 0x456 -- works only with --addr, dump physical memory"
+ "\n\t\tregion without trying to understand it's contents");
+ puts("\t--skip 2 or -s 2 -- skip 2 tables of the given name and output only 3rd one");
+ puts("\t--help or -h -- this help message");
+ exit(0);
+}
+
+static struct option long_options[] = {
+ {"addr", 1, 0, 0},
+ {"table", 1, 0, 0},
+ {"output", 1, 0, 0},
+ {"binary", 0, 0, 0},
+ {"length", 1, 0, 0},
+ {"skip", 1, 0, 0},
+ {"help", 0, 0, 0},
+ {0, 0, 0, 0}
+};
+int main(int argc, char **argv)
+{
+ int option_index, c, fd;
+ u8 *raw;
+ struct acpi_rsdp_descriptor rsdpx, *x = 0;
+ char *filename = 0;
+ char buff[80];
+ memset(select_sig, 0, 4);
+ print = 1;
+ connect = 0;
+ addr = length = 0;
+ skip = 0;
+ while (1) {
+ option_index = 0;
+ c = getopt_long(argc, argv, "a:t:o:bl:s:h",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ switch (option_index) {
+ case 0:
+ addr = strtoul(optarg, (char **)NULL, 16);
+ break;
+ case 1:
+ memcpy(select_sig, optarg, 4);
+ break;
+ case 2:
+ filename = optarg;
+ break;
+ case 3:
+ print = 0;
+ break;
+ case 4:
+ length = strtoul(optarg, (char **)NULL, 16);
+ break;
+ case 5:
+ skip = strtoul(optarg, (char **)NULL, 10);
+ break;
+ case 6:
+ usage(argv[0]);
+ exit(0);
+ }
+ break;
+ case 'a':
+ addr = strtoul(optarg, (char **)NULL, 16);
+ break;
+ case 't':
+ memcpy(select_sig, optarg, 4);
+ break;
+ case 'o':
+ filename = optarg;
+ break;
+ case 'b':
+ print = 0;
+ break;
+ case 'l':
+ length = strtoul(optarg, (char **)NULL, 16);
+ break;
+ case 's':
+ skip = strtoul(optarg, (char **)NULL, 10);
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ default:
+ printf("Unknown option!\n");
+ usage(argv[0]);
+ exit(0);
+ }
+ }
+
+ fd = STDOUT_FILENO;
+ if (filename) {
+ fd = creat(filename, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
+ if (fd < 0)
+ return fd;
+ }
+
+ if (!select_sig[0] && !print) {
+ connect = 1;
+ }
+
+ psz = sysconf(_SC_PAGESIZE);
+ if (length && addr) {
+ /* We know length and address, it means we just want a memory dump */
+ if (!(raw = acpi_map_memory(addr, length)))
+ goto not_found;
+ write(fd, raw, length);
+ acpi_unmap_memory(raw, length);
+ close(fd);
+ return 0;
+ }
+
+ length = sizeof(struct acpi_rsdp_descriptor);
+ if (!addr) {
+ addr = read_efi_systab();
+ if (!addr) {
+ addr = ACPI_HI_RSDP_WINDOW_BASE;
+ length = ACPI_HI_RSDP_WINDOW_SIZE;
+ }
+ }
+
+ if (!(raw = acpi_map_memory(addr, length)) ||
+ !(x = acpi_scan_for_rsdp(raw, length)))
+ goto not_found;
+
+ /* Find RSDP and print all found tables */
+ memcpy(&rsdpx, x, sizeof(struct acpi_rsdp_descriptor));
+ acpi_unmap_memory(raw, length);
+ if (connect) {
+ lseek(fd, sizeof(struct acpi_rsdp_descriptor), SEEK_SET);
+ }
+ if (!acpi_dump_SDT(fd, &rsdpx))
+ goto not_found;
+ if (connect) {
+ lseek(fd, 0, SEEK_SET);
+ write(fd, x, (rsdpx.revision < 2) ?
+ ACPI_RSDP_CHECKSUM_LENGTH : ACPI_RSDP_XCHECKSUM_LENGTH);
+ } else if (!select_sig[0] || !memcmp("RSD PTR ", select_sig, 4)) {
+ addr += (long)x - (long)raw;
+ length = snprintf(buff, 80, "RSD PTR @ %p\n", (void *)addr);
+ write(fd, buff, length);
+ acpi_show_data(fd, (u8 *) & rsdpx, (rsdpx.revision < 2) ?
+ ACPI_RSDP_CHECKSUM_LENGTH : ACPI_RSDP_XCHECKSUM_LENGTH);
+ buff[0] = '\n';
+ write(fd, buff, 1);
+ }
+ acpi_dump_dynamic_SSDT(fd);
+ close(fd);
+ return 0;
+not_found:
+ close(fd);
+ fprintf(stderr, "ACPI tables were not found. If you know location "
+ "of RSD PTR table (from dmesg, etc), "
+ "supply it with either --addr or -a option\n");
+ return 1;
+}
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index a93e06cfcc2..cf397bd26d0 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -111,7 +111,7 @@ GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo $(OUTPUT)po/$$HLANG.gmo;
export CROSS CC AR STRIP RANLIB CFLAGS LDFLAGS LIB_OBJS
# check if compiler option is supported
-cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
+cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
# use '-Os' optimization if available, else use -O2
OPTIMIZATION := $(call cc-supports,-Os,-O2)
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 74e44507dfe..e4d0690cccf 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -4,15 +4,11 @@ turbostat \- Report processor frequency and idle statistics
.SH SYNOPSIS
.ft B
.B turbostat
-.RB [ "\-s" ]
-.RB [ "\-v" ]
-.RB [ "\-M MSR#" ]
+.RB [ Options ]
.RB command
.br
.B turbostat
-.RB [ "\-s" ]
-.RB [ "\-v" ]
-.RB [ "\-M MSR#" ]
+.RB [ Options ]
.RB [ "\-i interval_sec" ]
.SH DESCRIPTION
\fBturbostat \fP reports processor topology, frequency
@@ -27,16 +23,23 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs.
on processors that additionally support C-state residency counters.
.SS Options
-The \fB-s\fP option limits output to a 1-line system summary for each interval.
+The \fB-p\fP option limits output to the 1st thread in 1st core of each package.
.PP
-The \fB-c\fP option limits output to the 1st thread in each core.
+The \fB-P\fP option limits output to the 1st thread in each Package.
.PP
-The \fB-p\fP option limits output to the 1st thread in each package.
+The \fB-S\fP option limits output to a 1-line System Summary for each interval.
.PP
The \fB-v\fP option increases verbosity.
.PP
-The \fB-M MSR#\fP option dumps the specified MSR,
-in addition to the usual frequency and idle statistics.
+The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34"
+.PP
+The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter.
+.PP
+The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter.
+.PP
+The \fB-m MSR#\fP option includes the the specified 32-bit MSR value.
+.PP
+The \fB-M MSR#\fP option includes the the specified 64-bit MSR value.
.PP
The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds.
The default is 5 seconds.
@@ -150,6 +153,29 @@ Note that turbostat reports average GHz of 3.63, while
the arithmetic average of the GHz column above is lower.
This is a weighted average, where the weight is %c0. ie. it is the total number of
un-halted cycles elapsed per time divided by the number of CPUs.
+.SH SMI COUNTING EXAMPLE
+On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter.
+Using the -m option, you can display how many SMIs have fired since reset, or if there
+are SMIs during the measurement interval, you can display the delta using the -d option.
+.nf
+[root@x980 ~]# turbostat -m 0x34
+cor CPU %c0 GHz TSC MSR 0x034 %c1 %c3 %c6 %pc3 %pc6
+ 1.41 1.82 3.38 0x00000000 8.92 37.82 51.85 17.37 0.55
+ 0 0 3.73 2.03 3.38 0x00000055 1.72 48.25 46.31 17.38 0.55
+ 0 6 0.14 1.63 3.38 0x00000056 5.30
+ 1 2 2.51 1.80 3.38 0x00000056 15.65 29.33 52.52
+ 1 8 0.10 1.65 3.38 0x00000056 18.05
+ 2 4 1.16 1.68 3.38 0x00000056 5.87 24.47 68.50
+ 2 10 0.10 1.63 3.38 0x00000056 6.93
+ 8 1 3.84 1.91 3.38 0x00000056 1.36 50.65 44.16
+ 8 7 0.08 1.64 3.38 0x00000056 5.12
+ 9 3 1.82 1.73 3.38 0x00000056 7.59 24.21 66.38
+ 9 9 0.09 1.68 3.38 0x00000056 9.32
+ 10 5 1.66 1.65 3.38 0x00000056 15.10 50.00 33.23
+ 10 11 1.72 1.65 3.38 0x00000056 15.05
+^C
+[root@x980 ~]#
+.fi
.SH NOTES
.B "turbostat "
@@ -165,6 +191,13 @@ may work poorly on Linux-2.6.20 through 2.6.29,
as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF
in those kernels.
+If the TSC column does not make sense, then
+the other numbers will also make no sense.
+Turbostat is lightweight, and its data collection is not atomic.
+These issues are usually caused by an extremely short measurement
+interval (much less than 1 second), or system activity that prevents
+turbostat from being able to run on all CPUS to quickly collect data.
+
The APERF, MPERF MSRs are defined to count non-halted cycles.
Although it is not guaranteed by the architecture, turbostat assumes
that they count at TSC rate, which is true on all processors tested to date.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 861d7719020..2655ae9a3ad 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -35,9 +35,9 @@
#include <ctype.h>
#include <sched.h>
-#define MSR_TSC 0x10
#define MSR_NEHALEM_PLATFORM_INFO 0xCE
#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD
+#define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE
#define MSR_APERF 0xE8
#define MSR_MPERF 0xE7
#define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */
@@ -62,7 +62,11 @@ unsigned int genuine_intel;
unsigned int has_invariant_tsc;
unsigned int do_nehalem_platform_info;
unsigned int do_nehalem_turbo_ratio_limit;
-unsigned int extra_msr_offset;
+unsigned int do_ivt_turbo_ratio_limit;
+unsigned int extra_msr_offset32;
+unsigned int extra_msr_offset64;
+unsigned int extra_delta_offset32;
+unsigned int extra_delta_offset64;
double bclk;
unsigned int show_pkg;
unsigned int show_core;
@@ -83,7 +87,10 @@ struct thread_data {
unsigned long long aperf;
unsigned long long mperf;
unsigned long long c1; /* derived */
- unsigned long long extra_msr;
+ unsigned long long extra_msr64;
+ unsigned long long extra_delta64;
+ unsigned long long extra_msr32;
+ unsigned long long extra_delta32;
unsigned int cpu_id;
unsigned int flags;
#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
@@ -222,6 +229,14 @@ void print_header(void)
if (has_aperf)
outp += sprintf(outp, " GHz");
outp += sprintf(outp, " TSC");
+ if (extra_delta_offset32)
+ outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
+ if (extra_delta_offset64)
+ outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64);
+ if (extra_msr_offset32)
+ outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32);
+ if (extra_msr_offset64)
+ outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
if (do_nhm_cstates)
outp += sprintf(outp, " %%c1");
if (do_nhm_cstates)
@@ -238,8 +253,6 @@ void print_header(void)
outp += sprintf(outp, " %%pc6");
if (do_snb_cstates)
outp += sprintf(outp, " %%pc7");
- if (extra_msr_offset)
- outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset);
outp += sprintf(outp, "\n");
}
@@ -255,8 +268,14 @@ int dump_counters(struct thread_data *t, struct core_data *c,
fprintf(stderr, "aperf: %016llX\n", t->aperf);
fprintf(stderr, "mperf: %016llX\n", t->mperf);
fprintf(stderr, "c1: %016llX\n", t->c1);
+ fprintf(stderr, "msr0x%x: %08llX\n",
+ extra_delta_offset32, t->extra_delta32);
fprintf(stderr, "msr0x%x: %016llX\n",
- extra_msr_offset, t->extra_msr);
+ extra_delta_offset64, t->extra_delta64);
+ fprintf(stderr, "msr0x%x: %08llX\n",
+ extra_msr_offset32, t->extra_msr32);
+ fprintf(stderr, "msr0x%x: %016llX\n",
+ extra_msr_offset64, t->extra_msr64);
}
if (c) {
@@ -360,6 +379,21 @@ int format_counters(struct thread_data *t, struct core_data *c,
/* TSC */
outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
+ /* delta */
+ if (extra_delta_offset32)
+ outp += sprintf(outp, " %11llu", t->extra_delta32);
+
+ /* DELTA */
+ if (extra_delta_offset64)
+ outp += sprintf(outp, " %11llu", t->extra_delta64);
+ /* msr */
+ if (extra_msr_offset32)
+ outp += sprintf(outp, " 0x%08llx", t->extra_msr32);
+
+ /* MSR */
+ if (extra_msr_offset64)
+ outp += sprintf(outp, " 0x%016llx", t->extra_msr64);
+
if (do_nhm_cstates) {
if (!skip_c1)
outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc);
@@ -391,8 +425,6 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (do_snb_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
done:
- if (extra_msr_offset)
- outp += sprintf(outp, " 0x%016llx", t->extra_msr);
outp += sprintf(outp, "\n");
return 0;
@@ -502,10 +534,16 @@ delta_thread(struct thread_data *new, struct thread_data *old,
old->mperf = 1; /* divide by 0 protection */
}
+ old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
+ old->extra_delta32 &= 0xFFFFFFFF;
+
+ old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
+
/*
- * for "extra msr", just copy the latest w/o subtracting
+ * Extra MSR is just a snapshot, simply copy latest w/o subtracting
*/
- old->extra_msr = new->extra_msr;
+ old->extra_msr32 = new->extra_msr32;
+ old->extra_msr64 = new->extra_msr64;
}
int delta_cpu(struct thread_data *t, struct core_data *c,
@@ -533,6 +571,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->mperf = 0;
t->c1 = 0;
+ t->extra_delta32 = 0;
+ t->extra_delta64 = 0;
+
/* tells format_counters to dump all fields from this set */
t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
@@ -553,6 +594,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
average.threads.mperf += t->mperf;
average.threads.c1 += t->c1;
+ average.threads.extra_delta32 += t->extra_delta32;
+ average.threads.extra_delta64 += t->extra_delta64;
+
/* sum per-core values only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0;
@@ -588,6 +632,11 @@ void compute_average(struct thread_data *t, struct core_data *c,
average.threads.mperf /= topo.num_cpus;
average.threads.c1 /= topo.num_cpus;
+ average.threads.extra_delta32 /= topo.num_cpus;
+ average.threads.extra_delta32 &= 0xFFFFFFFF;
+
+ average.threads.extra_delta64 /= topo.num_cpus;
+
average.cores.c3 /= topo.num_cores;
average.cores.c6 /= topo.num_cores;
average.cores.c7 /= topo.num_cores;
@@ -629,8 +678,24 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -4;
}
- if (extra_msr_offset)
- if (get_msr(cpu, extra_msr_offset, &t->extra_msr))
+ if (extra_delta_offset32) {
+ if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32))
+ return -5;
+ t->extra_delta32 &= 0xFFFFFFFF;
+ }
+
+ if (extra_delta_offset64)
+ if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
+ return -5;
+
+ if (extra_msr_offset32) {
+ if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32))
+ return -5;
+ t->extra_msr32 &= 0xFFFFFFFF;
+ }
+
+ if (extra_msr_offset64)
+ if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
return -5;
/* collect core counters only for 1st thread in core */
@@ -677,6 +742,9 @@ void print_verbose_header(void)
get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr);
+ if (verbose > 1)
+ fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr);
+
ratio = (msr >> 40) & 0xFF;
fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
ratio, bclk, ratio * bclk);
@@ -685,14 +753,84 @@ void print_verbose_header(void)
fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
ratio, bclk, ratio * bclk);
+ if (!do_ivt_turbo_ratio_limit)
+ goto print_nhm_turbo_ratio_limits;
+
+ get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
+
if (verbose > 1)
- fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr);
+ fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr);
+
+ ratio = (msr >> 56) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 48) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 40) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 32) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 24) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 16) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 8) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 0) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+print_nhm_turbo_ratio_limits:
if (!do_nehalem_turbo_ratio_limit)
return;
get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr);
+ if (verbose > 1)
+ fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr);
+
+ ratio = (msr >> 56) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 48) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 40) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 32) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
+ ratio, bclk, ratio * bclk);
+
ratio = (msr >> 24) & 0xFF;
if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
@@ -712,7 +850,6 @@ void print_verbose_header(void)
if (ratio)
fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
ratio, bclk, ratio * bclk);
-
}
void free_all_buffers(void)
@@ -1038,7 +1175,7 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
case 0x2A: /* SNB */
case 0x2D: /* SNB Xeon */
case 0x3A: /* IVB */
- case 0x3D: /* IVB Xeon */
+ case 0x3E: /* IVB Xeon */
return 1;
case 0x2E: /* Nehalem-EX Xeon - Beckton */
case 0x2F: /* Westmere-EX Xeon - Eagleton */
@@ -1046,6 +1183,22 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
return 0;
}
}
+int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case 0x3E: /* IVB Xeon */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
int is_snb(unsigned int family, unsigned int model)
{
@@ -1056,7 +1209,7 @@ int is_snb(unsigned int family, unsigned int model)
case 0x2A:
case 0x2D:
case 0x3A: /* IVB */
- case 0x3D: /* IVB Xeon */
+ case 0x3E: /* IVB Xeon */
return 1;
}
return 0;
@@ -1145,12 +1298,13 @@ void check_cpuid()
bclk = discover_bclk(family, model);
do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
+ do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
}
void usage()
{
- fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n",
+ fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n",
progname);
exit(1);
}
@@ -1440,15 +1594,15 @@ void cmdline(int argc, char **argv)
progname = argv[0];
- while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) {
+ while ((opt = getopt(argc, argv, "+pPSvisc:sC:m:M:")) != -1) {
switch (opt) {
- case 'c':
+ case 'p':
show_core_only++;
break;
- case 'p':
+ case 'P':
show_pkg_only++;
break;
- case 's':
+ case 'S':
summary_only++;
break;
case 'v':
@@ -1457,10 +1611,20 @@ void cmdline(int argc, char **argv)
case 'i':
interval_sec = atoi(optarg);
break;
+ case 'c':
+ sscanf(optarg, "%x", &extra_delta_offset32);
+ break;
+ case 's':
+ extra_delta_offset32 = 0x34; /* SMI counter */
+ break;
+ case 'C':
+ sscanf(optarg, "%x", &extra_delta_offset64);
+ break;
+ case 'm':
+ sscanf(optarg, "%x", &extra_msr_offset32);
+ break;
case 'M':
- sscanf(optarg, "%x", &extra_msr_offset);
- if (verbose > 1)
- fprintf(stderr, "MSR 0x%X\n", extra_msr_offset);
+ sscanf(optarg, "%x", &extra_msr_offset64);
break;
default:
usage();
@@ -1473,7 +1637,7 @@ int main(int argc, char **argv)
cmdline(argc, argv);
if (verbose > 1)
- fprintf(stderr, "turbostat v2.0 May 16, 2012"
+ fprintf(stderr, "turbostat v2.1 October 6, 2012"
" - Len Brown <lenb@kernel.org>\n");
turbostat_init();
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index c05bcd293d8..b51d787176d 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1873,10 +1873,10 @@ sub make_oldconfig {
apply_min_config;
}
- if (!run_command "$make oldnoconfig") {
- # Perhaps oldnoconfig doesn't exist in this version of the kernel
+ if (!run_command "$make olddefconfig") {
+ # Perhaps olddefconfig doesn't exist in this version of the kernel
# try a yes '' | oldconfig
- doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+ doprint "olddefconfig failed, trying yes '' | make oldconfig\n";
run_command "yes '' | $make oldconfig" or
dodie "failed make config oldconfig";
}
@@ -1929,7 +1929,7 @@ sub build {
# old config can ask questions
if ($type eq "oldconfig") {
- $type = "oldnoconfig";
+ $type = "olddefconfig";
# allow for empty configs
run_command "touch $output_config";
@@ -1959,7 +1959,7 @@ sub build {
load_force_config($minconfig);
}
- if ($type ne "oldnoconfig") {
+ if ($type ne "olddefconfig") {
run_command "$make $type" or
dodie "failed make config";
}
@@ -2458,8 +2458,7 @@ my %config_set;
# config_off holds the set of configs that the bad config had disabled.
# We need to record them and set them in the .config when running
-# oldnoconfig, because oldnoconfig does not turn off new symbols, but
-# instead just keeps the defaults.
+# olddefconfig, because olddefconfig keeps the defaults.
my %config_off;
# config_off_tmp holds a set of configs to turn off for now
@@ -3250,7 +3249,7 @@ sub test_this_config {
}
# Remove this config from the list of configs
- # do a make oldnoconfig and then read the resulting
+ # do a make olddefconfig and then read the resulting
# .config to make sure it is missing the config that
# we had before
my %configs = %min_configs;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 85baf11e2ac..43480149119 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,4 +1,4 @@
-TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug
+TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug epoll
all:
for TARGET in $(TARGETS); do \
diff --git a/tools/testing/selftests/epoll/Makefile b/tools/testing/selftests/epoll/Makefile
new file mode 100644
index 00000000000..19806ed62f5
--- /dev/null
+++ b/tools/testing/selftests/epoll/Makefile
@@ -0,0 +1,11 @@
+# Makefile for epoll selftests
+
+all: test_epoll
+%: %.c
+ gcc -pthread -g -o $@ $^
+
+run_tests: all
+ ./test_epoll
+
+clean:
+ $(RM) test_epoll
diff --git a/tools/testing/selftests/epoll/test_epoll.c b/tools/testing/selftests/epoll/test_epoll.c
new file mode 100644
index 00000000000..e0fcff1e833
--- /dev/null
+++ b/tools/testing/selftests/epoll/test_epoll.c
@@ -0,0 +1,344 @@
+/*
+ * tools/testing/selftests/epoll/test_epoll.c
+ *
+ * Copyright 2012 Adobe Systems Incorporated
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Paton J. Lewis <palewis@adobe.com>
+ *
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+
+/*
+ * A pointer to an epoll_item_private structure will be stored in the epoll
+ * item's event structure so that we can get access to the epoll_item_private
+ * data after calling epoll_wait:
+ */
+struct epoll_item_private {
+ int index; /* Position of this struct within the epoll_items array. */
+ int fd;
+ uint32_t events;
+ pthread_mutex_t mutex; /* Guards the following variables... */
+ int stop;
+ int status; /* Stores any error encountered while handling item. */
+ /* The following variable allows us to test whether we have encountered
+ a problem while attempting to cancel and delete the associated
+ event. When the test program exits, 'deleted' should be exactly
+ one. If it is greater than one, then the failed test reflects a real
+ world situation where we would have tried to access the epoll item's
+ private data after deleting it: */
+ int deleted;
+};
+
+struct epoll_item_private *epoll_items;
+
+/*
+ * Delete the specified item from the epoll set. In a real-world secneario this
+ * is where we would free the associated data structure, but in this testing
+ * environment we retain the structure so that we can test for double-deletion:
+ */
+void delete_item(int index)
+{
+ __sync_fetch_and_add(&epoll_items[index].deleted, 1);
+}
+
+/*
+ * A pointer to a read_thread_data structure will be passed as the argument to
+ * each read thread:
+ */
+struct read_thread_data {
+ int stop;
+ int status; /* Indicates any error encountered by the read thread. */
+ int epoll_set;
+};
+
+/*
+ * The function executed by the read threads:
+ */
+void *read_thread_function(void *function_data)
+{
+ struct read_thread_data *thread_data =
+ (struct read_thread_data *)function_data;
+ struct epoll_event event_data;
+ struct epoll_item_private *item_data;
+ char socket_data;
+
+ /* Handle events until we encounter an error or this thread's 'stop'
+ condition is set: */
+ while (1) {
+ int result = epoll_wait(thread_data->epoll_set,
+ &event_data,
+ 1, /* Number of desired events */
+ 1000); /* Timeout in ms */
+ if (result < 0) {
+ /* Breakpoints signal all threads. Ignore that while
+ debugging: */
+ if (errno == EINTR)
+ continue;
+ thread_data->status = errno;
+ return 0;
+ } else if (thread_data->stop)
+ return 0;
+ else if (result == 0) /* Timeout */
+ continue;
+
+ /* We need the mutex here because checking for the stop
+ condition and re-enabling the epoll item need to be done
+ together as one atomic operation when EPOLL_CTL_DISABLE is
+ available: */
+ item_data = (struct epoll_item_private *)event_data.data.ptr;
+ pthread_mutex_lock(&item_data->mutex);
+
+ /* Remove the item from the epoll set if we want to stop
+ handling that event: */
+ if (item_data->stop)
+ delete_item(item_data->index);
+ else {
+ /* Clear the data that was written to the other end of
+ our non-blocking socket: */
+ do {
+ if (read(item_data->fd, &socket_data, 1) < 1) {
+ if ((errno == EAGAIN) ||
+ (errno == EWOULDBLOCK))
+ break;
+ else
+ goto error_unlock;
+ }
+ } while (item_data->events & EPOLLET);
+
+ /* The item was one-shot, so re-enable it: */
+ event_data.events = item_data->events;
+ if (epoll_ctl(thread_data->epoll_set,
+ EPOLL_CTL_MOD,
+ item_data->fd,
+ &event_data) < 0)
+ goto error_unlock;
+ }
+
+ pthread_mutex_unlock(&item_data->mutex);
+ }
+
+error_unlock:
+ thread_data->status = item_data->status = errno;
+ pthread_mutex_unlock(&item_data->mutex);
+ return 0;
+}
+
+/*
+ * A pointer to a write_thread_data structure will be passed as the argument to
+ * the write thread:
+ */
+struct write_thread_data {
+ int stop;
+ int status; /* Indicates any error encountered by the write thread. */
+ int n_fds;
+ int *fds;
+};
+
+/*
+ * The function executed by the write thread. It writes a single byte to each
+ * socket in turn until the stop condition for this thread is set. If writing to
+ * a socket would block (i.e. errno was EAGAIN), we leave that socket alone for
+ * the moment and just move on to the next socket in the list. We don't care
+ * about the order in which we deliver events to the epoll set. In fact we don't
+ * care about the data we're writing to the pipes at all; we just want to
+ * trigger epoll events:
+ */
+void *write_thread_function(void *function_data)
+{
+ const char data = 'X';
+ int index;
+ struct write_thread_data *thread_data =
+ (struct write_thread_data *)function_data;
+ while (!write_thread_data->stop)
+ for (index = 0;
+ !thread_data->stop && (index < thread_data->n_fds);
+ ++index)
+ if ((write(thread_data->fds[index], &data, 1) < 1) &&
+ (errno != EAGAIN) &&
+ (errno != EWOULDBLOCK)) {
+ write_thread_data->status = errno;
+ return;
+ }
+}
+
+/*
+ * Arguments are currently ignored:
+ */
+int main(int argc, char **argv)
+{
+ const int n_read_threads = 100;
+ const int n_epoll_items = 500;
+ int index;
+ int epoll_set = epoll_create1(0);
+ struct write_thread_data write_thread_data = {
+ 0, 0, n_epoll_items, malloc(n_epoll_items * sizeof(int))
+ };
+ struct read_thread_data *read_thread_data =
+ malloc(n_read_threads * sizeof(struct read_thread_data));
+ pthread_t *read_threads = malloc(n_read_threads * sizeof(pthread_t));
+ pthread_t write_thread;
+
+ printf("-----------------\n");
+ printf("Runing test_epoll\n");
+ printf("-----------------\n");
+
+ epoll_items = malloc(n_epoll_items * sizeof(struct epoll_item_private));
+
+ if (epoll_set < 0 || epoll_items == 0 || write_thread_data.fds == 0 ||
+ read_thread_data == 0 || read_threads == 0)
+ goto error;
+
+ if (sysconf(_SC_NPROCESSORS_ONLN) < 2) {
+ printf("Error: please run this test on a multi-core system.\n");
+ goto error;
+ }
+
+ /* Create the socket pairs and epoll items: */
+ for (index = 0; index < n_epoll_items; ++index) {
+ int socket_pair[2];
+ struct epoll_event event_data;
+ if (socketpair(AF_UNIX,
+ SOCK_STREAM | SOCK_NONBLOCK,
+ 0,
+ socket_pair) < 0)
+ goto error;
+ write_thread_data.fds[index] = socket_pair[0];
+ epoll_items[index].index = index;
+ epoll_items[index].fd = socket_pair[1];
+ if (pthread_mutex_init(&epoll_items[index].mutex, NULL) != 0)
+ goto error;
+ /* We always use EPOLLONESHOT because this test is currently
+ structured to demonstrate the need for EPOLL_CTL_DISABLE,
+ which only produces useful information in the EPOLLONESHOT
+ case (without EPOLLONESHOT, calling epoll_ctl with
+ EPOLL_CTL_DISABLE will never return EBUSY). If support for
+ testing events without EPOLLONESHOT is desired, it should
+ probably be implemented in a separate unit test. */
+ epoll_items[index].events = EPOLLIN | EPOLLONESHOT;
+ if (index < n_epoll_items / 2)
+ epoll_items[index].events |= EPOLLET;
+ epoll_items[index].stop = 0;
+ epoll_items[index].status = 0;
+ epoll_items[index].deleted = 0;
+ event_data.events = epoll_items[index].events;
+ event_data.data.ptr = &epoll_items[index];
+ if (epoll_ctl(epoll_set,
+ EPOLL_CTL_ADD,
+ epoll_items[index].fd,
+ &event_data) < 0)
+ goto error;
+ }
+
+ /* Create and start the read threads: */
+ for (index = 0; index < n_read_threads; ++index) {
+ read_thread_data[index].stop = 0;
+ read_thread_data[index].status = 0;
+ read_thread_data[index].epoll_set = epoll_set;
+ if (pthread_create(&read_threads[index],
+ NULL,
+ read_thread_function,
+ &read_thread_data[index]) != 0)
+ goto error;
+ }
+
+ if (pthread_create(&write_thread,
+ NULL,
+ write_thread_function,
+ &write_thread_data) != 0)
+ goto error;
+
+ /* Cancel all event pollers: */
+#ifdef EPOLL_CTL_DISABLE
+ for (index = 0; index < n_epoll_items; ++index) {
+ pthread_mutex_lock(&epoll_items[index].mutex);
+ ++epoll_items[index].stop;
+ if (epoll_ctl(epoll_set,
+ EPOLL_CTL_DISABLE,
+ epoll_items[index].fd,
+ NULL) == 0)
+ delete_item(index);
+ else if (errno != EBUSY) {
+ pthread_mutex_unlock(&epoll_items[index].mutex);
+ goto error;
+ }
+ /* EBUSY means events were being handled; allow the other thread
+ to delete the item. */
+ pthread_mutex_unlock(&epoll_items[index].mutex);
+ }
+#else
+ for (index = 0; index < n_epoll_items; ++index) {
+ pthread_mutex_lock(&epoll_items[index].mutex);
+ ++epoll_items[index].stop;
+ pthread_mutex_unlock(&epoll_items[index].mutex);
+ /* Wait in case a thread running read_thread_function is
+ currently executing code between epoll_wait and
+ pthread_mutex_lock with this item. Note that a longer delay
+ would make double-deletion less likely (at the expense of
+ performance), but there is no guarantee that any delay would
+ ever be sufficient. Note also that we delete all event
+ pollers at once for testing purposes, but in a real-world
+ environment we are likely to want to be able to cancel event
+ pollers at arbitrary times. Therefore we can't improve this
+ situation by just splitting this loop into two loops
+ (i.e. signal 'stop' for all items, sleep, and then delete all
+ items). We also can't fix the problem via EPOLL_CTL_DEL
+ because that command can't prevent the case where some other
+ thread is executing read_thread_function within the region
+ mentioned above: */
+ usleep(1);
+ pthread_mutex_lock(&epoll_items[index].mutex);
+ if (!epoll_items[index].deleted)
+ delete_item(index);
+ pthread_mutex_unlock(&epoll_items[index].mutex);
+ }
+#endif
+
+ /* Shut down the read threads: */
+ for (index = 0; index < n_read_threads; ++index)
+ __sync_fetch_and_add(&read_thread_data[index].stop, 1);
+ for (index = 0; index < n_read_threads; ++index) {
+ if (pthread_join(read_threads[index], NULL) != 0)
+ goto error;
+ if (read_thread_data[index].status)
+ goto error;
+ }
+
+ /* Shut down the write thread: */
+ __sync_fetch_and_add(&write_thread_data.stop, 1);
+ if ((pthread_join(write_thread, NULL) != 0) || write_thread_data.status)
+ goto error;
+
+ /* Check for final error conditions: */
+ for (index = 0; index < n_epoll_items; ++index) {
+ if (epoll_items[index].status != 0)
+ goto error;
+ if (pthread_mutex_destroy(&epoll_items[index].mutex) < 0)
+ goto error;
+ }
+ for (index = 0; index < n_epoll_items; ++index)
+ if (epoll_items[index].deleted != 1) {
+ printf("Error: item data deleted %1d times.\n",
+ epoll_items[index].deleted);
+ goto error;
+ }
+
+ printf("[PASS]\n");
+ return 0;
+
+ error:
+ printf("[FAIL]\n");
+ return errno;
+}
diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c
index b0adb2710c0..68d0734b208 100644
--- a/tools/usb/testusb.c
+++ b/tools/usb/testusb.c
@@ -253,9 +253,6 @@ static int find_testdev(const char *name, const struct stat *sb, int flag)
if (flag != FTW_F)
return 0;
- /* ignore /proc/bus/usb/{devices,drivers} */
- if (strrchr(name, '/')[1] == 'd')
- return 0;
fd = fopen(name, "rb");
if (!fd) {
@@ -356,28 +353,8 @@ restart:
static const char *usbfs_dir_find(void)
{
- static char usbfs_path_0[] = "/dev/usb/devices";
- static char usbfs_path_1[] = "/proc/bus/usb/devices";
static char udev_usb_path[] = "/dev/bus/usb";
- static char *const usbfs_paths[] = {
- usbfs_path_0, usbfs_path_1
- };
-
- static char *const *
- end = usbfs_paths + sizeof usbfs_paths / sizeof *usbfs_paths;
-
- char *const *it = usbfs_paths;
- do {
- int fd = open(*it, O_RDONLY);
- close(fd);
- if (fd >= 0) {
- strrchr(*it, '/')[0] = '\0';
- return *it;
- }
- } while (++it != end);
-
- /* real device-nodes managed by udev */
if (access(udev_usb_path, F_OK) == 0)
return udev_usb_path;
@@ -489,7 +466,7 @@ usage:
goto usage;
if (!all && !device) {
fprintf (stderr, "must specify '-a' or '-D dev', "
- "or DEVICE=/proc/bus/usb/BBB/DDD in env\n");
+ "or DEVICE=/dev/bus/usb/BBB/DDD in env\n");
goto usage;
}
diff --git a/tools/virtio/virtio-trace/Makefile b/tools/virtio/virtio-trace/Makefile
new file mode 100644
index 00000000000..0d238163347
--- /dev/null
+++ b/tools/virtio/virtio-trace/Makefile
@@ -0,0 +1,13 @@
+CC = gcc
+CFLAGS = -O2 -Wall -pthread
+
+all: trace-agent
+
+.c.o:
+ $(CC) $(CFLAGS) -c $^ -o $@
+
+trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o
+ $(CC) $(CFLAGS) -o $@ $^
+
+clean:
+ rm -f *.o trace-agent
diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README
new file mode 100644
index 00000000000..b64845b823a
--- /dev/null
+++ b/tools/virtio/virtio-trace/README
@@ -0,0 +1,118 @@
+Trace Agent for virtio-trace
+============================
+
+Trace agent is a user tool for sending trace data of a guest to a Host in low
+overhead. Trace agent has the following functions:
+ - splice a page of ring-buffer to read_pipe without memory copying
+ - splice the page from write_pipe to virtio-console without memory copying
+ - write trace data to stdout by using -o option
+ - controlled by start/stop orders from a Host
+
+The trace agent operates as follows:
+ 1) Initialize all structures.
+ 2) Create a read/write thread per CPU. Each thread is bound to a CPU.
+ The read/write threads hold it.
+ 3) A controller thread does poll() for a start order of a host.
+ 4) After the controller of the trace agent receives a start order from a host,
+ the controller wake read/write threads.
+ 5) The read/write threads start to read trace data from ring-buffers and
+ write the data to virtio-serial.
+ 6) If the controller receives a stop order from a host, the read/write threads
+ stop to read trace data.
+
+
+Files
+=====
+
+README: this file
+Makefile: Makefile of trace agent for virtio-trace
+trace-agent.c: includes main function, sets up for operating trace agent
+trace-agent.h: includes all structures and some macros
+trace-agent-ctl.c: includes controller function for read/write threads
+trace-agent-rw.c: includes read/write threads function
+
+
+Setup
+=====
+
+To use this trace agent for virtio-trace, we need to prepare some virtio-serial
+I/Fs.
+
+1) Make FIFO in a host
+ virtio-trace uses virtio-serial pipe as trace data paths as to the number
+of CPUs and a control path, so FIFO (named pipe) should be created as follows:
+ # mkdir /tmp/virtio-trace/
+ # mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out}
+ # mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out}
+
+For example, if a guest use three CPUs, the names are
+ trace-path-cpu{0,1,2}.{in.out}
+and
+ agent-ctl-path.{in,out}.
+
+2) Set up of virtio-serial pipe in a host
+ Add qemu option to use virtio-serial pipe.
+
+ ##virtio-serial device##
+ -device virtio-serial-pci,id=virtio-serial0\
+ ##control path##
+ -chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\
+ -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\
+ id=channel0,name=agent-ctl-path\
+ ##data path##
+ -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\
+ -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\
+ id=channel1,name=trace-path-cpu0\
+ ...
+
+If you manage guests with libvirt, add the following tags to domain XML files.
+Then, libvirt passes the same command option to qemu.
+
+ <channel type='pipe'>
+ <source path='/tmp/virtio-trace/agent-ctl-path'/>
+ <target type='virtio' name='agent-ctl-path'/>
+ <address type='virtio-serial' controller='0' bus='0' port='0'/>
+ </channel>
+ <channel type='pipe'>
+ <source path='/tmp/virtio-trace/trace-path-cpu0'/>
+ <target type='virtio' name='trace-path-cpu0'/>
+ <address type='virtio-serial' controller='0' bus='0' port='1'/>
+ </channel>
+ ...
+Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For
+example, if a guest use three CPUs, chardev names should be trace-path-cpu0,
+trace-path-cpu1, trace-path-cpu2, and agent-ctl-path.
+
+3) Boot the guest
+ You can find some chardev in /dev/virtio-ports/ in the guest.
+
+
+Run
+===
+
+0) Build trace agent in a guest
+ $ make
+
+1) Enable ftrace in the guest
+ <Example>
+ # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+
+2) Run trace agent in the guest
+ This agent must be operated as root.
+ # ./trace-agent
+read/write threads in the agent wait for start order from host. If you add -o
+option, trace data are output via stdout in the guest.
+
+3) Open FIFO in a host
+ # cat /tmp/virtio-trace/trace-path-cpu0.out
+If a host does not open these, trace data get stuck in buffers of virtio. Then,
+the guest will stop by specification of chardev in QEMU. This blocking mode may
+be solved in the future.
+
+4) Start to read trace data by ordering from a host
+ A host injects read start order to the guest via virtio-serial.
+ # echo 1 > /tmp/virtio-trace/agent-ctl-path.in
+
+5) Stop to read trace data by ordering from a host
+ A host injects read stop order to the guest via virtio-serial.
+ # echo 0 > /tmp/virtio-trace/agent-ctl-path.in
diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c
new file mode 100644
index 00000000000..a2d0403c4f9
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent-ctl.c
@@ -0,0 +1,137 @@
+/*
+ * Controller of read/write threads for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trace-agent.h"
+
+#define HOST_MSG_SIZE 256
+#define EVENT_WAIT_MSEC 100
+
+static volatile sig_atomic_t global_signal_val;
+bool global_sig_receive; /* default false */
+bool global_run_operation; /* default false*/
+
+/* Handle SIGTERM/SIGINT/SIGQUIT to exit */
+static void signal_handler(int sig)
+{
+ global_signal_val = sig;
+}
+
+int rw_ctl_init(const char *ctl_path)
+{
+ int ctl_fd;
+
+ ctl_fd = open(ctl_path, O_RDONLY);
+ if (ctl_fd == -1) {
+ pr_err("Cannot open ctl_fd\n");
+ goto error;
+ }
+
+ return ctl_fd;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static int wait_order(int ctl_fd)
+{
+ struct pollfd poll_fd;
+ int ret = 0;
+
+ while (!global_sig_receive) {
+ poll_fd.fd = ctl_fd;
+ poll_fd.events = POLLIN;
+
+ ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC);
+
+ if (global_signal_val) {
+ global_sig_receive = true;
+ pr_info("Receive interrupt %d\n", global_signal_val);
+
+ /* Wakes rw-threads when they are sleeping */
+ if (!global_run_operation)
+ pthread_cond_broadcast(&cond_wakeup);
+
+ ret = -1;
+ break;
+ }
+
+ if (ret < 0) {
+ pr_err("Polling error\n");
+ goto error;
+ }
+
+ if (ret)
+ break;
+ };
+
+ return ret;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+/*
+ * contol read/write threads by handling global_run_operation
+ */
+void *rw_ctl_loop(int ctl_fd)
+{
+ ssize_t rlen;
+ char buf[HOST_MSG_SIZE];
+ int ret;
+
+ /* Setup signal handlers */
+ signal(SIGTERM, signal_handler);
+ signal(SIGINT, signal_handler);
+ signal(SIGQUIT, signal_handler);
+
+ while (!global_sig_receive) {
+
+ ret = wait_order(ctl_fd);
+ if (ret < 0)
+ break;
+
+ rlen = read(ctl_fd, buf, sizeof(buf));
+ if (rlen < 0) {
+ pr_err("read data error in ctl thread\n");
+ goto error;
+ }
+
+ if (rlen == 2 && buf[0] == '1') {
+ /*
+ * If host writes '1' to a control path,
+ * this controller wakes all read/write threads.
+ */
+ global_run_operation = true;
+ pthread_cond_broadcast(&cond_wakeup);
+ pr_debug("Wake up all read/write threads\n");
+ } else if (rlen == 2 && buf[0] == '0') {
+ /*
+ * If host writes '0' to a control path, read/write
+ * threads will wait for notification from Host.
+ */
+ global_run_operation = false;
+ pr_debug("Stop all read/write threads\n");
+ } else
+ pr_info("Invalid host notification: %s\n", buf);
+ }
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c
new file mode 100644
index 00000000000..3aace5ea484
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent-rw.c
@@ -0,0 +1,192 @@
+/*
+ * Read/write thread of a guest agent for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include "trace-agent.h"
+
+#define READ_WAIT_USEC 100000
+
+void *rw_thread_info_new(void)
+{
+ struct rw_thread_info *rw_ti;
+
+ rw_ti = zalloc(sizeof(struct rw_thread_info));
+ if (rw_ti == NULL) {
+ pr_err("rw_thread_info zalloc error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ rw_ti->cpu_num = -1;
+ rw_ti->in_fd = -1;
+ rw_ti->out_fd = -1;
+ rw_ti->read_pipe = -1;
+ rw_ti->write_pipe = -1;
+ rw_ti->pipe_size = PIPE_INIT;
+
+ return rw_ti;
+}
+
+void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
+ bool stdout_flag, unsigned long pipe_size,
+ struct rw_thread_info *rw_ti)
+{
+ int data_pipe[2];
+
+ rw_ti->cpu_num = cpu;
+
+ /* set read(input) fd */
+ rw_ti->in_fd = open(in_path, O_RDONLY);
+ if (rw_ti->in_fd == -1) {
+ pr_err("Could not open in_fd (CPU:%d)\n", cpu);
+ goto error;
+ }
+
+ /* set write(output) fd */
+ if (!stdout_flag) {
+ /* virtio-serial output mode */
+ rw_ti->out_fd = open(out_path, O_WRONLY);
+ if (rw_ti->out_fd == -1) {
+ pr_err("Could not open out_fd (CPU:%d)\n", cpu);
+ goto error;
+ }
+ } else
+ /* stdout mode */
+ rw_ti->out_fd = STDOUT_FILENO;
+
+ if (pipe2(data_pipe, O_NONBLOCK) < 0) {
+ pr_err("Could not create pipe in rw-thread(%d)\n", cpu);
+ goto error;
+ }
+
+ /*
+ * Size of pipe is 64kB in default based on fs/pipe.c.
+ * To read/write trace data speedy, pipe size is changed.
+ */
+ if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) {
+ pr_err("Could not change pipe size in rw-thread(%d)\n", cpu);
+ goto error;
+ }
+
+ rw_ti->read_pipe = data_pipe[1];
+ rw_ti->write_pipe = data_pipe[0];
+ rw_ti->pipe_size = pipe_size;
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+/* Bind a thread to a cpu */
+static void bind_cpu(int cpu_num)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu_num, &mask);
+
+ /* bind my thread to cpu_num by assigning zero to the first argument */
+ if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
+ pr_err("Could not set CPU#%d affinity\n", (int)cpu_num);
+}
+
+static void *rw_thread_main(void *thread_info)
+{
+ ssize_t rlen, wlen;
+ ssize_t ret;
+ struct rw_thread_info *ts = (struct rw_thread_info *)thread_info;
+
+ bind_cpu(ts->cpu_num);
+
+ while (1) {
+ /* Wait for a read order of trace data by Host OS */
+ if (!global_run_operation) {
+ pthread_mutex_lock(&mutex_notify);
+ pthread_cond_wait(&cond_wakeup, &mutex_notify);
+ pthread_mutex_unlock(&mutex_notify);
+ }
+
+ if (global_sig_receive)
+ break;
+
+ /*
+ * Each thread read trace_pipe_raw of each cpu bounding the
+ * thread, so contention of multi-threads does not occur.
+ */
+ rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL,
+ ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE);
+
+ if (rlen < 0) {
+ pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num);
+ goto error;
+ } else if (rlen == 0) {
+ /*
+ * If trace data do not exist or are unreadable not
+ * for exceeding the page size, splice_read returns
+ * NULL. Then, this waits for being filled the data in a
+ * ring-buffer.
+ */
+ usleep(READ_WAIT_USEC);
+ pr_debug("Read retry(cpu:%d)\n", ts->cpu_num);
+ continue;
+ }
+
+ wlen = 0;
+
+ do {
+ ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL,
+ rlen - wlen,
+ SPLICE_F_MOVE | SPLICE_F_MORE);
+
+ if (ret < 0) {
+ pr_err("Splice_write in rw-thread(%d)\n",
+ ts->cpu_num);
+ goto error;
+ } else if (ret == 0)
+ /*
+ * When host reader is not in time for reading
+ * trace data, guest will be stopped. This is
+ * because char dev in QEMU is not supported
+ * non-blocking mode. Then, writer might be
+ * sleep in that case.
+ * This sleep will be removed by supporting
+ * non-blocking mode.
+ */
+ sleep(1);
+ wlen += ret;
+ } while (wlen < rlen);
+ }
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+
+pthread_t rw_thread_run(struct rw_thread_info *rw_ti)
+{
+ int ret;
+ pthread_t rw_thread_per_cpu;
+
+ ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti);
+ if (ret != 0) {
+ pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num);
+ exit(EXIT_FAILURE);
+ }
+
+ return rw_thread_per_cpu;
+}
diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c
new file mode 100644
index 00000000000..0a0a7dd4eff
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent.c
@@ -0,0 +1,270 @@
+/*
+ * Guest agent for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trace-agent.h"
+
+#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
+#define PIPE_DEF_BUFS 16
+#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
+#define PIPE_MAX_SIZE (1024*1024)
+#define READ_PATH_FMT \
+ "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
+#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
+#define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
+
+pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER;
+
+static int get_total_cpus(void)
+{
+ int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF);
+
+ if (nr_cpus <= 0) {
+ pr_err("Could not read cpus\n");
+ goto error;
+ } else if (nr_cpus > MAX_CPUS) {
+ pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS);
+ goto error;
+ }
+
+ return nr_cpus;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static void *agent_info_new(void)
+{
+ struct agent_info *s;
+ int i;
+
+ s = zalloc(sizeof(struct agent_info));
+ if (s == NULL) {
+ pr_err("agent_info zalloc error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ s->pipe_size = PIPE_INIT;
+ s->use_stdout = false;
+ s->cpus = get_total_cpus();
+ s->ctl_fd = -1;
+
+ /* read/write threads init */
+ for (i = 0; i < s->cpus; i++)
+ s->rw_ti[i] = rw_thread_info_new();
+
+ return s;
+}
+
+static unsigned long parse_size(const char *arg)
+{
+ unsigned long value, round;
+ char *ptr;
+
+ value = strtoul(arg, &ptr, 10);
+ switch (*ptr) {
+ case 'K': case 'k':
+ value <<= 10;
+ break;
+ case 'M': case 'm':
+ value <<= 20;
+ break;
+ default:
+ break;
+ }
+
+ if (value > PIPE_MAX_SIZE) {
+ pr_err("Pipe size must be less than 1MB\n");
+ goto error;
+ } else if (value < PIPE_MIN_SIZE) {
+ pr_err("Pipe size must be over 64KB\n");
+ goto error;
+ }
+
+ /* Align buffer size with page unit */
+ round = value & (PAGE_SIZE - 1);
+ value = value - round;
+
+ return value;
+error:
+ return 0;
+}
+
+static void usage(char const *prg)
+{
+ pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg);
+}
+
+static const char *make_path(int cpu_num, bool this_is_write_path)
+{
+ int ret;
+ char *buf;
+
+ buf = zalloc(PATH_MAX);
+ if (buf == NULL) {
+ pr_err("Could not allocate buffer\n");
+ goto error;
+ }
+
+ if (this_is_write_path)
+ /* write(output) path */
+ ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
+ else
+ /* read(input) path */
+ ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
+
+ if (ret <= 0) {
+ pr_err("Failed to generate %s path(CPU#%d):%d\n",
+ this_is_write_path ? "read" : "write", cpu_num, ret);
+ goto error;
+ }
+
+ return buf;
+
+error:
+ free(buf);
+ return NULL;
+}
+
+static const char *make_input_path(int cpu_num)
+{
+ return make_path(cpu_num, false);
+}
+
+static const char *make_output_path(int cpu_num)
+{
+ return make_path(cpu_num, true);
+}
+
+static void *agent_info_init(struct agent_info *s)
+{
+ int cpu;
+ const char *in_path = NULL;
+ const char *out_path = NULL;
+
+ /* init read/write threads */
+ for (cpu = 0; cpu < s->cpus; cpu++) {
+ /* set read(input) path per read/write thread */
+ in_path = make_input_path(cpu);
+ if (in_path == NULL)
+ goto error;
+
+ /* set write(output) path per read/write thread*/
+ if (!s->use_stdout) {
+ out_path = make_output_path(cpu);
+ if (out_path == NULL)
+ goto error;
+ } else
+ /* stdout mode */
+ pr_debug("stdout mode\n");
+
+ rw_thread_init(cpu, in_path, out_path, s->use_stdout,
+ s->pipe_size, s->rw_ti[cpu]);
+ }
+
+ /* init controller of read/write threads */
+ s->ctl_fd = rw_ctl_init((const char *)CTL_PATH);
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static void *parse_args(int argc, char *argv[], struct agent_info *s)
+{
+ int cmd;
+ unsigned long size;
+
+ while ((cmd = getopt(argc, argv, "hos:")) != -1) {
+ switch (cmd) {
+ /* stdout mode */
+ case 'o':
+ s->use_stdout = true;
+ break;
+ /* size of pipe */
+ case 's':
+ size = parse_size(optarg);
+ if (size == 0)
+ goto error;
+ s->pipe_size = size;
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ goto error;
+ }
+ }
+
+ agent_info_init(s);
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static void agent_main_loop(struct agent_info *s)
+{
+ int cpu;
+ pthread_t rw_thread_per_cpu[MAX_CPUS];
+
+ /* Start all read/write threads */
+ for (cpu = 0; cpu < s->cpus; cpu++)
+ rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]);
+
+ rw_ctl_loop(s->ctl_fd);
+
+ /* Finish all read/write threads */
+ for (cpu = 0; cpu < s->cpus; cpu++) {
+ int ret;
+
+ ret = pthread_join(rw_thread_per_cpu[cpu], NULL);
+ if (ret != 0) {
+ pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+static void agent_info_free(struct agent_info *s)
+{
+ int i;
+
+ close(s->ctl_fd);
+ for (i = 0; i < s->cpus; i++) {
+ close(s->rw_ti[i]->in_fd);
+ close(s->rw_ti[i]->out_fd);
+ close(s->rw_ti[i]->read_pipe);
+ close(s->rw_ti[i]->write_pipe);
+ free(s->rw_ti[i]);
+ }
+ free(s);
+}
+
+int main(int argc, char *argv[])
+{
+ struct agent_info *s = NULL;
+
+ s = agent_info_new();
+ parse_args(argc, argv, s);
+
+ agent_main_loop(s);
+
+ agent_info_free(s);
+
+ return 0;
+}
diff --git a/tools/virtio/virtio-trace/trace-agent.h b/tools/virtio/virtio-trace/trace-agent.h
new file mode 100644
index 00000000000..8de79bfeaa7
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent.h
@@ -0,0 +1,75 @@
+#ifndef __TRACE_AGENT_H__
+#define __TRACE_AGENT_H__
+#include <pthread.h>
+#include <stdbool.h>
+
+#define MAX_CPUS 256
+#define PIPE_INIT (1024*1024)
+
+/*
+ * agent_info - structure managing total information of guest agent
+ * @pipe_size: size of pipe (default 1MB)
+ * @use_stdout: set to true when o option is added (default false)
+ * @cpus: total number of CPUs
+ * @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path
+ * @rw_ti: structure managing information of read/write threads
+ */
+struct agent_info {
+ unsigned long pipe_size;
+ bool use_stdout;
+ int cpus;
+ int ctl_fd;
+ struct rw_thread_info *rw_ti[MAX_CPUS];
+};
+
+/*
+ * rw_thread_info - structure managing a read/write thread a cpu
+ * @cpu_num: cpu number operating this read/write thread
+ * @in_fd: fd of reading trace data path in cpu_num
+ * @out_fd: fd of writing trace data path in cpu_num
+ * @read_pipe: fd of read pipe
+ * @write_pipe: fd of write pipe
+ * @pipe_size: size of pipe (default 1MB)
+ */
+struct rw_thread_info {
+ int cpu_num;
+ int in_fd;
+ int out_fd;
+ int read_pipe;
+ int write_pipe;
+ unsigned long pipe_size;
+};
+
+/* use for stopping rw threads */
+extern bool global_sig_receive;
+
+/* use for notification */
+extern bool global_run_operation;
+extern pthread_mutex_t mutex_notify;
+extern pthread_cond_t cond_wakeup;
+
+/* for controller of read/write threads */
+extern int rw_ctl_init(const char *ctl_path);
+extern void *rw_ctl_loop(int ctl_fd);
+
+/* for trace read/write thread */
+extern void *rw_thread_info_new(void);
+extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
+ bool stdout_flag, unsigned long pipe_size,
+ struct rw_thread_info *rw_ti);
+extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti);
+
+static inline void *zalloc(size_t size)
+{
+ return calloc(1, size);
+}
+
+#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+
+#endif /*__TRACE_AGENT_H__*/