diff options
Diffstat (limited to 'tools')
25 files changed, 3085 insertions, 184 deletions
diff --git a/tools/hv/hv_get_dhcp_info.sh b/tools/hv/hv_get_dhcp_info.sh new file mode 100755 index 00000000000..ccd3e953276 --- /dev/null +++ b/tools/hv/hv_get_dhcp_info.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# This example script retrieves the DHCP state of a given interface. +# In the interest of keeping the KVP daemon code free of distro specific +# information; the kvp daemon code invokes this external script to gather +# DHCP setting for the specific interface. +# +# Input: Name of the interface +# +# Output: The script prints the string "Enabled" to stdout to indicate +# that DHCP is enabled on the interface. If DHCP is not enabled, +# the script prints the string "Disabled" to stdout. +# +# Each Distro is expected to implement this script in a distro specific +# fashion. For instance on Distros that ship with Network Manager enabled, +# this script can be based on the Network Manager APIs for retrieving DHCP +# information. + +if_file="/etc/sysconfig/network-scripts/ifcfg-"$1 + +dhcp=$(grep "dhcp" $if_file 2>/dev/null) + +if [ "$dhcp" != "" ]; +then +echo "Enabled" +else +echo "Disabled" +fi diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh new file mode 100755 index 00000000000..058c17b46ff --- /dev/null +++ b/tools/hv/hv_get_dns_info.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# This example script parses /etc/resolv.conf to retrive DNS information. +# In the interest of keeping the KVP daemon code free of distro specific +# information; the kvp daemon code invokes this external script to gather +# DNS information. +# This script is expected to print the nameserver values to stdout. +# Each Distro is expected to implement this script in a distro specific +# fashion. For instance on Distros that ship with Network Manager enabled, +# this script can be based on the Network Manager APIs for retrieving DNS +# entries. + +cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }' diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index d9834b36294..5959affd882 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -31,6 +31,7 @@ #include <stdlib.h> #include <unistd.h> #include <string.h> +#include <ctype.h> #include <errno.h> #include <arpa/inet.h> #include <linux/connector.h> @@ -41,6 +42,7 @@ #include <syslog.h> #include <sys/stat.h> #include <fcntl.h> +#include <dirent.h> /* * KVP protocol: The user mode component first registers with the @@ -68,25 +70,39 @@ enum key_index { ProcessorArchitecture }; + +enum { + IPADDR = 0, + NETMASK, + GATEWAY, + DNS +}; + static char kvp_send_buffer[4096]; -static char kvp_recv_buffer[4096]; +static char kvp_recv_buffer[4096 * 2]; static struct sockaddr_nl addr; +static int in_hand_shake = 1; static char *os_name = ""; static char *os_major = ""; static char *os_minor = ""; static char *processor_arch; static char *os_build; -static char *lic_version; +static char *lic_version = "Unknown version"; static struct utsname uts_buf; +/* + * The location of the interface configuration file. + */ + +#define KVP_CONFIG_LOC "/var/opt/" #define MAX_FILE_NAME 100 #define ENTRIES_PER_BLOCK 50 struct kvp_record { - __u8 key[HV_KVP_EXCHANGE_MAX_KEY_SIZE]; - __u8 value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; + char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE]; + char value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE]; }; struct kvp_file_state { @@ -94,7 +110,7 @@ struct kvp_file_state { int num_blocks; struct kvp_record *records; int num_records; - __u8 fname[MAX_FILE_NAME]; + char fname[MAX_FILE_NAME]; }; static struct kvp_file_state kvp_file_info[KVP_POOL_COUNT]; @@ -106,7 +122,7 @@ static void kvp_acquire_lock(int pool) if (fcntl(kvp_file_info[pool].fd, F_SETLKW, &fl) == -1) { syslog(LOG_ERR, "Failed to acquire the lock pool: %d", pool); - exit(-1); + exit(EXIT_FAILURE); } } @@ -118,7 +134,7 @@ static void kvp_release_lock(int pool) if (fcntl(kvp_file_info[pool].fd, F_SETLK, &fl) == -1) { perror("fcntl"); syslog(LOG_ERR, "Failed to release the lock pool: %d", pool); - exit(-1); + exit(EXIT_FAILURE); } } @@ -137,14 +153,19 @@ static void kvp_update_file(int pool) if (!filep) { kvp_release_lock(pool); syslog(LOG_ERR, "Failed to open file, pool: %d", pool); - exit(-1); + exit(EXIT_FAILURE); } bytes_written = fwrite(kvp_file_info[pool].records, sizeof(struct kvp_record), kvp_file_info[pool].num_records, filep); - fflush(filep); + if (ferror(filep) || fclose(filep)) { + kvp_release_lock(pool); + syslog(LOG_ERR, "Failed to write file, pool: %d", pool); + exit(EXIT_FAILURE); + } + kvp_release_lock(pool); } @@ -163,14 +184,19 @@ static void kvp_update_mem_state(int pool) if (!filep) { kvp_release_lock(pool); syslog(LOG_ERR, "Failed to open file, pool: %d", pool); - exit(-1); + exit(EXIT_FAILURE); } - while (!feof(filep)) { + for (;;) { readp = &record[records_read]; records_read += fread(readp, sizeof(struct kvp_record), ENTRIES_PER_BLOCK * num_blocks, filep); + if (ferror(filep)) { + syslog(LOG_ERR, "Failed to read file, pool: %d", pool); + exit(EXIT_FAILURE); + } + if (!feof(filep)) { /* * We have more data to read. @@ -180,7 +206,7 @@ static void kvp_update_mem_state(int pool) if (record == NULL) { syslog(LOG_ERR, "malloc failed"); - exit(-1); + exit(EXIT_FAILURE); } continue; } @@ -191,14 +217,15 @@ static void kvp_update_mem_state(int pool) kvp_file_info[pool].records = record; kvp_file_info[pool].num_records = records_read; + fclose(filep); kvp_release_lock(pool); } static int kvp_file_init(void) { - int ret, fd; + int fd; FILE *filep; size_t records_read; - __u8 *fname; + char *fname; struct kvp_record *record; struct kvp_record *readp; int num_blocks; @@ -208,7 +235,7 @@ static int kvp_file_init(void) if (access("/var/opt/hyperv", F_OK)) { if (mkdir("/var/opt/hyperv", S_IRUSR | S_IWUSR | S_IROTH)) { syslog(LOG_ERR, " Failed to create /var/opt/hyperv"); - exit(-1); + exit(EXIT_FAILURE); } } @@ -232,12 +259,18 @@ static int kvp_file_init(void) fclose(filep); return 1; } - while (!feof(filep)) { + for (;;) { readp = &record[records_read]; records_read += fread(readp, sizeof(struct kvp_record), ENTRIES_PER_BLOCK, filep); + if (ferror(filep)) { + syslog(LOG_ERR, "Failed to read file, pool: %d", + i); + exit(EXIT_FAILURE); + } + if (!feof(filep)) { /* * We have more data to read. @@ -311,7 +344,6 @@ static int kvp_key_add_or_modify(int pool, __u8 *key, int key_size, __u8 *value, int value_size) { int i; - int j, k; int num_records; struct kvp_record *record; int num_blocks; @@ -394,7 +426,7 @@ static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value, return 1; } -static void kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size, +static int kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size, __u8 *value, int value_size) { struct kvp_record *record; @@ -406,16 +438,12 @@ static void kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size, record = kvp_file_info[pool].records; if (index >= kvp_file_info[pool].num_records) { - /* - * This is an invalid index; terminate enumeration; - * - a NULL value will do the trick. - */ - strcpy(value, ""); - return; + return 1; } memcpy(key, record[index].key, key_size); memcpy(value, record[index].value, value_size); + return 0; } @@ -426,6 +454,7 @@ void kvp_get_os_info(void) uname(&uts_buf); os_build = uts_buf.release; + os_name = uts_buf.sysname; processor_arch = uts_buf.machine; /* @@ -437,20 +466,70 @@ void kvp_get_os_info(void) if (p) *p = '\0'; + /* + * Parse the /etc/os-release file if present: + * http://www.freedesktop.org/software/systemd/man/os-release.html + */ + file = fopen("/etc/os-release", "r"); + if (file != NULL) { + while (fgets(buf, sizeof(buf), file)) { + char *value, *q; + + /* Ignore comments */ + if (buf[0] == '#') + continue; + + /* Split into name=value */ + p = strchr(buf, '='); + if (!p) + continue; + *p++ = 0; + + /* Remove quotes and newline; un-escape */ + value = p; + q = p; + while (*p) { + if (*p == '\\') { + ++p; + if (!*p) + break; + *q++ = *p++; + } else if (*p == '\'' || *p == '"' || + *p == '\n') { + ++p; + } else { + *q++ = *p++; + } + } + *q = 0; + + if (!strcmp(buf, "NAME")) { + p = strdup(value); + if (!p) + break; + os_name = p; + } else if (!strcmp(buf, "VERSION_ID")) { + p = strdup(value); + if (!p) + break; + os_major = p; + } + } + fclose(file); + return; + } + + /* Fallback for older RH/SUSE releases */ file = fopen("/etc/SuSE-release", "r"); if (file != NULL) goto kvp_osinfo_found; file = fopen("/etc/redhat-release", "r"); if (file != NULL) goto kvp_osinfo_found; - /* - * Add code for other supported platforms. - */ /* * We don't have information about the os. */ - os_name = uts_buf.sysname; return; kvp_osinfo_found: @@ -494,82 +573,458 @@ done: return; } + + +/* + * Retrieve an interface name corresponding to the specified guid. + * If there is a match, the function returns a pointer + * to the interface name and if not, a NULL is returned. + * If a match is found, the caller is responsible for + * freeing the memory. + */ + +static char *kvp_get_if_name(char *guid) +{ + DIR *dir; + struct dirent *entry; + FILE *file; + char *p, *q, *x; + char *if_name = NULL; + char buf[256]; + char *kvp_net_dir = "/sys/class/net/"; + char dev_id[256]; + + dir = opendir(kvp_net_dir); + if (dir == NULL) + return NULL; + + snprintf(dev_id, sizeof(dev_id), "%s", kvp_net_dir); + q = dev_id + strlen(kvp_net_dir); + + while ((entry = readdir(dir)) != NULL) { + /* + * Set the state for the next pass. + */ + *q = '\0'; + strcat(dev_id, entry->d_name); + strcat(dev_id, "/device/device_id"); + + file = fopen(dev_id, "r"); + if (file == NULL) + continue; + + p = fgets(buf, sizeof(buf), file); + if (p) { + x = strchr(p, '\n'); + if (x) + *x = '\0'; + + if (!strcmp(p, guid)) { + /* + * Found the guid match; return the interface + * name. The caller will free the memory. + */ + if_name = strdup(entry->d_name); + fclose(file); + break; + } + } + fclose(file); + } + + closedir(dir); + return if_name; +} + +/* + * Retrieve the MAC address given the interface name. + */ + +static char *kvp_if_name_to_mac(char *if_name) +{ + FILE *file; + char *p, *x; + char buf[256]; + char addr_file[256]; + int i; + char *mac_addr = NULL; + + snprintf(addr_file, sizeof(addr_file), "%s%s%s", "/sys/class/net/", + if_name, "/address"); + + file = fopen(addr_file, "r"); + if (file == NULL) + return NULL; + + p = fgets(buf, sizeof(buf), file); + if (p) { + x = strchr(p, '\n'); + if (x) + *x = '\0'; + for (i = 0; i < strlen(p); i++) + p[i] = toupper(p[i]); + mac_addr = strdup(p); + } + + fclose(file); + return mac_addr; +} + + +/* + * Retrieve the interface name given tha MAC address. + */ + +static char *kvp_mac_to_if_name(char *mac) +{ + DIR *dir; + struct dirent *entry; + FILE *file; + char *p, *q, *x; + char *if_name = NULL; + char buf[256]; + char *kvp_net_dir = "/sys/class/net/"; + char dev_id[256]; + int i; + + dir = opendir(kvp_net_dir); + if (dir == NULL) + return NULL; + + snprintf(dev_id, sizeof(dev_id), kvp_net_dir); + q = dev_id + strlen(kvp_net_dir); + + while ((entry = readdir(dir)) != NULL) { + /* + * Set the state for the next pass. + */ + *q = '\0'; + + strcat(dev_id, entry->d_name); + strcat(dev_id, "/address"); + + file = fopen(dev_id, "r"); + if (file == NULL) + continue; + + p = fgets(buf, sizeof(buf), file); + if (p) { + x = strchr(p, '\n'); + if (x) + *x = '\0'; + + for (i = 0; i < strlen(p); i++) + p[i] = toupper(p[i]); + + if (!strcmp(p, mac)) { + /* + * Found the MAC match; return the interface + * name. The caller will free the memory. + */ + if_name = strdup(entry->d_name); + fclose(file); + break; + } + } + fclose(file); + } + + closedir(dir); + return if_name; +} + + +static void kvp_process_ipconfig_file(char *cmd, + char *config_buf, int len, + int element_size, int offset) +{ + char buf[256]; + char *p; + char *x; + FILE *file; + + /* + * First execute the command. + */ + file = popen(cmd, "r"); + if (file == NULL) + return; + + if (offset == 0) + memset(config_buf, 0, len); + while ((p = fgets(buf, sizeof(buf), file)) != NULL) { + if ((len - strlen(config_buf)) < (element_size + 1)) + break; + + x = strchr(p, '\n'); + *x = '\0'; + strcat(config_buf, p); + strcat(config_buf, ";"); + } + pclose(file); +} + +static void kvp_get_ipconfig_info(char *if_name, + struct hv_kvp_ipaddr_value *buffer) +{ + char cmd[512]; + char dhcp_info[128]; + char *p; + FILE *file; + + /* + * Get the address of default gateway (ipv4). + */ + sprintf(cmd, "%s %s", "ip route show dev", if_name); + strcat(cmd, " | awk '/default/ {print $3 }'"); + + /* + * Execute the command to gather gateway info. + */ + kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, + (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0); + + /* + * Get the address of default gateway (ipv6). + */ + sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name); + strcat(cmd, " | awk '/default/ {print $3 }'"); + + /* + * Execute the command to gather gateway info (ipv6). + */ + kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, + (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1); + + + /* + * Gather the DNS state. + * Since there is no standard way to get this information + * across various distributions of interest; we just invoke + * an external script that needs to be ported across distros + * of interest. + * + * Following is the expected format of the information from the script: + * + * ipaddr1 (nameserver1) + * ipaddr2 (nameserver2) + * . + * . + */ + + sprintf(cmd, "%s", "hv_get_dns_info"); + + /* + * Execute the command to gather DNS info. + */ + kvp_process_ipconfig_file(cmd, (char *)buffer->dns_addr, + (MAX_IP_ADDR_SIZE * 2), INET_ADDRSTRLEN, 0); + + /* + * Gather the DHCP state. + * We will gather this state by invoking an external script. + * The parameter to the script is the interface name. + * Here is the expected output: + * + * Enabled: DHCP enabled. + */ + + sprintf(cmd, "%s %s", "hv_get_dhcp_info", if_name); + + file = popen(cmd, "r"); + if (file == NULL) + return; + + p = fgets(dhcp_info, sizeof(dhcp_info), file); + if (p == NULL) { + pclose(file); + return; + } + + if (!strncmp(p, "Enabled", 7)) + buffer->dhcp_enabled = 1; + else + buffer->dhcp_enabled = 0; + + pclose(file); +} + + +static unsigned int hweight32(unsigned int *w) +{ + unsigned int res = *w - ((*w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +} + +static int kvp_process_ip_address(void *addrp, + int family, char *buffer, + int length, int *offset) +{ + struct sockaddr_in *addr; + struct sockaddr_in6 *addr6; + int addr_length; + char tmp[50]; + const char *str; + + if (family == AF_INET) { + addr = (struct sockaddr_in *)addrp; + str = inet_ntop(family, &addr->sin_addr, tmp, 50); + addr_length = INET_ADDRSTRLEN; + } else { + addr6 = (struct sockaddr_in6 *)addrp; + str = inet_ntop(family, &addr6->sin6_addr.s6_addr, tmp, 50); + addr_length = INET6_ADDRSTRLEN; + } + + if ((length - *offset) < addr_length + 1) + return HV_E_FAIL; + if (str == NULL) { + strcpy(buffer, "inet_ntop failed\n"); + return HV_E_FAIL; + } + if (*offset == 0) + strcpy(buffer, tmp); + else + strcat(buffer, tmp); + strcat(buffer, ";"); + + *offset += strlen(str) + 1; + return 0; +} + static int -kvp_get_ip_address(int family, char *buffer, int length) +kvp_get_ip_info(int family, char *if_name, int op, + void *out_buffer, int length) { struct ifaddrs *ifap; struct ifaddrs *curp; - int ipv4_len = strlen("255.255.255.255") + 1; - int ipv6_len = strlen("ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff")+1; int offset = 0; - const char *str; - char tmp[50]; + int sn_offset = 0; int error = 0; - + char *buffer; + struct hv_kvp_ipaddr_value *ip_buffer; + char cidr_mask[5]; /* /xyz */ + int weight; + int i; + unsigned int *w; + char *sn_str; + struct sockaddr_in6 *addr6; + + if (op == KVP_OP_ENUMERATE) { + buffer = out_buffer; + } else { + ip_buffer = out_buffer; + buffer = (char *)ip_buffer->ip_addr; + ip_buffer->addr_family = 0; + } /* * On entry into this function, the buffer is capable of holding the - * maximum key value (2048 bytes). + * maximum key value. */ if (getifaddrs(&ifap)) { strcpy(buffer, "getifaddrs failed\n"); - return 1; + return HV_E_FAIL; } curp = ifap; while (curp != NULL) { - if ((curp->ifa_addr != NULL) && - (curp->ifa_addr->sa_family == family)) { - if (family == AF_INET) { - struct sockaddr_in *addr = - (struct sockaddr_in *) curp->ifa_addr; - - str = inet_ntop(family, &addr->sin_addr, - tmp, 50); - if (str == NULL) { - strcpy(buffer, "inet_ntop failed\n"); - error = 1; - goto getaddr_done; - } - if (offset == 0) - strcpy(buffer, tmp); - else - strcat(buffer, tmp); - strcat(buffer, ";"); + if (curp->ifa_addr == NULL) { + curp = curp->ifa_next; + continue; + } - offset += strlen(str) + 1; - if ((length - offset) < (ipv4_len + 1)) - goto getaddr_done; + if ((if_name != NULL) && + (strncmp(curp->ifa_name, if_name, strlen(if_name)))) { + /* + * We want info about a specific interface; + * just continue. + */ + curp = curp->ifa_next; + continue; + } - } else { + /* + * We only support two address families: AF_INET and AF_INET6. + * If a family value of 0 is specified, we collect both + * supported address families; if not we gather info on + * the specified address family. + */ + if ((family != 0) && (curp->ifa_addr->sa_family != family)) { + curp = curp->ifa_next; + continue; + } + if ((curp->ifa_addr->sa_family != AF_INET) && + (curp->ifa_addr->sa_family != AF_INET6)) { + curp = curp->ifa_next; + continue; + } + if (op == KVP_OP_GET_IP_INFO) { /* - * We only support AF_INET and AF_INET6 - * and the list of addresses is separated by a ";". + * Gather info other than the IP address. + * IP address info will be gathered later. */ - struct sockaddr_in6 *addr = - (struct sockaddr_in6 *) curp->ifa_addr; - - str = inet_ntop(family, - &addr->sin6_addr.s6_addr, - tmp, 50); - if (str == NULL) { - strcpy(buffer, "inet_ntop failed\n"); - error = 1; - goto getaddr_done; - } - if (offset == 0) - strcpy(buffer, tmp); - else - strcat(buffer, tmp); - strcat(buffer, ";"); - offset += strlen(str) + 1; - if ((length - offset) < (ipv6_len + 1)) - goto getaddr_done; + if (curp->ifa_addr->sa_family == AF_INET) { + ip_buffer->addr_family |= ADDR_FAMILY_IPV4; + /* + * Get subnet info. + */ + error = kvp_process_ip_address( + curp->ifa_netmask, + AF_INET, + (char *) + ip_buffer->sub_net, + length, + &sn_offset); + if (error) + goto gather_ipaddr; + } else { + ip_buffer->addr_family |= ADDR_FAMILY_IPV6; + /* + * Get subnet info in CIDR format. + */ + weight = 0; + sn_str = (char *)ip_buffer->sub_net; + addr6 = (struct sockaddr_in6 *) + curp->ifa_netmask; + w = addr6->sin6_addr.s6_addr32; + + for (i = 0; i < 4; i++) + weight += hweight32(&w[i]); + + sprintf(cidr_mask, "/%d", weight); + if ((length - sn_offset) < + (strlen(cidr_mask) + 1)) + goto gather_ipaddr; + + if (sn_offset == 0) + strcpy(sn_str, cidr_mask); + else + strcat(sn_str, cidr_mask); + strcat((char *)ip_buffer->sub_net, ";"); + sn_offset += strlen(sn_str) + 1; } + /* + * Collect other ip related configuration info. + */ + + kvp_get_ipconfig_info(if_name, ip_buffer); } + +gather_ipaddr: + error = kvp_process_ip_address(curp->ifa_addr, + curp->ifa_addr->sa_family, + buffer, + length, &offset); + if (error) + goto getaddr_done; + curp = curp->ifa_next; } @@ -579,6 +1034,315 @@ getaddr_done: } +static int expand_ipv6(char *addr, int type) +{ + int ret; + struct in6_addr v6_addr; + + ret = inet_pton(AF_INET6, addr, &v6_addr); + + if (ret != 1) { + if (type == NETMASK) + return 1; + return 0; + } + + sprintf(addr, "%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:" + "%02x%02x:%02x%02x:%02x%02x", + (int)v6_addr.s6_addr[0], (int)v6_addr.s6_addr[1], + (int)v6_addr.s6_addr[2], (int)v6_addr.s6_addr[3], + (int)v6_addr.s6_addr[4], (int)v6_addr.s6_addr[5], + (int)v6_addr.s6_addr[6], (int)v6_addr.s6_addr[7], + (int)v6_addr.s6_addr[8], (int)v6_addr.s6_addr[9], + (int)v6_addr.s6_addr[10], (int)v6_addr.s6_addr[11], + (int)v6_addr.s6_addr[12], (int)v6_addr.s6_addr[13], + (int)v6_addr.s6_addr[14], (int)v6_addr.s6_addr[15]); + + return 1; + +} + +static int is_ipv4(char *addr) +{ + int ret; + struct in_addr ipv4_addr; + + ret = inet_pton(AF_INET, addr, &ipv4_addr); + + if (ret == 1) + return 1; + return 0; +} + +static int parse_ip_val_buffer(char *in_buf, int *offset, + char *out_buf, int out_len) +{ + char *x; + char *start; + + /* + * in_buf has sequence of characters that are seperated by + * the character ';'. The last sequence does not have the + * terminating ";" character. + */ + start = in_buf + *offset; + + x = strchr(start, ';'); + if (x) + *x = 0; + else + x = start + strlen(start); + + if (strlen(start) != 0) { + int i = 0; + /* + * Get rid of leading spaces. + */ + while (start[i] == ' ') + i++; + + if ((x - start) <= out_len) { + strcpy(out_buf, (start + i)); + *offset += (x - start) + 1; + return 1; + } + } + return 0; +} + +static int kvp_write_file(FILE *f, char *s1, char *s2, char *s3) +{ + int ret; + + ret = fprintf(f, "%s%s%s%s\n", s1, s2, "=", s3); + + if (ret < 0) + return HV_E_FAIL; + + return 0; +} + + +static int process_ip_string(FILE *f, char *ip_string, int type) +{ + int error = 0; + char addr[INET6_ADDRSTRLEN]; + int i = 0; + int j = 0; + char str[256]; + char sub_str[10]; + int offset = 0; + + memset(addr, 0, sizeof(addr)); + + while (parse_ip_val_buffer(ip_string, &offset, addr, + (MAX_IP_ADDR_SIZE * 2))) { + + sub_str[0] = 0; + if (is_ipv4(addr)) { + switch (type) { + case IPADDR: + snprintf(str, sizeof(str), "%s", "IPADDR"); + break; + case NETMASK: + snprintf(str, sizeof(str), "%s", "NETMASK"); + break; + case GATEWAY: + snprintf(str, sizeof(str), "%s", "GATEWAY"); + break; + case DNS: + snprintf(str, sizeof(str), "%s", "DNS"); + break; + } + if (i != 0) { + if (type != DNS) { + snprintf(sub_str, sizeof(sub_str), + "_%d", i++); + } else { + snprintf(sub_str, sizeof(sub_str), + "%d", ++i); + } + } else if (type == DNS) { + snprintf(sub_str, sizeof(sub_str), "%d", ++i); + } + + + } else if (expand_ipv6(addr, type)) { + switch (type) { + case IPADDR: + snprintf(str, sizeof(str), "%s", "IPV6ADDR"); + break; + case NETMASK: + snprintf(str, sizeof(str), "%s", "IPV6NETMASK"); + break; + case GATEWAY: + snprintf(str, sizeof(str), "%s", + "IPV6_DEFAULTGW"); + break; + case DNS: + snprintf(str, sizeof(str), "%s", "DNS"); + break; + } + if ((j != 0) || (type == DNS)) { + if (type != DNS) { + snprintf(sub_str, sizeof(sub_str), + "_%d", j++); + } else { + snprintf(sub_str, sizeof(sub_str), + "%d", ++i); + } + } else if (type == DNS) { + snprintf(sub_str, sizeof(sub_str), + "%d", ++i); + } + } else { + return HV_INVALIDARG; + } + + error = kvp_write_file(f, str, sub_str, addr); + if (error) + return error; + memset(addr, 0, sizeof(addr)); + } + + return 0; +} + +static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) +{ + int error = 0; + char if_file[128]; + FILE *file; + char cmd[512]; + char *mac_addr; + + /* + * Set the configuration for the specified interface with + * the information provided. Since there is no standard + * way to configure an interface, we will have an external + * script that does the job of configuring the interface and + * flushing the configuration. + * + * The parameters passed to this external script are: + * 1. A configuration file that has the specified configuration. + * + * We will embed the name of the interface in the configuration + * file: ifcfg-ethx (where ethx is the interface name). + * + * The information provided here may be more than what is needed + * in a given distro to configure the interface and so are free + * ignore information that may not be relevant. + * + * Here is the format of the ip configuration file: + * + * HWADDR=macaddr + * IF_NAME=interface name + * DHCP=yes (This is optional; if yes, DHCP is configured) + * + * IPADDR=ipaddr1 + * IPADDR_1=ipaddr2 + * IPADDR_x=ipaddry (where y = x + 1) + * + * NETMASK=netmask1 + * NETMASK_x=netmasky (where y = x + 1) + * + * GATEWAY=ipaddr1 + * GATEWAY_x=ipaddry (where y = x + 1) + * + * DNSx=ipaddrx (where first DNS address is tagged as DNS1 etc) + * + * IPV6 addresses will be tagged as IPV6ADDR, IPV6 gateway will be + * tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as + * IPV6NETMASK. + * + * The host can specify multiple ipv4 and ipv6 addresses to be + * configured for the interface. Furthermore, the configuration + * needs to be persistent. A subsequent GET call on the interface + * is expected to return the configuration that is set via the SET + * call. + */ + + snprintf(if_file, sizeof(if_file), "%s%s%s", KVP_CONFIG_LOC, + "hyperv/ifcfg-", if_name); + + file = fopen(if_file, "w"); + + if (file == NULL) { + syslog(LOG_ERR, "Failed to open config file"); + return HV_E_FAIL; + } + + /* + * First write out the MAC address. + */ + + mac_addr = kvp_if_name_to_mac(if_name); + if (mac_addr == NULL) { + error = HV_E_FAIL; + goto setval_error; + } + + error = kvp_write_file(file, "HWADDR", "", mac_addr); + if (error) + goto setval_error; + + error = kvp_write_file(file, "IF_NAME", "", if_name); + if (error) + goto setval_error; + + if (new_val->dhcp_enabled) { + error = kvp_write_file(file, "DHCP", "", "yes"); + if (error) + goto setval_error; + + /* + * We are done!. + */ + goto setval_done; + } + + /* + * Write the configuration for ipaddress, netmask, gateway and + * name servers. + */ + + error = process_ip_string(file, (char *)new_val->ip_addr, IPADDR); + if (error) + goto setval_error; + + error = process_ip_string(file, (char *)new_val->sub_net, NETMASK); + if (error) + goto setval_error; + + error = process_ip_string(file, (char *)new_val->gate_way, GATEWAY); + if (error) + goto setval_error; + + error = process_ip_string(file, (char *)new_val->dns_addr, DNS); + if (error) + goto setval_error; + +setval_done: + free(mac_addr); + fclose(file); + + /* + * Now that we have populated the configuration file, + * invoke the external script to do its magic. + */ + + snprintf(cmd, sizeof(cmd), "%s %s", "hv_set_ifconfig", if_file); + system(cmd); + return 0; + +setval_error: + syslog(LOG_ERR, "Failed to write config file"); + free(mac_addr); + fclose(file); + return error; +} + + static int kvp_get_domain_name(char *buffer, int length) { @@ -646,6 +1410,10 @@ int main(void) char *p; char *key_value; char *key_name; + int op; + int pool; + char *if_name; + struct hv_kvp_ipaddr_value *kvp_ip_val; daemon(1, 0); openlog("KVP", 0, LOG_USER); @@ -657,13 +1425,13 @@ int main(void) if (kvp_file_init()) { syslog(LOG_ERR, "Failed to initialize the pools"); - exit(-1); + exit(EXIT_FAILURE); } fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); if (fd < 0) { syslog(LOG_ERR, "netlink socket creation failed; error:%d", fd); - exit(-1); + exit(EXIT_FAILURE); } addr.nl_family = AF_NETLINK; addr.nl_pad = 0; @@ -675,7 +1443,7 @@ int main(void) if (error < 0) { syslog(LOG_ERR, "bind failed; error:%d", error); close(fd); - exit(-1); + exit(EXIT_FAILURE); } sock_opt = addr.nl_groups; setsockopt(fd, 270, 1, &sock_opt, sizeof(sock_opt)); @@ -687,7 +1455,7 @@ int main(void) message->id.val = CN_KVP_VAL; hv_msg = (struct hv_kvp_msg *)message->data; - hv_msg->kvp_hdr.operation = KVP_OP_REGISTER; + hv_msg->kvp_hdr.operation = KVP_OP_REGISTER1; message->ack = 0; message->len = sizeof(struct hv_kvp_msg); @@ -695,7 +1463,7 @@ int main(void) if (len < 0) { syslog(LOG_ERR, "netlink_send failed; error:%d", len); close(fd); - exit(-1); + exit(EXIT_FAILURE); } pfd.fd = fd; @@ -721,12 +1489,21 @@ int main(void) incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg); hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data; - switch (hv_msg->kvp_hdr.operation) { - case KVP_OP_REGISTER: + /* + * We will use the KVP header information to pass back + * the error from this daemon. So, first copy the state + * and set the error code to success. + */ + op = hv_msg->kvp_hdr.operation; + pool = hv_msg->kvp_hdr.pool; + hv_msg->error = HV_S_OK; + + if ((in_hand_shake) && (op == KVP_OP_REGISTER1)) { /* * Driver is registering with us; stash away the version * information. */ + in_hand_shake = 0; p = (char *)hv_msg->body.kvp_register.version; lic_version = malloc(strlen(p) + 1); if (lic_version) { @@ -737,44 +1514,82 @@ int main(void) syslog(LOG_ERR, "malloc failed"); } continue; + } - /* - * The current protocol with the kernel component uses a - * NULL key name to pass an error condition. - * For the SET, GET and DELETE operations, - * use the existing protocol to pass back error. - */ + switch (op) { + case KVP_OP_GET_IP_INFO: + kvp_ip_val = &hv_msg->body.kvp_ip_val; + if_name = + kvp_mac_to_if_name((char *)kvp_ip_val->adapter_id); + + if (if_name == NULL) { + /* + * We could not map the mac address to an + * interface name; return error. + */ + hv_msg->error = HV_E_FAIL; + break; + } + error = kvp_get_ip_info( + 0, if_name, KVP_OP_GET_IP_INFO, + kvp_ip_val, + (MAX_IP_ADDR_SIZE * 2)); + + if (error) + hv_msg->error = error; + + free(if_name); + break; + + case KVP_OP_SET_IP_INFO: + kvp_ip_val = &hv_msg->body.kvp_ip_val; + if_name = kvp_get_if_name( + (char *)kvp_ip_val->adapter_id); + if (if_name == NULL) { + /* + * We could not map the guid to an + * interface name; return error. + */ + hv_msg->error = HV_GUID_NOTFOUND; + break; + } + error = kvp_set_ip_info(if_name, kvp_ip_val); + if (error) + hv_msg->error = error; + + free(if_name); + break; case KVP_OP_SET: - if (kvp_key_add_or_modify(hv_msg->kvp_hdr.pool, + if (kvp_key_add_or_modify(pool, hv_msg->body.kvp_set.data.key, hv_msg->body.kvp_set.data.key_size, hv_msg->body.kvp_set.data.value, hv_msg->body.kvp_set.data.value_size)) - strcpy(hv_msg->body.kvp_set.data.key, ""); + hv_msg->error = HV_S_CONT; break; case KVP_OP_GET: - if (kvp_get_value(hv_msg->kvp_hdr.pool, + if (kvp_get_value(pool, hv_msg->body.kvp_set.data.key, hv_msg->body.kvp_set.data.key_size, hv_msg->body.kvp_set.data.value, hv_msg->body.kvp_set.data.value_size)) - strcpy(hv_msg->body.kvp_set.data.key, ""); + hv_msg->error = HV_S_CONT; break; case KVP_OP_DELETE: - if (kvp_key_delete(hv_msg->kvp_hdr.pool, + if (kvp_key_delete(pool, hv_msg->body.kvp_delete.key, hv_msg->body.kvp_delete.key_size)) - strcpy(hv_msg->body.kvp_delete.key, ""); + hv_msg->error = HV_S_CONT; break; default: break; } - if (hv_msg->kvp_hdr.operation != KVP_OP_ENUMERATE) + if (op != KVP_OP_ENUMERATE) goto kvp_done; /* @@ -782,13 +1597,14 @@ int main(void) * both the key and the value; if not read from the * appropriate pool. */ - if (hv_msg->kvp_hdr.pool != KVP_POOL_AUTO) { - kvp_pool_enumerate(hv_msg->kvp_hdr.pool, + if (pool != KVP_POOL_AUTO) { + if (kvp_pool_enumerate(pool, hv_msg->body.kvp_enum_data.index, hv_msg->body.kvp_enum_data.data.key, HV_KVP_EXCHANGE_MAX_KEY_SIZE, hv_msg->body.kvp_enum_data.data.value, - HV_KVP_EXCHANGE_MAX_VALUE_SIZE); + HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) + hv_msg->error = HV_S_CONT; goto kvp_done; } @@ -807,13 +1623,13 @@ int main(void) strcpy(key_value, lic_version); break; case NetworkAddressIPv4: - kvp_get_ip_address(AF_INET, key_value, - HV_KVP_EXCHANGE_MAX_VALUE_SIZE); + kvp_get_ip_info(AF_INET, NULL, KVP_OP_ENUMERATE, + key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "NetworkAddressIPv4"); break; case NetworkAddressIPv6: - kvp_get_ip_address(AF_INET6, key_value, - HV_KVP_EXCHANGE_MAX_VALUE_SIZE); + kvp_get_ip_info(AF_INET6, NULL, KVP_OP_ENUMERATE, + key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE); strcpy(key_name, "NetworkAddressIPv6"); break; case OSBuildNumber: @@ -841,11 +1657,7 @@ int main(void) strcpy(key_name, "ProcessorArchitecture"); break; default: - strcpy(key_value, "Unknown Key"); - /* - * We use a null key name to terminate enumeration. - */ - strcpy(key_name, ""); + hv_msg->error = HV_S_CONT; break; } /* @@ -863,7 +1675,7 @@ kvp_done: len = netlink_send(fd, incoming_cn_msg); if (len < 0) { syslog(LOG_ERR, "net_link send failed; error:%d", len); - exit(-1); + exit(EXIT_FAILURE); } } diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh new file mode 100755 index 00000000000..3e9427e08d8 --- /dev/null +++ b/tools/hv/hv_set_ifconfig.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# This example script activates an interface based on the specified +# configuration. +# +# In the interest of keeping the KVP daemon code free of distro specific +# information; the kvp daemon code invokes this external script to configure +# the interface. +# +# The only argument to this script is the configuration file that is to +# be used to configure the interface. +# +# Each Distro is expected to implement this script in a distro specific +# fashion. For instance on Distros that ship with Network Manager enabled, +# this script can be based on the Network Manager APIs for configuring the +# interface. +# +# This example script is based on a RHEL environment. +# +# Here is the format of the ip configuration file: +# +# HWADDR=macaddr +# IF_NAME=interface name +# DHCP=yes (This is optional; if yes, DHCP is configured) +# +# IPADDR=ipaddr1 +# IPADDR_1=ipaddr2 +# IPADDR_x=ipaddry (where y = x + 1) +# +# NETMASK=netmask1 +# NETMASK_x=netmasky (where y = x + 1) +# +# GATEWAY=ipaddr1 +# GATEWAY_x=ipaddry (where y = x + 1) +# +# DNSx=ipaddrx (where first DNS address is tagged as DNS1 etc) +# +# IPV6 addresses will be tagged as IPV6ADDR, IPV6 gateway will be +# tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as +# IPV6NETMASK. +# +# The host can specify multiple ipv4 and ipv6 addresses to be +# configured for the interface. Furthermore, the configuration +# needs to be persistent. A subsequent GET call on the interface +# is expected to return the configuration that is set via the SET +# call. +# + + + +echo "IPV6INIT=yes" >> $1 +echo "NM_CONTROLLED=no" >> $1 +echo "PEERDNS=yes" >> $1 +echo "ONBOOT=yes" >> $1 + +dhcp=$(grep "DHCP" $1 2>/dev/null) +if [ "$dhcp" != "" ]; +then +echo "BOOTPROTO=dhcp" >> $1; +fi + +cp $1 /etc/sysconfig/network-scripts/ + + +interface=$(echo $1 | awk -F - '{ print $2 }') + +/sbin/ifdown $interface 2>/dev/null +/sbin/ifup $interfac 2>/dev/null diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index f759f4f097c..fd2f9221b24 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -1299,6 +1299,7 @@ static struct device *new_device(const char *name, u16 type) dev->feature_len = 0; dev->num_vq = 0; dev->running = false; + dev->next = NULL; /* * Append to device list. Prepending to a single-linked list is diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f9126f89efe..247264502fb 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -74,7 +74,7 @@ ifeq ($(ARCH),x86_64) override ARCH := x86 IS_X86_64 := 0 ifeq (, $(findstring m32,$(EXTRA_CFLAGS))) - IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1) + IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1) endif ifeq (${IS_X86_64}, 1) RAW_ARCH := x86_64 @@ -270,6 +270,7 @@ LIB_H += util/include/linux/magic.h LIB_H += util/include/linux/poison.h LIB_H += util/include/linux/prefetch.h LIB_H += util/include/linux/rbtree.h +LIB_H += util/include/linux/rbtree_augmented.h LIB_H += util/include/linux/string.h LIB_H += util/include/linux/types.h LIB_H += util/include/linux/linkage.h @@ -881,7 +882,7 @@ $(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $< $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-redundant-decls $< diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 87f4ec6d1f3..a89cbbb6180 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -88,6 +88,12 @@ void get_term_dimensions(struct winsize *ws); #define CPUINFO_PROC "Processor" #endif +#ifdef __aarch64__ +#include "../../arch/arm64/include/asm/unistd.h" +#define rmb() asm volatile("dmb ld" ::: "memory") +#define cpu_relax() asm volatile("yield" ::: "memory") +#endif + #ifdef __mips__ #include "../../arch/mips/include/asm/unistd.h" #define rmb() asm volatile( \ diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h new file mode 100644 index 00000000000..9d6fcdf1788 --- /dev/null +++ b/tools/perf/util/include/linux/rbtree_augmented.h @@ -0,0 +1,2 @@ +#include <stdbool.h> +#include "../../../../include/linux/rbtree_augmented.h" diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile new file mode 100644 index 00000000000..6b9cf7a987c --- /dev/null +++ b/tools/power/acpi/Makefile @@ -0,0 +1,18 @@ +PROG= acpidump +SRCS= acpidump.c +KERNEL_INCLUDE := ../../../include +CFLAGS += -Wall -Wstrict-prototypes -Wdeclaration-after-statement -Os -s -D_LINUX -DDEFINE_ALTERNATE_TYPES -I$(KERNEL_INCLUDE) + +all: acpidump +$(PROG) : $(SRCS) + $(CC) $(CFLAGS) $(SRCS) -o $(PROG) + +CLEANFILES= $(PROG) + +clean : + rm -f $(CLEANFILES) $(patsubst %.c,%.o, $(SRCS)) *~ + +install : + install acpidump /usr/bin/acpidump + install acpidump.8 /usr/share/man/man8 + diff --git a/tools/power/acpi/acpidump.8 b/tools/power/acpi/acpidump.8 new file mode 100644 index 00000000000..adfa99166e5 --- /dev/null +++ b/tools/power/acpi/acpidump.8 @@ -0,0 +1,59 @@ +.TH ACPIDUMP 8 +.SH NAME +acpidump \- Dump system's ACPI tables to an ASCII file. +.SH SYNOPSIS +.ft B +.B acpidump > acpidump.out +.SH DESCRIPTION +\fBacpidump \fP dumps the systems ACPI tables to an ASCII file +appropriate for attaching to a bug report. + +Subsequently, they can be processed by utilities in the ACPICA package. +.SS Options +no options worth worrying about. +.PP +.SH EXAMPLE + +.nf +# acpidump > acpidump.out + +$ acpixtract -a acpidump.out + Acpi table [DSDT] - 15974 bytes written to DSDT.dat + Acpi table [FACS] - 64 bytes written to FACS.dat + Acpi table [FACP] - 116 bytes written to FACP.dat + Acpi table [APIC] - 120 bytes written to APIC.dat + Acpi table [MCFG] - 60 bytes written to MCFG.dat + Acpi table [SSDT] - 444 bytes written to SSDT1.dat + Acpi table [SSDT] - 439 bytes written to SSDT2.dat + Acpi table [SSDT] - 439 bytes written to SSDT3.dat + Acpi table [SSDT] - 439 bytes written to SSDT4.dat + Acpi table [SSDT] - 439 bytes written to SSDT5.dat + Acpi table [RSDT] - 76 bytes written to RSDT.dat + Acpi table [RSDP] - 20 bytes written to RSDP.dat + +$ iasl -d *.dat +... +.fi +creates *.dsl, a human readable form which can be edited +and compiled using iasl. + + +.SH NOTES + +.B "acpidump " +must be run as root. + +.SH REFERENCES +ACPICA: https://acpica.org/ + +.SH FILES +.ta +.nf +/dev/mem +/sys/firmware/acpi/tables/dynamic/* +.fi + +.PP +.SH AUTHOR +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/acpi/acpidump.c b/tools/power/acpi/acpidump.c new file mode 100644 index 00000000000..07779871421 --- /dev/null +++ b/tools/power/acpi/acpidump.c @@ -0,0 +1,560 @@ +/* + * (c) Alexey Starikovskiy, Intel, 2005-2006. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + */ + +#ifdef DEFINE_ALTERNATE_TYPES +/* hack to enable building old application with new headers -lenb */ +#define acpi_fadt_descriptor acpi_table_fadt +#define acpi_rsdp_descriptor acpi_table_rsdp +#define DSDT_SIG ACPI_SIG_DSDT +#define FACS_SIG ACPI_SIG_FACS +#define FADT_SIG ACPI_SIG_FADT +#define xfirmware_ctrl Xfacs +#define firmware_ctrl facs + +typedef int s32; +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef long long s64; +#endif + +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <getopt.h> + +#include <sys/types.h> +#include <dirent.h> + +#include <acpi/acconfig.h> +#include <acpi/platform/acenv.h> +#include <acpi/actypes.h> +#include <acpi/actbl.h> + +static inline u8 checksum(u8 * buffer, u32 length) +{ + u8 sum = 0, *i = buffer; + buffer += length; + for (; i < buffer; sum += *(i++)); + return sum; +} + +static unsigned long psz, addr, length; +static int print, connect, skip; +static u8 select_sig[4]; + +static unsigned long read_efi_systab( void ) +{ + char buffer[80]; + unsigned long addr; + FILE *f = fopen("/sys/firmware/efi/systab", "r"); + if (f) { + while (fgets(buffer, 80, f)) { + if (sscanf(buffer, "ACPI20=0x%lx", &addr) == 1) + return addr; + } + fclose(f); + } + return 0; +} + +static u8 *acpi_map_memory(unsigned long where, unsigned length) +{ + unsigned long offset; + u8 *there; + int fd = open("/dev/mem", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "acpi_os_map_memory: cannot open /dev/mem\n"); + exit(1); + } + offset = where % psz; + there = mmap(NULL, length + offset, PROT_READ, MAP_PRIVATE, + fd, where - offset); + close(fd); + if (there == MAP_FAILED) return 0; + return (there + offset); +} + +static void acpi_unmap_memory(u8 * there, unsigned length) +{ + unsigned long offset = (unsigned long)there % psz; + munmap(there - offset, length + offset); +} + +static struct acpi_table_header *acpi_map_table(unsigned long where, char *sig) +{ + unsigned size; + struct acpi_table_header *tbl = (struct acpi_table_header *) + acpi_map_memory(where, sizeof(struct acpi_table_header)); + if (!tbl || (sig && memcmp(sig, tbl->signature, 4))) return 0; + size = tbl->length; + acpi_unmap_memory((u8 *) tbl, sizeof(struct acpi_table_header)); + return (struct acpi_table_header *)acpi_map_memory(where, size); +} + +static void acpi_unmap_table(struct acpi_table_header *tbl) +{ + acpi_unmap_memory((u8 *)tbl, tbl->length); +} + +static struct acpi_rsdp_descriptor *acpi_scan_for_rsdp(u8 *begin, u32 length) +{ + struct acpi_rsdp_descriptor *rsdp; + u8 *i, *end = begin + length; + /* Search from given start address for the requested length */ + for (i = begin; i < end; i += ACPI_RSDP_SCAN_STEP) { + /* The signature and checksum must both be correct */ + if (memcmp((char *)i, "RSD PTR ", 8)) continue; + rsdp = (struct acpi_rsdp_descriptor *)i; + /* Signature matches, check the appropriate checksum */ + if (!checksum((u8 *) rsdp, (rsdp->revision < 2) ? + ACPI_RSDP_CHECKSUM_LENGTH : + ACPI_RSDP_XCHECKSUM_LENGTH)) + /* Checksum valid, we have found a valid RSDP */ + return rsdp; + } + /* Searched entire block, no RSDP was found */ + return 0; +} + +/* + * Output data + */ +static void acpi_show_data(int fd, u8 * data, int size) +{ + char buffer[256]; + int len; + int i, remain = size; + while (remain > 0) { + len = snprintf(buffer, 256, " %04x:", size - remain); + for (i = 0; i < 16 && i < remain; i++) { + len += + snprintf(&buffer[len], 256 - len, " %02x", data[i]); + } + for (; i < 16; i++) { + len += snprintf(&buffer[len], 256 - len, " "); + } + len += snprintf(&buffer[len], 256 - len, " "); + for (i = 0; i < 16 && i < remain; i++) { + buffer[len++] = (isprint(data[i])) ? data[i] : '.'; + } + buffer[len++] = '\n'; + write(fd, buffer, len); + data += 16; + remain -= 16; + } +} + +/* + * Output ACPI table + */ +static void acpi_show_table(int fd, struct acpi_table_header *table, unsigned long addr) +{ + char buff[80]; + int len = snprintf(buff, 80, "%.4s @ %p\n", table->signature, (void *)addr); + write(fd, buff, len); + acpi_show_data(fd, (u8 *) table, table->length); + buff[0] = '\n'; + write(fd, buff, 1); +} + +static void write_table(int fd, struct acpi_table_header *tbl, unsigned long addr) +{ + static int select_done = 0; + if (!select_sig[0]) { + if (print) { + acpi_show_table(fd, tbl, addr); + } else { + write(fd, tbl, tbl->length); + } + } else if (!select_done && !memcmp(select_sig, tbl->signature, 4)) { + if (skip > 0) { + --skip; + return; + } + if (print) { + acpi_show_table(fd, tbl, addr); + } else { + write(fd, tbl, tbl->length); + } + select_done = 1; + } +} + +static void acpi_dump_FADT(int fd, struct acpi_table_header *tbl, unsigned long xaddr) { + struct acpi_fadt_descriptor x; + unsigned long addr; + size_t len = sizeof(struct acpi_fadt_descriptor); + if (len > tbl->length) len = tbl->length; + memcpy(&x, tbl, len); + x.header.length = len; + if (checksum((u8 *)tbl, len)) { + fprintf(stderr, "Wrong checksum for FADT!\n"); + } + if (x.header.length >= 148 && x.Xdsdt) { + addr = (unsigned long)x.Xdsdt; + if (connect) { + x.Xdsdt = lseek(fd, 0, SEEK_CUR); + } + } else if (x.header.length >= 44 && x.dsdt) { + addr = (unsigned long)x.dsdt; + if (connect) { + x.dsdt = lseek(fd, 0, SEEK_CUR); + } + } else { + fprintf(stderr, "No DSDT in FADT!\n"); + goto no_dsdt; + } + tbl = acpi_map_table(addr, DSDT_SIG); + if (!tbl) goto no_dsdt; + if (checksum((u8 *)tbl, tbl->length)) + fprintf(stderr, "Wrong checksum for DSDT!\n"); + write_table(fd, tbl, addr); + acpi_unmap_table(tbl); +no_dsdt: + if (x.header.length >= 140 && x.xfirmware_ctrl) { + addr = (unsigned long)x.xfirmware_ctrl; + if (connect) { + x.xfirmware_ctrl = lseek(fd, 0, SEEK_CUR); + } + } else if (x.header.length >= 40 && x.firmware_ctrl) { + addr = (unsigned long)x.firmware_ctrl; + if (connect) { + x.firmware_ctrl = lseek(fd, 0, SEEK_CUR); + } + } else { + fprintf(stderr, "No FACS in FADT!\n"); + goto no_facs; + } + tbl = acpi_map_table(addr, FACS_SIG); + if (!tbl) goto no_facs; + /* do not checksum FACS */ + write_table(fd, tbl, addr); + acpi_unmap_table(tbl); +no_facs: + write_table(fd, (struct acpi_table_header *)&x, xaddr); +} + +static int acpi_dump_SDT(int fd, struct acpi_rsdp_descriptor *rsdp) +{ + struct acpi_table_header *sdt, *tbl = 0; + int xsdt = 1, i, num; + char *offset; + unsigned long addr; + if (rsdp->revision > 1 && rsdp->xsdt_physical_address) { + tbl = acpi_map_table(rsdp->xsdt_physical_address, "XSDT"); + } + if (!tbl && rsdp->rsdt_physical_address) { + xsdt = 0; + tbl = acpi_map_table(rsdp->rsdt_physical_address, "RSDT"); + } + if (!tbl) return 0; + sdt = malloc(tbl->length); + memcpy(sdt, tbl, tbl->length); + acpi_unmap_table(tbl); + if (checksum((u8 *)sdt, sdt->length)) + fprintf(stderr, "Wrong checksum for %s!\n", (xsdt)?"XSDT":"RSDT"); + num = (sdt->length - sizeof(struct acpi_table_header))/((xsdt)?sizeof(u64):sizeof(u32)); + offset = (char *)sdt + sizeof(struct acpi_table_header); + for (i = 0; i < num; ++i, offset += ((xsdt) ? sizeof(u64) : sizeof(u32))) { + addr = (xsdt) ? (unsigned long)(*(u64 *)offset): + (unsigned long)(*(u32 *)offset); + if (!addr) continue; + tbl = acpi_map_table(addr, 0); + if (!tbl) continue; + if (!memcmp(tbl->signature, FADT_SIG, 4)) { + acpi_dump_FADT(fd, tbl, addr); + } else { + if (checksum((u8 *)tbl, tbl->length)) + fprintf(stderr, "Wrong checksum for generic table!\n"); + write_table(fd, tbl, addr); + } + acpi_unmap_table(tbl); + if (connect) { + if (xsdt) + (*(u64*)offset) = lseek(fd, 0, SEEK_CUR); + else + (*(u32*)offset) = lseek(fd, 0, SEEK_CUR); + } + } + if (xsdt) { + addr = (unsigned long)rsdp->xsdt_physical_address; + if (connect) { + rsdp->xsdt_physical_address = lseek(fd, 0, SEEK_CUR); + } + } else { + addr = (unsigned long)rsdp->rsdt_physical_address; + if (connect) { + rsdp->rsdt_physical_address = lseek(fd, 0, SEEK_CUR); + } + } + write_table(fd, sdt, addr); + free (sdt); + return 1; +} + +#define DYNAMIC_SSDT "/sys/firmware/acpi/tables/dynamic" + +static void acpi_dump_dynamic_SSDT(int fd) +{ + struct stat file_stat; + char filename[256], *ptr; + DIR *tabledir; + struct dirent *entry; + FILE *fp; + int count, readcount, length; + struct acpi_table_header table_header, *ptable; + + if (stat(DYNAMIC_SSDT, &file_stat) == -1) { + /* The directory doesn't exist */ + return; + } + tabledir = opendir(DYNAMIC_SSDT); + if(!tabledir){ + /*can't open the directory */ + return; + } + + while ((entry = readdir(tabledir)) != 0){ + /* skip the file of . /.. */ + if (entry->d_name[0] == '.') + continue; + + sprintf(filename, "%s/%s", DYNAMIC_SSDT, entry->d_name); + fp = fopen(filename, "r"); + if (fp == NULL) { + fprintf(stderr, "Can't open the file of %s\n", + filename); + continue; + } + /* Read the Table header to parse the table length */ + count = fread(&table_header, 1, sizeof(struct acpi_table_header), fp); + if (count < sizeof(table_header)) { + /* the length is lessn than ACPI table header. skip it */ + fclose(fp); + continue; + } + length = table_header.length; + ptr = malloc(table_header.length); + fseek(fp, 0, SEEK_SET); + readcount = 0; + while(!feof(fp) && readcount < length) { + count = fread(ptr + readcount, 1, 256, fp); + readcount += count; + } + fclose(fp); + ptable = (struct acpi_table_header *) ptr; + if (checksum((u8 *) ptable, ptable->length)) + fprintf(stderr, "Wrong checksum " + "for dynamic SSDT table!\n"); + write_table(fd, ptable, 0); + free(ptr); + } + closedir(tabledir); + return; +} + +static void usage(const char *progname) +{ + puts("Usage:"); + printf("%s [--addr 0x1234][--table DSDT][--output filename]" + "[--binary][--length 0x456][--help]\n", progname); + puts("\t--addr 0x1234 or -a 0x1234 -- look for tables at this physical address"); + puts("\t--table DSDT or -t DSDT -- only dump table with DSDT signature"); + puts("\t--output filename or -o filename -- redirect output from stdin to filename"); + puts("\t--binary or -b -- dump data in binary form rather than in hex-dump format"); + puts("\t--length 0x456 or -l 0x456 -- works only with --addr, dump physical memory" + "\n\t\tregion without trying to understand it's contents"); + puts("\t--skip 2 or -s 2 -- skip 2 tables of the given name and output only 3rd one"); + puts("\t--help or -h -- this help message"); + exit(0); +} + +static struct option long_options[] = { + {"addr", 1, 0, 0}, + {"table", 1, 0, 0}, + {"output", 1, 0, 0}, + {"binary", 0, 0, 0}, + {"length", 1, 0, 0}, + {"skip", 1, 0, 0}, + {"help", 0, 0, 0}, + {0, 0, 0, 0} +}; +int main(int argc, char **argv) +{ + int option_index, c, fd; + u8 *raw; + struct acpi_rsdp_descriptor rsdpx, *x = 0; + char *filename = 0; + char buff[80]; + memset(select_sig, 0, 4); + print = 1; + connect = 0; + addr = length = 0; + skip = 0; + while (1) { + option_index = 0; + c = getopt_long(argc, argv, "a:t:o:bl:s:h", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 0: + switch (option_index) { + case 0: + addr = strtoul(optarg, (char **)NULL, 16); + break; + case 1: + memcpy(select_sig, optarg, 4); + break; + case 2: + filename = optarg; + break; + case 3: + print = 0; + break; + case 4: + length = strtoul(optarg, (char **)NULL, 16); + break; + case 5: + skip = strtoul(optarg, (char **)NULL, 10); + break; + case 6: + usage(argv[0]); + exit(0); + } + break; + case 'a': + addr = strtoul(optarg, (char **)NULL, 16); + break; + case 't': + memcpy(select_sig, optarg, 4); + break; + case 'o': + filename = optarg; + break; + case 'b': + print = 0; + break; + case 'l': + length = strtoul(optarg, (char **)NULL, 16); + break; + case 's': + skip = strtoul(optarg, (char **)NULL, 10); + break; + case 'h': + usage(argv[0]); + exit(0); + default: + printf("Unknown option!\n"); + usage(argv[0]); + exit(0); + } + } + + fd = STDOUT_FILENO; + if (filename) { + fd = creat(filename, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); + if (fd < 0) + return fd; + } + + if (!select_sig[0] && !print) { + connect = 1; + } + + psz = sysconf(_SC_PAGESIZE); + if (length && addr) { + /* We know length and address, it means we just want a memory dump */ + if (!(raw = acpi_map_memory(addr, length))) + goto not_found; + write(fd, raw, length); + acpi_unmap_memory(raw, length); + close(fd); + return 0; + } + + length = sizeof(struct acpi_rsdp_descriptor); + if (!addr) { + addr = read_efi_systab(); + if (!addr) { + addr = ACPI_HI_RSDP_WINDOW_BASE; + length = ACPI_HI_RSDP_WINDOW_SIZE; + } + } + + if (!(raw = acpi_map_memory(addr, length)) || + !(x = acpi_scan_for_rsdp(raw, length))) + goto not_found; + + /* Find RSDP and print all found tables */ + memcpy(&rsdpx, x, sizeof(struct acpi_rsdp_descriptor)); + acpi_unmap_memory(raw, length); + if (connect) { + lseek(fd, sizeof(struct acpi_rsdp_descriptor), SEEK_SET); + } + if (!acpi_dump_SDT(fd, &rsdpx)) + goto not_found; + if (connect) { + lseek(fd, 0, SEEK_SET); + write(fd, x, (rsdpx.revision < 2) ? + ACPI_RSDP_CHECKSUM_LENGTH : ACPI_RSDP_XCHECKSUM_LENGTH); + } else if (!select_sig[0] || !memcmp("RSD PTR ", select_sig, 4)) { + addr += (long)x - (long)raw; + length = snprintf(buff, 80, "RSD PTR @ %p\n", (void *)addr); + write(fd, buff, length); + acpi_show_data(fd, (u8 *) & rsdpx, (rsdpx.revision < 2) ? + ACPI_RSDP_CHECKSUM_LENGTH : ACPI_RSDP_XCHECKSUM_LENGTH); + buff[0] = '\n'; + write(fd, buff, 1); + } + acpi_dump_dynamic_SSDT(fd); + close(fd); + return 0; +not_found: + close(fd); + fprintf(stderr, "ACPI tables were not found. If you know location " + "of RSD PTR table (from dmesg, etc), " + "supply it with either --addr or -a option\n"); + return 1; +} diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index a93e06cfcc2..cf397bd26d0 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -111,7 +111,7 @@ GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo $(OUTPUT)po/$$HLANG.gmo; export CROSS CC AR STRIP RANLIB CFLAGS LDFLAGS LIB_OBJS # check if compiler option is supported -cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} +cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} # use '-Os' optimization if available, else use -O2 OPTIMIZATION := $(call cc-supports,-Os,-O2) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 74e44507dfe..e4d0690cccf 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -4,15 +4,11 @@ turbostat \- Report processor frequency and idle statistics .SH SYNOPSIS .ft B .B turbostat -.RB [ "\-s" ] -.RB [ "\-v" ] -.RB [ "\-M MSR#" ] +.RB [ Options ] .RB command .br .B turbostat -.RB [ "\-s" ] -.RB [ "\-v" ] -.RB [ "\-M MSR#" ] +.RB [ Options ] .RB [ "\-i interval_sec" ] .SH DESCRIPTION \fBturbostat \fP reports processor topology, frequency @@ -27,16 +23,23 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. on processors that additionally support C-state residency counters. .SS Options -The \fB-s\fP option limits output to a 1-line system summary for each interval. +The \fB-p\fP option limits output to the 1st thread in 1st core of each package. .PP -The \fB-c\fP option limits output to the 1st thread in each core. +The \fB-P\fP option limits output to the 1st thread in each Package. .PP -The \fB-p\fP option limits output to the 1st thread in each package. +The \fB-S\fP option limits output to a 1-line System Summary for each interval. .PP The \fB-v\fP option increases verbosity. .PP -The \fB-M MSR#\fP option dumps the specified MSR, -in addition to the usual frequency and idle statistics. +The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34" +.PP +The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter. +.PP +The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter. +.PP +The \fB-m MSR#\fP option includes the the specified 32-bit MSR value. +.PP +The \fB-M MSR#\fP option includes the the specified 64-bit MSR value. .PP The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds. The default is 5 seconds. @@ -150,6 +153,29 @@ Note that turbostat reports average GHz of 3.63, while the arithmetic average of the GHz column above is lower. This is a weighted average, where the weight is %c0. ie. it is the total number of un-halted cycles elapsed per time divided by the number of CPUs. +.SH SMI COUNTING EXAMPLE +On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter. +Using the -m option, you can display how many SMIs have fired since reset, or if there +are SMIs during the measurement interval, you can display the delta using the -d option. +.nf +[root@x980 ~]# turbostat -m 0x34 +cor CPU %c0 GHz TSC MSR 0x034 %c1 %c3 %c6 %pc3 %pc6 + 1.41 1.82 3.38 0x00000000 8.92 37.82 51.85 17.37 0.55 + 0 0 3.73 2.03 3.38 0x00000055 1.72 48.25 46.31 17.38 0.55 + 0 6 0.14 1.63 3.38 0x00000056 5.30 + 1 2 2.51 1.80 3.38 0x00000056 15.65 29.33 52.52 + 1 8 0.10 1.65 3.38 0x00000056 18.05 + 2 4 1.16 1.68 3.38 0x00000056 5.87 24.47 68.50 + 2 10 0.10 1.63 3.38 0x00000056 6.93 + 8 1 3.84 1.91 3.38 0x00000056 1.36 50.65 44.16 + 8 7 0.08 1.64 3.38 0x00000056 5.12 + 9 3 1.82 1.73 3.38 0x00000056 7.59 24.21 66.38 + 9 9 0.09 1.68 3.38 0x00000056 9.32 + 10 5 1.66 1.65 3.38 0x00000056 15.10 50.00 33.23 + 10 11 1.72 1.65 3.38 0x00000056 15.05 +^C +[root@x980 ~]# +.fi .SH NOTES .B "turbostat " @@ -165,6 +191,13 @@ may work poorly on Linux-2.6.20 through 2.6.29, as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF in those kernels. +If the TSC column does not make sense, then +the other numbers will also make no sense. +Turbostat is lightweight, and its data collection is not atomic. +These issues are usually caused by an extremely short measurement +interval (much less than 1 second), or system activity that prevents +turbostat from being able to run on all CPUS to quickly collect data. + The APERF, MPERF MSRs are defined to count non-halted cycles. Although it is not guaranteed by the architecture, turbostat assumes that they count at TSC rate, which is true on all processors tested to date. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 861d7719020..2655ae9a3ad 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -35,9 +35,9 @@ #include <ctype.h> #include <sched.h> -#define MSR_TSC 0x10 #define MSR_NEHALEM_PLATFORM_INFO 0xCE #define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD +#define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE #define MSR_APERF 0xE8 #define MSR_MPERF 0xE7 #define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ @@ -62,7 +62,11 @@ unsigned int genuine_intel; unsigned int has_invariant_tsc; unsigned int do_nehalem_platform_info; unsigned int do_nehalem_turbo_ratio_limit; -unsigned int extra_msr_offset; +unsigned int do_ivt_turbo_ratio_limit; +unsigned int extra_msr_offset32; +unsigned int extra_msr_offset64; +unsigned int extra_delta_offset32; +unsigned int extra_delta_offset64; double bclk; unsigned int show_pkg; unsigned int show_core; @@ -83,7 +87,10 @@ struct thread_data { unsigned long long aperf; unsigned long long mperf; unsigned long long c1; /* derived */ - unsigned long long extra_msr; + unsigned long long extra_msr64; + unsigned long long extra_delta64; + unsigned long long extra_msr32; + unsigned long long extra_delta32; unsigned int cpu_id; unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 @@ -222,6 +229,14 @@ void print_header(void) if (has_aperf) outp += sprintf(outp, " GHz"); outp += sprintf(outp, " TSC"); + if (extra_delta_offset32) + outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); + if (extra_delta_offset64) + outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64); + if (extra_msr_offset32) + outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); + if (extra_msr_offset64) + outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); if (do_nhm_cstates) outp += sprintf(outp, " %%c1"); if (do_nhm_cstates) @@ -238,8 +253,6 @@ void print_header(void) outp += sprintf(outp, " %%pc6"); if (do_snb_cstates) outp += sprintf(outp, " %%pc7"); - if (extra_msr_offset) - outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset); outp += sprintf(outp, "\n"); } @@ -255,8 +268,14 @@ int dump_counters(struct thread_data *t, struct core_data *c, fprintf(stderr, "aperf: %016llX\n", t->aperf); fprintf(stderr, "mperf: %016llX\n", t->mperf); fprintf(stderr, "c1: %016llX\n", t->c1); + fprintf(stderr, "msr0x%x: %08llX\n", + extra_delta_offset32, t->extra_delta32); fprintf(stderr, "msr0x%x: %016llX\n", - extra_msr_offset, t->extra_msr); + extra_delta_offset64, t->extra_delta64); + fprintf(stderr, "msr0x%x: %08llX\n", + extra_msr_offset32, t->extra_msr32); + fprintf(stderr, "msr0x%x: %016llX\n", + extra_msr_offset64, t->extra_msr64); } if (c) { @@ -360,6 +379,21 @@ int format_counters(struct thread_data *t, struct core_data *c, /* TSC */ outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); + /* delta */ + if (extra_delta_offset32) + outp += sprintf(outp, " %11llu", t->extra_delta32); + + /* DELTA */ + if (extra_delta_offset64) + outp += sprintf(outp, " %11llu", t->extra_delta64); + /* msr */ + if (extra_msr_offset32) + outp += sprintf(outp, " 0x%08llx", t->extra_msr32); + + /* MSR */ + if (extra_msr_offset64) + outp += sprintf(outp, " 0x%016llx", t->extra_msr64); + if (do_nhm_cstates) { if (!skip_c1) outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); @@ -391,8 +425,6 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); done: - if (extra_msr_offset) - outp += sprintf(outp, " 0x%016llx", t->extra_msr); outp += sprintf(outp, "\n"); return 0; @@ -502,10 +534,16 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->mperf = 1; /* divide by 0 protection */ } + old->extra_delta32 = new->extra_delta32 - old->extra_delta32; + old->extra_delta32 &= 0xFFFFFFFF; + + old->extra_delta64 = new->extra_delta64 - old->extra_delta64; + /* - * for "extra msr", just copy the latest w/o subtracting + * Extra MSR is just a snapshot, simply copy latest w/o subtracting */ - old->extra_msr = new->extra_msr; + old->extra_msr32 = new->extra_msr32; + old->extra_msr64 = new->extra_msr64; } int delta_cpu(struct thread_data *t, struct core_data *c, @@ -533,6 +571,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->mperf = 0; t->c1 = 0; + t->extra_delta32 = 0; + t->extra_delta64 = 0; + /* tells format_counters to dump all fields from this set */ t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; @@ -553,6 +594,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.threads.mperf += t->mperf; average.threads.c1 += t->c1; + average.threads.extra_delta32 += t->extra_delta32; + average.threads.extra_delta64 += t->extra_delta64; + /* sum per-core values only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; @@ -588,6 +632,11 @@ void compute_average(struct thread_data *t, struct core_data *c, average.threads.mperf /= topo.num_cpus; average.threads.c1 /= topo.num_cpus; + average.threads.extra_delta32 /= topo.num_cpus; + average.threads.extra_delta32 &= 0xFFFFFFFF; + + average.threads.extra_delta64 /= topo.num_cpus; + average.cores.c3 /= topo.num_cores; average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; @@ -629,8 +678,24 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -4; } - if (extra_msr_offset) - if (get_msr(cpu, extra_msr_offset, &t->extra_msr)) + if (extra_delta_offset32) { + if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) + return -5; + t->extra_delta32 &= 0xFFFFFFFF; + } + + if (extra_delta_offset64) + if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) + return -5; + + if (extra_msr_offset32) { + if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) + return -5; + t->extra_msr32 &= 0xFFFFFFFF; + } + + if (extra_msr_offset64) + if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) return -5; /* collect core counters only for 1st thread in core */ @@ -677,6 +742,9 @@ void print_verbose_header(void) get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); + if (verbose > 1) + fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", ratio, bclk, ratio * bclk); @@ -685,14 +753,84 @@ void print_verbose_header(void) fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", ratio, bclk, ratio * bclk); + if (!do_ivt_turbo_ratio_limit) + goto print_nhm_turbo_ratio_limits; + + get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); + if (verbose > 1) - fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + + ratio = (msr >> 56) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 48) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 40) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 32) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 24) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", + ratio, bclk, ratio * bclk); + +print_nhm_turbo_ratio_limits: if (!do_nehalem_turbo_ratio_limit) return; get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); + if (verbose > 1) + fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + + ratio = (msr >> 56) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 48) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 40) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 32) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", + ratio, bclk, ratio * bclk); + ratio = (msr >> 24) & 0xFF; if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", @@ -712,7 +850,6 @@ void print_verbose_header(void) if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", ratio, bclk, ratio * bclk); - } void free_all_buffers(void) @@ -1038,7 +1175,7 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x2A: /* SNB */ case 0x2D: /* SNB Xeon */ case 0x3A: /* IVB */ - case 0x3D: /* IVB Xeon */ + case 0x3E: /* IVB Xeon */ return 1; case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ @@ -1046,6 +1183,22 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; } } +int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x3E: /* IVB Xeon */ + return 1; + default: + return 0; + } +} + int is_snb(unsigned int family, unsigned int model) { @@ -1056,7 +1209,7 @@ int is_snb(unsigned int family, unsigned int model) case 0x2A: case 0x2D: case 0x3A: /* IVB */ - case 0x3D: /* IVB Xeon */ + case 0x3E: /* IVB Xeon */ return 1; } return 0; @@ -1145,12 +1298,13 @@ void check_cpuid() bclk = discover_bclk(family, model); do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); + do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); } void usage() { - fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n", + fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", progname); exit(1); } @@ -1440,15 +1594,15 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) { + while ((opt = getopt(argc, argv, "+pPSvisc:sC:m:M:")) != -1) { switch (opt) { - case 'c': + case 'p': show_core_only++; break; - case 'p': + case 'P': show_pkg_only++; break; - case 's': + case 'S': summary_only++; break; case 'v': @@ -1457,10 +1611,20 @@ void cmdline(int argc, char **argv) case 'i': interval_sec = atoi(optarg); break; + case 'c': + sscanf(optarg, "%x", &extra_delta_offset32); + break; + case 's': + extra_delta_offset32 = 0x34; /* SMI counter */ + break; + case 'C': + sscanf(optarg, "%x", &extra_delta_offset64); + break; + case 'm': + sscanf(optarg, "%x", &extra_msr_offset32); + break; case 'M': - sscanf(optarg, "%x", &extra_msr_offset); - if (verbose > 1) - fprintf(stderr, "MSR 0x%X\n", extra_msr_offset); + sscanf(optarg, "%x", &extra_msr_offset64); break; default: usage(); @@ -1473,7 +1637,7 @@ int main(int argc, char **argv) cmdline(argc, argv); if (verbose > 1) - fprintf(stderr, "turbostat v2.0 May 16, 2012" + fprintf(stderr, "turbostat v2.1 October 6, 2012" " - Len Brown <lenb@kernel.org>\n"); turbostat_init(); diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index c05bcd293d8..b51d787176d 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1873,10 +1873,10 @@ sub make_oldconfig { apply_min_config; } - if (!run_command "$make oldnoconfig") { - # Perhaps oldnoconfig doesn't exist in this version of the kernel + if (!run_command "$make olddefconfig") { + # Perhaps olddefconfig doesn't exist in this version of the kernel # try a yes '' | oldconfig - doprint "oldnoconfig failed, trying yes '' | make oldconfig\n"; + doprint "olddefconfig failed, trying yes '' | make oldconfig\n"; run_command "yes '' | $make oldconfig" or dodie "failed make config oldconfig"; } @@ -1929,7 +1929,7 @@ sub build { # old config can ask questions if ($type eq "oldconfig") { - $type = "oldnoconfig"; + $type = "olddefconfig"; # allow for empty configs run_command "touch $output_config"; @@ -1959,7 +1959,7 @@ sub build { load_force_config($minconfig); } - if ($type ne "oldnoconfig") { + if ($type ne "olddefconfig") { run_command "$make $type" or dodie "failed make config"; } @@ -2458,8 +2458,7 @@ my %config_set; # config_off holds the set of configs that the bad config had disabled. # We need to record them and set them in the .config when running -# oldnoconfig, because oldnoconfig does not turn off new symbols, but -# instead just keeps the defaults. +# olddefconfig, because olddefconfig keeps the defaults. my %config_off; # config_off_tmp holds a set of configs to turn off for now @@ -3250,7 +3249,7 @@ sub test_this_config { } # Remove this config from the list of configs - # do a make oldnoconfig and then read the resulting + # do a make olddefconfig and then read the resulting # .config to make sure it is missing the config that # we had before my %configs = %min_configs; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 85baf11e2ac..43480149119 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,4 @@ -TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug +TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug epoll all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/epoll/Makefile b/tools/testing/selftests/epoll/Makefile new file mode 100644 index 00000000000..19806ed62f5 --- /dev/null +++ b/tools/testing/selftests/epoll/Makefile @@ -0,0 +1,11 @@ +# Makefile for epoll selftests + +all: test_epoll +%: %.c + gcc -pthread -g -o $@ $^ + +run_tests: all + ./test_epoll + +clean: + $(RM) test_epoll diff --git a/tools/testing/selftests/epoll/test_epoll.c b/tools/testing/selftests/epoll/test_epoll.c new file mode 100644 index 00000000000..e0fcff1e833 --- /dev/null +++ b/tools/testing/selftests/epoll/test_epoll.c @@ -0,0 +1,344 @@ +/* + * tools/testing/selftests/epoll/test_epoll.c + * + * Copyright 2012 Adobe Systems Incorporated + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Paton J. Lewis <palewis@adobe.com> + * + */ + +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/epoll.h> +#include <sys/socket.h> + +/* + * A pointer to an epoll_item_private structure will be stored in the epoll + * item's event structure so that we can get access to the epoll_item_private + * data after calling epoll_wait: + */ +struct epoll_item_private { + int index; /* Position of this struct within the epoll_items array. */ + int fd; + uint32_t events; + pthread_mutex_t mutex; /* Guards the following variables... */ + int stop; + int status; /* Stores any error encountered while handling item. */ + /* The following variable allows us to test whether we have encountered + a problem while attempting to cancel and delete the associated + event. When the test program exits, 'deleted' should be exactly + one. If it is greater than one, then the failed test reflects a real + world situation where we would have tried to access the epoll item's + private data after deleting it: */ + int deleted; +}; + +struct epoll_item_private *epoll_items; + +/* + * Delete the specified item from the epoll set. In a real-world secneario this + * is where we would free the associated data structure, but in this testing + * environment we retain the structure so that we can test for double-deletion: + */ +void delete_item(int index) +{ + __sync_fetch_and_add(&epoll_items[index].deleted, 1); +} + +/* + * A pointer to a read_thread_data structure will be passed as the argument to + * each read thread: + */ +struct read_thread_data { + int stop; + int status; /* Indicates any error encountered by the read thread. */ + int epoll_set; +}; + +/* + * The function executed by the read threads: + */ +void *read_thread_function(void *function_data) +{ + struct read_thread_data *thread_data = + (struct read_thread_data *)function_data; + struct epoll_event event_data; + struct epoll_item_private *item_data; + char socket_data; + + /* Handle events until we encounter an error or this thread's 'stop' + condition is set: */ + while (1) { + int result = epoll_wait(thread_data->epoll_set, + &event_data, + 1, /* Number of desired events */ + 1000); /* Timeout in ms */ + if (result < 0) { + /* Breakpoints signal all threads. Ignore that while + debugging: */ + if (errno == EINTR) + continue; + thread_data->status = errno; + return 0; + } else if (thread_data->stop) + return 0; + else if (result == 0) /* Timeout */ + continue; + + /* We need the mutex here because checking for the stop + condition and re-enabling the epoll item need to be done + together as one atomic operation when EPOLL_CTL_DISABLE is + available: */ + item_data = (struct epoll_item_private *)event_data.data.ptr; + pthread_mutex_lock(&item_data->mutex); + + /* Remove the item from the epoll set if we want to stop + handling that event: */ + if (item_data->stop) + delete_item(item_data->index); + else { + /* Clear the data that was written to the other end of + our non-blocking socket: */ + do { + if (read(item_data->fd, &socket_data, 1) < 1) { + if ((errno == EAGAIN) || + (errno == EWOULDBLOCK)) + break; + else + goto error_unlock; + } + } while (item_data->events & EPOLLET); + + /* The item was one-shot, so re-enable it: */ + event_data.events = item_data->events; + if (epoll_ctl(thread_data->epoll_set, + EPOLL_CTL_MOD, + item_data->fd, + &event_data) < 0) + goto error_unlock; + } + + pthread_mutex_unlock(&item_data->mutex); + } + +error_unlock: + thread_data->status = item_data->status = errno; + pthread_mutex_unlock(&item_data->mutex); + return 0; +} + +/* + * A pointer to a write_thread_data structure will be passed as the argument to + * the write thread: + */ +struct write_thread_data { + int stop; + int status; /* Indicates any error encountered by the write thread. */ + int n_fds; + int *fds; +}; + +/* + * The function executed by the write thread. It writes a single byte to each + * socket in turn until the stop condition for this thread is set. If writing to + * a socket would block (i.e. errno was EAGAIN), we leave that socket alone for + * the moment and just move on to the next socket in the list. We don't care + * about the order in which we deliver events to the epoll set. In fact we don't + * care about the data we're writing to the pipes at all; we just want to + * trigger epoll events: + */ +void *write_thread_function(void *function_data) +{ + const char data = 'X'; + int index; + struct write_thread_data *thread_data = + (struct write_thread_data *)function_data; + while (!write_thread_data->stop) + for (index = 0; + !thread_data->stop && (index < thread_data->n_fds); + ++index) + if ((write(thread_data->fds[index], &data, 1) < 1) && + (errno != EAGAIN) && + (errno != EWOULDBLOCK)) { + write_thread_data->status = errno; + return; + } +} + +/* + * Arguments are currently ignored: + */ +int main(int argc, char **argv) +{ + const int n_read_threads = 100; + const int n_epoll_items = 500; + int index; + int epoll_set = epoll_create1(0); + struct write_thread_data write_thread_data = { + 0, 0, n_epoll_items, malloc(n_epoll_items * sizeof(int)) + }; + struct read_thread_data *read_thread_data = + malloc(n_read_threads * sizeof(struct read_thread_data)); + pthread_t *read_threads = malloc(n_read_threads * sizeof(pthread_t)); + pthread_t write_thread; + + printf("-----------------\n"); + printf("Runing test_epoll\n"); + printf("-----------------\n"); + + epoll_items = malloc(n_epoll_items * sizeof(struct epoll_item_private)); + + if (epoll_set < 0 || epoll_items == 0 || write_thread_data.fds == 0 || + read_thread_data == 0 || read_threads == 0) + goto error; + + if (sysconf(_SC_NPROCESSORS_ONLN) < 2) { + printf("Error: please run this test on a multi-core system.\n"); + goto error; + } + + /* Create the socket pairs and epoll items: */ + for (index = 0; index < n_epoll_items; ++index) { + int socket_pair[2]; + struct epoll_event event_data; + if (socketpair(AF_UNIX, + SOCK_STREAM | SOCK_NONBLOCK, + 0, + socket_pair) < 0) + goto error; + write_thread_data.fds[index] = socket_pair[0]; + epoll_items[index].index = index; + epoll_items[index].fd = socket_pair[1]; + if (pthread_mutex_init(&epoll_items[index].mutex, NULL) != 0) + goto error; + /* We always use EPOLLONESHOT because this test is currently + structured to demonstrate the need for EPOLL_CTL_DISABLE, + which only produces useful information in the EPOLLONESHOT + case (without EPOLLONESHOT, calling epoll_ctl with + EPOLL_CTL_DISABLE will never return EBUSY). If support for + testing events without EPOLLONESHOT is desired, it should + probably be implemented in a separate unit test. */ + epoll_items[index].events = EPOLLIN | EPOLLONESHOT; + if (index < n_epoll_items / 2) + epoll_items[index].events |= EPOLLET; + epoll_items[index].stop = 0; + epoll_items[index].status = 0; + epoll_items[index].deleted = 0; + event_data.events = epoll_items[index].events; + event_data.data.ptr = &epoll_items[index]; + if (epoll_ctl(epoll_set, + EPOLL_CTL_ADD, + epoll_items[index].fd, + &event_data) < 0) + goto error; + } + + /* Create and start the read threads: */ + for (index = 0; index < n_read_threads; ++index) { + read_thread_data[index].stop = 0; + read_thread_data[index].status = 0; + read_thread_data[index].epoll_set = epoll_set; + if (pthread_create(&read_threads[index], + NULL, + read_thread_function, + &read_thread_data[index]) != 0) + goto error; + } + + if (pthread_create(&write_thread, + NULL, + write_thread_function, + &write_thread_data) != 0) + goto error; + + /* Cancel all event pollers: */ +#ifdef EPOLL_CTL_DISABLE + for (index = 0; index < n_epoll_items; ++index) { + pthread_mutex_lock(&epoll_items[index].mutex); + ++epoll_items[index].stop; + if (epoll_ctl(epoll_set, + EPOLL_CTL_DISABLE, + epoll_items[index].fd, + NULL) == 0) + delete_item(index); + else if (errno != EBUSY) { + pthread_mutex_unlock(&epoll_items[index].mutex); + goto error; + } + /* EBUSY means events were being handled; allow the other thread + to delete the item. */ + pthread_mutex_unlock(&epoll_items[index].mutex); + } +#else + for (index = 0; index < n_epoll_items; ++index) { + pthread_mutex_lock(&epoll_items[index].mutex); + ++epoll_items[index].stop; + pthread_mutex_unlock(&epoll_items[index].mutex); + /* Wait in case a thread running read_thread_function is + currently executing code between epoll_wait and + pthread_mutex_lock with this item. Note that a longer delay + would make double-deletion less likely (at the expense of + performance), but there is no guarantee that any delay would + ever be sufficient. Note also that we delete all event + pollers at once for testing purposes, but in a real-world + environment we are likely to want to be able to cancel event + pollers at arbitrary times. Therefore we can't improve this + situation by just splitting this loop into two loops + (i.e. signal 'stop' for all items, sleep, and then delete all + items). We also can't fix the problem via EPOLL_CTL_DEL + because that command can't prevent the case where some other + thread is executing read_thread_function within the region + mentioned above: */ + usleep(1); + pthread_mutex_lock(&epoll_items[index].mutex); + if (!epoll_items[index].deleted) + delete_item(index); + pthread_mutex_unlock(&epoll_items[index].mutex); + } +#endif + + /* Shut down the read threads: */ + for (index = 0; index < n_read_threads; ++index) + __sync_fetch_and_add(&read_thread_data[index].stop, 1); + for (index = 0; index < n_read_threads; ++index) { + if (pthread_join(read_threads[index], NULL) != 0) + goto error; + if (read_thread_data[index].status) + goto error; + } + + /* Shut down the write thread: */ + __sync_fetch_and_add(&write_thread_data.stop, 1); + if ((pthread_join(write_thread, NULL) != 0) || write_thread_data.status) + goto error; + + /* Check for final error conditions: */ + for (index = 0; index < n_epoll_items; ++index) { + if (epoll_items[index].status != 0) + goto error; + if (pthread_mutex_destroy(&epoll_items[index].mutex) < 0) + goto error; + } + for (index = 0; index < n_epoll_items; ++index) + if (epoll_items[index].deleted != 1) { + printf("Error: item data deleted %1d times.\n", + epoll_items[index].deleted); + goto error; + } + + printf("[PASS]\n"); + return 0; + + error: + printf("[FAIL]\n"); + return errno; +} diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c index b0adb2710c0..68d0734b208 100644 --- a/tools/usb/testusb.c +++ b/tools/usb/testusb.c @@ -253,9 +253,6 @@ static int find_testdev(const char *name, const struct stat *sb, int flag) if (flag != FTW_F) return 0; - /* ignore /proc/bus/usb/{devices,drivers} */ - if (strrchr(name, '/')[1] == 'd') - return 0; fd = fopen(name, "rb"); if (!fd) { @@ -356,28 +353,8 @@ restart: static const char *usbfs_dir_find(void) { - static char usbfs_path_0[] = "/dev/usb/devices"; - static char usbfs_path_1[] = "/proc/bus/usb/devices"; static char udev_usb_path[] = "/dev/bus/usb"; - static char *const usbfs_paths[] = { - usbfs_path_0, usbfs_path_1 - }; - - static char *const * - end = usbfs_paths + sizeof usbfs_paths / sizeof *usbfs_paths; - - char *const *it = usbfs_paths; - do { - int fd = open(*it, O_RDONLY); - close(fd); - if (fd >= 0) { - strrchr(*it, '/')[0] = '\0'; - return *it; - } - } while (++it != end); - - /* real device-nodes managed by udev */ if (access(udev_usb_path, F_OK) == 0) return udev_usb_path; @@ -489,7 +466,7 @@ usage: goto usage; if (!all && !device) { fprintf (stderr, "must specify '-a' or '-D dev', " - "or DEVICE=/proc/bus/usb/BBB/DDD in env\n"); + "or DEVICE=/dev/bus/usb/BBB/DDD in env\n"); goto usage; } diff --git a/tools/virtio/virtio-trace/Makefile b/tools/virtio/virtio-trace/Makefile new file mode 100644 index 00000000000..0d238163347 --- /dev/null +++ b/tools/virtio/virtio-trace/Makefile @@ -0,0 +1,13 @@ +CC = gcc +CFLAGS = -O2 -Wall -pthread + +all: trace-agent + +.c.o: + $(CC) $(CFLAGS) -c $^ -o $@ + +trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o + $(CC) $(CFLAGS) -o $@ $^ + +clean: + rm -f *.o trace-agent diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README new file mode 100644 index 00000000000..b64845b823a --- /dev/null +++ b/tools/virtio/virtio-trace/README @@ -0,0 +1,118 @@ +Trace Agent for virtio-trace +============================ + +Trace agent is a user tool for sending trace data of a guest to a Host in low +overhead. Trace agent has the following functions: + - splice a page of ring-buffer to read_pipe without memory copying + - splice the page from write_pipe to virtio-console without memory copying + - write trace data to stdout by using -o option + - controlled by start/stop orders from a Host + +The trace agent operates as follows: + 1) Initialize all structures. + 2) Create a read/write thread per CPU. Each thread is bound to a CPU. + The read/write threads hold it. + 3) A controller thread does poll() for a start order of a host. + 4) After the controller of the trace agent receives a start order from a host, + the controller wake read/write threads. + 5) The read/write threads start to read trace data from ring-buffers and + write the data to virtio-serial. + 6) If the controller receives a stop order from a host, the read/write threads + stop to read trace data. + + +Files +===== + +README: this file +Makefile: Makefile of trace agent for virtio-trace +trace-agent.c: includes main function, sets up for operating trace agent +trace-agent.h: includes all structures and some macros +trace-agent-ctl.c: includes controller function for read/write threads +trace-agent-rw.c: includes read/write threads function + + +Setup +===== + +To use this trace agent for virtio-trace, we need to prepare some virtio-serial +I/Fs. + +1) Make FIFO in a host + virtio-trace uses virtio-serial pipe as trace data paths as to the number +of CPUs and a control path, so FIFO (named pipe) should be created as follows: + # mkdir /tmp/virtio-trace/ + # mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out} + # mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out} + +For example, if a guest use three CPUs, the names are + trace-path-cpu{0,1,2}.{in.out} +and + agent-ctl-path.{in,out}. + +2) Set up of virtio-serial pipe in a host + Add qemu option to use virtio-serial pipe. + + ##virtio-serial device## + -device virtio-serial-pci,id=virtio-serial0\ + ##control path## + -chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\ + -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\ + id=channel0,name=agent-ctl-path\ + ##data path## + -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\ + -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\ + id=channel1,name=trace-path-cpu0\ + ... + +If you manage guests with libvirt, add the following tags to domain XML files. +Then, libvirt passes the same command option to qemu. + + <channel type='pipe'> + <source path='/tmp/virtio-trace/agent-ctl-path'/> + <target type='virtio' name='agent-ctl-path'/> + <address type='virtio-serial' controller='0' bus='0' port='0'/> + </channel> + <channel type='pipe'> + <source path='/tmp/virtio-trace/trace-path-cpu0'/> + <target type='virtio' name='trace-path-cpu0'/> + <address type='virtio-serial' controller='0' bus='0' port='1'/> + </channel> + ... +Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For +example, if a guest use three CPUs, chardev names should be trace-path-cpu0, +trace-path-cpu1, trace-path-cpu2, and agent-ctl-path. + +3) Boot the guest + You can find some chardev in /dev/virtio-ports/ in the guest. + + +Run +=== + +0) Build trace agent in a guest + $ make + +1) Enable ftrace in the guest + <Example> + # echo 1 > /sys/kernel/debug/tracing/events/sched/enable + +2) Run trace agent in the guest + This agent must be operated as root. + # ./trace-agent +read/write threads in the agent wait for start order from host. If you add -o +option, trace data are output via stdout in the guest. + +3) Open FIFO in a host + # cat /tmp/virtio-trace/trace-path-cpu0.out +If a host does not open these, trace data get stuck in buffers of virtio. Then, +the guest will stop by specification of chardev in QEMU. This blocking mode may +be solved in the future. + +4) Start to read trace data by ordering from a host + A host injects read start order to the guest via virtio-serial. + # echo 1 > /tmp/virtio-trace/agent-ctl-path.in + +5) Stop to read trace data by ordering from a host + A host injects read stop order to the guest via virtio-serial. + # echo 0 > /tmp/virtio-trace/agent-ctl-path.in diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c new file mode 100644 index 00000000000..a2d0403c4f9 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent-ctl.c @@ -0,0 +1,137 @@ +/* + * Controller of read/write threads for virtio-trace + * + * Copyright (C) 2012 Hitachi, Ltd. + * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Licensed under GPL version 2 only. + * + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <poll.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "trace-agent.h" + +#define HOST_MSG_SIZE 256 +#define EVENT_WAIT_MSEC 100 + +static volatile sig_atomic_t global_signal_val; +bool global_sig_receive; /* default false */ +bool global_run_operation; /* default false*/ + +/* Handle SIGTERM/SIGINT/SIGQUIT to exit */ +static void signal_handler(int sig) +{ + global_signal_val = sig; +} + +int rw_ctl_init(const char *ctl_path) +{ + int ctl_fd; + + ctl_fd = open(ctl_path, O_RDONLY); + if (ctl_fd == -1) { + pr_err("Cannot open ctl_fd\n"); + goto error; + } + + return ctl_fd; + +error: + exit(EXIT_FAILURE); +} + +static int wait_order(int ctl_fd) +{ + struct pollfd poll_fd; + int ret = 0; + + while (!global_sig_receive) { + poll_fd.fd = ctl_fd; + poll_fd.events = POLLIN; + + ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC); + + if (global_signal_val) { + global_sig_receive = true; + pr_info("Receive interrupt %d\n", global_signal_val); + + /* Wakes rw-threads when they are sleeping */ + if (!global_run_operation) + pthread_cond_broadcast(&cond_wakeup); + + ret = -1; + break; + } + + if (ret < 0) { + pr_err("Polling error\n"); + goto error; + } + + if (ret) + break; + }; + + return ret; + +error: + exit(EXIT_FAILURE); +} + +/* + * contol read/write threads by handling global_run_operation + */ +void *rw_ctl_loop(int ctl_fd) +{ + ssize_t rlen; + char buf[HOST_MSG_SIZE]; + int ret; + + /* Setup signal handlers */ + signal(SIGTERM, signal_handler); + signal(SIGINT, signal_handler); + signal(SIGQUIT, signal_handler); + + while (!global_sig_receive) { + + ret = wait_order(ctl_fd); + if (ret < 0) + break; + + rlen = read(ctl_fd, buf, sizeof(buf)); + if (rlen < 0) { + pr_err("read data error in ctl thread\n"); + goto error; + } + + if (rlen == 2 && buf[0] == '1') { + /* + * If host writes '1' to a control path, + * this controller wakes all read/write threads. + */ + global_run_operation = true; + pthread_cond_broadcast(&cond_wakeup); + pr_debug("Wake up all read/write threads\n"); + } else if (rlen == 2 && buf[0] == '0') { + /* + * If host writes '0' to a control path, read/write + * threads will wait for notification from Host. + */ + global_run_operation = false; + pr_debug("Stop all read/write threads\n"); + } else + pr_info("Invalid host notification: %s\n", buf); + } + + return NULL; + +error: + exit(EXIT_FAILURE); +} diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c new file mode 100644 index 00000000000..3aace5ea484 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent-rw.c @@ -0,0 +1,192 @@ +/* + * Read/write thread of a guest agent for virtio-trace + * + * Copyright (C) 2012 Hitachi, Ltd. + * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Licensed under GPL version 2 only. + * + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/syscall.h> +#include "trace-agent.h" + +#define READ_WAIT_USEC 100000 + +void *rw_thread_info_new(void) +{ + struct rw_thread_info *rw_ti; + + rw_ti = zalloc(sizeof(struct rw_thread_info)); + if (rw_ti == NULL) { + pr_err("rw_thread_info zalloc error\n"); + exit(EXIT_FAILURE); + } + + rw_ti->cpu_num = -1; + rw_ti->in_fd = -1; + rw_ti->out_fd = -1; + rw_ti->read_pipe = -1; + rw_ti->write_pipe = -1; + rw_ti->pipe_size = PIPE_INIT; + + return rw_ti; +} + +void *rw_thread_init(int cpu, const char *in_path, const char *out_path, + bool stdout_flag, unsigned long pipe_size, + struct rw_thread_info *rw_ti) +{ + int data_pipe[2]; + + rw_ti->cpu_num = cpu; + + /* set read(input) fd */ + rw_ti->in_fd = open(in_path, O_RDONLY); + if (rw_ti->in_fd == -1) { + pr_err("Could not open in_fd (CPU:%d)\n", cpu); + goto error; + } + + /* set write(output) fd */ + if (!stdout_flag) { + /* virtio-serial output mode */ + rw_ti->out_fd = open(out_path, O_WRONLY); + if (rw_ti->out_fd == -1) { + pr_err("Could not open out_fd (CPU:%d)\n", cpu); + goto error; + } + } else + /* stdout mode */ + rw_ti->out_fd = STDOUT_FILENO; + + if (pipe2(data_pipe, O_NONBLOCK) < 0) { + pr_err("Could not create pipe in rw-thread(%d)\n", cpu); + goto error; + } + + /* + * Size of pipe is 64kB in default based on fs/pipe.c. + * To read/write trace data speedy, pipe size is changed. + */ + if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) { + pr_err("Could not change pipe size in rw-thread(%d)\n", cpu); + goto error; + } + + rw_ti->read_pipe = data_pipe[1]; + rw_ti->write_pipe = data_pipe[0]; + rw_ti->pipe_size = pipe_size; + + return NULL; + +error: + exit(EXIT_FAILURE); +} + +/* Bind a thread to a cpu */ +static void bind_cpu(int cpu_num) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu_num, &mask); + + /* bind my thread to cpu_num by assigning zero to the first argument */ + if (sched_setaffinity(0, sizeof(mask), &mask) == -1) + pr_err("Could not set CPU#%d affinity\n", (int)cpu_num); +} + +static void *rw_thread_main(void *thread_info) +{ + ssize_t rlen, wlen; + ssize_t ret; + struct rw_thread_info *ts = (struct rw_thread_info *)thread_info; + + bind_cpu(ts->cpu_num); + + while (1) { + /* Wait for a read order of trace data by Host OS */ + if (!global_run_operation) { + pthread_mutex_lock(&mutex_notify); + pthread_cond_wait(&cond_wakeup, &mutex_notify); + pthread_mutex_unlock(&mutex_notify); + } + + if (global_sig_receive) + break; + + /* + * Each thread read trace_pipe_raw of each cpu bounding the + * thread, so contention of multi-threads does not occur. + */ + rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL, + ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE); + + if (rlen < 0) { + pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num); + goto error; + } else if (rlen == 0) { + /* + * If trace data do not exist or are unreadable not + * for exceeding the page size, splice_read returns + * NULL. Then, this waits for being filled the data in a + * ring-buffer. + */ + usleep(READ_WAIT_USEC); + pr_debug("Read retry(cpu:%d)\n", ts->cpu_num); + continue; + } + + wlen = 0; + + do { + ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL, + rlen - wlen, + SPLICE_F_MOVE | SPLICE_F_MORE); + + if (ret < 0) { + pr_err("Splice_write in rw-thread(%d)\n", + ts->cpu_num); + goto error; + } else if (ret == 0) + /* + * When host reader is not in time for reading + * trace data, guest will be stopped. This is + * because char dev in QEMU is not supported + * non-blocking mode. Then, writer might be + * sleep in that case. + * This sleep will be removed by supporting + * non-blocking mode. + */ + sleep(1); + wlen += ret; + } while (wlen < rlen); + } + + return NULL; + +error: + exit(EXIT_FAILURE); +} + + +pthread_t rw_thread_run(struct rw_thread_info *rw_ti) +{ + int ret; + pthread_t rw_thread_per_cpu; + + ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti); + if (ret != 0) { + pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num); + exit(EXIT_FAILURE); + } + + return rw_thread_per_cpu; +} diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c new file mode 100644 index 00000000000..0a0a7dd4eff --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent.c @@ -0,0 +1,270 @@ +/* + * Guest agent for virtio-trace + * + * Copyright (C) 2012 Hitachi, Ltd. + * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Licensed under GPL version 2 only. + * + */ + +#define _GNU_SOURCE +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "trace-agent.h" + +#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) +#define PIPE_DEF_BUFS 16 +#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) +#define PIPE_MAX_SIZE (1024*1024) +#define READ_PATH_FMT \ + "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" +#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" +#define CTL_PATH "/dev/virtio-ports/agent-ctl-path" + +pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER; + +static int get_total_cpus(void) +{ + int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF); + + if (nr_cpus <= 0) { + pr_err("Could not read cpus\n"); + goto error; + } else if (nr_cpus > MAX_CPUS) { + pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS); + goto error; + } + + return nr_cpus; + +error: + exit(EXIT_FAILURE); +} + +static void *agent_info_new(void) +{ + struct agent_info *s; + int i; + + s = zalloc(sizeof(struct agent_info)); + if (s == NULL) { + pr_err("agent_info zalloc error\n"); + exit(EXIT_FAILURE); + } + + s->pipe_size = PIPE_INIT; + s->use_stdout = false; + s->cpus = get_total_cpus(); + s->ctl_fd = -1; + + /* read/write threads init */ + for (i = 0; i < s->cpus; i++) + s->rw_ti[i] = rw_thread_info_new(); + + return s; +} + +static unsigned long parse_size(const char *arg) +{ + unsigned long value, round; + char *ptr; + + value = strtoul(arg, &ptr, 10); + switch (*ptr) { + case 'K': case 'k': + value <<= 10; + break; + case 'M': case 'm': + value <<= 20; + break; + default: + break; + } + + if (value > PIPE_MAX_SIZE) { + pr_err("Pipe size must be less than 1MB\n"); + goto error; + } else if (value < PIPE_MIN_SIZE) { + pr_err("Pipe size must be over 64KB\n"); + goto error; + } + + /* Align buffer size with page unit */ + round = value & (PAGE_SIZE - 1); + value = value - round; + + return value; +error: + return 0; +} + +static void usage(char const *prg) +{ + pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg); +} + +static const char *make_path(int cpu_num, bool this_is_write_path) +{ + int ret; + char *buf; + + buf = zalloc(PATH_MAX); + if (buf == NULL) { + pr_err("Could not allocate buffer\n"); + goto error; + } + + if (this_is_write_path) + /* write(output) path */ + ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); + else + /* read(input) path */ + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); + + if (ret <= 0) { + pr_err("Failed to generate %s path(CPU#%d):%d\n", + this_is_write_path ? "read" : "write", cpu_num, ret); + goto error; + } + + return buf; + +error: + free(buf); + return NULL; +} + +static const char *make_input_path(int cpu_num) +{ + return make_path(cpu_num, false); +} + +static const char *make_output_path(int cpu_num) +{ + return make_path(cpu_num, true); +} + +static void *agent_info_init(struct agent_info *s) +{ + int cpu; + const char *in_path = NULL; + const char *out_path = NULL; + + /* init read/write threads */ + for (cpu = 0; cpu < s->cpus; cpu++) { + /* set read(input) path per read/write thread */ + in_path = make_input_path(cpu); + if (in_path == NULL) + goto error; + + /* set write(output) path per read/write thread*/ + if (!s->use_stdout) { + out_path = make_output_path(cpu); + if (out_path == NULL) + goto error; + } else + /* stdout mode */ + pr_debug("stdout mode\n"); + + rw_thread_init(cpu, in_path, out_path, s->use_stdout, + s->pipe_size, s->rw_ti[cpu]); + } + + /* init controller of read/write threads */ + s->ctl_fd = rw_ctl_init((const char *)CTL_PATH); + + return NULL; + +error: + exit(EXIT_FAILURE); +} + +static void *parse_args(int argc, char *argv[], struct agent_info *s) +{ + int cmd; + unsigned long size; + + while ((cmd = getopt(argc, argv, "hos:")) != -1) { + switch (cmd) { + /* stdout mode */ + case 'o': + s->use_stdout = true; + break; + /* size of pipe */ + case 's': + size = parse_size(optarg); + if (size == 0) + goto error; + s->pipe_size = size; + break; + case 'h': + default: + usage(argv[0]); + goto error; + } + } + + agent_info_init(s); + + return NULL; + +error: + exit(EXIT_FAILURE); +} + +static void agent_main_loop(struct agent_info *s) +{ + int cpu; + pthread_t rw_thread_per_cpu[MAX_CPUS]; + + /* Start all read/write threads */ + for (cpu = 0; cpu < s->cpus; cpu++) + rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]); + + rw_ctl_loop(s->ctl_fd); + + /* Finish all read/write threads */ + for (cpu = 0; cpu < s->cpus; cpu++) { + int ret; + + ret = pthread_join(rw_thread_per_cpu[cpu], NULL); + if (ret != 0) { + pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu); + exit(EXIT_FAILURE); + } + } +} + +static void agent_info_free(struct agent_info *s) +{ + int i; + + close(s->ctl_fd); + for (i = 0; i < s->cpus; i++) { + close(s->rw_ti[i]->in_fd); + close(s->rw_ti[i]->out_fd); + close(s->rw_ti[i]->read_pipe); + close(s->rw_ti[i]->write_pipe); + free(s->rw_ti[i]); + } + free(s); +} + +int main(int argc, char *argv[]) +{ + struct agent_info *s = NULL; + + s = agent_info_new(); + parse_args(argc, argv, s); + + agent_main_loop(s); + + agent_info_free(s); + + return 0; +} diff --git a/tools/virtio/virtio-trace/trace-agent.h b/tools/virtio/virtio-trace/trace-agent.h new file mode 100644 index 00000000000..8de79bfeaa7 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent.h @@ -0,0 +1,75 @@ +#ifndef __TRACE_AGENT_H__ +#define __TRACE_AGENT_H__ +#include <pthread.h> +#include <stdbool.h> + +#define MAX_CPUS 256 +#define PIPE_INIT (1024*1024) + +/* + * agent_info - structure managing total information of guest agent + * @pipe_size: size of pipe (default 1MB) + * @use_stdout: set to true when o option is added (default false) + * @cpus: total number of CPUs + * @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path + * @rw_ti: structure managing information of read/write threads + */ +struct agent_info { + unsigned long pipe_size; + bool use_stdout; + int cpus; + int ctl_fd; + struct rw_thread_info *rw_ti[MAX_CPUS]; +}; + +/* + * rw_thread_info - structure managing a read/write thread a cpu + * @cpu_num: cpu number operating this read/write thread + * @in_fd: fd of reading trace data path in cpu_num + * @out_fd: fd of writing trace data path in cpu_num + * @read_pipe: fd of read pipe + * @write_pipe: fd of write pipe + * @pipe_size: size of pipe (default 1MB) + */ +struct rw_thread_info { + int cpu_num; + int in_fd; + int out_fd; + int read_pipe; + int write_pipe; + unsigned long pipe_size; +}; + +/* use for stopping rw threads */ +extern bool global_sig_receive; + +/* use for notification */ +extern bool global_run_operation; +extern pthread_mutex_t mutex_notify; +extern pthread_cond_t cond_wakeup; + +/* for controller of read/write threads */ +extern int rw_ctl_init(const char *ctl_path); +extern void *rw_ctl_loop(int ctl_fd); + +/* for trace read/write thread */ +extern void *rw_thread_info_new(void); +extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path, + bool stdout_flag, unsigned long pipe_size, + struct rw_thread_info *rw_ti); +extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti); + +static inline void *zalloc(size_t size) +{ + return calloc(1, size); +} + +#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__) +#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__) +#ifdef DEBUG +#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__) +#else +#define pr_debug(format, ...) do {} while (0) +#endif + +#endif /*__TRACE_AGENT_H__*/ |