aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2014-02-18 10:31:31 -0800
committerAlon Zakai <alonzakai@gmail.com>2014-02-18 13:59:02 -0800
commit5daa907cbbc25bf30cca147cf30d419a4786d3b9 (patch)
tree71e9506b34494356771d77eb22e8c8e830585275
parent1ac7a54cd6a138e24b7e932c0768be89a3a65a82 (diff)
guarantee that sanitize() returns a proper non-colliding value for each input
-rw-r--r--lib/Target/JSBackend/JSBackend.cpp65
1 files changed, 60 insertions, 5 deletions
diff --git a/lib/Target/JSBackend/JSBackend.cpp b/lib/Target/JSBackend/JSBackend.cpp
index 613c7a501b..7222c28992 100644
--- a/lib/Target/JSBackend/JSBackend.cpp
+++ b/lib/Target/JSBackend/JSBackend.cpp
@@ -402,10 +402,64 @@ formatted_raw_ostream &JSWriter::nl(formatted_raw_ostream &Out, int delta) {
return Out;
}
-static inline void sanitize(std::string& str) {
- for (size_t i = 1; i < str.length(); ++i)
- if (!isalnum(str[i]) && str[i] != '_' && str[i] != '$')
- str[i] = '_';
+static inline char halfCharToHex(unsigned char half) {
+ assert(half <= 15);
+ if (half <= 9) {
+ return '0' + half;
+ } else {
+ return 'A' + half - 10;
+ }
+}
+
+static inline void sanitizeGlobal(std::string& str) {
+ // functions and globals should already be in C-style format,
+ // in addition to . for llvm intrinsics. There is a risk of
+ // collisions with . and _, but this should not happen in
+ // practice.
+ size_t OriginalSize = str.size();
+ for (size_t i = 1; i < OriginalSize; ++i) {
+ unsigned char c = str[i];
+ if (c == '.') str[i] = '_';
+ assert(isalnum(c) || c == '_' || c == '.');
+ }
+}
+
+static inline void sanitizeLocal(std::string& str) {
+ // We need to convert every string that is not a valid JS identifier into
+ // a valid one, without collisions - we cannot turn "x.a" into "x_a" while
+ // also leaving "x_a" as is, for example.
+ //
+ // We leave valid characters 0-9a-zA-Z and _ unchanged. Anything else
+ // we replace with $ and append a hex representation of that value,
+ // so for example x.a turns into x$a2e, x..a turns into x$$a2e2e.
+ //
+ // As an optimization, we replace . with $ without appending anything,
+ // unless there is another illegal character. The reason is that . is
+ // a common illegal character, and we want to avoid resizing strings
+ // for perf reasons, and we If we do see we need to append something, then
+ // for . we just append Z (one character, instead of the hex code).
+ //
+
+ size_t OriginalSize = str.size();
+ int Queued = 0;
+ for (size_t i = 1; i < OriginalSize; ++i) {
+ unsigned char c = str[i];
+ if (!isalnum(c) && c != '_') {
+ str[i] = '$';
+ if (c == '.') {
+ Queued++;
+ } else {
+ size_t s = str.size();
+ str.resize(s+2+Queued);
+ for (int i = 0; i < Queued; i++) {
+ str[s++] = 'Z';
+ }
+ Queued = 0;
+ str[s] = halfCharToHex(c >> 4);
+ str[s+1] = halfCharToHex(c & 0xf);
+ }
+ }
+ }
}
static inline std::string ensureFloat(const std::string &S, Type *T) {
@@ -496,10 +550,11 @@ const std::string &JSWriter::getJSName(const Value* val) {
if (val->hasName()) {
if (isa<Function>(val) || isa<Constant>(val)) {
name = std::string("_") + val->getName().str();
+ sanitizeGlobal(name);
} else {
name = std::string("$") + val->getName().str();
+ sanitizeLocal(name);
}
- sanitize(name);
} else {
name = "u$" + utostr(UniqueNum++);
}