Unlike some of the comments suggest, the unicorn symbol 🦄 (U+1F984
) is located in the contract's symbol name. You can check this by running the following in a web3 browser's console:
// taken from https://github.com/flyswatter/human-standard-token-abi/blob/master/index.js
var tokenAbi = [
{
"constant": true,
"inputs": [],
"name": "name",
"outputs": [
{
"name": "",
"type": "string"
}
],
"payable": false,
"type": "function"
},
{
"constant": false,
"inputs": [
{
"name": "_spender",
"type": "address"
},
{
"name": "_value",
"type": "uint256"
}
],
"name": "approve",
"outputs": [
{
"name": "success",
"type": "bool"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "totalSupply",
"outputs": [
{
"name": "",
"type": "uint256"
}
],
"payable": false,
"type": "function"
},
{
"constant": false,
"inputs": [
{
"name": "_from",
"type": "address"
},
{
"name": "_to",
"type": "address"
},
{
"name": "_value",
"type": "uint256"
}
],
"name": "transferFrom",
"outputs": [
{
"name": "success",
"type": "bool"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "decimals",
"outputs": [
{
"name": "",
"type": "uint8"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "version",
"outputs": [
{
"name": "",
"type": "string"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [
{
"name": "_owner",
"type": "address"
}
],
"name": "balanceOf",
"outputs": [
{
"name": "balance",
"type": "uint256"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "symbol",
"outputs": [
{
"name": "",
"type": "string"
}
],
"payable": false,
"type": "function"
},
{
"constant": false,
"inputs": [
{
"name": "_to",
"type": "address"
},
{
"name": "_value",
"type": "uint256"
}
],
"name": "transfer",
"outputs": [
{
"name": "success",
"type": "bool"
}
],
"payable": false,
"type": "function"
},
{
"constant": false,
"inputs": [
{
"name": "_spender",
"type": "address"
},
{
"name": "_value",
"type": "uint256"
},
{
"name": "_extraData",
"type": "bytes"
}
],
"name": "approveAndCall",
"outputs": [
{
"name": "success",
"type": "bool"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [
{
"name": "_owner",
"type": "address"
},
{
"name": "_spender",
"type": "address"
}
],
"name": "allowance",
"outputs": [
{
"name": "remaining",
"type": "uint256"
}
],
"payable": false,
"type": "function"
},
{
"inputs": [
{
"name": "_initialAmount",
"type": "uint256"
},
{
"name": "_tokenName",
"type": "string"
},
{
"name": "_decimalUnits",
"type": "uint8"
},
{
"name": "_tokenSymbol",
"type": "string"
}
],
"type": "constructor"
},
{
"payable": false,
"type": "fallback"
},
{
"anonymous": false,
"inputs": [
{
"indexed": true,
"name": "_from",
"type": "address"
},
{
"indexed": true,
"name": "_to",
"type": "address"
},
{
"indexed": false,
"name": "_value",
"type": "uint256"
}
],
"name": "Transfer",
"type": "event"
},
{
"anonymous": false,
"inputs": [
{
"indexed": true,
"name": "_owner",
"type": "address"
},
{
"indexed": true,
"name": "_spender",
"type": "address"
},
{
"indexed": false,
"name": "_value",
"type": "uint256"
}
],
"name": "Approval",
"type": "event"
},
];
// get symbol
var tokenContract = web3.eth.contract(tokenAbi);
var instance = tokenContract.at("0x89205a3a3b2a69de6dbf7f01ed13b2108b2c43e7");
instance.symbol.call((thing1, thing2) => global.symbol = thing2);
Let's investigate this string:
symbol.length // Output: 2
symbol.codePointAt(0) // Output: 129412 (0x1F984)
symbol.split("").map(char => char.charCodeAt(0)); // Output: [55358, 56708]
Let's talk about UTF-8, the character encoding mist uses. According to FileFormat.info:
UTF-8 is a compromise character encoding that can be as compact as ASCII (if the file is just plain English text) but can also contain any unicode characters (with some increase in file size).
UTF stands for Unicode Transformation Format. The '8' means it uses 8-bit blocks to represent a character. The number of blocks needed to represent a character varies from 1 to 4.
What we have here is unicode code point: 29412 (0x1F984). It looks like solidity only supports encoding codepoints between 0x0000 (0) and FFFF (65535) using the \uNNNN
syntax. Typically languages allow encoding with over this amount using surrogate characters. Solidity doesn't seem to.
No worries, we should be able to just put the hex encoding of the correct unicode sequence and the correct text should be rendered.
Contract:
pragma solidity ^0.4.11;
contract EmojiCon {
string public constant working = hex"F09FA684";
string public constant broken = "\uD83D\uDC36";
}
Test (truffle):
var EmojiCon = artifacts.require("./EmojiCon.sol");
contract('EmojiCon', function(accounts) {
it("should match the string", async function() {
const instance = await EmojiCon.deployed();
const workingString = await instance.working.call();
console.log(workingString);
});
it("shouldn't match the string", async function() {
const instance = await EmojiCon.deployed();
const brokenString = await instance.broken.call();
console.log(brokenString);
});
});
Unfortunately it won't compile:
Compiling ./contracts/EmojiCon.sol...
Compiling ./contracts/Migrations.sol...
/home/vm/ethereum/unicode/contracts/EmojiCon.sol:4:35: : Type literal_string (contains invalid UTF-8 sequence at position 3) is not implicitly convertible to
expected type string memory.
string public constant working = hex"F09FA68421";
^-------------^
Compiliation failed. See above.
The error comes from here. It seems the validator chooses an incorrect value for count
and stops too early. (isolation (original)). This seems to be a bug/missing feature in solidity. Hope this helps.
Best Answer
This are very inefficient but should work
In any case I'd recommend to try to convert your hexadecimal data outside solidity.