Unlike some of the comments suggest, the unicorn symbol 🦄 (U+1F984
) is located in the contract's symbol name. You can check this by running the following in a web3 browser's console:
// taken from https://github.com/flyswatter/human-standard-token-abi/blob/master/index.js
var tokenAbi = [
{
"constant": true,
"inputs": [],
"name": "name",
"outputs": [
{
"name": "",
"type": "string"
}
],
"payable": false,
"type": "function"
},
{
"constant": false,
"inputs": [
{
"name": "_spender",
"type": "address"
},
{
"name": "_value",
"type": "uint256"
}
],
"name": "approve",
"outputs": [
{
"name": "success",
"type": "bool"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "totalSupply",
"outputs": [
{
"name": "",
"type": "uint256"
}
],
"payable": false,
"type": "function"
},
{
"constant": false,
"inputs": [
{
"name": "_from",
"type": "address"
},
{
"name": "_to",
"type": "address"
},
{
"name": "_value",
"type": "uint256"
}
],
"name": "transferFrom",
"outputs": [
{
"name": "success",
"type": "bool"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "decimals",
"outputs": [
{
"name": "",
"type": "uint8"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "version",
"outputs": [
{
"name": "",
"type": "string"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [
{
"name": "_owner",
"type": "address"
}
],
"name": "balanceOf",
"outputs": [
{
"name": "balance",
"type": "uint256"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [],
"name": "symbol",
"outputs": [
{
"name": "",
"type": "string"
}
],
"payable": false,
"type": "function"
},
{
"constant": false,
"inputs": [
{
"name": "_to",
"type": "address"
},
{
"name": "_value",
"type": "uint256"
}
],
"name": "transfer",
"outputs": [
{
"name": "success",
"type": "bool"
}
],
"payable": false,
"type": "function"
},
{
"constant": false,
"inputs": [
{
"name": "_spender",
"type": "address"
},
{
"name": "_value",
"type": "uint256"
},
{
"name": "_extraData",
"type": "bytes"
}
],
"name": "approveAndCall",
"outputs": [
{
"name": "success",
"type": "bool"
}
],
"payable": false,
"type": "function"
},
{
"constant": true,
"inputs": [
{
"name": "_owner",
"type": "address"
},
{
"name": "_spender",
"type": "address"
}
],
"name": "allowance",
"outputs": [
{
"name": "remaining",
"type": "uint256"
}
],
"payable": false,
"type": "function"
},
{
"inputs": [
{
"name": "_initialAmount",
"type": "uint256"
},
{
"name": "_tokenName",
"type": "string"
},
{
"name": "_decimalUnits",
"type": "uint8"
},
{
"name": "_tokenSymbol",
"type": "string"
}
],
"type": "constructor"
},
{
"payable": false,
"type": "fallback"
},
{
"anonymous": false,
"inputs": [
{
"indexed": true,
"name": "_from",
"type": "address"
},
{
"indexed": true,
"name": "_to",
"type": "address"
},
{
"indexed": false,
"name": "_value",
"type": "uint256"
}
],
"name": "Transfer",
"type": "event"
},
{
"anonymous": false,
"inputs": [
{
"indexed": true,
"name": "_owner",
"type": "address"
},
{
"indexed": true,
"name": "_spender",
"type": "address"
},
{
"indexed": false,
"name": "_value",
"type": "uint256"
}
],
"name": "Approval",
"type": "event"
},
];
// get symbol
var tokenContract = web3.eth.contract(tokenAbi);
var instance = tokenContract.at("0x89205a3a3b2a69de6dbf7f01ed13b2108b2c43e7");
instance.symbol.call((thing1, thing2) => global.symbol = thing2);
Let's investigate this string:
symbol.length // Output: 2
symbol.codePointAt(0) // Output: 129412 (0x1F984)
symbol.split("").map(char => char.charCodeAt(0)); // Output: [55358, 56708]
Let's talk about UTF-8, the character encoding mist uses. According to FileFormat.info:
UTF-8 is a compromise character encoding that can be as compact as ASCII (if the file is just plain English text) but can also contain any unicode characters (with some increase in file size).
UTF stands for Unicode Transformation Format. The '8' means it uses 8-bit blocks to represent a character. The number of blocks needed to represent a character varies from 1 to 4.
What we have here is unicode code point: 29412 (0x1F984). It looks like solidity only supports encoding codepoints between 0x0000 (0) and FFFF (65535) using the \uNNNN
syntax. Typically languages allow encoding with over this amount using surrogate characters. Solidity doesn't seem to.
No worries, we should be able to just put the hex encoding of the correct unicode sequence and the correct text should be rendered.
Contract:
pragma solidity ^0.4.11;
contract EmojiCon {
string public constant working = hex"F09FA684";
string public constant broken = "\uD83D\uDC36";
}
Test (truffle):
var EmojiCon = artifacts.require("./EmojiCon.sol");
contract('EmojiCon', function(accounts) {
it("should match the string", async function() {
const instance = await EmojiCon.deployed();
const workingString = await instance.working.call();
console.log(workingString);
});
it("shouldn't match the string", async function() {
const instance = await EmojiCon.deployed();
const brokenString = await instance.broken.call();
console.log(brokenString);
});
});
Unfortunately it won't compile:
Compiling ./contracts/EmojiCon.sol...
Compiling ./contracts/Migrations.sol...
/home/vm/ethereum/unicode/contracts/EmojiCon.sol:4:35: : Type literal_string (contains invalid UTF-8 sequence at position 3) is not implicitly convertible to
expected type string memory.
string public constant working = hex"F09FA68421";
^-------------^
Compiliation failed. See above.
The error comes from here. It seems the validator chooses an incorrect value for count
and stops too early. (isolation (original)). This seems to be a bug/missing feature in solidity. Hope this helps.
Best Answer
If it is yourself who is packing those two strings
s1
ands2
into onebytes
, then you may:At the source (in JavaScript):
const data = abiCoder.encode(["string", "string"], [s1, s2])
Pass
data
via the contract functionAt the receiving end (in Solidity):
(string memory s1, string memory s2) = abi.decode(data, (string, string))
Doing it this way avoids string operations altogether.
P.S. Keep in mind that
data
is not intended for consumption by thetransfer
function itself, but it is intended for consumption by the receipient of the transfer (if that recipient is a compliantERC1155TokenReceiver
contract.)