diff --git a/writerune.go b/writerune.go new file mode 100644 index 0000000..d7bf9a2 --- /dev/null +++ b/writerune.go @@ -0,0 +1,58 @@ +package utf8s + +import ( + "io" +) + +// WriteRune writes a single UTF-8 encoded Unicode character and returns the number of bytes written. +func WriteRune(w io.Writer, r rune) (int, error) { + + + switch { + case 127 >= r: + var buffer [1]byte + + buffer[0] = byte(r) + + p := buffer[:] + + return w.Write(p) + + case 0x7FF >= r: + var buffer [2]byte + + buffer[0] = 0xC0 | byte((0x000007C0 & r) >> 6) + buffer[1] = 0x80 | byte( 0x0000003F & r) + + p := buffer[:] + + return w.Write(p) + + case 0xFFFF >= r: + var buffer [3]byte + + buffer[0] = 0xE0 | byte((0x0000F000 & r) >> 12) + buffer[1] = 0x80 | byte((0x00000FC0 & r) >> 6) + buffer[2] = 0x80 | byte( 0x0000003F & r) + + p := buffer[:] + + return w.Write(p) + + case 0x10FFFF >= r: + var buffer [4]byte + + buffer[0] = 0xF0 | byte((0x001C0000 & r) >> 18) + buffer[1] = 0x80 | byte((0x0003F000 & r) >> 12) + buffer[2] = 0x80 | byte((0x00000FC0 & r) >> 6) + buffer[3] = 0x80 | byte( 0x0000003F & r) + + p := buffer[:] + + return w.Write(p) + + default: + return 0, errInternalError + } + +} diff --git a/writerune_test.go b/writerune_test.go new file mode 100644 index 0000000..4e7b7c3 --- /dev/null +++ b/writerune_test.go @@ -0,0 +1,224 @@ +package utf8s + +import ( + "bytes" + + "testing" +) + +func TestWriteRune(t *testing.T) { + + tests := []struct{ + Rune rune + ExpectedInt int + ExpectedBytes []byte + }{ + { + Rune: 'a', + ExpectedInt: 1, + ExpectedBytes: []byte{'a'}, + }, + + + + { + Rune: 'b', + ExpectedInt: 1, + ExpectedBytes: []byte{'b'}, + }, + + + + { + Rune: 'c', + ExpectedInt: 1, + ExpectedBytes: []byte{'c'}, + }, + + + + { + Rune: 'A', + ExpectedInt: 1, + ExpectedBytes: []byte{'A'}, + }, + + + + { + Rune: 'r', + ExpectedInt: 1, + ExpectedBytes: []byte{'r'}, + }, + + + + { + Rune: '¡', + ExpectedInt: 2, + ExpectedBytes: []byte{0xC2, 0xA1}, + }, + + + + { + Rune: '۵', + ExpectedInt: 2, + ExpectedBytes: []byte{0xDB, 0xB5}, + }, + + + + { + Rune: '‱', + ExpectedInt: 3, + ExpectedBytes: []byte{0xe2, 0x80, 0xb1}, + }, + + + + { + Rune: '≡', + ExpectedInt: 3, + ExpectedBytes: []byte{0xE2, 0x89, 0xA1}, + }, + + + + { + Rune: '𐏕', + ExpectedInt: 4, + ExpectedBytes: []byte{0xf0, 0x90, 0x8f, 0x95}, + }, + + + + { + Rune: '🙂', + ExpectedInt: 4, + ExpectedBytes: []byte{0xf0, 0x9f, 0x99, 0x82}, + }, + + + + { + Rune : 0x0000, + ExpectedInt: 1, + ExpectedBytes: []byte{0x00}, + }, + { + Rune: 0x0001, + ExpectedInt: 1, + ExpectedBytes: []byte{0x01}, + }, + { + Rune: 0x007e, + ExpectedInt: 1, + ExpectedBytes: []byte{0x7e}, + }, + { + Rune: 0x007f, + ExpectedInt: 1, + ExpectedBytes: []byte{0x7f}, + }, + + + + { + Rune: 0x0080, // 0b0000,1000,0000 + ExpectedInt: 2, + ExpectedBytes: []byte{0xC2, 0x80}, // <<0b11000010 ; 0b1000,0000>> + }, + { + Rune: 0x0081, // 0b0000,1000,0000 + ExpectedInt: 2, + ExpectedBytes: []byte{0xC2, 0x81}, // <<0b11000010 ; 0b1000,0001>> + }, + { + Rune: 0x07fe, // 0b0111,1111,1110 + ExpectedInt: 2, + ExpectedBytes: []byte{0xDF, 0xBE}, // <<0b1101,1111 ; 0b1011,1110>> + }, + { + Rune: 0x07ff, // 0b0111,1111,1111 + ExpectedInt: 2, + ExpectedBytes: []byte{0xDF, 0xBF}, // <<0b1101,1111 ; 0b1011,1111>> + }, + + + + { + Rune: 0x0800, // 0b1000,0000,0000 + ExpectedInt: 3, + ExpectedBytes: []byte{0xe0, 0xa0, 0x80}, // <<0b111,00000 ; 0b1010,0000 ; 0b1000,0000>> + }, + { + Rune: 0x0801, // 0b1000,0000,0001 + ExpectedInt: 3, + ExpectedBytes: []byte{0xe0, 0xa0, 0x81}, // <<0b111,00000 ; 0b1010,0000 ; 0b1000,0001>> + }, + { + Rune: 0xfffe, // 0b1111,1111,1111,1110 + ExpectedInt: 3, + ExpectedBytes: []byte{0xEF, 0xBF, 0xBE}, // <<0b11101111 ; 0b1011,1111 ; 0b1011,1110>> + }, + { + Rune: 0xffff, // 0b1111,1111,1111,1111 + ExpectedInt: 3, + ExpectedBytes: []byte{0xEF, 0xBF, 0xBF}, // <<0b11101111 ; 0b1011,1111 ; 0b1011,1111>> + }, + + + + { + Rune: 0x010000, // 0b0001,0000,0000,0000,0000 + ExpectedInt: 4, + ExpectedBytes: []byte{0xF0, 0x90, 0x80, 0x80}, // <<0b1111,0000 ; 0b10010000 ; 0b1000,0000 ; 0b1000,0000>> + }, + { + Rune: 0x010001, // 0b0001,0000,0000,0000,0001 + ExpectedInt: 4, + ExpectedBytes: []byte{0xF0, 0x90, 0x80, 0x81}, // <<0b1111,0000 ; 0b10010000 ; 0b1000,0000 ; 0b1000,0001>> + }, + { + Rune: 0x10fffe, // 0b0001,0000,1111,1111,1111,1110 + ExpectedInt: 4, + ExpectedBytes: []byte{0xF4, 0x8F, 0xBF, 0xBE}, // <<0b1111,0100 ; 0b10001111 ; 0b1011,1111 ; 0b1011,1110>> + }, + { + Rune: 0x10ffff, // 0b0001,0000,1111,1111,1111,1111 + ExpectedInt: 4, + ExpectedBytes: []byte{0xF4, 0x8F, 0xBF, 0xBF}, // <<0b1111,0100 ; 0b10001111 ; 0b1011,1111 ; 0b1011,1111>> + }, + } + + + for testNumber, test := range tests { + + var buffer bytes.Buffer + + actualInt, err := WriteRune(&buffer, test.Rune) + if nil != err { + t.Errorf("For test #%d, did not expect an error, but actually got one: (%T) %q", testNumber, err, err) + t.Errorf("\trune = %q (%x)", test.Rune, test.Rune) + continue + } + if expected, actual := test.ExpectedInt, actualInt; expected != actual { + t.Errorf("For test #%d, expected %d, but actually got %d.", testNumber, expected, actual) + t.Errorf("\trune = %q (%x)", test.Rune, test.Rune) + continue + } + if expected, actual := test.ExpectedInt, buffer.Len(); expected != actual { + t.Errorf("For test #%d, expected %d, but actually got %d.", testNumber, expected, actual) + t.Errorf("\trune = %q (%x)", test.Rune, test.Rune) + continue + } + for byteNumber, expected := range test.ExpectedBytes { + if actual := buffer.Bytes()[byteNumber]; expected != actual { + t.Errorf("For test #%d and byte #%d, expected %q (%X), but actually got %q (%X).", testNumber, byteNumber, expected, expected, actual, actual) + t.Errorf("\trune = %q (%x)", test.Rune, test.Rune) + continue + } + } + } +}