diff --git a/len.go b/len.go new file mode 100644 index 0000000..c6635d5 --- /dev/null +++ b/len.go @@ -0,0 +1,22 @@ +package utf8s + +// Len returns the number of bytes in a UTF-8 encoding of this Unicode code point. +func Len(r rune) int { + + switch { + case 127 > r: + return 1 + + case 0x7FF >= r: + return 2 + + case 0xFFFF >= r: + return 3 + + case 0x10FFFF >= r: + return 4 + + default: + return 0 + } +} diff --git a/len_test.go b/len_test.go new file mode 100644 index 0000000..1d2eed1 --- /dev/null +++ b/len_test.go @@ -0,0 +1,56 @@ +package utf8s + +import ( + "testing" +) + +func TestLen(t *testing.T) { + + tests := []struct{ + Datum rune + Expected int + }{ + { + Datum: 'A', + Expected: 1, + }, + { + Datum: 'r', + Expected: 1, + }, + { + Datum: '¡', + Expected: 2, + }, + { + Datum: '۵', + Expected: 2, + }, + { + Datum: '‱', + Expected: 3, + }, + { + Datum: '≡', + Expected: 3, + }, + { + Datum: '𐏕', + Expected: 4, + }, + { + Datum: '🙂', + Expected: 4, + }, + } + + + for testNumber, test := range tests { + + actual := Len(test.Datum) + if expected := test.Expected; expected != actual { + t.Errorf("For test #%d, expected %d, but actually got %d.", testNumber, expected, actual) + continue + } + } +}