From 751302c4d84f8e51b8cbec8200646fc88afd5179 Mon Sep 17 00:00:00 2001 From: Charles Iliya Krempeaux Date: Mon, 2 Jul 2018 11:03:47 -0700 Subject: [PATCH] utf8s.Len() --- len.go | 22 +++++++++++++++++++++ len_test.go | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 len.go create mode 100644 len_test.go diff --git a/len.go b/len.go new file mode 100644 index 0000000..c6635d5 --- /dev/null +++ b/len.go @@ -0,0 +1,22 @@ +package utf8s + +// Len returns the number of bytes in a UTF-8 encoding of this Unicode code point. +func Len(r rune) int { + + switch { + case 127 > r: + return 1 + + case 0x7FF >= r: + return 2 + + case 0xFFFF >= r: + return 3 + + case 0x10FFFF >= r: + return 4 + + default: + return 0 + } +} diff --git a/len_test.go b/len_test.go new file mode 100644 index 0000000..1d2eed1 --- /dev/null +++ b/len_test.go @@ -0,0 +1,56 @@ +package utf8s + +import ( + "testing" +) + +func TestLen(t *testing.T) { + + tests := []struct{ + Datum rune + Expected int + }{ + { + Datum: 'A', + Expected: 1, + }, + { + Datum: 'r', + Expected: 1, + }, + { + Datum: '¡', + Expected: 2, + }, + { + Datum: '۵', + Expected: 2, + }, + { + Datum: '‱', + Expected: 3, + }, + { + Datum: '≡', + Expected: 3, + }, + { + Datum: '𐏕', + Expected: 4, + }, + { + Datum: '🙂', + Expected: 4, + }, + } + + + for testNumber, test := range tests { + + actual := Len(test.Datum) + if expected := test.Expected; expected != actual { + t.Errorf("For test #%d, expected %d, but actually got %d.", testNumber, expected, actual) + continue + } + } +}