diff --git a/readeol.go b/readeol.go new file mode 100644 index 0000000..9228373 --- /dev/null +++ b/readeol.go @@ -0,0 +1,83 @@ +package eol + +import ( + "io" +) + +// ReadEOL tries to read and end-of-line character. +// +// The end-of-line sequences it supports are: +// +// line-feed (LF) (U+000A) ('\n') +// carriage-return (CR) (U+000D) ('\r') +// carriage-return, line-feed ("\r\n") +// new-line (NL) (U+0085) +// line-separator (LS) (U+2028) +// +// If successful, ReadEOL return the end-of-line sequence and the number-of-bytes read. +func ReadEOL(runescanner io.RuneScanner) (endofline string, size int, err error) { + if nil == runescanner { + return "", 0, errNilRuneScanner + } + + var r0 rune + var size0 int + { + var err error + + r0, size0, err = runescanner.ReadRune() + if nil != err { + const runeNumber = 1 + return "", size0, errProblemReadingRune(err, runeNumber) + } + } + + switch r0 { + case lf: + return LF, size0, nil + case cr: + // Nothing here. + case nl: + return NL, size0, nil + case ls: + return LS, size0, nil + default: + err := runescanner.UnreadRune() + if nil != err { + const runeNumber = 1 + return "", size0, errProblemUnreadingRune(err, runeNumber, r0) + } + + return "", 0, errNotEOL(r0) + } + + // if we got here, then we had a CR + + var r1 rune + var size1 int + { + var err error + + r1, size1, err = runescanner.ReadRune() + if io.EOF == err { + return CR, size0, nil + } + if nil != err { + const runeNumber = 2 + return "", size1+size0, errProblemReadingRune(err, runeNumber) + } + } + + switch r1 { + case lf: + return CRLF, size1+size0, nil + default: + err := runescanner.UnreadRune() + if nil != err { + const runeNumber = 2 + return "", size1+size0, errProblemUnreadingRune(err, runeNumber, r1) + } + + return CR, size0, nil + } +} diff --git a/readeol_test.go b/readeol_test.go new file mode 100644 index 0000000..67bdd2f --- /dev/null +++ b/readeol_test.go @@ -0,0 +1,203 @@ +package eol_test + +import ( + "testing" + + "io" + "strings" + + "sourcecode.social/reiver/go-utf8" + + "sourcecode.social/reiver/go-eol" +) + +func TestReadEOL(t *testing.T) { + + tests := []struct{ + Value string + ExpectedEOL string + ExpectedSize int + }{ + { + Value: "\n", + ExpectedEOL: eol.LF, + ExpectedSize: 1, + }, + { + Value: "\r", + ExpectedEOL: eol.CR, + ExpectedSize: 1, + }, + { + Value: "\r\n", + ExpectedEOL: eol.CRLF, + ExpectedSize: 2, + }, + { + Value: "\u0085", + ExpectedEOL: eol.NL, + ExpectedSize: 2, + }, + { + Value: "\u2028", + ExpectedEOL: eol.LS, + ExpectedSize: 3, + }, + + + + { + Value: "\napple banana cherry", + ExpectedEOL: eol.LF, + ExpectedSize: 1, + }, + { + Value: "\rapple banana cherr", + ExpectedEOL: eol.CR, + ExpectedSize: 1, + }, + { + Value: "\r\napple banana cherr", + ExpectedEOL: eol.CRLF, + ExpectedSize: 2, + }, + { + Value: "\u0085apple banana cherr", + ExpectedEOL: eol.NL, + ExpectedSize: 2, + }, + { + Value: "\u2028apple banana cherr", + ExpectedEOL: eol.LS, + ExpectedSize: 3, + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualEOL, actualSize, err := eol.ReadEOL(runescanner) + if nil != err { + t.Errorf("For test #%d, did not expect an error but actually got one." , testNumber) + t.Logf("ERROR: (%T) %s", err, err) + t.Logf("VALUE: %q", test.Value) + t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL) + t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize) + continue + } + + { + expected := test.ExpectedEOL + actual := actualEOL + + if expected != actual { + t.Errorf("For test #%d, the actual end-of-line sequence is not what was expected.", testNumber) + t.Logf("EXPECTED: %q", expected) + t.Logf("ACTUAL: %q", actual) + t.Logf("VALUE: %q", test.Value) + t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize) + continue + } + } + + { + expected := test.ExpectedSize + actual := actualSize + + if expected != actual { + t.Errorf("For test #%d, the actual size is not what was expected.", testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("VALUE: %q", test.Value) + t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL) + continue + } + } + } +} + +func TestReadEOL_fail(t *testing.T) { + + tests := []struct{ + Value string + ExpectedEOL string + ExpectedSize int + ExpectedError string + }{ + { + Value: "apple", + ExpectedError: "eol: 'a' (U+0061) is not an end-of-line character", + }, + { + Value: "banana", + ExpectedError: "eol: 'b' (U+0062) is not an end-of-line character", + }, + { + Value: "cherry", + ExpectedError: "eol: 'c' (U+0063) is not an end-of-line character", + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualEOL, actualSize, err := eol.ReadEOL(runescanner) + if nil == err { + t.Errorf("For test #%d, expected an error but did not actually get one." , testNumber) + t.Logf("EXPECTED-ERROR: %q", test.ExpectedError) + t.Logf("VALUE: %q", test.Value) + t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL) + t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize) + continue + } + + { + expected := test.ExpectedError + actual := err.Error() + + if expected != actual { + t.Errorf("For test %d, the actual error is not what was expected.", testNumber) + t.Logf("EXPECTED: %q", expected) + t.Logf("ACTUAL: %q", actual) + t.Logf("VALUE: %q", test.Value) + t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL) + t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize) + continue + } + } + + { + expected := test.ExpectedEOL + actual := actualEOL + + if expected != actual { + t.Errorf("For test %d, the actual end-of-line sequence is not what was expected.", testNumber) + t.Logf("EXPECTED: %q", expected) + t.Logf("ACTUAL: %q", actual) + t.Logf("VALUE: %q", test.Value) + t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize) + t.Logf("EXPECTED-ERROR: %q", test.ExpectedError) + continue + } + } + + { + expected := test.ExpectedSize + actual := actualSize + + if expected != actual { + t.Errorf("For test %d, the actual size is not what was expected.", testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("VALUE: %q", test.Value) + t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL) + t.Logf("EXPECTED-ERROR: %q", test.ExpectedError) + continue + } + } + } +}