initial commits

master
Charles Iliya Krempeaux 2023-11-27 07:13:38 -08:00
parent efcad45cd1
commit db2133ea53
2 changed files with 286 additions and 0 deletions

83
readeol.go 100644
View File

@ -0,0 +1,83 @@
package eol
import (
"io"
)
// ReadEOL tries to read and end-of-line character.
//
// The end-of-line sequences it supports are:
//
// line-feed (LF) (U+000A) ('\n')
// carriage-return (CR) (U+000D) ('\r')
// carriage-return, line-feed ("\r\n")
// new-line (NL) (U+0085)
// line-separator (LS) (U+2028)
//
// If successful, ReadEOL return the end-of-line sequence and the number-of-bytes read.
func ReadEOL(runescanner io.RuneScanner) (endofline string, size int, err error) {
if nil == runescanner {
return "", 0, errNilRuneScanner
}
var r0 rune
var size0 int
{
var err error
r0, size0, err = runescanner.ReadRune()
if nil != err {
const runeNumber = 1
return "", size0, errProblemReadingRune(err, runeNumber)
}
}
switch r0 {
case lf:
return LF, size0, nil
case cr:
// Nothing here.
case nl:
return NL, size0, nil
case ls:
return LS, size0, nil
default:
err := runescanner.UnreadRune()
if nil != err {
const runeNumber = 1
return "", size0, errProblemUnreadingRune(err, runeNumber, r0)
}
return "", 0, errNotEOL(r0)
}
// if we got here, then we had a CR
var r1 rune
var size1 int
{
var err error
r1, size1, err = runescanner.ReadRune()
if io.EOF == err {
return CR, size0, nil
}
if nil != err {
const runeNumber = 2
return "", size1+size0, errProblemReadingRune(err, runeNumber)
}
}
switch r1 {
case lf:
return CRLF, size1+size0, nil
default:
err := runescanner.UnreadRune()
if nil != err {
const runeNumber = 2
return "", size1+size0, errProblemUnreadingRune(err, runeNumber, r1)
}
return CR, size0, nil
}
}

203
readeol_test.go 100644
View File

@ -0,0 +1,203 @@
package eol_test
import (
"testing"
"io"
"strings"
"sourcecode.social/reiver/go-utf8"
"sourcecode.social/reiver/go-eol"
)
func TestReadEOL(t *testing.T) {
tests := []struct{
Value string
ExpectedEOL string
ExpectedSize int
}{
{
Value: "\n",
ExpectedEOL: eol.LF,
ExpectedSize: 1,
},
{
Value: "\r",
ExpectedEOL: eol.CR,
ExpectedSize: 1,
},
{
Value: "\r\n",
ExpectedEOL: eol.CRLF,
ExpectedSize: 2,
},
{
Value: "\u0085",
ExpectedEOL: eol.NL,
ExpectedSize: 2,
},
{
Value: "\u2028",
ExpectedEOL: eol.LS,
ExpectedSize: 3,
},
{
Value: "\napple banana cherry",
ExpectedEOL: eol.LF,
ExpectedSize: 1,
},
{
Value: "\rapple banana cherr",
ExpectedEOL: eol.CR,
ExpectedSize: 1,
},
{
Value: "\r\napple banana cherr",
ExpectedEOL: eol.CRLF,
ExpectedSize: 2,
},
{
Value: "\u0085apple banana cherr",
ExpectedEOL: eol.NL,
ExpectedSize: 2,
},
{
Value: "\u2028apple banana cherr",
ExpectedEOL: eol.LS,
ExpectedSize: 3,
},
}
for testNumber, test := range tests {
var reader io.Reader = strings.NewReader(test.Value)
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
actualEOL, actualSize, err := eol.ReadEOL(runescanner)
if nil != err {
t.Errorf("For test #%d, did not expect an error but actually got one." , testNumber)
t.Logf("ERROR: (%T) %s", err, err)
t.Logf("VALUE: %q", test.Value)
t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL)
t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize)
continue
}
{
expected := test.ExpectedEOL
actual := actualEOL
if expected != actual {
t.Errorf("For test #%d, the actual end-of-line sequence is not what was expected.", testNumber)
t.Logf("EXPECTED: %q", expected)
t.Logf("ACTUAL: %q", actual)
t.Logf("VALUE: %q", test.Value)
t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize)
continue
}
}
{
expected := test.ExpectedSize
actual := actualSize
if expected != actual {
t.Errorf("For test #%d, the actual size is not what was expected.", testNumber)
t.Logf("EXPECTED: %d", expected)
t.Logf("ACTUAL: %d", actual)
t.Logf("VALUE: %q", test.Value)
t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL)
continue
}
}
}
}
func TestReadEOL_fail(t *testing.T) {
tests := []struct{
Value string
ExpectedEOL string
ExpectedSize int
ExpectedError string
}{
{
Value: "apple",
ExpectedError: "eol: 'a' (U+0061) is not an end-of-line character",
},
{
Value: "banana",
ExpectedError: "eol: 'b' (U+0062) is not an end-of-line character",
},
{
Value: "cherry",
ExpectedError: "eol: 'c' (U+0063) is not an end-of-line character",
},
}
for testNumber, test := range tests {
var reader io.Reader = strings.NewReader(test.Value)
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
actualEOL, actualSize, err := eol.ReadEOL(runescanner)
if nil == err {
t.Errorf("For test #%d, expected an error but did not actually get one." , testNumber)
t.Logf("EXPECTED-ERROR: %q", test.ExpectedError)
t.Logf("VALUE: %q", test.Value)
t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL)
t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize)
continue
}
{
expected := test.ExpectedError
actual := err.Error()
if expected != actual {
t.Errorf("For test %d, the actual error is not what was expected.", testNumber)
t.Logf("EXPECTED: %q", expected)
t.Logf("ACTUAL: %q", actual)
t.Logf("VALUE: %q", test.Value)
t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL)
t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize)
continue
}
}
{
expected := test.ExpectedEOL
actual := actualEOL
if expected != actual {
t.Errorf("For test %d, the actual end-of-line sequence is not what was expected.", testNumber)
t.Logf("EXPECTED: %q", expected)
t.Logf("ACTUAL: %q", actual)
t.Logf("VALUE: %q", test.Value)
t.Logf("EXPECTED-SIZE: %d", test.ExpectedSize)
t.Logf("EXPECTED-ERROR: %q", test.ExpectedError)
continue
}
}
{
expected := test.ExpectedSize
actual := actualSize
if expected != actual {
t.Errorf("For test %d, the actual size is not what was expected.", testNumber)
t.Logf("EXPECTED: %d", expected)
t.Logf("ACTUAL: %d", actual)
t.Logf("VALUE: %q", test.Value)
t.Logf("EXPECTED-EOL: %q", test.ExpectedEOL)
t.Logf("EXPECTED-ERROR: %q", test.ExpectedError)
continue
}
}
}
}