initial commits

master
Charles Iliya Krempeaux 2023-11-29 10:18:09 -08:00
parent 18aff7a790
commit 95a67a7fe7
3 changed files with 387 additions and 0 deletions

41
readthiseol.go 100644
View File

@ -0,0 +1,41 @@
package eol
import (
"io"
)
// ReadThisEOL tries to read the specified end-of-line sequence.
//
// The end-of-line sequences it supports are:
//
// line-feed (LF) (U+000A) ('\n')
// carriage-return (CR) (U+000D) ('\r')
// carriage-return, line-feed ("\r\n")
// next-line (NEL) (U+0085)
// line-separator (LS) (U+2028)
//
// If successful, ReadThisEOL return the number-of-bytes read (to read in the specified end-of-line sequence).
//
// Example usage:
///
// size, err := eol.ReadThisEOL(runescanner, eol.CRLF)
func ReadThisEOL(runescanner io.RuneScanner, endofline string) (size int, err error) {
if nil == runescanner {
return 0, errNilRuneScanner
}
switch endofline {
case LF:
return ReadLF(runescanner)
case CR:
return ReadCR(runescanner)
case CRLF:
return ReadCRLF(runescanner)
case NEL:
return ReadNEL(runescanner)
case LS:
return ReadLS(runescanner)
default:
return 0, errUnrecognizedEOL(endofline)
}
}

321
readthiseol_test.go 100644
View File

@ -0,0 +1,321 @@
package eol_test
import (
"testing"
"io"
"strings"
"sourcecode.social/reiver/go-utf8"
"sourcecode.social/reiver/go-eol"
)
func TestReadThisEOL(t *testing.T) {
tests := []struct{
Value string
EOL string
}{
{
Value: "\n",
EOL: "\n",
},
{
Value: "\r",
EOL: "\r",
},
{
Value: "\r\n",
EOL: "\r\n",
},
{
Value: "\u0085",
EOL: "\u0085",
},
{
Value: "\u2028",
EOL: "\u2028",
},
{
Value: "\n" + "12345",
EOL: "\n",
},
{
Value: "\r" + "12345",
EOL: "\r",
},
{
Value: "\r\n" + "12345",
EOL: "\r\n",
},
{
Value: "\u0085" + "12345",
EOL: "\u0085",
},
{
Value: "\u2028" + "12345",
EOL: "\u2028",
},
{
Value: "\n" + "12345" + "\n",
EOL: "\n",
},
{
Value: "\r" + "12345" + "\r",
EOL: "\r",
},
{
Value: "\r\n" + "12345" + "\r\n",
EOL: "\r\n",
},
{
Value: "\u0085" + "12345" + "\u0085",
EOL: "\u0085",
},
{
Value: "\u2028" + "12345" + "\u2028",
EOL: "\u2028",
},
}
for testNumber, test := range tests {
var reader io.Reader = strings.NewReader(test.Value)
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
actualNumRead, err := eol.ReadThisEOL(runescanner, test.EOL)
if nil != err {
t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber)
t.Logf("ERROR: (%T) %s", err, err)
t.Logf("EOL: %q", test.EOL)
t.Logf("VALUE: %q", test.Value)
continue
}
{
var expected int = len(test.EOL)
var actual int = actualNumRead
if expected != actual {
t.Errorf("For tst #%d, the actual number-of-bytes-read is not what was expected." , testNumber)
t.Logf("EXPECTED: %d", expected)
t.Logf("ACTUAL: %d", actual)
t.Logf("EOL: %q", test.EOL)
t.Logf("VALUE: %q", test.Value)
continue
}
}
}
}
func TestReadThisEOL_fail(t *testing.T) {
tests := []struct{
Value string
EOL string
ExpectedError string
ExpectedNumRead int
}{
{
Value: "",
EOL: "\n",
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\n": EOF`,
},
{
Value: "",
EOL: "\r",
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\r": EOF`,
},
{
Value: "",
EOL: "\r\n",
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\r\n": EOF`,
},
{
Value: "",
EOL: "\u0085",
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u0085": EOF`,
},
{
Value: "",
EOL: "\u2028",
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u2028": EOF`,
},
{
Value: "\n",
EOL: "\r",
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r" character №1 — instead found '\n' (U+000A)`,
},
{
Value: "\n",
EOL: "\r\n",
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r\n" character №1 — instead found '\n' (U+000A)`,
},
{
Value: "\n",
EOL: "\u0085",
ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\n' (U+000A)`,
},
{
Value: "\n",
EOL: "\u2028",
ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\n' (U+000A)`,
},
{
Value: "\r",
EOL: "\n",
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\r",
EOL: "\r\n",
ExpectedError: `eol: problem reading character №2 of end-of-line sequence "\r\n": EOF`,
ExpectedNumRead: 1,
},
{
Value: "\r",
EOL: "\u0085",
ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\r",
EOL: "\u2028",
ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\r\n",
EOL: "\n",
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\r\n",
EOL: "\u0085",
ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\r\n",
EOL: "\u2028",
ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\u0085",
EOL: "\n",
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\u0085' (U+0085)`,
},
{
Value: "\u0085",
EOL: "\r",
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r" character №1 — instead found '\u0085' (U+0085)`,
},
{
Value: "\u0085",
EOL: "\r\n",
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r\n" character №1 — instead found '\u0085' (U+0085)`,
},
{
Value: "\u0085",
EOL: "\u2028",
ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\u0085' (U+0085)`,
},
{
Value: "\u2028",
EOL: "\n",
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\u2028' (U+2028)`,
},
{
Value: "\u2028",
EOL: "\r",
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r" character №1 — instead found '\u2028' (U+2028)`,
},
{
Value: "\u2028",
EOL: "\r\n",
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r\n" character №1 — instead found '\u2028' (U+2028)`,
},
{
Value: "\u2028",
EOL: "\u0085",
ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\u2028' (U+2028)`,
},
{
Value: "\rapple banana cherry",
EOL: "\r\n",
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\r\n" character №2 — instead found 'a' (U+0061)`,
ExpectedNumRead: 1,
},
}
for testNumber, test := range tests {
var reader io.Reader = strings.NewReader(test.Value)
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
actualNumRead, err := eol.ReadThisEOL(runescanner, test.EOL)
if nil == err {
t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber)
t.Logf("EXPECTED-ERROR: %s", test.ExpectedError)
t.Logf("EOL: %q", test.EOL)
t.Logf("VALUE: %q", test.Value)
continue
}
{
expected := test.ExpectedError
actual := err.Error()
if expected != actual {
t.Errorf("For test #%d, the actual error is not what was expected.", testNumber)
t.Logf("EXPECTED: %q", expected)
t.Logf("ACTUAL: %q", actual)
t.Logf("EOL: %q", test.EOL)
t.Logf("VALUE: %q", test.Value)
continue
}
}
{
expected := test.ExpectedNumRead
actual := actualNumRead
if expected != actual {
t.Errorf("For test #%d, the actual number-of-bytes-read is not what was expected.", testNumber)
t.Logf("EXPECTED: %d", expected)
t.Logf("ACTUAL: %d", actual)
t.Logf("EOL: %q", test.EOL)
t.Logf("VALUE: %q", test.Value)
continue
}
}
}
}

25
unrecognizedeol.go 100644
View File

@ -0,0 +1,25 @@
package eol
import (
"fmt"
)
var _ error = internalUnrecognizedEOLError{}
type internalUnrecognizedEOLError struct {
value string
}
func errUnrecognizedEOL(value string) error {
return internalUnrecognizedEOLError{
value:value,
}
}
func (receiver internalUnrecognizedEOLError) Error() string {
return fmt.Sprintf("eol: %q is an unrecognized end-of-line (EOL) sequence", receiver.value)
}
func (receiver internalUnrecognizedEOLError) UnrecognizedEOL() string {
return receiver.value
}