initial commits
parent
18aff7a790
commit
95a67a7fe7
|
@ -0,0 +1,41 @@
|
|||
package eol
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
// ReadThisEOL tries to read the specified end-of-line sequence.
|
||||
//
|
||||
// The end-of-line sequences it supports are:
|
||||
//
|
||||
// line-feed (LF) (U+000A) ('\n')
|
||||
// carriage-return (CR) (U+000D) ('\r')
|
||||
// carriage-return, line-feed ("\r\n")
|
||||
// next-line (NEL) (U+0085)
|
||||
// line-separator (LS) (U+2028)
|
||||
//
|
||||
// If successful, ReadThisEOL return the number-of-bytes read (to read in the specified end-of-line sequence).
|
||||
//
|
||||
// Example usage:
|
||||
///
|
||||
// size, err := eol.ReadThisEOL(runescanner, eol.CRLF)
|
||||
func ReadThisEOL(runescanner io.RuneScanner, endofline string) (size int, err error) {
|
||||
if nil == runescanner {
|
||||
return 0, errNilRuneScanner
|
||||
}
|
||||
|
||||
switch endofline {
|
||||
case LF:
|
||||
return ReadLF(runescanner)
|
||||
case CR:
|
||||
return ReadCR(runescanner)
|
||||
case CRLF:
|
||||
return ReadCRLF(runescanner)
|
||||
case NEL:
|
||||
return ReadNEL(runescanner)
|
||||
case LS:
|
||||
return ReadLS(runescanner)
|
||||
default:
|
||||
return 0, errUnrecognizedEOL(endofline)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,321 @@
|
|||
package eol_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"sourcecode.social/reiver/go-utf8"
|
||||
|
||||
"sourcecode.social/reiver/go-eol"
|
||||
)
|
||||
|
||||
func TestReadThisEOL(t *testing.T) {
|
||||
|
||||
tests := []struct{
|
||||
Value string
|
||||
EOL string
|
||||
}{
|
||||
{
|
||||
Value: "\n",
|
||||
EOL: "\n",
|
||||
},
|
||||
{
|
||||
Value: "\r",
|
||||
EOL: "\r",
|
||||
},
|
||||
{
|
||||
Value: "\r\n",
|
||||
EOL: "\r\n",
|
||||
},
|
||||
{
|
||||
Value: "\u0085",
|
||||
EOL: "\u0085",
|
||||
},
|
||||
{
|
||||
Value: "\u2028",
|
||||
EOL: "\u2028",
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\n" + "12345",
|
||||
EOL: "\n",
|
||||
},
|
||||
{
|
||||
Value: "\r" + "12345",
|
||||
EOL: "\r",
|
||||
},
|
||||
{
|
||||
Value: "\r\n" + "12345",
|
||||
EOL: "\r\n",
|
||||
},
|
||||
{
|
||||
Value: "\u0085" + "12345",
|
||||
EOL: "\u0085",
|
||||
},
|
||||
{
|
||||
Value: "\u2028" + "12345",
|
||||
EOL: "\u2028",
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\n" + "12345" + "\n",
|
||||
EOL: "\n",
|
||||
},
|
||||
{
|
||||
Value: "\r" + "12345" + "\r",
|
||||
EOL: "\r",
|
||||
},
|
||||
{
|
||||
Value: "\r\n" + "12345" + "\r\n",
|
||||
EOL: "\r\n",
|
||||
},
|
||||
{
|
||||
Value: "\u0085" + "12345" + "\u0085",
|
||||
EOL: "\u0085",
|
||||
},
|
||||
{
|
||||
Value: "\u2028" + "12345" + "\u2028",
|
||||
EOL: "\u2028",
|
||||
},
|
||||
}
|
||||
|
||||
for testNumber, test := range tests {
|
||||
|
||||
var reader io.Reader = strings.NewReader(test.Value)
|
||||
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
|
||||
|
||||
actualNumRead, err := eol.ReadThisEOL(runescanner, test.EOL)
|
||||
if nil != err {
|
||||
t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber)
|
||||
t.Logf("ERROR: (%T) %s", err, err)
|
||||
t.Logf("EOL: %q", test.EOL)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
|
||||
{
|
||||
var expected int = len(test.EOL)
|
||||
var actual int = actualNumRead
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("For tst #%d, the actual number-of-bytes-read is not what was expected." , testNumber)
|
||||
t.Logf("EXPECTED: %d", expected)
|
||||
t.Logf("ACTUAL: %d", actual)
|
||||
t.Logf("EOL: %q", test.EOL)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadThisEOL_fail(t *testing.T) {
|
||||
|
||||
tests := []struct{
|
||||
Value string
|
||||
EOL string
|
||||
ExpectedError string
|
||||
ExpectedNumRead int
|
||||
}{
|
||||
{
|
||||
Value: "",
|
||||
EOL: "\n",
|
||||
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\n": EOF`,
|
||||
},
|
||||
{
|
||||
Value: "",
|
||||
EOL: "\r",
|
||||
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\r": EOF`,
|
||||
},
|
||||
{
|
||||
Value: "",
|
||||
EOL: "\r\n",
|
||||
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\r\n": EOF`,
|
||||
},
|
||||
{
|
||||
Value: "",
|
||||
EOL: "\u0085",
|
||||
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u0085": EOF`,
|
||||
},
|
||||
{
|
||||
Value: "",
|
||||
EOL: "\u2028",
|
||||
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u2028": EOF`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\n",
|
||||
EOL: "\r",
|
||||
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r" character №1 — instead found '\n' (U+000A)`,
|
||||
},
|
||||
{
|
||||
Value: "\n",
|
||||
EOL: "\r\n",
|
||||
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r\n" character №1 — instead found '\n' (U+000A)`,
|
||||
},
|
||||
{
|
||||
Value: "\n",
|
||||
EOL: "\u0085",
|
||||
ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\n' (U+000A)`,
|
||||
},
|
||||
{
|
||||
Value: "\n",
|
||||
EOL: "\u2028",
|
||||
ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\n' (U+000A)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\r",
|
||||
EOL: "\n",
|
||||
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\r' (U+000D)`,
|
||||
},
|
||||
{
|
||||
Value: "\r",
|
||||
EOL: "\r\n",
|
||||
ExpectedError: `eol: problem reading character №2 of end-of-line sequence "\r\n": EOF`,
|
||||
ExpectedNumRead: 1,
|
||||
},
|
||||
{
|
||||
Value: "\r",
|
||||
EOL: "\u0085",
|
||||
ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\r' (U+000D)`,
|
||||
},
|
||||
{
|
||||
Value: "\r",
|
||||
EOL: "\u2028",
|
||||
ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\r' (U+000D)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\r\n",
|
||||
EOL: "\n",
|
||||
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\r' (U+000D)`,
|
||||
},
|
||||
{
|
||||
Value: "\r\n",
|
||||
EOL: "\u0085",
|
||||
ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\r' (U+000D)`,
|
||||
},
|
||||
{
|
||||
Value: "\r\n",
|
||||
EOL: "\u2028",
|
||||
ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\r' (U+000D)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\u0085",
|
||||
EOL: "\n",
|
||||
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\u0085' (U+0085)`,
|
||||
},
|
||||
{
|
||||
Value: "\u0085",
|
||||
EOL: "\r",
|
||||
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r" character №1 — instead found '\u0085' (U+0085)`,
|
||||
},
|
||||
{
|
||||
Value: "\u0085",
|
||||
EOL: "\r\n",
|
||||
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r\n" character №1 — instead found '\u0085' (U+0085)`,
|
||||
},
|
||||
{
|
||||
Value: "\u0085",
|
||||
EOL: "\u2028",
|
||||
ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\u0085' (U+0085)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\u2028",
|
||||
EOL: "\n",
|
||||
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\u2028' (U+2028)`,
|
||||
},
|
||||
{
|
||||
Value: "\u2028",
|
||||
EOL: "\r",
|
||||
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r" character №1 — instead found '\u2028' (U+2028)`,
|
||||
},
|
||||
{
|
||||
Value: "\u2028",
|
||||
EOL: "\r\n",
|
||||
ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r\n" character №1 — instead found '\u2028' (U+2028)`,
|
||||
},
|
||||
{
|
||||
Value: "\u2028",
|
||||
EOL: "\u0085",
|
||||
ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\u2028' (U+2028)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\rapple banana cherry",
|
||||
EOL: "\r\n",
|
||||
ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\r\n" character №2 — instead found 'a' (U+0061)`,
|
||||
ExpectedNumRead: 1,
|
||||
},
|
||||
}
|
||||
|
||||
for testNumber, test := range tests {
|
||||
|
||||
var reader io.Reader = strings.NewReader(test.Value)
|
||||
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
|
||||
|
||||
actualNumRead, err := eol.ReadThisEOL(runescanner, test.EOL)
|
||||
if nil == err {
|
||||
t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber)
|
||||
t.Logf("EXPECTED-ERROR: %s", test.ExpectedError)
|
||||
t.Logf("EOL: %q", test.EOL)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
|
||||
{
|
||||
expected := test.ExpectedError
|
||||
actual := err.Error()
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("For test #%d, the actual error is not what was expected.", testNumber)
|
||||
t.Logf("EXPECTED: %q", expected)
|
||||
t.Logf("ACTUAL: %q", actual)
|
||||
t.Logf("EOL: %q", test.EOL)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
expected := test.ExpectedNumRead
|
||||
actual := actualNumRead
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("For test #%d, the actual number-of-bytes-read is not what was expected.", testNumber)
|
||||
t.Logf("EXPECTED: %d", expected)
|
||||
t.Logf("ACTUAL: %d", actual)
|
||||
t.Logf("EOL: %q", test.EOL)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package eol
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
var _ error = internalUnrecognizedEOLError{}
|
||||
|
||||
type internalUnrecognizedEOLError struct {
|
||||
value string
|
||||
}
|
||||
|
||||
func errUnrecognizedEOL(value string) error {
|
||||
return internalUnrecognizedEOLError{
|
||||
value:value,
|
||||
}
|
||||
}
|
||||
|
||||
func (receiver internalUnrecognizedEOLError) Error() string {
|
||||
return fmt.Sprintf("eol: %q is an unrecognized end-of-line (EOL) sequence", receiver.value)
|
||||
}
|
||||
|
||||
func (receiver internalUnrecognizedEOLError) UnrecognizedEOL() string {
|
||||
return receiver.value
|
||||
}
|
Loading…
Reference in New Issue