From 95a67a7fe7d73f27238cdfacaa01716601b29c30 Mon Sep 17 00:00:00 2001 From: Charles Iliya Krempeaux Date: Wed, 29 Nov 2023 10:18:09 -0800 Subject: [PATCH] initial commits --- readthiseol.go | 41 ++++++ readthiseol_test.go | 321 ++++++++++++++++++++++++++++++++++++++++++++ unrecognizedeol.go | 25 ++++ 3 files changed, 387 insertions(+) create mode 100644 readthiseol.go create mode 100644 readthiseol_test.go create mode 100644 unrecognizedeol.go diff --git a/readthiseol.go b/readthiseol.go new file mode 100644 index 0000000..a271c42 --- /dev/null +++ b/readthiseol.go @@ -0,0 +1,41 @@ +package eol + +import ( + "io" +) + +// ReadThisEOL tries to read the specified end-of-line sequence. +// +// The end-of-line sequences it supports are: +// +// line-feed (LF) (U+000A) ('\n') +// carriage-return (CR) (U+000D) ('\r') +// carriage-return, line-feed ("\r\n") +// next-line (NEL) (U+0085) +// line-separator (LS) (U+2028) +// +// If successful, ReadThisEOL return the number-of-bytes read (to read in the specified end-of-line sequence). +// +// Example usage: +/// +// size, err := eol.ReadThisEOL(runescanner, eol.CRLF) +func ReadThisEOL(runescanner io.RuneScanner, endofline string) (size int, err error) { + if nil == runescanner { + return 0, errNilRuneScanner + } + + switch endofline { + case LF: + return ReadLF(runescanner) + case CR: + return ReadCR(runescanner) + case CRLF: + return ReadCRLF(runescanner) + case NEL: + return ReadNEL(runescanner) + case LS: + return ReadLS(runescanner) + default: + return 0, errUnrecognizedEOL(endofline) + } +} diff --git a/readthiseol_test.go b/readthiseol_test.go new file mode 100644 index 0000000..e582cd9 --- /dev/null +++ b/readthiseol_test.go @@ -0,0 +1,321 @@ +package eol_test + +import ( + "testing" + + "io" + "strings" + + "sourcecode.social/reiver/go-utf8" + + "sourcecode.social/reiver/go-eol" +) + +func TestReadThisEOL(t *testing.T) { + + tests := []struct{ + Value string + EOL string + }{ + { + Value: "\n", + EOL: "\n", + }, + { + Value: "\r", + EOL: "\r", + }, + { + Value: "\r\n", + EOL: "\r\n", + }, + { + Value: "\u0085", + EOL: "\u0085", + }, + { + Value: "\u2028", + EOL: "\u2028", + }, + + + + { + Value: "\n" + "12345", + EOL: "\n", + }, + { + Value: "\r" + "12345", + EOL: "\r", + }, + { + Value: "\r\n" + "12345", + EOL: "\r\n", + }, + { + Value: "\u0085" + "12345", + EOL: "\u0085", + }, + { + Value: "\u2028" + "12345", + EOL: "\u2028", + }, + + + + { + Value: "\n" + "12345" + "\n", + EOL: "\n", + }, + { + Value: "\r" + "12345" + "\r", + EOL: "\r", + }, + { + Value: "\r\n" + "12345" + "\r\n", + EOL: "\r\n", + }, + { + Value: "\u0085" + "12345" + "\u0085", + EOL: "\u0085", + }, + { + Value: "\u2028" + "12345" + "\u2028", + EOL: "\u2028", + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualNumRead, err := eol.ReadThisEOL(runescanner, test.EOL) + if nil != err { + t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber) + t.Logf("ERROR: (%T) %s", err, err) + t.Logf("EOL: %q", test.EOL) + t.Logf("VALUE: %q", test.Value) + continue + } + + { + var expected int = len(test.EOL) + var actual int = actualNumRead + + if expected != actual { + t.Errorf("For tst #%d, the actual number-of-bytes-read is not what was expected." , testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("EOL: %q", test.EOL) + t.Logf("VALUE: %q", test.Value) + continue + } + } + } +} + +func TestReadThisEOL_fail(t *testing.T) { + + tests := []struct{ + Value string + EOL string + ExpectedError string + ExpectedNumRead int + }{ + { + Value: "", + EOL: "\n", + ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\n": EOF`, + }, + { + Value: "", + EOL: "\r", + ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\r": EOF`, + }, + { + Value: "", + EOL: "\r\n", + ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\r\n": EOF`, + }, + { + Value: "", + EOL: "\u0085", + ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u0085": EOF`, + }, + { + Value: "", + EOL: "\u2028", + ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u2028": EOF`, + }, + + + + { + Value: "\n", + EOL: "\r", + ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r" character №1 — instead found '\n' (U+000A)`, + }, + { + Value: "\n", + EOL: "\r\n", + ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r\n" character №1 — instead found '\n' (U+000A)`, + }, + { + Value: "\n", + EOL: "\u0085", + ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\n' (U+000A)`, + }, + { + Value: "\n", + EOL: "\u2028", + ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\n' (U+000A)`, + }, + + + + { + Value: "\r", + EOL: "\n", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\r' (U+000D)`, + }, + { + Value: "\r", + EOL: "\r\n", + ExpectedError: `eol: problem reading character №2 of end-of-line sequence "\r\n": EOF`, + ExpectedNumRead: 1, + }, + { + Value: "\r", + EOL: "\u0085", + ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\r' (U+000D)`, + }, + { + Value: "\r", + EOL: "\u2028", + ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\r' (U+000D)`, + }, + + + + { + Value: "\r\n", + EOL: "\n", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\r' (U+000D)`, + }, + { + Value: "\r\n", + EOL: "\u0085", + ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\r' (U+000D)`, + }, + { + Value: "\r\n", + EOL: "\u2028", + ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\r' (U+000D)`, + }, + + + + { + Value: "\u0085", + EOL: "\n", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\u0085' (U+0085)`, + }, + { + Value: "\u0085", + EOL: "\r", + ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r" character №1 — instead found '\u0085' (U+0085)`, + }, + { + Value: "\u0085", + EOL: "\r\n", + ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r\n" character №1 — instead found '\u0085' (U+0085)`, + }, + { + Value: "\u0085", + EOL: "\u2028", + ExpectedError: `eol: line-separator (LS) character (U+2028) not found for end-of-line sequence "\u2028" character №1 — instead found '\u0085' (U+0085)`, + }, + + + + { + Value: "\u2028", + EOL: "\n", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n" character №1 — instead found '\u2028' (U+2028)`, + }, + { + Value: "\u2028", + EOL: "\r", + ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r" character №1 — instead found '\u2028' (U+2028)`, + }, + { + Value: "\u2028", + EOL: "\r\n", + ExpectedError: `eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence "\r\n" character №1 — instead found '\u2028' (U+2028)`, + }, + { + Value: "\u2028", + EOL: "\u0085", + ExpectedError: `eol: next-line (NEL) character (U+0085) not found for end-of-line sequence "\u0085" character №1 — instead found '\u2028' (U+2028)`, + }, + + + + + + + + + + { + Value: "\rapple banana cherry", + EOL: "\r\n", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\r\n" character №2 — instead found 'a' (U+0061)`, + ExpectedNumRead: 1, + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualNumRead, err := eol.ReadThisEOL(runescanner, test.EOL) + if nil == err { + t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber) + t.Logf("EXPECTED-ERROR: %s", test.ExpectedError) + t.Logf("EOL: %q", test.EOL) + t.Logf("VALUE: %q", test.Value) + continue + } + + { + expected := test.ExpectedError + actual := err.Error() + + if expected != actual { + t.Errorf("For test #%d, the actual error is not what was expected.", testNumber) + t.Logf("EXPECTED: %q", expected) + t.Logf("ACTUAL: %q", actual) + t.Logf("EOL: %q", test.EOL) + t.Logf("VALUE: %q", test.Value) + continue + } + } + + { + expected := test.ExpectedNumRead + actual := actualNumRead + + if expected != actual { + t.Errorf("For test #%d, the actual number-of-bytes-read is not what was expected.", testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("EOL: %q", test.EOL) + t.Logf("VALUE: %q", test.Value) + continue + } + } + } +} diff --git a/unrecognizedeol.go b/unrecognizedeol.go new file mode 100644 index 0000000..a3fda4d --- /dev/null +++ b/unrecognizedeol.go @@ -0,0 +1,25 @@ +package eol + +import ( + "fmt" +) + +var _ error = internalUnrecognizedEOLError{} + +type internalUnrecognizedEOLError struct { + value string +} + +func errUnrecognizedEOL(value string) error { + return internalUnrecognizedEOLError{ + value:value, + } +} + +func (receiver internalUnrecognizedEOLError) Error() string { + return fmt.Sprintf("eol: %q is an unrecognized end-of-line (EOL) sequence", receiver.value) +} + +func (receiver internalUnrecognizedEOLError) UnrecognizedEOL() string { + return receiver.value +}