From 0c561bbb23ccce36fc7e9e511d776fb14c58b941 Mon Sep 17 00:00:00 2001 From: Charles Iliya Krempeaux Date: Sun, 18 Feb 2024 10:07:49 -0800 Subject: [PATCH] lf cr --- readlfcr.go | 50 +++++++++++ readlfcr_test.go | 215 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 265 insertions(+) create mode 100644 readlfcr.go create mode 100644 readlfcr_test.go diff --git a/readlfcr.go b/readlfcr.go new file mode 100644 index 0000000..ae3adf1 --- /dev/null +++ b/readlfcr.go @@ -0,0 +1,50 @@ +package eol + +import ( + "io" + + "sourcecode.social/reiver/go-opt" + + "sourcecode.social/reiver/go-eol/cr" + "sourcecode.social/reiver/go-eol/lf" + "sourcecode.social/reiver/go-eol/lfcr" +) + +// ReadLFCR tries to read the "\r\n" (i.e., carriage-return line-feed) end-of-line sequence. +// +// If successful, it returns the number-of-bytes read (to read in end-of-line sequence "\r\n"). +// +// If the first character read is not a '\n', then ReadLFCR will try to unread the character. +// If the second character read is not a '\r', then ReadLFCR will also try to unread the second character, but will not be able to unread the first character (i.e., '\n') it already read. +// +// Example usage: +// +// size, err := eol.ReadLFCR(runescanner) +func ReadLFCR(runescanner io.RuneScanner) (size int, err error) { + + var size0 int + { + var err error + + const characterNumber uint64 = 1 + var circumstance internalCircumstance = specifyCircumstance(opt.Something(lfcr.String), characterNumber) + size0, err = readthisrune(circumstance, runescanner, lf.Rune) + if nil != err { + return size0, err + } + } + + var size1 int + { + var err error + + const characterNumber uint64 = 2 + var circumstance internalCircumstance = specifyCircumstance(opt.Something(lfcr.String), characterNumber) + size1, err = readthisrune(circumstance, runescanner, cr.Rune) + if nil != err { + return size1+size0, err + } + } + + return size1+size0, nil +} diff --git a/readlfcr_test.go b/readlfcr_test.go new file mode 100644 index 0000000..f1fba2c --- /dev/null +++ b/readlfcr_test.go @@ -0,0 +1,215 @@ +package eol_test + +import ( + "testing" + + "io" + "strings" + + "sourcecode.social/reiver/go-utf8" + + "sourcecode.social/reiver/go-eol" +) + +func TestReadLFCR(t *testing.T) { + + tests := []struct{ + Value string + ExpectedSize int + }{ + { + Value: "\n\r", + ExpectedSize: 2, + }, + + + + { + Value: "\n\rapple banana cherry", + ExpectedSize: 2, + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualSize, err := eol.ReadLFCR(runescanner) + if nil != err { + t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber) + t.Logf("ERROR: (%T) %s", err, err) + t.Logf("VALUE: %q", test.Value) + continue + } + + { + expected := test.ExpectedSize + actual := actualSize + + if expected != actual { + t.Errorf("For test #%d, the actual size is not what was expected.", testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("VALUE: %q", test.Value) + continue + } + } + + } +} + +func TestReadLFCR_fail(t *testing.T) { + + tests := []struct{ + Value string + ExpectedError string + ExpectedSize int + }{ + { + Value: "", + ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\n\r": EOF`, + }, + + + + { + Value: "\r", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '\r' (U+000D)`, + }, + { + Value: "\n", + ExpectedError: `eol: problem reading character №2 of end-of-line sequence "\n\r": EOF`, + ExpectedSize: 1, + }, + { + Value: "\u0085", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '\u0085' (U+0085)`, + }, + { + Value: "\u2028", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '\u2028' (U+2028)`, + }, + + + + { + Value: "😈", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '😈' (U+1F608)`, + }, + + + + { + Value: "\rapple banana cherry", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '\r' (U+000D)`, + }, + { + Value: "\u0085apple banana cherry", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '\u0085' (U+0085)`, + }, + { + Value: "\u2028apple banana cherry", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '\u2028' (U+2028)`, + }, + + + + { + Value: "😈apple banana cherry", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '😈' (U+1F608)`, + }, + + + + { + Value: " \r", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found ' ' (U+0020)`, + }, + { + Value: " \n", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found ' ' (U+0020)`, + }, + { + Value: " \u0085", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found ' ' (U+0020)`, + }, + { + Value: " \u2028", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found ' ' (U+0020)`, + }, + + + + { + Value: " 😈", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found ' ' (U+0020)`, + }, + + + + { + Value: ".\n", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '.' (U+002E)`, + }, + { + Value: ".\r", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '.' (U+002E)`, + }, + { + Value: ".\u0085", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '.' (U+002E)`, + }, + { + Value: ".\u2028", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '.' (U+002E)`, + }, + + + + { + Value: ".😈", + ExpectedError: `eol: line-feed (LF) character ('\n') (U+000A) not found for end-of-line sequence "\n\r" character №1 — instead found '.' (U+002E)`, + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualSize, err := eol.ReadLFCR(runescanner) + if nil == err { + t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber) + t.Logf("EXPECTED-ERROR: %q", test.ExpectedError) + t.Logf("VALUE: %q", test.Value) + continue + } + + { + expected := test.ExpectedError + actual := err.Error() + + if expected != actual { + t.Errorf("For test #%d, the actual error is not what was expected.", testNumber) + t.Logf("EXPECTED: %q", expected) + t.Logf("ACTUAL: %q", actual) + t.Logf("VALUE: %q", test.Value) + continue + } + } + + { + expected := test.ExpectedSize + actual := actualSize + + if expected != actual { + t.Errorf("For test #%d, the actual size is not what was expected.", testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("VALUE: %q", test.Value) + continue + } + } + } +}