From 4bdb41c618755e13199a94927c364191cf7cd823 Mon Sep 17 00:00:00 2001 From: Charles Iliya Krempeaux Date: Mon, 25 Mar 2024 23:24:08 -0700 Subject: [PATCH] form-feed, etc --- README.md | 3 +- ff/byte.go | 3 + ff/rune.go | 3 + ff/string.go | 3 + notfound.go | 7 ++ readeol.go | 6 +- readeol_test.go | 25 ++++-- readff.go | 24 ++++++ readff_test.go | 209 ++++++++++++++++++++++++++++++++++++++++++++++++ readvt.go | 6 +- 10 files changed, 277 insertions(+), 12 deletions(-) create mode 100644 ff/byte.go create mode 100644 ff/rune.go create mode 100644 ff/string.go create mode 100644 readff.go create mode 100644 readff_test.go diff --git a/README.md b/README.md index cf70303..57be4e7 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,9 @@ Package **eol** implements tools for working with end-of-line, for the Go progra The end-of-line sequences it supports is: * `"\n" // line-feed (LF)` -* `"\v" // vertical-tab (VT)` * `"\n\r" // line-feed (LF), carriage-return (CR)` +* `"\v" // vertical-tab (VT)` +* `"\f" // form-feed (FF)` * `"\r" // carriage-return (CR)` * `"\r\n" // carriage-return (CR), line-feed (LF)` * `"\u0085" // next-line (NEL)` diff --git a/ff/byte.go b/ff/byte.go new file mode 100644 index 0000000..bfe85d2 --- /dev/null +++ b/ff/byte.go @@ -0,0 +1,3 @@ +package ff + +const Byte byte = '\u000C' diff --git a/ff/rune.go b/ff/rune.go new file mode 100644 index 0000000..e750415 --- /dev/null +++ b/ff/rune.go @@ -0,0 +1,3 @@ +package ff + +const Rune rune = '\u000C' diff --git a/ff/string.go b/ff/string.go new file mode 100644 index 0000000..9234c73 --- /dev/null +++ b/ff/string.go @@ -0,0 +1,3 @@ +package ff + +const String string = string(Rune) diff --git a/notfound.go b/notfound.go index abb8a79..23d2769 100644 --- a/notfound.go +++ b/notfound.go @@ -6,6 +6,7 @@ import ( "sourcecode.social/reiver/go-opt" "sourcecode.social/reiver/go-eol/cr" + "sourcecode.social/reiver/go-eol/ff" "sourcecode.social/reiver/go-eol/lf" "sourcecode.social/reiver/go-eol/ls" "sourcecode.social/reiver/go-eol/nel" @@ -53,6 +54,12 @@ func (receiver internalNotFoundError) Error() string { s = fmt.Sprintf(`eol: vertical-tab (VT) character ('\v') (U+000B) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual) }) p = append(p, s...) + case ff.Rune: + var s string = fmt.Sprintf(`eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence character №%d — instead found %q (%U)`, characterNumber, actual, actual) + eolSequence.WhenSomething(func(sequence string){ + s = fmt.Sprintf(`eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual) + }) + p = append(p, s...) case cr.Rune: var s string = fmt.Sprintf(`eol: carriage-return (CR) character ('\r') (U+000D) not found for end-of-line sequence character №%d — instead found %q (%U)`, characterNumber, actual, actual) eolSequence.WhenSomething(func(sequence string){ diff --git a/readeol.go b/readeol.go index ef2924b..0a413f7 100644 --- a/readeol.go +++ b/readeol.go @@ -7,6 +7,7 @@ import ( "sourcecode.social/reiver/go-eol/cr" "sourcecode.social/reiver/go-eol/crlf" + "sourcecode.social/reiver/go-eol/ff" "sourcecode.social/reiver/go-eol/lf" "sourcecode.social/reiver/go-eol/lfcr" "sourcecode.social/reiver/go-eol/ls" @@ -19,8 +20,9 @@ import ( // The end-of-line sequences it supports are: // // line-feed (LF) (U+000A) ('\n') -// vertical-tab (VT) (U+000B) ('\v') // line-feed, carriage-return ("\n\r") +// vertical-tab (VT) (U+000B) ('\v') +// vertical-tab (VT) (U+000B) ('\f') // carriage-return (CR) (U+000D) ('\r') // carriage-return, line-feed ("\r\n") // next-line (NEL) (U+0085) @@ -56,6 +58,8 @@ func ReadEOL(runescanner io.RuneScanner) (endofline string, size int, err error) // Nothing here. case vt.Rune: return vt.String, size0, nil + case ff.Rune: + return ff.String, size0, nil case cr.Rune: // Nothing here. case nel.Rune: diff --git a/readeol_test.go b/readeol_test.go index b652083..d944083 100644 --- a/readeol_test.go +++ b/readeol_test.go @@ -11,6 +11,7 @@ import ( "sourcecode.social/reiver/go-eol" "sourcecode.social/reiver/go-eol/cr" "sourcecode.social/reiver/go-eol/crlf" + "sourcecode.social/reiver/go-eol/ff" "sourcecode.social/reiver/go-eol/lf" "sourcecode.social/reiver/go-eol/lfcr" "sourcecode.social/reiver/go-eol/ls" @@ -40,6 +41,11 @@ func TestReadEOL(t *testing.T) { ExpectedEOL: vt.String, ExpectedSize: 1, }, + { + Value: "\f", + ExpectedEOL: ff.String, + ExpectedSize: 1, + }, { Value: "\r", ExpectedEOL: cr.String, @@ -68,33 +74,38 @@ func TestReadEOL(t *testing.T) { ExpectedEOL: lf.String, ExpectedSize: 1, }, + { + Value: "\n\rapple banana cherry", + ExpectedEOL: lfcr.String, + ExpectedSize: 2, + }, { Value: "\vapple banana cherry", ExpectedEOL: vt.String, ExpectedSize: 1, }, { - Value: "\n\rapple banana cherr", - ExpectedEOL: lfcr.String, - ExpectedSize: 2, + Value: "\fapple banana cherry", + ExpectedEOL: ff.String, + ExpectedSize: 1, }, { - Value: "\rapple banana cherr", + Value: "\rapple banana cherry", ExpectedEOL: cr.String, ExpectedSize: 1, }, { - Value: "\r\napple banana cherr", + Value: "\r\napple banana cherry", ExpectedEOL: crlf.String, ExpectedSize: 2, }, { - Value: "\u0085apple banana cherr", + Value: "\u0085apple banana cherry", ExpectedEOL: nel.String, ExpectedSize: 2, }, { - Value: "\u2028apple banana cherr", + Value: "\u2028apple banana cherry", ExpectedEOL: ls.String, ExpectedSize: 3, }, diff --git a/readff.go b/readff.go new file mode 100644 index 0000000..af2aa56 --- /dev/null +++ b/readff.go @@ -0,0 +1,24 @@ +package eol + +import ( + "io" + + "sourcecode.social/reiver/go-opt" + + "sourcecode.social/reiver/go-eol/ff" +) + +// ReadFF tries to read the "\f" (i.e., form-feed) end-of-line sequence. +// +// If successful, it returns the number-of-bytes read (to read in end-of-line sequence "\f"). +// +// If the character read is not a '\f', then ReadFF will try to unread the character. +// +// Example usage: +// +// size, err := eol.ReadFF(runescanner) +func ReadFF(runescanner io.RuneScanner) (size int, err error) { + const characterNumber uint64 = 1 + var circumstance internalCircumstance = specifyCircumstance(opt.Something(ff.String), characterNumber) + return readthisrune(circumstance, runescanner, ff.Rune) +} diff --git a/readff_test.go b/readff_test.go new file mode 100644 index 0000000..298a124 --- /dev/null +++ b/readff_test.go @@ -0,0 +1,209 @@ +package eol_test + +import ( + "testing" + + "io" + "strings" + + "sourcecode.social/reiver/go-utf8" + + "sourcecode.social/reiver/go-eol" +) + +func TestReadFF(t *testing.T) { + + tests := []struct{ + Value string + ExpectedSize int + }{ + { + Value: "\f", + ExpectedSize: 1, + }, + + + + { + Value: "\fapple banana cherry", + ExpectedSize: 1, + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualSize, err := eol.ReadFF(runescanner) + if nil != err { + t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber) + t.Logf("ERROR: (%T) %s", err, err) + t.Logf("VALUE: %q", test.Value) + continue + } + + { + expected := test.ExpectedSize + actual := actualSize + + if expected != actual { + t.Errorf("For test #%d, the actual size is not what was expected.", testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("VALUE: %q", test.Value) + continue + } + } + + } +} + +func TestReadFF_fail(t *testing.T) { + + tests := []struct{ + Value string + ExpectedError string + }{ + { + Value: "", + ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\f": EOF`, + }, + + + + { + Value: "\n", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '\n' (U+000A)`, + }, + { + Value: "\u0085", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '\u0085' (U+0085)`, + }, + { + Value: "\u2028", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '\u2028' (U+2028)`, + }, + + + + { + Value: "😈", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '😈' (U+1F608)`, + }, + + + + { + Value: "\napple banana cherry", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '\n' (U+000A)`, + }, + { + Value: "\u0085apple banana cherry", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '\u0085' (U+0085)`, + }, + { + Value: "\u2028apple banana cherry", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '\u2028' (U+2028)`, + }, + + + + { + Value: "😈apple banana cherry", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '😈' (U+1F608)`, + }, + + + + { + Value: " \n", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found ' ' (U+0020)`, + }, + { + Value: " \r", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found ' ' (U+0020)`, + }, + { + Value: " \u0085", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found ' ' (U+0020)`, + }, + { + Value: " \u2028", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found ' ' (U+0020)`, + }, + + + + { + Value: " 😈", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found ' ' (U+0020)`, + }, + + + + { + Value: ".\n", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '.' (U+002E)`, + }, + { + Value: ".\r", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '.' (U+002E)`, + }, + { + Value: ".\u0085", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '.' (U+002E)`, + }, + { + Value: ".\u2028", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '.' (U+002E)`, + }, + + + + { + Value: ".😈", + ExpectedError: `eol: form-feed (FF) character ('\f') (U+000C) not found for end-of-line sequence "\f" character №1 — instead found '.' (U+002E)`, + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualSize, err := eol.ReadFF(runescanner) + if nil == err { + t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber) + t.Logf("EXPECTED-ERROR: %q", test.ExpectedError) + t.Logf("VALUE: %q", test.Value) + continue + } + + { + expected := test.ExpectedError + actual := err.Error() + + if expected != actual { + t.Errorf("For test #%d, the actual error is not what was expected.", testNumber) + t.Logf("EXPECTED: %q", expected) + t.Logf("ACTUAL: %q", actual) + t.Logf("VALUE: %q", test.Value) + continue + } + } + + { + expected := 0 + actual := actualSize + + if expected != actual { + t.Errorf("For test #%d, the actual size is not what was expected.", testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("VALUE: %q", test.Value) + continue + } + } + } +} diff --git a/readvt.go b/readvt.go index 3274729..b2ef3ec 100644 --- a/readvt.go +++ b/readvt.go @@ -8,11 +8,11 @@ import ( "sourcecode.social/reiver/go-eol/vt" ) -// ReadVT tries to read the "\r" (i.e., carriage-return) end-of-line sequence. +// ReadVT tries to read the "\v" (i.e., carriage-return) end-of-line sequence. // -// If successful, it returns the number-of-bytes read (to read in end-of-line sequence "\r"). +// If successful, it returns the number-of-bytes read (to read in end-of-line sequence "\v"). // -// If the character read is not a '\r', then ReadVT will try to unread the character. +// If the character read is not a '\v', then ReadVT will try to unread the character. // // Example usage: //