From 2244b2cfeba9231941d2efa95ab74cc05083f323 Mon Sep 17 00:00:00 2001 From: Charles Iliya Krempeaux Date: Mon, 25 Mar 2024 23:36:06 -0700 Subject: [PATCH] paragraph-separator --- README.md | 17 ++-- circumstance.go | 6 ++ notfound.go | 7 ++ ps/rune.go | 3 + ps/string.go | 3 + readeol.go | 4 + readeol_test.go | 11 +++ readps.go | 24 ++++++ readps_test.go | 209 ++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 276 insertions(+), 8 deletions(-) create mode 100644 ps/rune.go create mode 100644 ps/string.go create mode 100644 readps.go create mode 100644 readps_test.go diff --git a/README.md b/README.md index 57be4e7..94a244d 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,15 @@ Package **eol** implements tools for working with end-of-line, for the Go progra The end-of-line sequences it supports is: -* `"\n" // line-feed (LF)` -* `"\n\r" // line-feed (LF), carriage-return (CR)` -* `"\v" // vertical-tab (VT)` -* `"\f" // form-feed (FF)` -* `"\r" // carriage-return (CR)` -* `"\r\n" // carriage-return (CR), line-feed (LF)` -* `"\u0085" // next-line (NEL)` -* `"\u2028" // line-separator (LS)` +* `"\n" // line-feed (LF)` +* `"\n\r" // line-feed (LF), carriage-return (CR)` +* `"\v" // vertical-tab (VT)` +* `"\f" // form-feed (FF)` +* `"\r" // carriage-return (CR)` +* `"\r\n" // carriage-return (CR), line-feed (LF)` +* `"\u0085" // next-line (NEL)` +* `"\u2028" // line-separator (LS)` +* `"\u2029" // paragraph-separator (PS)` ## Documention diff --git a/circumstance.go b/circumstance.go index ee87b3d..5da030d 100644 --- a/circumstance.go +++ b/circumstance.go @@ -11,10 +11,14 @@ import ( // Example end-of-line sequences are: // // • "\n" +// • "\n\r" +// • "\v" +// • "\f" // • "\r" // • "\r\n" // • "\u0085" // • "\u2028" +// • "\u2029" // // No end-of-line sequence can also be specified if it is unknown. // For example eol.ReadEOL() does NOT know the end-of-line sequence ahead of time. @@ -30,6 +34,8 @@ import ( // // var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\v"), 1) // +// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\f"), 1) +// // var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r"), 1) // // var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r\n"), 1) diff --git a/notfound.go b/notfound.go index 23d2769..750c3e0 100644 --- a/notfound.go +++ b/notfound.go @@ -10,6 +10,7 @@ import ( "sourcecode.social/reiver/go-eol/lf" "sourcecode.social/reiver/go-eol/ls" "sourcecode.social/reiver/go-eol/nel" + "sourcecode.social/reiver/go-eol/ps" "sourcecode.social/reiver/go-eol/vt" ) @@ -78,6 +79,12 @@ func (receiver internalNotFoundError) Error() string { s = fmt.Sprintf(`eol: line-separator (LS) character (U+2028) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual) }) p = append(p, s...) + case ps.Rune: + var s string = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence character №%d — instead found %q (%U)`, characterNumber, actual, actual) + eolSequence.WhenSomething(func(sequence string){ + s = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual) + }) + p = append(p, s...) default: var s string = fmt.Sprintf(`eol: %q character (%U) not found for sequence character №%d — instead found %q (%U)`, expected, expected, characterNumber, actual, actual) eolSequence.WhenSomething(func(sequence string){ diff --git a/ps/rune.go b/ps/rune.go new file mode 100644 index 0000000..4fb6a85 --- /dev/null +++ b/ps/rune.go @@ -0,0 +1,3 @@ +package ps + +const Rune rune = '\u2029' diff --git a/ps/string.go b/ps/string.go new file mode 100644 index 0000000..b76c32c --- /dev/null +++ b/ps/string.go @@ -0,0 +1,3 @@ +package ps + +const String string = string(Rune) diff --git a/readeol.go b/readeol.go index 0a413f7..c93d95d 100644 --- a/readeol.go +++ b/readeol.go @@ -12,6 +12,7 @@ import ( "sourcecode.social/reiver/go-eol/lfcr" "sourcecode.social/reiver/go-eol/ls" "sourcecode.social/reiver/go-eol/nel" + "sourcecode.social/reiver/go-eol/ps" "sourcecode.social/reiver/go-eol/vt" ) @@ -27,6 +28,7 @@ import ( // carriage-return, line-feed ("\r\n") // next-line (NEL) (U+0085) // line-separator (LS) (U+2028) +// line-separator (LS) (U+2029) // // If successful, ReadEOL return the end-of-line sequence it found and the number-of-bytes read (to read in end-of-line sequence it found). // @@ -66,6 +68,8 @@ func ReadEOL(runescanner io.RuneScanner) (endofline string, size int, err error) return nel.String, size0, nil case ls.Rune: return ls.String, size0, nil + case ps.Rune: + return ps.String, size0, nil default: err := runescanner.UnreadRune() if nil != err { diff --git a/readeol_test.go b/readeol_test.go index d944083..42bb890 100644 --- a/readeol_test.go +++ b/readeol_test.go @@ -16,6 +16,7 @@ import ( "sourcecode.social/reiver/go-eol/lfcr" "sourcecode.social/reiver/go-eol/ls" "sourcecode.social/reiver/go-eol/nel" + "sourcecode.social/reiver/go-eol/ps" "sourcecode.social/reiver/go-eol/vt" ) @@ -66,6 +67,11 @@ func TestReadEOL(t *testing.T) { ExpectedEOL: ls.String, ExpectedSize: 3, }, + { + Value: "\u2029", + ExpectedEOL: ps.String, + ExpectedSize: 3, + }, @@ -109,6 +115,11 @@ func TestReadEOL(t *testing.T) { ExpectedEOL: ls.String, ExpectedSize: 3, }, + { + Value: "\u2029apple banana cherry", + ExpectedEOL: ps.String, + ExpectedSize: 3, + }, } for testNumber, test := range tests { diff --git a/readps.go b/readps.go new file mode 100644 index 0000000..0d72121 --- /dev/null +++ b/readps.go @@ -0,0 +1,24 @@ +package eol + +import ( + "io" + + "sourcecode.social/reiver/go-opt" + + "sourcecode.social/reiver/go-eol/ps" +) + +// ReadPS tries to read the "\r" (i.e., carriage-return) end-of-line sequence. +// +// If successful, it returns the number-of-bytes read (to read in end-of-line sequence "\r"). +// +// If the character read is not a '\r', then ReadPS will try to unread the character. +// +// Example usage: +// +// size, err := eol.ReadPS(runescanner) +func ReadPS(runescanner io.RuneScanner) (size int, err error) { + const characterNumber uint64 = 1 + var circumstance internalCircumstance = specifyCircumstance(opt.Something(ps.String), characterNumber) + return readthisrune(circumstance, runescanner, ps.Rune) +} diff --git a/readps_test.go b/readps_test.go new file mode 100644 index 0000000..79c7eab --- /dev/null +++ b/readps_test.go @@ -0,0 +1,209 @@ +package eol_test + +import ( + "testing" + + "io" + "strings" + + "sourcecode.social/reiver/go-utf8" + + "sourcecode.social/reiver/go-eol" +) + +func TestReadPS(t *testing.T) { + + tests := []struct{ + Value string + ExpectedSize int + }{ + { + Value: "\u2029", + ExpectedSize: 3, + }, + + + + { + Value: "\u2029apple banana cherry", + ExpectedSize: 3, + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualSize, err := eol.ReadPS(runescanner) + if nil != err { + t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber) + t.Logf("ERROR: (%T) %s", err, err) + t.Logf("VALUE: %q", test.Value) + continue + } + + { + expected := test.ExpectedSize + actual := actualSize + + if expected != actual { + t.Errorf("For test #%d, the actual size is not what was expected.", testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("VALUE: %q", test.Value) + continue + } + } + + } +} + +func TestReadPS_fail(t *testing.T) { + + tests := []struct{ + Value string + ExpectedError string + }{ + { + Value: "", + ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u2029": EOF`, + }, + + + + { + Value: "\n", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`, + }, + { + Value: "\r", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`, + }, + { + Value: "\u0085", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`, + }, + + + + { + Value: "😈", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`, + }, + + + + { + Value: "\napple banana cherry", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`, + }, + { + Value: "\rapple banana cherry", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`, + }, + { + Value: "\u0085apple banana cherry", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`, + }, + + + + { + Value: "😈apple banana cherry", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`, + }, + + + + { + Value: " \n", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`, + }, + { + Value: " \r", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`, + }, + { + Value: " \u0085", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`, + }, + { + Value: " \u2028", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`, + }, + + + + { + Value: " 😈", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`, + }, + + + + { + Value: ".\n", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`, + }, + { + Value: ".\r", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`, + }, + { + Value: ".\u0085", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`, + }, + { + Value: ".\u2028", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`, + }, + + + + { + Value: ".😈", + ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`, + }, + } + + for testNumber, test := range tests { + + var reader io.Reader = strings.NewReader(test.Value) + var runescanner io.RuneScanner = utf8.NewRuneScanner(reader) + + actualSize, err := eol.ReadPS(runescanner) + if nil == err { + t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber) + t.Logf("EXPECTED-ERROR: %q", test.ExpectedError) + t.Logf("VALUE: %q", test.Value) + continue + } + + { + expected := test.ExpectedError + actual := err.Error() + + if expected != actual { + t.Errorf("For test #%d, the actual error is not what was expected.", testNumber) + t.Logf("EXPECTED: %q", expected) + t.Logf("ACTUAL: %q", actual) + t.Logf("VALUE: %q", test.Value) + continue + } + } + + { + expected := 0 + actual := actualSize + + if expected != actual { + t.Errorf("For test #%d, the actual size is not what was expected.", testNumber) + t.Logf("EXPECTED: %d", expected) + t.Logf("ACTUAL: %d", actual) + t.Logf("VALUE: %q", test.Value) + continue + } + } + } +}