paragraph-separator

master
Charles Iliya Krempeaux 2024-03-25 23:36:06 -07:00
parent 4bdb41c618
commit 2244b2cfeb
9 changed files with 276 additions and 8 deletions

View File

@ -4,14 +4,15 @@ Package **eol** implements tools for working with end-of-line, for the Go progra
The end-of-line sequences it supports is:
* `"\n" // line-feed (LF)`
* `"\n\r" // line-feed (LF), carriage-return (CR)`
* `"\v" // vertical-tab (VT)`
* `"\f" // form-feed (FF)`
* `"\r" // carriage-return (CR)`
* `"\r\n" // carriage-return (CR), line-feed (LF)`
* `"\u0085" // next-line (NEL)`
* `"\u2028" // line-separator (LS)`
* `"\n" // line-feed (LF)`
* `"\n\r" // line-feed (LF), carriage-return (CR)`
* `"\v" // vertical-tab (VT)`
* `"\f" // form-feed (FF)`
* `"\r" // carriage-return (CR)`
* `"\r\n" // carriage-return (CR), line-feed (LF)`
* `"\u0085" // next-line (NEL)`
* `"\u2028" // line-separator (LS)`
* `"\u2029" // paragraph-separator (PS)`
## Documention

View File

@ -11,10 +11,14 @@ import (
// Example end-of-line sequences are:
//
// • "\n"
// • "\n\r"
// • "\v"
// • "\f"
// • "\r"
// • "\r\n"
// • "\u0085"
// • "\u2028"
// • "\u2029"
//
// No end-of-line sequence can also be specified if it is unknown.
// For example eol.ReadEOL() does NOT know the end-of-line sequence ahead of time.
@ -30,6 +34,8 @@ import (
//
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\v"), 1)
//
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\f"), 1)
//
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r"), 1)
//
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r\n"), 1)

View File

@ -10,6 +10,7 @@ import (
"sourcecode.social/reiver/go-eol/lf"
"sourcecode.social/reiver/go-eol/ls"
"sourcecode.social/reiver/go-eol/nel"
"sourcecode.social/reiver/go-eol/ps"
"sourcecode.social/reiver/go-eol/vt"
)
@ -78,6 +79,12 @@ func (receiver internalNotFoundError) Error() string {
s = fmt.Sprintf(`eol: line-separator (LS) character (U+2028) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual)
})
p = append(p, s...)
case ps.Rune:
var s string = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence character №%d — instead found %q (%U)`, characterNumber, actual, actual)
eolSequence.WhenSomething(func(sequence string){
s = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual)
})
p = append(p, s...)
default:
var s string = fmt.Sprintf(`eol: %q character (%U) not found for sequence character №%d — instead found %q (%U)`, expected, expected, characterNumber, actual, actual)
eolSequence.WhenSomething(func(sequence string){

3
ps/rune.go 100644
View File

@ -0,0 +1,3 @@
package ps
const Rune rune = '\u2029'

3
ps/string.go 100644
View File

@ -0,0 +1,3 @@
package ps
const String string = string(Rune)

View File

@ -12,6 +12,7 @@ import (
"sourcecode.social/reiver/go-eol/lfcr"
"sourcecode.social/reiver/go-eol/ls"
"sourcecode.social/reiver/go-eol/nel"
"sourcecode.social/reiver/go-eol/ps"
"sourcecode.social/reiver/go-eol/vt"
)
@ -27,6 +28,7 @@ import (
// carriage-return, line-feed ("\r\n")
// next-line (NEL) (U+0085)
// line-separator (LS) (U+2028)
// line-separator (LS) (U+2029)
//
// If successful, ReadEOL return the end-of-line sequence it found and the number-of-bytes read (to read in end-of-line sequence it found).
//
@ -66,6 +68,8 @@ func ReadEOL(runescanner io.RuneScanner) (endofline string, size int, err error)
return nel.String, size0, nil
case ls.Rune:
return ls.String, size0, nil
case ps.Rune:
return ps.String, size0, nil
default:
err := runescanner.UnreadRune()
if nil != err {

View File

@ -16,6 +16,7 @@ import (
"sourcecode.social/reiver/go-eol/lfcr"
"sourcecode.social/reiver/go-eol/ls"
"sourcecode.social/reiver/go-eol/nel"
"sourcecode.social/reiver/go-eol/ps"
"sourcecode.social/reiver/go-eol/vt"
)
@ -66,6 +67,11 @@ func TestReadEOL(t *testing.T) {
ExpectedEOL: ls.String,
ExpectedSize: 3,
},
{
Value: "\u2029",
ExpectedEOL: ps.String,
ExpectedSize: 3,
},
@ -109,6 +115,11 @@ func TestReadEOL(t *testing.T) {
ExpectedEOL: ls.String,
ExpectedSize: 3,
},
{
Value: "\u2029apple banana cherry",
ExpectedEOL: ps.String,
ExpectedSize: 3,
},
}
for testNumber, test := range tests {

24
readps.go 100644
View File

@ -0,0 +1,24 @@
package eol
import (
"io"
"sourcecode.social/reiver/go-opt"
"sourcecode.social/reiver/go-eol/ps"
)
// ReadPS tries to read the "\r" (i.e., carriage-return) end-of-line sequence.
//
// If successful, it returns the number-of-bytes read (to read in end-of-line sequence "\r").
//
// If the character read is not a '\r', then ReadPS will try to unread the character.
//
// Example usage:
//
// size, err := eol.ReadPS(runescanner)
func ReadPS(runescanner io.RuneScanner) (size int, err error) {
const characterNumber uint64 = 1
var circumstance internalCircumstance = specifyCircumstance(opt.Something(ps.String), characterNumber)
return readthisrune(circumstance, runescanner, ps.Rune)
}

209
readps_test.go 100644
View File

@ -0,0 +1,209 @@
package eol_test
import (
"testing"
"io"
"strings"
"sourcecode.social/reiver/go-utf8"
"sourcecode.social/reiver/go-eol"
)
func TestReadPS(t *testing.T) {
tests := []struct{
Value string
ExpectedSize int
}{
{
Value: "\u2029",
ExpectedSize: 3,
},
{
Value: "\u2029apple banana cherry",
ExpectedSize: 3,
},
}
for testNumber, test := range tests {
var reader io.Reader = strings.NewReader(test.Value)
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
actualSize, err := eol.ReadPS(runescanner)
if nil != err {
t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber)
t.Logf("ERROR: (%T) %s", err, err)
t.Logf("VALUE: %q", test.Value)
continue
}
{
expected := test.ExpectedSize
actual := actualSize
if expected != actual {
t.Errorf("For test #%d, the actual size is not what was expected.", testNumber)
t.Logf("EXPECTED: %d", expected)
t.Logf("ACTUAL: %d", actual)
t.Logf("VALUE: %q", test.Value)
continue
}
}
}
}
func TestReadPS_fail(t *testing.T) {
tests := []struct{
Value string
ExpectedError string
}{
{
Value: "",
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u2029": EOF`,
},
{
Value: "\n",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`,
},
{
Value: "\r",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\u0085",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`,
},
{
Value: "😈",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`,
},
{
Value: "\napple banana cherry",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`,
},
{
Value: "\rapple banana cherry",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\u0085apple banana cherry",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`,
},
{
Value: "😈apple banana cherry",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`,
},
{
Value: " \n",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: " \r",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: " \u0085",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: " \u2028",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: " 😈",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: ".\n",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
{
Value: ".\r",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
{
Value: ".\u0085",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
{
Value: ".\u2028",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
{
Value: ".😈",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
}
for testNumber, test := range tests {
var reader io.Reader = strings.NewReader(test.Value)
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
actualSize, err := eol.ReadPS(runescanner)
if nil == err {
t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber)
t.Logf("EXPECTED-ERROR: %q", test.ExpectedError)
t.Logf("VALUE: %q", test.Value)
continue
}
{
expected := test.ExpectedError
actual := err.Error()
if expected != actual {
t.Errorf("For test #%d, the actual error is not what was expected.", testNumber)
t.Logf("EXPECTED: %q", expected)
t.Logf("ACTUAL: %q", actual)
t.Logf("VALUE: %q", test.Value)
continue
}
}
{
expected := 0
actual := actualSize
if expected != actual {
t.Errorf("For test #%d, the actual size is not what was expected.", testNumber)
t.Logf("EXPECTED: %d", expected)
t.Logf("ACTUAL: %d", actual)
t.Logf("VALUE: %q", test.Value)
continue
}
}
}
}