paragraph-separator

master
Charles Iliya Krempeaux 2024-03-25 23:36:06 -07:00
parent 4bdb41c618
commit 2244b2cfeb
9 changed files with 276 additions and 8 deletions

View File

@ -12,6 +12,7 @@ The end-of-line sequences it supports is:
* `"\r\n" // carriage-return (CR), line-feed (LF)` * `"\r\n" // carriage-return (CR), line-feed (LF)`
* `"\u0085" // next-line (NEL)` * `"\u0085" // next-line (NEL)`
* `"\u2028" // line-separator (LS)` * `"\u2028" // line-separator (LS)`
* `"\u2029" // paragraph-separator (PS)`
## Documention ## Documention

View File

@ -11,10 +11,14 @@ import (
// Example end-of-line sequences are: // Example end-of-line sequences are:
// //
// • "\n" // • "\n"
// • "\n\r"
// • "\v"
// • "\f"
// • "\r" // • "\r"
// • "\r\n" // • "\r\n"
// • "\u0085" // • "\u0085"
// • "\u2028" // • "\u2028"
// • "\u2029"
// //
// No end-of-line sequence can also be specified if it is unknown. // No end-of-line sequence can also be specified if it is unknown.
// For example eol.ReadEOL() does NOT know the end-of-line sequence ahead of time. // For example eol.ReadEOL() does NOT know the end-of-line sequence ahead of time.
@ -30,6 +34,8 @@ import (
// //
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\v"), 1) // var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\v"), 1)
// //
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\f"), 1)
//
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r"), 1) // var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r"), 1)
// //
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r\n"), 1) // var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r\n"), 1)

View File

@ -10,6 +10,7 @@ import (
"sourcecode.social/reiver/go-eol/lf" "sourcecode.social/reiver/go-eol/lf"
"sourcecode.social/reiver/go-eol/ls" "sourcecode.social/reiver/go-eol/ls"
"sourcecode.social/reiver/go-eol/nel" "sourcecode.social/reiver/go-eol/nel"
"sourcecode.social/reiver/go-eol/ps"
"sourcecode.social/reiver/go-eol/vt" "sourcecode.social/reiver/go-eol/vt"
) )
@ -78,6 +79,12 @@ func (receiver internalNotFoundError) Error() string {
s = fmt.Sprintf(`eol: line-separator (LS) character (U+2028) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual) s = fmt.Sprintf(`eol: line-separator (LS) character (U+2028) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual)
}) })
p = append(p, s...) p = append(p, s...)
case ps.Rune:
var s string = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence character №%d — instead found %q (%U)`, characterNumber, actual, actual)
eolSequence.WhenSomething(func(sequence string){
s = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual)
})
p = append(p, s...)
default: default:
var s string = fmt.Sprintf(`eol: %q character (%U) not found for sequence character №%d — instead found %q (%U)`, expected, expected, characterNumber, actual, actual) var s string = fmt.Sprintf(`eol: %q character (%U) not found for sequence character №%d — instead found %q (%U)`, expected, expected, characterNumber, actual, actual)
eolSequence.WhenSomething(func(sequence string){ eolSequence.WhenSomething(func(sequence string){

3
ps/rune.go 100644
View File

@ -0,0 +1,3 @@
package ps
const Rune rune = '\u2029'

3
ps/string.go 100644
View File

@ -0,0 +1,3 @@
package ps
const String string = string(Rune)

View File

@ -12,6 +12,7 @@ import (
"sourcecode.social/reiver/go-eol/lfcr" "sourcecode.social/reiver/go-eol/lfcr"
"sourcecode.social/reiver/go-eol/ls" "sourcecode.social/reiver/go-eol/ls"
"sourcecode.social/reiver/go-eol/nel" "sourcecode.social/reiver/go-eol/nel"
"sourcecode.social/reiver/go-eol/ps"
"sourcecode.social/reiver/go-eol/vt" "sourcecode.social/reiver/go-eol/vt"
) )
@ -27,6 +28,7 @@ import (
// carriage-return, line-feed ("\r\n") // carriage-return, line-feed ("\r\n")
// next-line (NEL) (U+0085) // next-line (NEL) (U+0085)
// line-separator (LS) (U+2028) // line-separator (LS) (U+2028)
// line-separator (LS) (U+2029)
// //
// If successful, ReadEOL return the end-of-line sequence it found and the number-of-bytes read (to read in end-of-line sequence it found). // If successful, ReadEOL return the end-of-line sequence it found and the number-of-bytes read (to read in end-of-line sequence it found).
// //
@ -66,6 +68,8 @@ func ReadEOL(runescanner io.RuneScanner) (endofline string, size int, err error)
return nel.String, size0, nil return nel.String, size0, nil
case ls.Rune: case ls.Rune:
return ls.String, size0, nil return ls.String, size0, nil
case ps.Rune:
return ps.String, size0, nil
default: default:
err := runescanner.UnreadRune() err := runescanner.UnreadRune()
if nil != err { if nil != err {

View File

@ -16,6 +16,7 @@ import (
"sourcecode.social/reiver/go-eol/lfcr" "sourcecode.social/reiver/go-eol/lfcr"
"sourcecode.social/reiver/go-eol/ls" "sourcecode.social/reiver/go-eol/ls"
"sourcecode.social/reiver/go-eol/nel" "sourcecode.social/reiver/go-eol/nel"
"sourcecode.social/reiver/go-eol/ps"
"sourcecode.social/reiver/go-eol/vt" "sourcecode.social/reiver/go-eol/vt"
) )
@ -66,6 +67,11 @@ func TestReadEOL(t *testing.T) {
ExpectedEOL: ls.String, ExpectedEOL: ls.String,
ExpectedSize: 3, ExpectedSize: 3,
}, },
{
Value: "\u2029",
ExpectedEOL: ps.String,
ExpectedSize: 3,
},
@ -109,6 +115,11 @@ func TestReadEOL(t *testing.T) {
ExpectedEOL: ls.String, ExpectedEOL: ls.String,
ExpectedSize: 3, ExpectedSize: 3,
}, },
{
Value: "\u2029apple banana cherry",
ExpectedEOL: ps.String,
ExpectedSize: 3,
},
} }
for testNumber, test := range tests { for testNumber, test := range tests {

24
readps.go 100644
View File

@ -0,0 +1,24 @@
package eol
import (
"io"
"sourcecode.social/reiver/go-opt"
"sourcecode.social/reiver/go-eol/ps"
)
// ReadPS tries to read the "\r" (i.e., carriage-return) end-of-line sequence.
//
// If successful, it returns the number-of-bytes read (to read in end-of-line sequence "\r").
//
// If the character read is not a '\r', then ReadPS will try to unread the character.
//
// Example usage:
//
// size, err := eol.ReadPS(runescanner)
func ReadPS(runescanner io.RuneScanner) (size int, err error) {
const characterNumber uint64 = 1
var circumstance internalCircumstance = specifyCircumstance(opt.Something(ps.String), characterNumber)
return readthisrune(circumstance, runescanner, ps.Rune)
}

209
readps_test.go 100644
View File

@ -0,0 +1,209 @@
package eol_test
import (
"testing"
"io"
"strings"
"sourcecode.social/reiver/go-utf8"
"sourcecode.social/reiver/go-eol"
)
func TestReadPS(t *testing.T) {
tests := []struct{
Value string
ExpectedSize int
}{
{
Value: "\u2029",
ExpectedSize: 3,
},
{
Value: "\u2029apple banana cherry",
ExpectedSize: 3,
},
}
for testNumber, test := range tests {
var reader io.Reader = strings.NewReader(test.Value)
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
actualSize, err := eol.ReadPS(runescanner)
if nil != err {
t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber)
t.Logf("ERROR: (%T) %s", err, err)
t.Logf("VALUE: %q", test.Value)
continue
}
{
expected := test.ExpectedSize
actual := actualSize
if expected != actual {
t.Errorf("For test #%d, the actual size is not what was expected.", testNumber)
t.Logf("EXPECTED: %d", expected)
t.Logf("ACTUAL: %d", actual)
t.Logf("VALUE: %q", test.Value)
continue
}
}
}
}
func TestReadPS_fail(t *testing.T) {
tests := []struct{
Value string
ExpectedError string
}{
{
Value: "",
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u2029": EOF`,
},
{
Value: "\n",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`,
},
{
Value: "\r",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\u0085",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`,
},
{
Value: "😈",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`,
},
{
Value: "\napple banana cherry",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`,
},
{
Value: "\rapple banana cherry",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`,
},
{
Value: "\u0085apple banana cherry",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`,
},
{
Value: "😈apple banana cherry",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`,
},
{
Value: " \n",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: " \r",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: " \u0085",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: " \u2028",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: " 😈",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
},
{
Value: ".\n",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
{
Value: ".\r",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
{
Value: ".\u0085",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
{
Value: ".\u2028",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
{
Value: ".😈",
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
},
}
for testNumber, test := range tests {
var reader io.Reader = strings.NewReader(test.Value)
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
actualSize, err := eol.ReadPS(runescanner)
if nil == err {
t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber)
t.Logf("EXPECTED-ERROR: %q", test.ExpectedError)
t.Logf("VALUE: %q", test.Value)
continue
}
{
expected := test.ExpectedError
actual := err.Error()
if expected != actual {
t.Errorf("For test #%d, the actual error is not what was expected.", testNumber)
t.Logf("EXPECTED: %q", expected)
t.Logf("ACTUAL: %q", actual)
t.Logf("VALUE: %q", test.Value)
continue
}
}
{
expected := 0
actual := actualSize
if expected != actual {
t.Errorf("For test #%d, the actual size is not what was expected.", testNumber)
t.Logf("EXPECTED: %d", expected)
t.Logf("ACTUAL: %d", actual)
t.Logf("VALUE: %q", test.Value)
continue
}
}
}
}