paragraph-separator
parent
4bdb41c618
commit
2244b2cfeb
17
README.md
17
README.md
|
@ -4,14 +4,15 @@ Package **eol** implements tools for working with end-of-line, for the Go progra
|
||||||
|
|
||||||
The end-of-line sequences it supports is:
|
The end-of-line sequences it supports is:
|
||||||
|
|
||||||
* `"\n" // line-feed (LF)`
|
* `"\n" // line-feed (LF)`
|
||||||
* `"\n\r" // line-feed (LF), carriage-return (CR)`
|
* `"\n\r" // line-feed (LF), carriage-return (CR)`
|
||||||
* `"\v" // vertical-tab (VT)`
|
* `"\v" // vertical-tab (VT)`
|
||||||
* `"\f" // form-feed (FF)`
|
* `"\f" // form-feed (FF)`
|
||||||
* `"\r" // carriage-return (CR)`
|
* `"\r" // carriage-return (CR)`
|
||||||
* `"\r\n" // carriage-return (CR), line-feed (LF)`
|
* `"\r\n" // carriage-return (CR), line-feed (LF)`
|
||||||
* `"\u0085" // next-line (NEL)`
|
* `"\u0085" // next-line (NEL)`
|
||||||
* `"\u2028" // line-separator (LS)`
|
* `"\u2028" // line-separator (LS)`
|
||||||
|
* `"\u2029" // paragraph-separator (PS)`
|
||||||
|
|
||||||
## Documention
|
## Documention
|
||||||
|
|
||||||
|
|
|
@ -11,10 +11,14 @@ import (
|
||||||
// Example end-of-line sequences are:
|
// Example end-of-line sequences are:
|
||||||
//
|
//
|
||||||
// • "\n"
|
// • "\n"
|
||||||
|
// • "\n\r"
|
||||||
|
// • "\v"
|
||||||
|
// • "\f"
|
||||||
// • "\r"
|
// • "\r"
|
||||||
// • "\r\n"
|
// • "\r\n"
|
||||||
// • "\u0085"
|
// • "\u0085"
|
||||||
// • "\u2028"
|
// • "\u2028"
|
||||||
|
// • "\u2029"
|
||||||
//
|
//
|
||||||
// No end-of-line sequence can also be specified if it is unknown.
|
// No end-of-line sequence can also be specified if it is unknown.
|
||||||
// For example eol.ReadEOL() does NOT know the end-of-line sequence ahead of time.
|
// For example eol.ReadEOL() does NOT know the end-of-line sequence ahead of time.
|
||||||
|
@ -30,6 +34,8 @@ import (
|
||||||
//
|
//
|
||||||
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\v"), 1)
|
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\v"), 1)
|
||||||
//
|
//
|
||||||
|
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\f"), 1)
|
||||||
|
//
|
||||||
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r"), 1)
|
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r"), 1)
|
||||||
//
|
//
|
||||||
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r\n"), 1)
|
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r\n"), 1)
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"sourcecode.social/reiver/go-eol/lf"
|
"sourcecode.social/reiver/go-eol/lf"
|
||||||
"sourcecode.social/reiver/go-eol/ls"
|
"sourcecode.social/reiver/go-eol/ls"
|
||||||
"sourcecode.social/reiver/go-eol/nel"
|
"sourcecode.social/reiver/go-eol/nel"
|
||||||
|
"sourcecode.social/reiver/go-eol/ps"
|
||||||
"sourcecode.social/reiver/go-eol/vt"
|
"sourcecode.social/reiver/go-eol/vt"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -78,6 +79,12 @@ func (receiver internalNotFoundError) Error() string {
|
||||||
s = fmt.Sprintf(`eol: line-separator (LS) character (U+2028) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual)
|
s = fmt.Sprintf(`eol: line-separator (LS) character (U+2028) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual)
|
||||||
})
|
})
|
||||||
p = append(p, s...)
|
p = append(p, s...)
|
||||||
|
case ps.Rune:
|
||||||
|
var s string = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence character №%d — instead found %q (%U)`, characterNumber, actual, actual)
|
||||||
|
eolSequence.WhenSomething(func(sequence string){
|
||||||
|
s = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual)
|
||||||
|
})
|
||||||
|
p = append(p, s...)
|
||||||
default:
|
default:
|
||||||
var s string = fmt.Sprintf(`eol: %q character (%U) not found for sequence character №%d — instead found %q (%U)`, expected, expected, characterNumber, actual, actual)
|
var s string = fmt.Sprintf(`eol: %q character (%U) not found for sequence character №%d — instead found %q (%U)`, expected, expected, characterNumber, actual, actual)
|
||||||
eolSequence.WhenSomething(func(sequence string){
|
eolSequence.WhenSomething(func(sequence string){
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
package ps
|
||||||
|
|
||||||
|
const Rune rune = '\u2029'
|
|
@ -0,0 +1,3 @@
|
||||||
|
package ps
|
||||||
|
|
||||||
|
const String string = string(Rune)
|
|
@ -12,6 +12,7 @@ import (
|
||||||
"sourcecode.social/reiver/go-eol/lfcr"
|
"sourcecode.social/reiver/go-eol/lfcr"
|
||||||
"sourcecode.social/reiver/go-eol/ls"
|
"sourcecode.social/reiver/go-eol/ls"
|
||||||
"sourcecode.social/reiver/go-eol/nel"
|
"sourcecode.social/reiver/go-eol/nel"
|
||||||
|
"sourcecode.social/reiver/go-eol/ps"
|
||||||
"sourcecode.social/reiver/go-eol/vt"
|
"sourcecode.social/reiver/go-eol/vt"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -27,6 +28,7 @@ import (
|
||||||
// carriage-return, line-feed ("\r\n")
|
// carriage-return, line-feed ("\r\n")
|
||||||
// next-line (NEL) (U+0085)
|
// next-line (NEL) (U+0085)
|
||||||
// line-separator (LS) (U+2028)
|
// line-separator (LS) (U+2028)
|
||||||
|
// line-separator (LS) (U+2029)
|
||||||
//
|
//
|
||||||
// If successful, ReadEOL return the end-of-line sequence it found and the number-of-bytes read (to read in end-of-line sequence it found).
|
// If successful, ReadEOL return the end-of-line sequence it found and the number-of-bytes read (to read in end-of-line sequence it found).
|
||||||
//
|
//
|
||||||
|
@ -66,6 +68,8 @@ func ReadEOL(runescanner io.RuneScanner) (endofline string, size int, err error)
|
||||||
return nel.String, size0, nil
|
return nel.String, size0, nil
|
||||||
case ls.Rune:
|
case ls.Rune:
|
||||||
return ls.String, size0, nil
|
return ls.String, size0, nil
|
||||||
|
case ps.Rune:
|
||||||
|
return ps.String, size0, nil
|
||||||
default:
|
default:
|
||||||
err := runescanner.UnreadRune()
|
err := runescanner.UnreadRune()
|
||||||
if nil != err {
|
if nil != err {
|
||||||
|
|
|
@ -16,6 +16,7 @@ import (
|
||||||
"sourcecode.social/reiver/go-eol/lfcr"
|
"sourcecode.social/reiver/go-eol/lfcr"
|
||||||
"sourcecode.social/reiver/go-eol/ls"
|
"sourcecode.social/reiver/go-eol/ls"
|
||||||
"sourcecode.social/reiver/go-eol/nel"
|
"sourcecode.social/reiver/go-eol/nel"
|
||||||
|
"sourcecode.social/reiver/go-eol/ps"
|
||||||
"sourcecode.social/reiver/go-eol/vt"
|
"sourcecode.social/reiver/go-eol/vt"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -66,6 +67,11 @@ func TestReadEOL(t *testing.T) {
|
||||||
ExpectedEOL: ls.String,
|
ExpectedEOL: ls.String,
|
||||||
ExpectedSize: 3,
|
ExpectedSize: 3,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Value: "\u2029",
|
||||||
|
ExpectedEOL: ps.String,
|
||||||
|
ExpectedSize: 3,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -109,6 +115,11 @@ func TestReadEOL(t *testing.T) {
|
||||||
ExpectedEOL: ls.String,
|
ExpectedEOL: ls.String,
|
||||||
ExpectedSize: 3,
|
ExpectedSize: 3,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Value: "\u2029apple banana cherry",
|
||||||
|
ExpectedEOL: ps.String,
|
||||||
|
ExpectedSize: 3,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for testNumber, test := range tests {
|
for testNumber, test := range tests {
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
package eol
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"sourcecode.social/reiver/go-opt"
|
||||||
|
|
||||||
|
"sourcecode.social/reiver/go-eol/ps"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ReadPS tries to read the "\r" (i.e., carriage-return) end-of-line sequence.
|
||||||
|
//
|
||||||
|
// If successful, it returns the number-of-bytes read (to read in end-of-line sequence "\r").
|
||||||
|
//
|
||||||
|
// If the character read is not a '\r', then ReadPS will try to unread the character.
|
||||||
|
//
|
||||||
|
// Example usage:
|
||||||
|
//
|
||||||
|
// size, err := eol.ReadPS(runescanner)
|
||||||
|
func ReadPS(runescanner io.RuneScanner) (size int, err error) {
|
||||||
|
const characterNumber uint64 = 1
|
||||||
|
var circumstance internalCircumstance = specifyCircumstance(opt.Something(ps.String), characterNumber)
|
||||||
|
return readthisrune(circumstance, runescanner, ps.Rune)
|
||||||
|
}
|
|
@ -0,0 +1,209 @@
|
||||||
|
package eol_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"sourcecode.social/reiver/go-utf8"
|
||||||
|
|
||||||
|
"sourcecode.social/reiver/go-eol"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestReadPS(t *testing.T) {
|
||||||
|
|
||||||
|
tests := []struct{
|
||||||
|
Value string
|
||||||
|
ExpectedSize int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
Value: "\u2029",
|
||||||
|
ExpectedSize: 3,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Value: "\u2029apple banana cherry",
|
||||||
|
ExpectedSize: 3,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for testNumber, test := range tests {
|
||||||
|
|
||||||
|
var reader io.Reader = strings.NewReader(test.Value)
|
||||||
|
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
|
||||||
|
|
||||||
|
actualSize, err := eol.ReadPS(runescanner)
|
||||||
|
if nil != err {
|
||||||
|
t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber)
|
||||||
|
t.Logf("ERROR: (%T) %s", err, err)
|
||||||
|
t.Logf("VALUE: %q", test.Value)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
expected := test.ExpectedSize
|
||||||
|
actual := actualSize
|
||||||
|
|
||||||
|
if expected != actual {
|
||||||
|
t.Errorf("For test #%d, the actual size is not what was expected.", testNumber)
|
||||||
|
t.Logf("EXPECTED: %d", expected)
|
||||||
|
t.Logf("ACTUAL: %d", actual)
|
||||||
|
t.Logf("VALUE: %q", test.Value)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReadPS_fail(t *testing.T) {
|
||||||
|
|
||||||
|
tests := []struct{
|
||||||
|
Value string
|
||||||
|
ExpectedError string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
Value: "",
|
||||||
|
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u2029": EOF`,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Value: "\n",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: "\r",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: "\u0085",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Value: "😈",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Value: "\napple banana cherry",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: "\rapple banana cherry",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: "\u0085apple banana cherry",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Value: "😈apple banana cherry",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Value: " \n",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: " \r",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: " \u0085",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: " \u2028",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Value: " 😈",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Value: ".\n",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: ".\r",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: ".\u0085",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: ".\u2028",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
Value: ".😈",
|
||||||
|
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for testNumber, test := range tests {
|
||||||
|
|
||||||
|
var reader io.Reader = strings.NewReader(test.Value)
|
||||||
|
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
|
||||||
|
|
||||||
|
actualSize, err := eol.ReadPS(runescanner)
|
||||||
|
if nil == err {
|
||||||
|
t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber)
|
||||||
|
t.Logf("EXPECTED-ERROR: %q", test.ExpectedError)
|
||||||
|
t.Logf("VALUE: %q", test.Value)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
expected := test.ExpectedError
|
||||||
|
actual := err.Error()
|
||||||
|
|
||||||
|
if expected != actual {
|
||||||
|
t.Errorf("For test #%d, the actual error is not what was expected.", testNumber)
|
||||||
|
t.Logf("EXPECTED: %q", expected)
|
||||||
|
t.Logf("ACTUAL: %q", actual)
|
||||||
|
t.Logf("VALUE: %q", test.Value)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
expected := 0
|
||||||
|
actual := actualSize
|
||||||
|
|
||||||
|
if expected != actual {
|
||||||
|
t.Errorf("For test #%d, the actual size is not what was expected.", testNumber)
|
||||||
|
t.Logf("EXPECTED: %d", expected)
|
||||||
|
t.Logf("ACTUAL: %d", actual)
|
||||||
|
t.Logf("VALUE: %q", test.Value)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue