paragraph-separator
parent
4bdb41c618
commit
2244b2cfeb
17
README.md
17
README.md
|
@ -4,14 +4,15 @@ Package **eol** implements tools for working with end-of-line, for the Go progra
|
|||
|
||||
The end-of-line sequences it supports is:
|
||||
|
||||
* `"\n" // line-feed (LF)`
|
||||
* `"\n\r" // line-feed (LF), carriage-return (CR)`
|
||||
* `"\v" // vertical-tab (VT)`
|
||||
* `"\f" // form-feed (FF)`
|
||||
* `"\r" // carriage-return (CR)`
|
||||
* `"\r\n" // carriage-return (CR), line-feed (LF)`
|
||||
* `"\u0085" // next-line (NEL)`
|
||||
* `"\u2028" // line-separator (LS)`
|
||||
* `"\n" // line-feed (LF)`
|
||||
* `"\n\r" // line-feed (LF), carriage-return (CR)`
|
||||
* `"\v" // vertical-tab (VT)`
|
||||
* `"\f" // form-feed (FF)`
|
||||
* `"\r" // carriage-return (CR)`
|
||||
* `"\r\n" // carriage-return (CR), line-feed (LF)`
|
||||
* `"\u0085" // next-line (NEL)`
|
||||
* `"\u2028" // line-separator (LS)`
|
||||
* `"\u2029" // paragraph-separator (PS)`
|
||||
|
||||
## Documention
|
||||
|
||||
|
|
|
@ -11,10 +11,14 @@ import (
|
|||
// Example end-of-line sequences are:
|
||||
//
|
||||
// • "\n"
|
||||
// • "\n\r"
|
||||
// • "\v"
|
||||
// • "\f"
|
||||
// • "\r"
|
||||
// • "\r\n"
|
||||
// • "\u0085"
|
||||
// • "\u2028"
|
||||
// • "\u2029"
|
||||
//
|
||||
// No end-of-line sequence can also be specified if it is unknown.
|
||||
// For example eol.ReadEOL() does NOT know the end-of-line sequence ahead of time.
|
||||
|
@ -30,6 +34,8 @@ import (
|
|||
//
|
||||
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\v"), 1)
|
||||
//
|
||||
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\f"), 1)
|
||||
//
|
||||
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r"), 1)
|
||||
//
|
||||
// var circumstance internalCircumstance = specifyCircumstance(opt.Something]("\r\n"), 1)
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"sourcecode.social/reiver/go-eol/lf"
|
||||
"sourcecode.social/reiver/go-eol/ls"
|
||||
"sourcecode.social/reiver/go-eol/nel"
|
||||
"sourcecode.social/reiver/go-eol/ps"
|
||||
"sourcecode.social/reiver/go-eol/vt"
|
||||
)
|
||||
|
||||
|
@ -78,6 +79,12 @@ func (receiver internalNotFoundError) Error() string {
|
|||
s = fmt.Sprintf(`eol: line-separator (LS) character (U+2028) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual)
|
||||
})
|
||||
p = append(p, s...)
|
||||
case ps.Rune:
|
||||
var s string = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence character №%d — instead found %q (%U)`, characterNumber, actual, actual)
|
||||
eolSequence.WhenSomething(func(sequence string){
|
||||
s = fmt.Sprintf(`eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence %q character №%d — instead found %q (%U)`, sequence, characterNumber, actual, actual)
|
||||
})
|
||||
p = append(p, s...)
|
||||
default:
|
||||
var s string = fmt.Sprintf(`eol: %q character (%U) not found for sequence character №%d — instead found %q (%U)`, expected, expected, characterNumber, actual, actual)
|
||||
eolSequence.WhenSomething(func(sequence string){
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
package ps
|
||||
|
||||
const Rune rune = '\u2029'
|
|
@ -0,0 +1,3 @@
|
|||
package ps
|
||||
|
||||
const String string = string(Rune)
|
|
@ -12,6 +12,7 @@ import (
|
|||
"sourcecode.social/reiver/go-eol/lfcr"
|
||||
"sourcecode.social/reiver/go-eol/ls"
|
||||
"sourcecode.social/reiver/go-eol/nel"
|
||||
"sourcecode.social/reiver/go-eol/ps"
|
||||
"sourcecode.social/reiver/go-eol/vt"
|
||||
)
|
||||
|
||||
|
@ -27,6 +28,7 @@ import (
|
|||
// carriage-return, line-feed ("\r\n")
|
||||
// next-line (NEL) (U+0085)
|
||||
// line-separator (LS) (U+2028)
|
||||
// line-separator (LS) (U+2029)
|
||||
//
|
||||
// If successful, ReadEOL return the end-of-line sequence it found and the number-of-bytes read (to read in end-of-line sequence it found).
|
||||
//
|
||||
|
@ -66,6 +68,8 @@ func ReadEOL(runescanner io.RuneScanner) (endofline string, size int, err error)
|
|||
return nel.String, size0, nil
|
||||
case ls.Rune:
|
||||
return ls.String, size0, nil
|
||||
case ps.Rune:
|
||||
return ps.String, size0, nil
|
||||
default:
|
||||
err := runescanner.UnreadRune()
|
||||
if nil != err {
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"sourcecode.social/reiver/go-eol/lfcr"
|
||||
"sourcecode.social/reiver/go-eol/ls"
|
||||
"sourcecode.social/reiver/go-eol/nel"
|
||||
"sourcecode.social/reiver/go-eol/ps"
|
||||
"sourcecode.social/reiver/go-eol/vt"
|
||||
)
|
||||
|
||||
|
@ -66,6 +67,11 @@ func TestReadEOL(t *testing.T) {
|
|||
ExpectedEOL: ls.String,
|
||||
ExpectedSize: 3,
|
||||
},
|
||||
{
|
||||
Value: "\u2029",
|
||||
ExpectedEOL: ps.String,
|
||||
ExpectedSize: 3,
|
||||
},
|
||||
|
||||
|
||||
|
||||
|
@ -109,6 +115,11 @@ func TestReadEOL(t *testing.T) {
|
|||
ExpectedEOL: ls.String,
|
||||
ExpectedSize: 3,
|
||||
},
|
||||
{
|
||||
Value: "\u2029apple banana cherry",
|
||||
ExpectedEOL: ps.String,
|
||||
ExpectedSize: 3,
|
||||
},
|
||||
}
|
||||
|
||||
for testNumber, test := range tests {
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
package eol
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"sourcecode.social/reiver/go-opt"
|
||||
|
||||
"sourcecode.social/reiver/go-eol/ps"
|
||||
)
|
||||
|
||||
// ReadPS tries to read the "\r" (i.e., carriage-return) end-of-line sequence.
|
||||
//
|
||||
// If successful, it returns the number-of-bytes read (to read in end-of-line sequence "\r").
|
||||
//
|
||||
// If the character read is not a '\r', then ReadPS will try to unread the character.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// size, err := eol.ReadPS(runescanner)
|
||||
func ReadPS(runescanner io.RuneScanner) (size int, err error) {
|
||||
const characterNumber uint64 = 1
|
||||
var circumstance internalCircumstance = specifyCircumstance(opt.Something(ps.String), characterNumber)
|
||||
return readthisrune(circumstance, runescanner, ps.Rune)
|
||||
}
|
|
@ -0,0 +1,209 @@
|
|||
package eol_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"sourcecode.social/reiver/go-utf8"
|
||||
|
||||
"sourcecode.social/reiver/go-eol"
|
||||
)
|
||||
|
||||
func TestReadPS(t *testing.T) {
|
||||
|
||||
tests := []struct{
|
||||
Value string
|
||||
ExpectedSize int
|
||||
}{
|
||||
{
|
||||
Value: "\u2029",
|
||||
ExpectedSize: 3,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\u2029apple banana cherry",
|
||||
ExpectedSize: 3,
|
||||
},
|
||||
}
|
||||
|
||||
for testNumber, test := range tests {
|
||||
|
||||
var reader io.Reader = strings.NewReader(test.Value)
|
||||
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
|
||||
|
||||
actualSize, err := eol.ReadPS(runescanner)
|
||||
if nil != err {
|
||||
t.Errorf("For test #%d, did not expect an error but actually got one.", testNumber)
|
||||
t.Logf("ERROR: (%T) %s", err, err)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
|
||||
{
|
||||
expected := test.ExpectedSize
|
||||
actual := actualSize
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("For test #%d, the actual size is not what was expected.", testNumber)
|
||||
t.Logf("EXPECTED: %d", expected)
|
||||
t.Logf("ACTUAL: %d", actual)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadPS_fail(t *testing.T) {
|
||||
|
||||
tests := []struct{
|
||||
Value string
|
||||
ExpectedError string
|
||||
}{
|
||||
{
|
||||
Value: "",
|
||||
ExpectedError: `eol: problem reading character №1 of end-of-line sequence "\u2029": EOF`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\n",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`,
|
||||
},
|
||||
{
|
||||
Value: "\r",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`,
|
||||
},
|
||||
{
|
||||
Value: "\u0085",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "😈",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "\napple banana cherry",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\n' (U+000A)`,
|
||||
},
|
||||
{
|
||||
Value: "\rapple banana cherry",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\r' (U+000D)`,
|
||||
},
|
||||
{
|
||||
Value: "\u0085apple banana cherry",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '\u0085' (U+0085)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: "😈apple banana cherry",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '😈' (U+1F608)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: " \n",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||
},
|
||||
{
|
||||
Value: " \r",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||
},
|
||||
{
|
||||
Value: " \u0085",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||
},
|
||||
{
|
||||
Value: " \u2028",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: " 😈",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found ' ' (U+0020)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: ".\n",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||
},
|
||||
{
|
||||
Value: ".\r",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||
},
|
||||
{
|
||||
Value: ".\u0085",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||
},
|
||||
{
|
||||
Value: ".\u2028",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||
},
|
||||
|
||||
|
||||
|
||||
{
|
||||
Value: ".😈",
|
||||
ExpectedError: `eol: paragraph-separator (PS) character (U+2029) not found for end-of-line sequence "\u2029" character №1 — instead found '.' (U+002E)`,
|
||||
},
|
||||
}
|
||||
|
||||
for testNumber, test := range tests {
|
||||
|
||||
var reader io.Reader = strings.NewReader(test.Value)
|
||||
var runescanner io.RuneScanner = utf8.NewRuneScanner(reader)
|
||||
|
||||
actualSize, err := eol.ReadPS(runescanner)
|
||||
if nil == err {
|
||||
t.Errorf("For test #%d, expected an error but did not actually get one.", testNumber)
|
||||
t.Logf("EXPECTED-ERROR: %q", test.ExpectedError)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
|
||||
{
|
||||
expected := test.ExpectedError
|
||||
actual := err.Error()
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("For test #%d, the actual error is not what was expected.", testNumber)
|
||||
t.Logf("EXPECTED: %q", expected)
|
||||
t.Logf("ACTUAL: %q", actual)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
expected := 0
|
||||
actual := actualSize
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("For test #%d, the actual size is not what was expected.", testNumber)
|
||||
t.Logf("EXPECTED: %d", expected)
|
||||
t.Logf("ACTUAL: %d", actual)
|
||||
t.Logf("VALUE: %q", test.Value)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue