go-httpbearer/parse.go

181 lines
4.7 KiB
Go
Raw Normal View History

2023-09-22 09:25:02 +00:00
package httpbearer
import (
"strings"
)
// Parse parses the value of an HTTP "Authorization" header and returns a bearer token, if there was one.
// If there was no bearer token in the HTTP "Authorization" header, then the returned value of successful will be false.
//
// The full HTTP request header will look something like this:
//
// "Authorization: Bearer WW91IGFyZSBub3QgYSBkcm9wIGluIHRoZSBvY2Vhbi4gWW91IGFyZSB0aGUgZW50aXJlIG9jZWFuLCBpbiBhIGRyb3Au\r\n"
//
// This function expected to receive just the value. So, with our previous example, that would be:
//
// "Bearer WW91IGFyZSBub3QgYSBkcm9wIGluIHRoZSBvY2Vhbi4gWW91IGFyZSB0aGUgZW50aXJlIG9jZWFuLCBpbiBhIGRyb3Au"
//
// Note that HTTP headers allow for extra "\t" and " " to be put in any place where a "\t" or " " already is.
//
// So, for example, this:
//
// "Authorization: Bearer abcde12345\r\n"
//
// And these:
//
// "Authorization: Bearer abcde12345\r\n"
//
// "Authorization: Bearer abcde12345\r\n"
//
// "Authorization: Bearer abcde12345\r\n"
//
// "Authorization: Bearer abcde12345\r\n"
//
// "Authorization: Bearer abcde12345\r\n"
//
// "Authorization: Bearer\tabcde12345\r\n"
//
// "Authorization: Bearer\t\tabcde12345\r\n"
//
// "Authorization: Bearer\t\t\tabcde12345\r\n"
//
// "Authorization: Bearer\t\t\t\tabcde12345\r\n"
//
// "Authorization: Bearer\t\t\t\t\tabcde12345\r\n"
//
// "Authorization: Bearer\t\t\t\t\t\tabcde12345\r\n"
//
// "Authorization: Bearer \t \t abcde12345\r\n"
//
// Are all equivalent.
//
// Also note that HTTP headers also allow for the values to be broken up in multiple lines.
// The rule is that if a "\r\n" is followed by a " " or "\t" the that next line is part of the previous line.
//
// So, for example, this:
//
// "Authorization: Bearer abcde12345\r\n"
//
// And these:
//
// "Authorization:\r\n Bearer abcde12345\r\n"
//
// "Authorization:\r\n\tBearer abcde12345\r\n"
//
// "Authorization: Bearer\r\n abcde12345\r\n"
//
// "Authorization: Bearer\r\n\tabcde12345\r\n"
//
// "Authorization:\r\n \r\n\t Bearer \r\n\t \t\t\t abcde12345\r\n"
//
// Are all equivalent.
//
// Parse deals with all of these, too.
func Parse(value string) (bearerToken string, successful bool) {
// Although the first important thing we expected it the string "Bearer",
// there could be zero or more of these characters.
//
// • "\t" i.e,. horizontal tab (␉)
// • " " i.e., space (␠)
// • "\r\n" i.e., carriage return (␍), line feed (␊)
//
// In IETF RFC822 LWSP-char is defined as a horizontal tab (␉) or space (␠).
//
// Technically "\r\n" should always be followed by a " " or a "\t".
// But we don't have to worry about that here. As the parser for the request
// already dealt with that.
value = trimleft(value)
// The first important thing we should see it "Bearer".
//
//@TODO: should this be case insensitive?
{
const expected string = "Bearer"
if !strings.HasPrefix(value, expected) {
return "", false
}
value = value[len(expected):]
}
// The next thing we should see is one of these 3:
//
// • "\t" i.e,. horizontal tab (␉)
// • " " i.e., space (␠)
// • "\r\n" i.e., carriage return (␍), line feed (␊)
//
// In IETF RFC822 LWSP-char is defined as a horizontal tab (␉) or space (␠).
//
// Technically "\r\n" should always be followed by a " " or a "\t".
// But we don't have to worry about that here. As the parser for the request
// already dealt with that.
//
// Note that what we are doing here is safe, even if we are dealing with the UTF-8 Unicode encoding.
{
if len(value) <= 0 {
return "", false
}
c0, value := value[0], value[1:]
switch c0 {
case ' ','\t':
// Nothing here. We got LWSP-char. So we will continue.
case '\r':
if len(value) <= 0 {
return "", false
}
c1, value := value[0], value[1:]
value = value[1:]
if '\n' != c1 {
return "", false
}
// Nothing else here. We got "\r\n". So we will continue.
default:
return "", false
}
}
// There could be more of these characters:
//
// • "\t" i.e,. horizontal tab (␉)
// • " " i.e., space (␠)
// • "\r\n" i.e., carriage return (␍), line feed (␊)
//
// We will consume them (and ignore them) if they are there.
value = trimleft(value)
// What should be left is the bearer token (with possibly some LWSP-chars or "\r\n" after it).
//
// Note that what we are doing here is safe, even if we are dealing with the UTF-8 Unicode encoding.
{
if len(value) <= 0 {
return "", true
}
for i,c := range value {
switch c {
case ' ','\t':
return value[:i], true
case '\r':
if len(value) < i+2 {
return value, true
}
next := value[i+1]
if '\n' != next {
return value[:i+1], true
}
return value[:i], true
}
}
return value, true
}
}