go-httpbearer/parse.go

package httpbearer

import (
	"strings"
)

// Parse parses the value of an HTTP "Authorization" header and returns a bearer token, if there was one.
// If there was no bearer token in the HTTP "Authorization" header, then the returned value of ‘successful’ will be false.
//
// The full HTTP request header will look something like this:
//
//	"Authorization: Bearer WW91IGFyZSBub3QgYSBkcm9wIGluIHRoZSBvY2Vhbi4gWW91IGFyZSB0aGUgZW50aXJlIG9jZWFuLCBpbiBhIGRyb3Au\r\n"
//
// This function expected to receive just the value. So, with our previous example, that would be:
//
//	"Bearer WW91IGFyZSBub3QgYSBkcm9wIGluIHRoZSBvY2Vhbi4gWW91IGFyZSB0aGUgZW50aXJlIG9jZWFuLCBpbiBhIGRyb3Au"
//
// Note that HTTP headers allow for extra "\t" and " " to be put in any place where a "\t" or " " already is.
//
// So, for example, this:
//
//	"Authorization: Bearer abcde12345\r\n"
//
// And these:
//
//	"Authorization: Bearer  abcde12345\r\n"
//
//	"Authorization: Bearer   abcde12345\r\n"
//
//	"Authorization: Bearer    abcde12345\r\n"
//
//	"Authorization: Bearer     abcde12345\r\n"
//
//	"Authorization: Bearer      abcde12345\r\n"
//
//	"Authorization: Bearer\tabcde12345\r\n"
//
//	"Authorization: Bearer\t\tabcde12345\r\n"
//
//	"Authorization: Bearer\t\t\tabcde12345\r\n"
//
//	"Authorization: Bearer\t\t\t\tabcde12345\r\n"
//
//	"Authorization: Bearer\t\t\t\t\tabcde12345\r\n"
//
//	"Authorization: Bearer\t\t\t\t\t\tabcde12345\r\n"
//
//	"Authorization: Bearer \t  \t abcde12345\r\n"
//
// Are all equivalent.
//
// Also note that HTTP headers also allow for the values to be broken up in multiple lines.
// The rule is that if a "\r\n" is followed by a " " or "\t" the that next line is part of the previous line.
//
// So, for example, this:
//
//	"Authorization: Bearer abcde12345\r\n"
//
// And these:
//
//	"Authorization:\r\n Bearer abcde12345\r\n"
//
//	"Authorization:\r\n\tBearer abcde12345\r\n"
//
//	"Authorization: Bearer\r\n abcde12345\r\n"
//
//	"Authorization: Bearer\r\n\tabcde12345\r\n"
//
//	"Authorization:\r\n \r\n\t      Bearer    \r\n\t \t\t\t  abcde12345\r\n"
//
// Are all equivalent.
//
// Parse deals with all of these, too.
func Parse(value string) (bearerToken string, successful bool) {

	// Although the first important thing we expected it the string "Bearer",
	// there could be zero or more of these characters.
	//
	// • "\t"   i.e,. horizontal tab (␉)
	// • " "    i.e., space (␠)
	// • "\r\n" i.e., carriage return (␍), line feed (␊)
	//
	// In IETF RFC822 LWSP-char is defined as a horizontal tab (␉) or space (␠).
	//
	// Technically "\r\n" should always be followed by a " " or a "\t".
	// But we don't have to worry about that here. As the parser for the request
	// already dealt with that.
	value = trimleft(value)

	// The first important thing we should see it "Bearer".
	//
	//@TODO: should this be case insensitive?
	{
		const expected string = "Bearer"

		if !strings.HasPrefix(value, expected) {
			return "", false
		}

		value = value[len(expected):]
	}

	// The next thing we should see is one of these 3:
	//
	// • "\t"   i.e,. horizontal tab (␉)
	// • " "    i.e., space (␠)
	// • "\r\n" i.e., carriage return (␍), line feed (␊)
	//
	// In IETF RFC822 LWSP-char is defined as a horizontal tab (␉) or space (␠).
	//
	// Technically "\r\n" should always be followed by a " " or a "\t".
	// But we don't have to worry about that here. As the parser for the request
	// already dealt with that.
	//
	// Note that what we are doing here is safe, even if we are dealing with the UTF-8 Unicode encoding.
	{
		if len(value) <= 0 {
			return "", false
		}

		c0, value := value[0], value[1:]

		switch c0 {
		case ' ','\t':
			// Nothing here. We got LWSP-char. So we will continue.
		case '\r':
			if len(value) <= 0 {
				return "", false
			}
			c1, value := value[0], value[1:]
			value = value[1:]
			if '\n' != c1 {
				return "", false
			}

			// Nothing else here. We got "\r\n". So we will continue.
		default:
			return "", false
		}
	}

	// There could be more of these characters:
	//
	// • "\t"   i.e,. horizontal tab (␉)
	// • " "    i.e., space (␠)
	// • "\r\n" i.e., carriage return (␍), line feed (␊)
	//
	// We will consume them (and ignore them) if they are there.
	value = trimleft(value)

	// What should be left is the bearer token (with possibly some LWSP-chars or "\r\n" after it).
	//
	// Note that what we are doing here is safe, even if we are dealing with the UTF-8 Unicode encoding.
	{
		if len(value) <= 0 {
			return "", true
		}

		for i,c := range value {
			switch c {
			case ' ','\t':
				return value[:i], true
			case '\r':
				if len(value) < i+2 {
					return value, true
				}

				next := value[i+1]
				if '\n' != next {
					return value[:i+1], true
				}

				return value[:i], true
			}
		}

		return value, true
	}

}