Skip to content

Commit 77c5f02

Browse files
committed
Add support for accept-encoding header.
1 parent 9256375 commit 77c5f02

File tree

6 files changed

+155
-26
lines changed

6 files changed

+155
-26
lines changed

lib/protocol/http/header/accept_charset.rb

+8-15
Original file line numberDiff line numberDiff line change
@@ -15,38 +15,31 @@ module Header
1515
class AcceptCharset < Split
1616
ParseError = Class.new(Error)
1717

18-
# https://tools.ietf.org/html/rfc7231#section-5.3.1
19-
QVALUE = /0(\.[0-9]{0,3})?|1(\.[0]{0,3})?/
20-
2118
# https://tools.ietf.org/html/rfc7231#section-5.3.3
22-
CHARSETS = /\A(?<charset>#{TOKEN})(;q=(?<q>#{QVALUE}))?\z/
19+
CHARSET = /\A(?<name>#{TOKEN})(;q=(?<q>#{QVALUE}))?\z/
2320

24-
Charset = Struct.new(:charset, :q) do
21+
Charset = Struct.new(:name, :q) do
2522
def quality_factor
2623
(q || 1.0).to_f
2724
end
25+
26+
def <=> other
27+
other.quality_factor <=> self.quality_factor
28+
end
2829
end
2930

3031
# Parse the `accept-charset` header value into a list of character sets.
3132
#
3233
# @returns [Array(Charset)] the list of character sets and their associated quality factors.
3334
def charsets
3435
self.map do |value|
35-
if match = value.match(CHARSETS)
36-
Charset.new(match[:charset], match[:q])
36+
if match = value.match(CHARSET)
37+
Charset.new(match[:name], match[:q])
3738
else
3839
raise ParseError.new("Could not parse character set: #{value.inspect}")
3940
end
4041
end
4142
end
42-
43-
# Sort the character sets by quality factor, with the highest quality factor first.
44-
#
45-
# @returns [Array(Charset)] the list of character sets sorted by quality factor.
46-
def sorted_charsets
47-
# We do this to get a stable sort:
48-
self.charsets.sort_by.with_index{|object, index| [-object.quality_factor, index]}
49-
end
5043
end
5144
end
5245
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2020-2023, by Samuel Williams.
5+
# Copyright, 2023, by Thomas Morgan.
6+
7+
require_relative "split"
8+
require_relative "quoted_string"
9+
require_relative "../error"
10+
11+
module Protocol
12+
module HTTP
13+
module Header
14+
# The `accept-encoding` header represents a list of encodings that the client can accept.
15+
class AcceptEncoding < Split
16+
ParseError = Class.new(Error)
17+
18+
# https://tools.ietf.org/html/rfc7231#section-5.3.1
19+
QVALUE = /0(\.[0-9]{0,3})?|1(\.[0]{0,3})?/
20+
21+
# https://tools.ietf.org/html/rfc7231#section-5.3.4
22+
ENCODING = /\A(?<name>#{TOKEN})(;q=(?<q>#{QVALUE}))?\z/
23+
24+
Encoding = Struct.new(:name, :q) do
25+
def quality_factor
26+
(q || 1.0).to_f
27+
end
28+
29+
def <=> other
30+
other.quality_factor <=> self.quality_factor
31+
end
32+
end
33+
34+
# Parse the `accept-encoding` header value into a list of encodings.
35+
#
36+
# @returns [Array(Charset)] the list of character sets and their associated quality factors.
37+
def encodings
38+
self.map do |value|
39+
if match = value.match(ENCODING)
40+
Encoding.new(match[:name], match[:q])
41+
else
42+
raise ParseError.new("Could not parse encoding: #{value.inspect}")
43+
end
44+
end
45+
end
46+
end
47+
end
48+
end
49+
end

lib/protocol/http/header/quoted_string.rb

+4
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,12 @@ module HTTP
88
module Header
99
# According to https://tools.ietf.org/html/rfc7231#appendix-C
1010
TOKEN = /[!#$%&'*+\-.^_`|~0-9A-Z]+/i
11+
1112
QUOTED_STRING = /"(?:.(?!(?<!\\)"))*.?"/
1213

14+
# https://tools.ietf.org/html/rfc7231#section-5.3.1
15+
QVALUE = /0(\.[0-9]{0,3})?|1(\.[0]{0,3})?/
16+
1317
module QuotedString
1418
# Unquote a "quoted-string" value according to https://tools.ietf.org/html/rfc7230#section-3.2.6
1519
# It should already match the QUOTED_STRING pattern above by the parser.

lib/protocol/http/headers.rb

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# Copyright, 2018-2024, by Samuel Williams.
55

66
require_relative "header/accept_charset"
7+
require_relative "header/accept_encoding"
78
require_relative "header/split"
89
require_relative "header/multiple"
910
require_relative "header/cookie"
@@ -280,6 +281,7 @@ def []= key, value
280281
"if-unmodified-since" => Header::Date,
281282

282283
"accept-charset" => Header::AcceptCharset,
284+
"accept-encoding" => Header::AcceptEncoding,
283285
}.tap{|hash| hash.default = Split}
284286

285287
# Delete all header values for the given key, and return the merged value.

test/protocol/http/header/accept_charset.rb

+11-11
Original file line numberDiff line numberDiff line change
@@ -15,47 +15,47 @@
1515

1616
describe Protocol::HTTP::Header::AcceptCharset do
1717
let(:header) {subject.new(description)}
18-
let(:charsets) {header.sorted_charsets}
18+
let(:charsets) {header.charsets.sort}
1919

2020
with "utf-8, iso-8859-1;q=0.5, windows-1252;q=0.25" do
2121
it "can parse charsets" do
2222
expect(header.length).to be == 3
2323

24-
expect(charsets[0].charset).to be == "utf-8"
24+
expect(charsets[0].name).to be == "utf-8"
2525
expect(charsets[0].quality_factor).to be == 1.0
2626

27-
expect(charsets[1].charset).to be == "iso-8859-1"
27+
expect(charsets[1].name).to be == "iso-8859-1"
2828
expect(charsets[1].quality_factor).to be == 0.5
2929

30-
expect(charsets[2].charset).to be == "windows-1252"
30+
expect(charsets[2].name).to be == "windows-1252"
3131
expect(charsets[2].quality_factor).to be == 0.25
3232
end
3333
end
3434

3535
with "windows-1252;q=0.25, iso-8859-1;q=0.5, utf-8" do
3636
it "should order based on quality factor" do
37-
expect(charsets.collect(&:charset)).to be == %w{utf-8 iso-8859-1 windows-1252}
37+
expect(charsets.collect(&:name)).to be == %w{utf-8 iso-8859-1 windows-1252}
3838
end
3939
end
4040

4141
with "us-ascii,iso-8859-1;q=0.8,windows-1252;q=0.6,utf-8" do
4242
it "should order based on quality factor" do
43-
expect(charsets.collect(&:charset)).to be == %w{us-ascii utf-8 iso-8859-1 windows-1252}
43+
expect(charsets.collect(&:name)).to be == %w{us-ascii utf-8 iso-8859-1 windows-1252}
4444
end
4545
end
4646

4747
with "*;q=0" do
4848
it "should accept wildcard charset" do
49-
expect(charsets[0].charset).to be == "*"
49+
expect(charsets[0].name).to be == "*"
5050
expect(charsets[0].quality_factor).to be == 0
5151
end
5252
end
5353

5454
with "utf-8, iso-8859-1;q=0.5, windows-1252;q=0.5" do
5555
it "should preserve relative order" do
56-
expect(charsets[0].charset).to be == "utf-8"
57-
expect(charsets[1].charset).to be == "iso-8859-1"
58-
expect(charsets[2].charset).to be == "windows-1252"
56+
expect(charsets[0].name).to be == "utf-8"
57+
expect(charsets[1].name).to be == "iso-8859-1"
58+
expect(charsets[2].name).to be == "windows-1252"
5959
end
6060
end
6161

@@ -64,7 +64,7 @@
6464
# Invalid quality factor:
6565
"utf-8;f=1",
6666

67-
# Invalid charset separator:
67+
# Invalid parameter:
6868
"us-ascii;utf-8",
6969

7070
# Invalid use of separator:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2016, by Matthew Kerwin.
5+
# Copyright, 2017-2024, by Samuel Williams.
6+
7+
require 'protocol/http/header/accept_encoding'
8+
9+
describe Protocol::HTTP::Header::AcceptEncoding::Encoding do
10+
it "should have default quality_factor of 1.0" do
11+
charset = subject.new('utf-8', nil)
12+
expect(charset.quality_factor).to be == 1.0
13+
end
14+
end
15+
16+
describe Protocol::HTTP::Header::AcceptEncoding do
17+
let(:header) {subject.new(description)}
18+
let(:encodings) {header.encodings.sort}
19+
20+
with "gzip, deflate;q=0.5, identity;q=0.25" do
21+
it "can parse charsets" do
22+
expect(header.length).to be == 3
23+
24+
expect(encodings[0].name).to be == "gzip"
25+
expect(encodings[0].quality_factor).to be == 1.0
26+
27+
expect(encodings[1].name).to be == "deflate"
28+
expect(encodings[1].quality_factor).to be == 0.5
29+
30+
expect(encodings[2].name).to be == "identity"
31+
expect(encodings[2].quality_factor).to be == 0.25
32+
end
33+
end
34+
35+
with "identity;q=0.25, deflate;q=0.5, gzip" do
36+
it "should order based on quality factor" do
37+
expect(encodings.collect(&:name)).to be == %w{gzip deflate identity}
38+
end
39+
end
40+
41+
with "br,deflate;q=0.8,identity;q=0.6,gzip" do
42+
it "should order based on quality factor" do
43+
expect(encodings.collect(&:name)).to be == %w{br gzip deflate identity}
44+
end
45+
end
46+
47+
with "*;q=0" do
48+
it "should accept wildcard encoding" do
49+
expect(encodings[0].name).to be == "*"
50+
expect(encodings[0].quality_factor).to be == 0
51+
end
52+
end
53+
54+
with "br, gzip;q=0.5, deflate;q=0.5" do
55+
it "should preserve relative order" do
56+
expect(encodings[0].name).to be == "br"
57+
expect(encodings[1].name).to be == "gzip"
58+
expect(encodings[2].name).to be == "deflate"
59+
end
60+
end
61+
62+
it "should not accept invalid input" do
63+
bad_values = [
64+
# Invalid quality factor:
65+
"br;f=1",
66+
67+
# Invalid parameter:
68+
"br;gzip",
69+
70+
# Invalid use of separator:
71+
";",
72+
73+
# Empty (we ignore this one):
74+
# ","
75+
]
76+
77+
bad_values.each do |value|
78+
expect{subject.new(value).encodings}.to raise_exception(subject::ParseError)
79+
end
80+
end
81+
end

0 commit comments

Comments
 (0)