Skip to content

Commit 9256375

Browse files
committed
Add support for accept-charset header.
1 parent 5dca581 commit 9256375

File tree

4 files changed

+177
-0
lines changed

4 files changed

+177
-0
lines changed
+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2020-2023, by Samuel Williams.
5+
# Copyright, 2023, by Thomas Morgan.
6+
7+
require_relative "split"
8+
require_relative "quoted_string"
9+
require_relative "../error"
10+
11+
module Protocol
12+
module HTTP
13+
module Header
14+
# The `accept-charset` header represents a list of character sets that the client can accept.
15+
class AcceptCharset < Split
16+
ParseError = Class.new(Error)
17+
18+
# https://tools.ietf.org/html/rfc7231#section-5.3.1
19+
QVALUE = /0(\.[0-9]{0,3})?|1(\.[0]{0,3})?/
20+
21+
# https://tools.ietf.org/html/rfc7231#section-5.3.3
22+
CHARSETS = /\A(?<charset>#{TOKEN})(;q=(?<q>#{QVALUE}))?\z/
23+
24+
Charset = Struct.new(:charset, :q) do
25+
def quality_factor
26+
(q || 1.0).to_f
27+
end
28+
end
29+
30+
# Parse the `accept-charset` header value into a list of character sets.
31+
#
32+
# @returns [Array(Charset)] the list of character sets and their associated quality factors.
33+
def charsets
34+
self.map do |value|
35+
if match = value.match(CHARSETS)
36+
Charset.new(match[:charset], match[:q])
37+
else
38+
raise ParseError.new("Could not parse character set: #{value.inspect}")
39+
end
40+
end
41+
end
42+
43+
# Sort the character sets by quality factor, with the highest quality factor first.
44+
#
45+
# @returns [Array(Charset)] the list of character sets sorted by quality factor.
46+
def sorted_charsets
47+
# We do this to get a stable sort:
48+
self.charsets.sort_by.with_index{|object, index| [-object.quality_factor, index]}
49+
end
50+
end
51+
end
52+
end
53+
end
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2016-2024, by Samuel Williams.
5+
6+
module Protocol
7+
module HTTP
8+
module Header
9+
# According to https://tools.ietf.org/html/rfc7231#appendix-C
10+
TOKEN = /[!#$%&'*+\-.^_`|~0-9A-Z]+/i
11+
QUOTED_STRING = /"(?:.(?!(?<!\\)"))*.?"/
12+
13+
module QuotedString
14+
# Unquote a "quoted-string" value according to https://tools.ietf.org/html/rfc7230#section-3.2.6
15+
# It should already match the QUOTED_STRING pattern above by the parser.
16+
def self.unquote(value, normalize_whitespace = true)
17+
value = value[1...-1]
18+
19+
value.gsub!(/\\(.)/, '\1')
20+
21+
if normalize_whitespace
22+
# LWS = [CRLF] 1*( SP | HT )
23+
value.gsub!(/[\r\n]+\s+/, ' ')
24+
end
25+
26+
return value
27+
end
28+
29+
# Quote a string if required. Doesn't handle newlines correctly currently.
30+
def self.quote(value, force = false)
31+
if value =~ /"/ or force
32+
"\"#{value.gsub(/["\\]/, "\\\\\\0")}\""
33+
else
34+
return value
35+
end
36+
end
37+
end
38+
end
39+
end
40+
end

lib/protocol/http/headers.rb

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Released under the MIT License.
44
# Copyright, 2018-2024, by Samuel Williams.
55

6+
require_relative "header/accept_charset"
67
require_relative "header/split"
78
require_relative "header/multiple"
89
require_relative "header/cookie"
@@ -277,6 +278,8 @@ def []= key, value
277278
"last-modified" => Header::Date,
278279
"if-modified-since" => Header::Date,
279280
"if-unmodified-since" => Header::Date,
281+
282+
"accept-charset" => Header::AcceptCharset,
280283
}.tap{|hash| hash.default = Split}
281284

282285
# Delete all header values for the given key, and return the merged value.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# frozen_string_literal: true
2+
3+
# Released under the MIT License.
4+
# Copyright, 2016, by Matthew Kerwin.
5+
# Copyright, 2017-2024, by Samuel Williams.
6+
7+
require 'protocol/http/header/accept_charset'
8+
9+
describe Protocol::HTTP::Header::AcceptCharset::Charset do
10+
it "should have default quality_factor of 1.0" do
11+
charset = subject.new('utf-8', nil)
12+
expect(charset.quality_factor).to be == 1.0
13+
end
14+
end
15+
16+
describe Protocol::HTTP::Header::AcceptCharset do
17+
let(:header) {subject.new(description)}
18+
let(:charsets) {header.sorted_charsets}
19+
20+
with "utf-8, iso-8859-1;q=0.5, windows-1252;q=0.25" do
21+
it "can parse charsets" do
22+
expect(header.length).to be == 3
23+
24+
expect(charsets[0].charset).to be == "utf-8"
25+
expect(charsets[0].quality_factor).to be == 1.0
26+
27+
expect(charsets[1].charset).to be == "iso-8859-1"
28+
expect(charsets[1].quality_factor).to be == 0.5
29+
30+
expect(charsets[2].charset).to be == "windows-1252"
31+
expect(charsets[2].quality_factor).to be == 0.25
32+
end
33+
end
34+
35+
with "windows-1252;q=0.25, iso-8859-1;q=0.5, utf-8" do
36+
it "should order based on quality factor" do
37+
expect(charsets.collect(&:charset)).to be == %w{utf-8 iso-8859-1 windows-1252}
38+
end
39+
end
40+
41+
with "us-ascii,iso-8859-1;q=0.8,windows-1252;q=0.6,utf-8" do
42+
it "should order based on quality factor" do
43+
expect(charsets.collect(&:charset)).to be == %w{us-ascii utf-8 iso-8859-1 windows-1252}
44+
end
45+
end
46+
47+
with "*;q=0" do
48+
it "should accept wildcard charset" do
49+
expect(charsets[0].charset).to be == "*"
50+
expect(charsets[0].quality_factor).to be == 0
51+
end
52+
end
53+
54+
with "utf-8, iso-8859-1;q=0.5, windows-1252;q=0.5" do
55+
it "should preserve relative order" do
56+
expect(charsets[0].charset).to be == "utf-8"
57+
expect(charsets[1].charset).to be == "iso-8859-1"
58+
expect(charsets[2].charset).to be == "windows-1252"
59+
end
60+
end
61+
62+
it "should not accept invalid input" do
63+
bad_values = [
64+
# Invalid quality factor:
65+
"utf-8;f=1",
66+
67+
# Invalid charset separator:
68+
"us-ascii;utf-8",
69+
70+
# Invalid use of separator:
71+
";",
72+
73+
# Empty charset (we ignore this one):
74+
# ","
75+
]
76+
77+
bad_values.each do |value|
78+
expect{subject.new(value).charsets}.to raise_exception(subject::ParseError)
79+
end
80+
end
81+
end

0 commit comments

Comments
 (0)