Skip to content

Commit 532f0d0

Browse files
authored
Parse and format century numbers (go-chrono#68)
* Improve comment. Signed-off-by: joe-mann <[email protected]> * Formatting. Signed-off-by: joe-mann <[email protected]> * Formatting. Signed-off-by: joe-mann <[email protected]> * Require consistency. Signed-off-by: joe-mann <[email protected]> --------- Signed-off-by: joe-mann <[email protected]>
1 parent 75a1a72 commit 532f0d0

File tree

3 files changed

+201
-48
lines changed

3 files changed

+201
-48
lines changed

format.go

+86-36
Original file line numberDiff line numberDiff line change
@@ -9,39 +9,52 @@ import (
99
// These are predefined layouts used for the parsing and formatting of dates, times and date-times.
1010
// Additional layouts can be composed using the specifiers detailed below:
1111
//
12-
// %Y: The ISO 8601 year as a decimal number, padded to 4 digits with leading 0s.
13-
// %EY: The year in the era as a decimal number, padded to 4 digits with leading 0s.
14-
// %y: The ISO 8601 year without a century as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 99. See note (1).
15-
// %Ey: The year in the era without a century as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 99. See note (1).
16-
// %EC: The name of the era, either "CE" (for Common Era) "BCE" (for Before the Common Era).
17-
// %j: The day of the year as a decimal number, padded to 3 digits with leading 0s, in the range 001 to 366. See note (2).
18-
// %m: The month as a decimal number, padded to 2 digits with a leading 0, in the range 01 to 12.
19-
// %B: The full month name, e.g. January, February, etc.
20-
// %b: The abbreviated month name, e.g. Jan, Feb, etc.
21-
// %d: The day of the month as a decimal number, padded to 2 digits with a leading 0, in the range 01 to 31.
12+
// - %Y: The ISO 8601 year as a decimal number, padded to 4 digits with leading 0s.
13+
// - %EY: The year in the era as a decimal number, padded to 4 digits with leading 0s.
14+
// - %y: The ISO 8601 year without a century as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 99. See note (1).
15+
// - %Ey: The year in the era without a century as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 99. See notes (1) and (9).
16+
// - %C: The century as a decimal number, padded to 2 digits with a leading 0, e.g. 19 for 1980. See note (9).
17+
// - %EC: The name of the era, either "CE" (for Common Era) "BCE" (for Before the Common Era).
18+
// - %j: The day of the year as a decimal number, padded to 3 digits with leading 0s, in the range 001 to 366. See note (2).
19+
// - %m: The month as a decimal number, padded to 2 digits with a leading 0, in the range 01 to 12.
20+
// - %B: The full month name, e.g. January, February, etc.
21+
// - %b: The abbreviated month name, e.g. Jan, Feb, etc.
22+
// - %d: The day of the month as a decimal number, padded to 2 digits with a leading 0, in the range 01 to 31.
2223
//
23-
// %u: The day of the week as a decimal number, e.g. 1 for Monday, 2 for Tuesday, etc. See note (3).
24-
// %A: The full name of the day of the week, e.g. Monday, Tuesday, etc. See note (3).
25-
// %a: The abbreviated name of the day of the week, e.g. Mon, Tue, etc. See note (3).
24+
// Days of week:
2625
//
27-
// %G: The ISO 8601 week-based year, padded to 4 digits with leading 0s. This may differ by ±1 to the actual calendar year. See note (2).
28-
// %V: The ISO week number, padded to 2 digits with a leading 0, in the range 01 to 53. See note (2).
26+
// - %u: The day of the week as a decimal number, e.g. 1 for Monday, 2 for Tuesday, etc. See note (3).
27+
// - %A: The full name of the day of the week, e.g. Monday, Tuesday, etc. See note (3).
28+
// - %a: The abbreviated name of the day of the week, e.g. Mon, Tue, etc. See note (3).
2929
//
30-
// %P: Either "am" or "pm", where noon is "pm" and midnight is "am".
31-
// %p: Either "AM" or "PM", where noon is "PM" and midnight is "AM".
32-
// %I: The hour of the day using the 12-hour clock as a decimal number, padded to 2 digits with a leading 0, in the range 01 to 12. See note (4).
30+
// Week numbers:
3331
//
34-
// %H: The hour of the day using the 24-hour clock as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 23. See note (5).
35-
// %M: The minute as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 59.
36-
// %S: The second as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 59.
32+
// - %G: The ISO 8601 week-based year, padded to 4 digits with leading 0s. This may differ by ±1 to the actual calendar year. See note (2).
33+
// - %V: The ISO week number, padded to 2 digits with a leading 0, in the range 01 to 53. See note (2).
3734
//
38-
// %f: Equivalent to %6f.
39-
// %3f: The millisecond offset within the represented second, rounded either up or down and padded to 3 digits with leading 0s.
40-
// %6f: The microsecond offset within the represented second, rounded either up or down and padded to 6 digits with leading 0s.
41-
// %9f: The nanosecond offset within the represented second, padded to 9 digits with leading 0s.
35+
// Times of day:
4236
//
43-
// %z: The UTC offset in the format ±HHMM, preceded always by the sign ('+' or '-'), and padded to 4 digits with leading zeros. See notes (6), (7), and (8).
44-
// %Ez: Equivalent to %z, except that an offset of +0000 is formatted at 'Z', and other offsets as ±HH:MM. See notes (6) and (7).
37+
// - %P: Either "am" or "pm", where noon is "pm" and midnight is "am".
38+
// - %p: Either "AM" or "PM", where noon is "PM" and midnight is "AM".
39+
// - %I: The hour of the day using the 12-hour clock as a decimal number, padded to 2 digits with a leading 0, in the range 01 to 12. See note (4).
40+
//
41+
// Time components:
42+
//
43+
// - %H: The hour of the day using the 24-hour clock as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 23. See note (5).
44+
// - %M: The minute as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 59.
45+
// - %S: The second as a decimal number, padded to 2 digits with a leading 0, in the range 00 to 59.
46+
//
47+
// Millisecond precisions:
48+
//
49+
// - %f: Equivalent to %6f.
50+
// - %3f: The millisecond offset within the represented second, rounded either up or down and padded to 3 digits with leading 0s.
51+
// - %6f: The microsecond offset within the represented second, rounded either up or down and padded to 6 digits with leading 0s.
52+
// - %9f: The nanosecond offset within the represented second, padded to 9 digits with leading 0s.
53+
//
54+
// Time offsets:
55+
//
56+
// - %z: The UTC offset in the format ±HHMM, preceded always by the sign ('+' or '-'), and padded to 4 digits with leading zeros. See notes (6), (7), and (8).
57+
// - %Ez: Equivalent to %z, except that an offset of +0000 is formatted at 'Z', and other offsets as ±HH:MM. See notes (6) and (7).
4558
//
4659
// When formatting using specifiers that represent padded decimals, leading 0s can be omitted using the '-' character after the '%'.
4760
// For example, '%m' may produce the string '04' (for March), but '%-m' produces '4'.
@@ -66,7 +79,7 @@ import (
6679
//
6780
// Notes:
6881
//
69-
// 1. When 2-digit years are parsed, they are converted according to the POSIX and ISO C standards:
82+
// 1. When 2-digit years are parsed (%y or %Ey), they are converted according to the POSIX and ISO C standards:
7083
// values 69–99 are mapped to 1969–1999, and values 0–68 are mapped to 2000–2068.
7184
// 2. When a date is parsed in combination with a day of year (%j), and/or an ISO week-based date (%G and/or %V),
7285
// an error will be returned if the represented dates to not match.
@@ -75,15 +88,17 @@ import (
7588
// The day of the week is otherwise ignored - it does not have any effect on the result.
7689
// 4. When a time represented in the 12-hour clock format (%I) is parsed, and no time of day (%P or %p) is present,
7790
// the time of day is assumed to be before noon, i.e. am or AM.
78-
// 5. When a time is parsed that contains the time of day (%P or %p), any hour (%H) that is present must be valid
79-
// on the 12-hour clock.
91+
// 5. When a time is parsed that contains the time of day (%P or %p), any hour (%H) that is present
92+
// must be valid on the 12-hour clock.
8093
// 6. When UTC offsets are parsed (%z or %Ez) into a type which do not include a time offset element,
8194
// the offset present in the string is ignored.
8295
// When UTC offsets are formatted from a type which does not include a time offset element,
8396
// the offset will not be present in the returned string.
8497
// 7. When UTC offsets are parsed (%z or %Ez), the shorted form of ±HH is accepted.
8598
// However, when formatted, only the full forms are returned (either ±HHMM or ±HH:MM).
8699
// 8. When %z is used for parsing a UTC offset, 'Z' can be used to represent an offset of +0000.
100+
// 9. When parsing partial years (%Ey and %C) in combination with a full year (%Y or %EY),
101+
// an error will be returned if the represented years to not match.
87102
const (
88103
// ISO 8601.
89104
ISO8601 = ISO8601DateTimeExtended
@@ -172,7 +187,7 @@ NextChar:
172187
out = append(out, []rune("CE")...)
173188
}
174189
} else { // %C
175-
panic("unsupported specifier 'C'")
190+
out = append(out, []rune(fmt.Sprintf("%02d", year/100))...)
176191
}
177192
case date != nil && main == 'd': // %d
178193
out = append(out, []rune(decimal(day, 2))...)
@@ -308,8 +323,12 @@ func parseDateAndTime(layout, value string, date, time, offset *int64) error {
308323
haveGregorianYear bool
309324
isBCE bool
310325
year int
311-
month int
312-
day int
326+
yearCentury *int
327+
shortYear *int
328+
yearType int // -1 = short/century, 0 = none, 1 = full year
329+
330+
month int
331+
day int
313332

314333
dayOfWeek int
315334

@@ -509,7 +528,12 @@ func parseDateAndTime(layout, value string, date, time, offset *int64) error {
509528
return fmt.Errorf("unrecognized era %q", original)
510529
}
511530
} else { // %C
512-
return fmt.Errorf("unsupported specifier 'C'")
531+
var v int
532+
if v, err = integer(2); err != nil {
533+
return err
534+
}
535+
yearCentury = &v
536+
yearType = -1
513537
}
514538
case date != nil && main == 'd': // %d
515539
haveDate = true
@@ -602,10 +626,12 @@ func parseDateAndTime(layout, value string, date, time, offset *int64) error {
602626
haveGregorianYear = true
603627
}
604628

605-
if year, err = integer(2); err != nil {
629+
var v int
630+
if v, err = integer(2); err != nil {
606631
return err
607632
}
608-
year += getCentury(year)
633+
shortYear = &v
634+
yearType = -1
609635
case date != nil && main == 'Y': // %Y
610636
if localed { // %EY
611637
haveGregorianYear = true
@@ -614,6 +640,7 @@ func parseDateAndTime(layout, value string, date, time, offset *int64) error {
614640
if year, err = integer(4); err != nil {
615641
return err
616642
}
643+
yearType = 1
617644
case time != nil && main == 'z': // %z
618645
// If at end of input and no offset is requested, break.
619646
// But continue to parse in the case where offset is not requested, but may be present.
@@ -716,6 +743,29 @@ func parseDateAndTime(layout, value string, date, time, offset *int64) error {
716743
}
717744

718745
if date != nil {
746+
// Check century according to note (9).
747+
if yearCentury != nil {
748+
if yearType == 1 && year/100 != *yearCentury {
749+
return fmt.Errorf("year century %d does not agree with year %d", *yearCentury, year)
750+
} else if yearType != 1 {
751+
year = *yearCentury * 100
752+
}
753+
}
754+
755+
// Check 2-digit year according to note (9).
756+
if shortYear != nil {
757+
_year := getCentury(*shortYear) + *shortYear
758+
if yearCentury != nil {
759+
_year = *yearCentury*100 + *shortYear
760+
}
761+
762+
if yearType == 1 && year-(year/100*100) != *shortYear {
763+
return fmt.Errorf("short year %d (%d) does not agree with year %d", *shortYear, _year, year)
764+
} else if yearType != 1 {
765+
year = _year
766+
}
767+
}
768+
719769
if haveGregorianYear {
720770
if year, err = convertGregorianToISOYear(year, isBCE); err != nil {
721771
return err

0 commit comments

Comments
 (0)