Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@ This library is based around the [XBRL 2.1 spec](https://www.xbrl.org/Specificat
It implements support for parsing basic facts (not tuples of facts), contexts and units through the `xml.Unmarshaler` interface.

See the package example in the godocs for how to unmarshal into the `XBRL` struct.
You can also use `Parse`, `ParseReader`, or `Decode` as small convenience helpers around the same `encoding/xml` path.

This library supports basic validation that checks for malformed facts and broken references between facts and contexts/units (see `XBRL.Validate()`),
This library supports structural validation that checks malformed contexts, units, facts, duplicate IDs, unsupported scenarios, unsupported top-level base `item` and `tuple` elements, and broken references between facts and contexts/units (see `XBRL.Validate()`),
but it does _not_ implement full semantic validation of XBRL documents.

There are no abstractions added on-top of the XBRL data structure, which makes this library flexible and simple,
but it also means you might have to read up a bit on how XBRL works to take full advantage of it.
The parser preserves lower-level XML details such as root attributes, XML names, raw link/reference elements, and generic segment content for callers that need them.

To give you a head start, here's some basics about XBRL:

Expand Down Expand Up @@ -52,7 +54,7 @@ The above fact doesn't directly tell us in which quarter EPS was `1.41`. That's
### Contexts

A [Context](https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7)
describes a business entity, period of time, and an optional scenario (this library doesn't currently support scenarios, so we're going to gloss over them).
describes a business entity, period of time, and an optional scenario (this library preserves scenario XML, but does not interpret scenario semantics).

When a fact references a context, it gives the fact more detail to help us understand what it means.

Expand Down
115 changes: 102 additions & 13 deletions context.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
package xbrl

import "encoding/xml"
import (
"encoding/xml"
"errors"
)

// Context contains information about the Entity being described, the reporting Period, and the reporting Scenario (scenario is NOT implemented).
// Context contains information about the Entity being described, the reporting Period, and the reporting Scenario.
// All of which are necessary for understanding a business Fact captured as an XBRL item.
// Scenario is preserved as raw XML, but scenario validation and interpretation are not implemented.
// https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7
type Context struct {
ID string `xml:"id,attr"`

Period Period `xml:"period"`
Entity Entity `xml:"entity"`
Period Period `xml:"period"`
Entity Entity `xml:"entity"`
Scenario *RawElement `xml:"scenario"`
}

// Entity documents the business entity for a Context (business, government department, individual, etc.).
Expand All @@ -19,6 +24,23 @@ type Entity struct {
Segments Segments `xml:"segment"`
}

// Validate checks that e contains the structural fields required by XBRL.
func (e Entity) Validate() error {
if e.Identifier.Scheme == "" {
return errors.New("entity identifier missing scheme")
}
if e.Identifier.Value == "" {
return errors.New("entity identifier missing value")
}

return nil
}

// IsValid validates e and returns true if no error was found.
func (e Entity) IsValid() bool {
return e.Validate() == nil
}

// Identifier specifies a scheme for identifying business entities and an identifier that follows the scheme.
// https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7.3.1
// For Example:
Expand All @@ -43,6 +65,7 @@ type Segment struct {
XMLName xml.Name
Attributes []xml.Attr `xml:",any,attr"`
Value string `xml:",chardata"`
InnerXML string `xml:",innerxml"`
}

// UnmarshalXML implements xml.Unmarshaller for Segments.
Expand All @@ -60,22 +83,27 @@ func (s *Segments) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
return nil
}

// PeriodType describes which supported shape a Period has.
type PeriodType string

// All the supported PeriodType values. See Period.Type() for more information.
const (
// PeriodTypeDuration is a period with startDate and endDate.
PeriodTypeDuration PeriodType = "duration"
PeriodTypeInstant PeriodType = "instant"
PeriodTypeForever PeriodType = "forever"
PeriodTypeInvalid PeriodType = "invalid"
// PeriodTypeInstant is a period with instant.
PeriodTypeInstant PeriodType = "instant"
// PeriodTypeForever is a period with forever.
PeriodTypeForever PeriodType = "forever"
// PeriodTypeInvalid is a period that does not match exactly one supported shape.
PeriodTypeInvalid PeriodType = "invalid"
)

// Period contains an instant or interval of time for a Context.
// https://www.xbrl.org/Specification/XBRL-2.1/REC-2003-12-31/XBRL-2.1-REC-2003-12-31+corrected-errata-2013-02-20.html#_4.7.2
type Period struct {
// StartDate is non-nil and guaranteed to be before EndDate if Period.Type() returns Duration.
// StartDate is non-nil if Period.Type() returns Duration.
StartDate *string `xml:"startDate"`
// EndDate is non-nil and guaranteed to be after StartDate if Period.Type() returns Duration.
// EndDate is non-nil if Period.Type() returns Duration.
EndDate *string `xml:"endDate"`

// Instant is non-nil if Period.Type() returns Instant
Expand All @@ -90,17 +118,78 @@ type Period struct {
// Type returns the type of this period to help clarify what fields in the Period struct are non-nil and valid to use.
// The comments on the attributes inside the Period struct explain when they can be used depending on what this function returns.
func (p Period) Type() PeriodType {
periodType := PeriodTypeInvalid
matches := 0

if p.Forever != nil {
return PeriodTypeForever
periodType = PeriodTypeForever
matches++
}

if p.Instant != nil {
return PeriodTypeInstant
periodType = PeriodTypeInstant
matches++
}

if p.StartDate != nil && p.EndDate != nil {
return PeriodTypeDuration
periodType = PeriodTypeDuration
matches++
}

if matches != 1 {
return PeriodTypeInvalid
}

return periodType
}

// Validate checks that p has exactly one supported XBRL period shape.
func (p Period) Validate() error {
switch p.Type() {
case PeriodTypeDuration:
if *p.StartDate == "" {
return errors.New("duration period missing startDate")
}
if *p.EndDate == "" {
return errors.New("duration period missing endDate")
}
case PeriodTypeInstant:
if *p.Instant == "" {
return errors.New("instant period missing value")
}
case PeriodTypeForever:
return nil
default:
return errors.New("period must have exactly one of duration, instant, or forever")
}

return PeriodTypeInvalid
return nil
}

// IsValid validates p and returns true if no error was found.
func (p Period) IsValid() bool {
return p.Validate() == nil
}

// Validate checks that c contains the structural fields this parser supports.
func (c Context) Validate() error {
if c.ID == "" {
return errors.New("context missing id")
}
if err := c.Entity.Validate(); err != nil {
return err
}
if err := c.Period.Validate(); err != nil {
return err
}
if c.Scenario != nil {
return errors.New("scenario is not supported")
}

return nil
}

// IsValid validates c and returns true if no error was found.
func (c Context) IsValid() bool {
return c.Validate() == nil
}
92 changes: 92 additions & 0 deletions context_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,105 @@ func TestUnmarshalContext(t *testing.T) {
assert.Equal(t, xml.Name{Space: "xbrldi", Local: "explicitMember"}, context.Entity.Segments[0].XMLName)
assert.Equal(t, []xml.Attr{{Name: xml.Name{Local: "dimension"}, Value: "us-gaap:StatementClassOfStockAxis"}}, context.Entity.Segments[0].Attributes)
assert.Equal(t, "us-gaap:CommonStockMember", context.Entity.Segments[0].Value)
assert.Equal(t, "us-gaap:CommonStockMember", context.Entity.Segments[0].InnerXML)

assert.Equal(t, xml.Name{Space: "myns", Local: "cool_segment"}, context.Entity.Segments[1].XMLName)
assert.Empty(t, context.Entity.Segments[1].Attributes)
assert.Equal(t, "I follow my own rules", context.Entity.Segments[1].Value)
assert.Equal(t, "I follow my own rules", context.Entity.Segments[1].InnerXML)

assert.Equal(t, PeriodTypeDuration, context.Period.Type())
assert.Equal(t, "2020-09-27", *context.Period.StartDate)
assert.Equal(t, "2021-03-27", *context.Period.EndDate)
})

t.Run("segment preserves nested raw XML", func(t *testing.T) {
// language=xml
contextXML := `<context id="nested_segment">
<entity>
<identifier scheme="http://www.sec.gov/CIK">0000320193</identifier>
<segment>
<dim:typedMember dimension="custom:Axis"><custom:domain>value</custom:domain></dim:typedMember>
</segment>
</entity>
<period><forever/></period>
</context>`

var context Context
require.NoError(t, xml.Unmarshal([]byte(contextXML), &context))

require.Len(t, context.Entity.Segments, 1)
assert.Equal(t, xml.Name{Space: "dim", Local: "typedMember"}, context.Entity.Segments[0].XMLName)
assert.Contains(t, context.Entity.Segments[0].InnerXML, "<custom:domain>value</custom:domain>")
})
}

func TestContextValidation(t *testing.T) {
t.Run("period must have exactly one shape", func(t *testing.T) {
period := Period{
StartDate: stringPtr("2020-09-27"),
EndDate: stringPtr("2021-03-27"),
Instant: stringPtr("2021-03-27"),
}

assert.Equal(t, PeriodTypeInvalid, period.Type())
assert.False(t, period.IsValid())
})

t.Run("duration requires start and end values", func(t *testing.T) {
period := Period{
StartDate: stringPtr("2020-09-27"),
EndDate: stringPtr(""),
}

assert.EqualError(t, period.Validate(), "duration period missing endDate")
})

t.Run("entity requires identifier scheme and value", func(t *testing.T) {
entity := Entity{
Identifier: Identifier{
Scheme: "http://www.sec.gov/CIK",
},
}

assert.EqualError(t, entity.Validate(), "entity identifier missing value")
})

t.Run("context requires id and entity identifier", func(t *testing.T) {
context := Context{
Period: Period{Instant: stringPtr("2021-03-27")},
Entity: Entity{
Identifier: Identifier{
Scheme: "http://www.sec.gov/CIK",
Value: "0000320193",
},
},
}

assert.EqualError(t, context.Validate(), "context missing id")
assert.False(t, context.IsValid())
})

t.Run("scenario is preserved but unsupported by validation", func(t *testing.T) {
// language=xml
contextXML := `<context id="scenario_context">
<entity>
<identifier scheme="http://www.sec.gov/CIK">0000320193</identifier>
</entity>
<period>
<instant>2021-03-27</instant>
</period>
<scenario>
<myns:forecast>true</myns:forecast>
</scenario>
</context>`

var context Context
require.NoError(t, xml.Unmarshal([]byte(contextXML), &context))

require.NotNil(t, context.Scenario)
assert.Equal(t, xml.Name{Local: "scenario"}, context.Scenario.XMLName)
assert.Contains(t, context.Scenario.InnerXML, "forecast")
assert.EqualError(t, context.Validate(), "scenario is not supported")
})
}
17 changes: 17 additions & 0 deletions doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Package xbrl parses XBRL 2.1 instance documents into simple Go data.
//
// The package preserves XBRL concepts such as facts, contexts, periods,
// entities, segments, units, XML names, attributes, and raw reference elements.
// It does not load taxonomies, resolve linkbases, normalize financial
// statements, transform Inline XBRL, or perform accounting-rule validation.
//
// XML unmarshalling is a first-class API:
//
// var doc xbrl.XBRL
// err := xml.Unmarshal(data, &doc)
//
// Parse, ParseReader, and Decode are convenience helpers around the same
// encoding/xml path. Parsing and validation are separate operations; call
// XBRL.Validate when you need structural checks for contexts, units, facts, and
// references.
package xbrl
31 changes: 24 additions & 7 deletions example_unmarshal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ import (
"github.com/massive-com/xbrl-parser/v2"
)

const doc = `<xbrl>
const doc = `<xbrl
xmlns="http://www.xbrl.org/2003/instance"
xmlns:link="http://www.xbrl.org/2003/linkbase"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:ci="http://www.xbrl.org/us/gaap/ci/2003/usfr-ci-2003">
<link:schemaRef xlink:type="simple" xlink:href="http://www.xbrl.org/us/fr/ci/2000-07-31/usfr-ci-2003.xsd"/>

<context id="c1">
Expand All @@ -32,12 +36,11 @@ func Example() {
if err := xml.Unmarshal([]byte(doc), &processed); err != nil {
panic(err)
}

fact := processed.Facts[0]
if !fact.IsValid() {
panic("fact invalid!")
if err := processed.Validate(); err != nil {
panic(err)
}

fact := processed.Facts[0]
factType := fact.Type()
numericValue, err := fact.NumericValue()

Expand All @@ -48,9 +51,23 @@ func Example() {
panic(err)
}

fmt.Printf("Fact: %s:%s (type: %s)\n", fact.XMLName.Space, fact.XMLName.Local, factType)
fmt.Printf("Fact: %s (namespace: %s, type: %s)\n", fact.XMLName.Local, fact.XMLName.Space, factType)
fmt.Printf(" %.0f %s on %s\n", numericValue, factUnit.String(), *factContext.Period.Instant)

// Output: Fact: ci:assets (type: non_fraction)
// Output: Fact: assets (namespace: http://www.xbrl.org/us/gaap/ci/2003/usfr-ci-2003, type: non_fraction)
// 727 shares on 2021-04-16
}

func ExampleParse() {
processed, err := xbrl.Parse([]byte(doc))
if err != nil {
panic(err)
}
if err := processed.Validate(); err != nil {
panic(err)
}

fmt.Println(len(processed.Facts))

// Output: 1
}
Loading