From bc35190d436738a0dcc9eab4ab77805c7869d8ba Mon Sep 17 00:00:00 2001 From: Thomas Naude Date: Mon, 15 Aug 2022 15:51:49 +0200 Subject: [PATCH 1/2] auto-correct style --- .byebug_history | 16 + .rubocop.yml | 3 + .rubocop_todo.yml | 512 +----------------------- lib/daru.rb | 36 +- lib/daru/accessors/array_wrapper.rb | 21 +- lib/daru/accessors/dataframe_by_row.rb | 6 +- lib/daru/category.rb | 190 ++++----- lib/daru/core/group_by.rb | 40 +- lib/daru/core/merge.rb | 52 +-- lib/daru/core/query.rb | 40 +- lib/daru/dataframe.rb | 528 +++++++++++++------------ lib/daru/date_time/index.rb | 122 +++--- lib/daru/date_time/offsets.rb | 164 ++++---- lib/daru/extensions/which_dsl.rb | 8 +- lib/daru/formatters/table.rb | 12 +- lib/daru/helpers/array.rb | 6 +- lib/daru/index/categorical_index.rb | 27 +- lib/daru/index/index.rb | 74 ++-- lib/daru/index/multi_index.rb | 96 ++--- lib/daru/io/io.rb | 68 ++-- lib/daru/iruby/helpers.rb | 8 +- lib/daru/maths/arithmetic/dataframe.rb | 26 +- lib/daru/maths/arithmetic/vector.rb | 30 +- lib/daru/maths/statistics/dataframe.rb | 33 +- lib/daru/maths/statistics/vector.rb | 205 +++++----- lib/daru/monkeys.rb | 18 +- lib/daru/plotting/gruff.rb | 6 +- lib/daru/plotting/gruff/category.rb | 10 +- lib/daru/plotting/gruff/dataframe.rb | 18 +- lib/daru/plotting/gruff/vector.rb | 14 +- lib/daru/vector.rb | 356 ++++++++--------- 31 files changed, 1153 insertions(+), 1592 deletions(-) create mode 100644 .byebug_history diff --git a/.byebug_history b/.byebug_history new file mode 100644 index 000000000..eef1105df --- /dev/null +++ b/.byebug_history @@ -0,0 +1,16 @@ +exit +rows.factors.to_h { |header| [header, nil] } +rows.factors.map { |header| [header, nil] } +rows.factors.each { |header| [header, nil] } +rows.factors.each { |header| p header.first } +rows.factors.each { |header| p header } +rows.factors.to_h { |(header, value)| [value, nil] } +rows.factors.map { |r| [r, nil] }.to_h +rows.factors.to_h { |header, value| [value, nil] } +rows.factors +rows.factors.to_s +rows.factors.map { |r| [r, nil] } +rows.factors.map { |r| [r, nil] }.to_h +rows.factors.to_h { |r| [r, nil] } +rows.factors.first +rows.factors diff --git a/.rubocop.yml b/.rubocop.yml index ed0776aa1..9038586fb 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -22,3 +22,6 @@ AllCops: require: - rubocop-performance - rubocop-rspec + +Style/FrozenStringLiteralComment: + EnforcedStyle: never diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 868bdaf37..7ef36258e 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,69 +1,11 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2022-08-15 13:16:52 UTC using RuboCop version 1.35.0. +# on 2022-08-16 13:18:56 UTC using RuboCop version 1.35.0. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new # versions of RuboCop, may require this file to be generated again. -# Offense count: 33 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle, IndentationWidth. -# SupportedStyles: with_first_argument, with_fixed_indentation -Layout/ArgumentAlignment: - Exclude: - - 'lib/daru/category.rb' - - 'lib/daru/core/merge.rb' - - 'lib/daru/core/query.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/date_time/index.rb' - - 'lib/daru/date_time/offsets.rb' - - 'lib/daru/index/index.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/maths/arithmetic/vector.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/vector.rb' - -# Offense count: 48 -# This cop supports safe autocorrection (--autocorrect). -Layout/EmptyLineAfterGuardClause: - Exclude: - - 'lib/daru/accessors/array_wrapper.rb' - - 'lib/daru/category.rb' - - 'lib/daru/core/group_by.rb' - - 'lib/daru/core/merge.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/date_time/index.rb' - - 'lib/daru/index/categorical_index.rb' - - 'lib/daru/index/index.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/vector.rb' - -# Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: AllowAliasSyntax, AllowedMethods. -# AllowedMethods: alias_method, public, protected, private -Layout/EmptyLinesAroundAttributeAccessor: - Exclude: - - 'lib/daru/vector.rb' - -# Offense count: 18 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle. -# SupportedHashRocketStyles: key, separator, table -# SupportedColonStyles: key, separator, table -# SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit -Layout/HashAlignment: - Exclude: - - 'lib/daru/core/merge.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/date_time/index.rb' - - 'lib/daru/extensions/which_dsl.rb' - - 'lib/daru/vector.rb' - # Offense count: 3 # Configuration parameters: AutoCorrect, EnforcedStyle. # SupportedStyles: leading, trailing @@ -73,98 +15,6 @@ Layout/LineContinuationLeadingSpace: - 'lib/daru/index/categorical_index.rb' - 'lib/daru/io/io.rb' -# Offense count: 15 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: AutoCorrect, EnforcedStyle. -# SupportedStyles: space, no_space -Layout/LineContinuationSpacing: - Exclude: - - 'lib/daru/category.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/index/categorical_index.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/vector.rb' - -# Offense count: 18 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle, IndentationWidth. -# SupportedStyles: aligned, indented -Layout/LineEndStringConcatenationIndentation: - Exclude: - - 'lib/daru/category.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/date_time/index.rb' - - 'lib/daru/index/categorical_index.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/vector.rb' - -# Offense count: 49 -# This cop supports safe autocorrection (--autocorrect). -Layout/SpaceAfterComma: - Exclude: - - 'lib/daru/dataframe.rb' - - 'lib/daru/date_time/index.rb' - - 'lib/daru/date_time/offsets.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/maths/statistics/dataframe.rb' - - 'lib/daru/maths/statistics/vector.rb' - -# Offense count: 176 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: . -# SupportedStyles: space, no_space -Layout/SpaceAroundEqualsInParameterDefault: - EnforcedStyle: no_space - -# Offense count: 79 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: AllowForAlignment, EnforcedStyleForExponentOperator. -# SupportedStylesForExponentOperator: space, no_space -Layout/SpaceAroundOperators: - Exclude: - - 'lib/daru/category.rb' - - 'lib/daru/core/group_by.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/date_time/offsets.rb' - - 'lib/daru/index/index.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/maths/statistics/dataframe.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/vector.rb' - -# Offense count: 24 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle, EnforcedStyleForEmptyBraces. -# SupportedStyles: space, no_space, compact -# SupportedStylesForEmptyBraces: space, no_space -Layout/SpaceInsideHashLiteralBraces: - Exclude: - - 'lib/daru/core/merge.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/date_time/index.rb' - - 'lib/daru/index/index.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/plotting/gruff/category.rb' - - 'lib/daru/plotting/gruff/vector.rb' - - 'lib/daru/vector.rb' - -# Offense count: 46 -# This cop supports safe autocorrection (--autocorrect). -Lint/AmbiguousOperatorPrecedence: - Exclude: - - 'lib/daru/core/merge.rb' - - 'lib/daru/date_time/index.rb' - - 'lib/daru/date_time/offsets.rb' - - 'lib/daru/formatters/table.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/maths/statistics/dataframe.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/plotting/gruff/dataframe.rb' - - 'lib/daru/vector.rb' - # Offense count: 2 # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches. Lint/DuplicateBranch: @@ -196,12 +46,6 @@ Lint/MissingSuper: - 'lib/daru/index/index.rb' - 'lib/daru/index/multi_index.rb' -# Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -Lint/RedundantCopDisableDirective: - Exclude: - - 'lib/daru/date_time/offsets.rb' - # Offense count: 2 # Configuration parameters: IgnoreImplicitReferences. Lint/ShadowedArgument: @@ -213,6 +57,12 @@ Lint/ToEnumArguments: Exclude: - 'lib/daru/vector.rb' +# Offense count: 1 +# Configuration parameters: AllowKeywordBlockArguments. +Lint/UnderscorePrefixedVariableName: + Exclude: + - 'lib/daru/dataframe.rb' + # Offense count: 6 # Configuration parameters: CheckForMethodsWithNoSideEffects. Lint/Void: @@ -221,22 +71,22 @@ Lint/Void: - 'lib/daru/dataframe.rb' - 'lib/daru/vector.rb' -# Offense count: 41 +# Offense count: 40 # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, CountRepeatedAttributes. Metrics/AbcSize: - Max: 35 + Max: 34 # Offense count: 3 # Configuration parameters: CountComments, CountAsOne. Metrics/ClassLength: - Max: 190 + Max: 189 # Offense count: 6 # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods. Metrics/CyclomaticComplexity: Max: 9 -# Offense count: 62 +# Offense count: 61 # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods. Metrics/MethodLength: Max: 15 @@ -251,13 +101,7 @@ Metrics/ModuleLength: Metrics/PerceivedComplexity: Max: 10 -# Offense count: 21 -# This cop supports safe autocorrection (--autocorrect). -Naming/BinaryOperatorParameterName: - Exclude: - - 'lib/daru/date_time/offsets.rb' - -# Offense count: 75 +# Offense count: 72 # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames. # AllowedNames: at, by, db, id, in, io, ip, of, on, os, pp, to Naming/MethodParameterName: @@ -288,15 +132,6 @@ Naming/PredicateName: - 'lib/daru/dataframe.rb' - 'lib/daru/vector.rb' -# Offense count: 25 -# This cop supports safe autocorrection (--autocorrect). -Performance/BlockGivenWithExplicitBlock: - Exclude: - - 'lib/daru/dataframe.rb' - - 'lib/daru/index/index.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/vector.rb' - # Offense count: 4 # Configuration parameters: MinSize. Performance/CollectionLiteralInLoop: @@ -305,19 +140,6 @@ Performance/CollectionLiteralInLoop: - 'lib/daru/index/multi_index.rb' - 'lib/daru/iruby/helpers.rb' -# Offense count: 1 -# This cop supports unsafe autocorrection (--autocorrect-all). -Performance/Count: - Exclude: - - 'lib/daru/maths/statistics/vector.rb' - -# Offense count: 2 -# This cop supports unsafe autocorrection (--autocorrect-all). -Performance/MapCompact: - Exclude: - - 'lib/daru/category.rb' - - 'lib/daru/dataframe.rb' - # Offense count: 4 Performance/MethodObjectAsBlock: Exclude: @@ -326,34 +148,6 @@ Performance/MethodObjectAsBlock: - 'lib/daru/index/index.rb' - 'lib/daru/index/multi_index.rb' -# Offense count: 1 -# This cop supports unsafe autocorrection (--autocorrect-all). -Performance/RedundantEqualityComparisonBlock: - Exclude: - - 'lib/daru/helpers/array.rb' - -# Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -Performance/RegexpMatch: - Exclude: - - 'lib/daru/dataframe.rb' - -# Offense count: 1 -# This cop supports unsafe autocorrection (--autocorrect-all). -Performance/StringInclude: - Exclude: - - 'lib/daru/vector.rb' - -# Offense count: 6 -# This cop supports unsafe autocorrection (--autocorrect-all). -# Configuration parameters: OnlySumOrWithInitialValue. -Performance/Sum: - Exclude: - - 'lib/daru/accessors/array_wrapper.rb' - - 'lib/daru/category.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/vector.rb' - # Offense count: 5 Security/MarshalLoad: Exclude: @@ -368,124 +162,22 @@ Security/Open: Exclude: - 'lib/daru/io/io.rb' -# Offense count: 2 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle. -# SupportedStyles: separated, grouped -Style/AccessorGrouping: - Exclude: - - 'lib/daru/index/multi_index.rb' - -# Offense count: 47 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle. -# SupportedStyles: prefer_alias, prefer_alias_method -Style/Alias: - Exclude: - - 'lib/daru/category.rb' - - 'lib/daru/core/group_by.rb' - - 'lib/daru/core/query.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/index/index.rb' - - 'lib/daru/maths/statistics/dataframe.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/monkeys.rb' - - 'lib/daru/vector.rb' - -# Offense count: 8 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: AllowOnlyRestArgument. -Style/ArgumentsForwarding: - Exclude: - - 'lib/daru/accessors/array_wrapper.rb' - - 'lib/daru/category.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/vector.rb' - -# Offense count: 11 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, IgnoredMethods, AllowBracesOnProceduralOneLiners, BracesRequiredMethods. -# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces -# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object -# FunctionalMethods: let, let!, subject, watch -# AllowedMethods: lambda, proc, it -Style/BlockDelimiters: - Exclude: - - 'lib/daru/core/merge.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/iruby/helpers.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/vector.rb' - -# Offense count: 2 -# This cop supports unsafe autocorrection (--autocorrect-all). -Style/CaseLikeIf: - Exclude: - - 'lib/daru/vector.rb' - # Offense count: 2 Style/ClassVars: Exclude: - 'lib/daru.rb' -# Offense count: 3 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: Keywords, RequireColon. -# Keywords: TODO, FIXME, OPTIMIZE, HACK, REVIEW, NOTE -Style/CommentAnnotation: - Exclude: - - 'lib/daru/dataframe.rb' - - 'lib/daru/vector.rb' - -# Offense count: 1 -# This cop supports unsafe autocorrection (--autocorrect-all). -Style/CommentedKeyword: - Exclude: - - 'lib/daru/core/merge.rb' - # Offense count: 44 # Configuration parameters: AllowedConstants. Style/Documentation: Enabled: false -# Offense count: 9 -# This cop supports safe autocorrection (--autocorrect). -Style/EmptyCaseCondition: - Exclude: - - 'lib/daru/core/merge.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/index/index.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/vector.rb' - # Offense count: 5 # This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle, AllowComments. -# SupportedStyles: empty, nil, both -Style/EmptyElse: - Exclude: - - 'lib/daru/date_time/index.rb' - - 'lib/daru/index/index.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/vector.rb' - -# Offense count: 13 -# This cop supports safe autocorrection (--autocorrect). -Style/ExpandPathArguments: - Exclude: - - 'lib/daru/dataframe.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/vector.rb' - -# Offense count: 7 -# This cop supports safe autocorrection (--autocorrect). # Configuration parameters: EnforcedStyle. # SupportedStyles: format, sprintf, percent Style/FormatString: Exclude: - - 'lib/daru/helpers/array.rb' - 'lib/daru/vector.rb' # Offense count: 2 @@ -495,108 +187,25 @@ Style/FormatString: Style/FormatStringToken: EnforcedStyle: unannotated -# Offense count: 34 -# This cop supports unsafe autocorrection (--autocorrect-all). -# Configuration parameters: EnforcedStyle. -# SupportedStyles: always, always_true, never -Style/FrozenStringLiteralComment: - Enabled: false - -# Offense count: 2 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: AllowSplatArgument. -Style/HashConversion: - Exclude: - - 'lib/daru/vector.rb' - # Offense count: 1 -# This cop supports unsafe autocorrection (--autocorrect-all). -# Configuration parameters: AllowedReceivers. -Style/HashEachMethods: +# Configuration parameters: MinBodyLength, AllowConsecutiveConditionals. +Style/GuardClause: Exclude: - - 'lib/daru/core/group_by.rb' + - 'lib/daru/core/merge.rb' # Offense count: 7 # This cop supports unsafe autocorrection (--autocorrect-all). -Style/HashTransformValues: - Exclude: - - 'lib/daru/category.rb' - - 'lib/daru/core/group_by.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/index/categorical_index.rb' - - 'lib/daru/maths/statistics/vector.rb' - - 'lib/daru/vector.rb' - -# Offense count: 5 -# This cop supports safe autocorrection (--autocorrect). -Style/IfUnlessModifier: - Exclude: - - 'lib/daru/category.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/extensions/which_dsl.rb' - - 'lib/daru/vector.rb' - -# Offense count: 3 -# This cop supports unsafe autocorrection (--autocorrect-all). -# Configuration parameters: AllowedMethods. -# AllowedMethods: nonzero? -Style/IfWithBooleanLiteralBranches: - Exclude: - - 'lib/daru/dataframe.rb' - - 'lib/daru/vector.rb' - -# Offense count: 32 -# This cop supports unsafe autocorrection (--autocorrect-all). Style/MapToHash: Exclude: - 'lib/daru/category.rb' - 'lib/daru/core/group_by.rb' - - 'lib/daru/core/merge.rb' - 'lib/daru/dataframe.rb' - - 'lib/daru/helpers/array.rb' - - 'lib/daru/index/categorical_index.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/maths/arithmetic/dataframe.rb' - - 'lib/daru/vector.rb' - -# Offense count: 461 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle. -# SupportedStyles: require_parentheses, require_no_parentheses, require_no_parentheses_except_multiline -Style/MethodDefParentheses: - Enabled: false # Offense count: 1 Style/MultilineBlockChain: Exclude: - 'lib/daru/formatters/table.rb' -# Offense count: 5 -# This cop supports unsafe autocorrection (--autocorrect-all). -# Configuration parameters: EnforcedStyle. -# SupportedStyles: literals, strict -Style/MutableConstant: - Exclude: - - 'lib/daru/date_time/index.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/vector.rb' - -# Offense count: 2 -# This cop supports safe autocorrection (--autocorrect). -Style/NegatedIfElseCondition: - Exclude: - - 'lib/daru/maths/statistics/vector.rb' - -# Offense count: 4 -# This cop supports unsafe autocorrection (--autocorrect-all). -# Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns, IgnoredMethods. -# SupportedStyles: predicate, comparison -Style/NumericPredicate: - Exclude: - - 'spec/**/*' - - 'lib/daru/date_time/index.rb' - - 'lib/daru/io/io.rb' - # Offense count: 9 # Configuration parameters: AllowedMethods. # AllowedMethods: respond_to_missing? @@ -607,64 +216,6 @@ Style/OptionalBooleanParameter: - 'lib/daru/vector.rb' # Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -Style/OrAssignment: - Exclude: - - 'lib/daru/dataframe.rb' - -# Offense count: 4 -# This cop supports safe autocorrection (--autocorrect). -Style/ParallelAssignment: - Exclude: - - 'lib/daru/core/group_by.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/maths/statistics/dataframe.rb' - - 'lib/daru/vector.rb' - -# Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -Style/PerlBackrefs: - Exclude: - - 'lib/daru/vector.rb' - -# Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -Style/RedundantCondition: - Exclude: - - 'lib/daru/dataframe.rb' - -# Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -Style/RedundantConditional: - Exclude: - - 'lib/daru/dataframe.rb' - -# Offense count: 29 -# This cop supports safe autocorrection (--autocorrect). -Style/RedundantFileExtensionInRequire: - Exclude: - - 'lib/daru.rb' - - 'lib/daru/dataframe.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/plotting/gruff.rb' - - 'lib/daru/vector.rb' - -# Offense count: 4 -# This cop supports safe autocorrection (--autocorrect). -Style/RedundantRegexpEscape: - Exclude: - - 'lib/daru/dataframe.rb' - - 'lib/daru/date_time/index.rb' - - 'lib/daru/vector.rb' - -# Offense count: 2 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: AllowMultipleReturnValues. -Style/RedundantReturn: - Exclude: - - 'lib/daru/index/multi_index.rb' - -# Offense count: 2 # This cop supports unsafe autocorrection (--autocorrect-all). Style/RedundantSelfAssignment: Exclude: @@ -672,40 +223,7 @@ Style/RedundantSelfAssignment: # Offense count: 1 # This cop supports unsafe autocorrection (--autocorrect-all). -Style/RedundantSort: - Exclude: - - 'lib/daru/date_time/index.rb' - -# Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -Style/RescueModifier: - Exclude: - - 'lib/daru/dataframe.rb' - -# Offense count: 4 -# This cop supports unsafe autocorrection (--autocorrect-all). -# Configuration parameters: ConvertCodeThatCanStartToReturnNil, AllowedMethods, MaxChainLength. -# AllowedMethods: present?, blank?, presence, try, try! -Style/SafeNavigation: - Exclude: - - 'lib/daru.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/maths/arithmetic/vector.rb' - -# Offense count: 4 -# This cop supports unsafe autocorrection (--autocorrect-all). -Style/SlicingWithRange: - Exclude: - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/iruby/helpers.rb' - -# Offense count: 7 -# This cop supports unsafe autocorrection (--autocorrect-all). # Configuration parameters: Mode. Style/StringConcatenation: Exclude: - 'lib/daru/dataframe.rb' - - 'lib/daru/date_time/offsets.rb' - - 'lib/daru/io/io.rb' - - 'lib/daru/vector.rb' diff --git a/lib/daru.rb b/lib/daru.rb index 2907fb1fe..52331abf0 100644 --- a/lib/daru.rb +++ b/lib/daru.rb @@ -65,7 +65,7 @@ def create_has_library(library) end end - def plotting_library= lib + def plotting_library=(lib) case lib when :gruff @plotting_library = lib @@ -74,8 +74,8 @@ def plotting_library= lib end end - def error msg - error_stream.puts msg if error_stream + def error(msg) + error_stream&.puts msg end end @@ -88,26 +88,26 @@ def error msg require 'erb' require 'date' -require 'daru/version.rb' +require 'daru/version' require 'open-uri' -require 'daru/index/index.rb' -require 'daru/index/multi_index.rb' -require 'daru/index/categorical_index.rb' +require 'daru/index/index' +require 'daru/index/multi_index' +require 'daru/index/categorical_index' -require 'daru/helpers/array.rb' -require 'daru/configuration.rb' -require 'daru/vector.rb' -require 'daru/dataframe.rb' -require 'daru/monkeys.rb' +require 'daru/helpers/array' +require 'daru/configuration' +require 'daru/vector' +require 'daru/dataframe' +require 'daru/monkeys' require 'daru/formatters/table' require 'daru/iruby/helpers' -require 'daru/exceptions.rb' +require 'daru/exceptions' -require 'daru/core/group_by.rb' -require 'daru/core/query.rb' -require 'daru/core/merge.rb' +require 'daru/core/group_by' +require 'daru/core/query' +require 'daru/core/merge' -require 'daru/date_time/offsets.rb' -require 'daru/date_time/index.rb' +require 'daru/date_time/offsets' +require 'daru/date_time/index' diff --git a/lib/daru/accessors/array_wrapper.rb b/lib/daru/accessors/array_wrapper.rb index a8b52037b..f49abcd82 100644 --- a/lib/daru/accessors/array_wrapper.rb +++ b/lib/daru/accessors/array_wrapper.rb @@ -20,36 +20,36 @@ def map!(&block) attr_accessor :size attr_reader :data - def initialize vector, context + def initialize(vector, context) @data = vector.to_a @context = context set_size end - def [] *index + def [](*index) @data[*index] end - def []= index, value + def []=(index, value) @data[index] = value set_size end - def == other + def ==(other) @data == other end - def delete_at index + def delete_at(index) @data.delete_at index set_size end - def index *args, &block - @data.index(*args, &block) + def index(...) + @data.index(...) end - def << element + def <<(element) @data << element set_size end @@ -78,7 +78,8 @@ def compact def mean values_to_sum = compact return nil if values_to_sum.empty? - sum = values_to_sum.inject :+ + + sum = values_to_sum.sum sum.quo(values_to_sum.size).to_f end @@ -95,7 +96,7 @@ def min end def sum - compact.inject :+ + compact.sum end private diff --git a/lib/daru/accessors/dataframe_by_row.rb b/lib/daru/accessors/dataframe_by_row.rb index a49e9634f..8b8ea3f4b 100644 --- a/lib/daru/accessors/dataframe_by_row.rb +++ b/lib/daru/accessors/dataframe_by_row.rb @@ -1,7 +1,7 @@ module Daru module Accessors class DataFrameByRow - def initialize data_frame + def initialize(data_frame) @data_frame = data_frame end @@ -13,11 +13,11 @@ def []=(*names, vector) @data_frame[*names, :row] = vector end - def at *positions + def at(*positions) @data_frame.row_at(*positions) end - def set_at positions, vector + def set_at(positions, vector) @data_frame.set_row_at(positions, vector) end end diff --git a/lib/daru/category.rb b/lib/daru/category.rb index 317f42205..dfd03df1e 100644 --- a/lib/daru/category.rb +++ b/lib/daru/category.rb @@ -25,7 +25,7 @@ module Category # rubocop:disable Metrics/ModuleLength # # 2 a # # 3 1 # # 4 c - def initialize_category data, opts={} + def initialize_category(data, opts = {}) @type = :category initialize_core_attributes data @@ -54,12 +54,12 @@ def initialize_category data, opts={} self end - def name= new_name + def name=(new_name) @name = new_name self end - def plotting_library= lib + def plotting_library=(lib) case lib when :gruff @plotting_library = lib @@ -69,24 +69,25 @@ def plotting_library= lib ) end else - raise ArgumentError, "Plotting library #{lib} not supported. "\ - 'Supported library is :gruff' + raise ArgumentError, "Plotting library #{lib} not supported. " \ + 'Supported library is :gruff' end end # this method is overwritten: see Daru::Category#plotting_library= - def plot(*args, **options, &b) + def plot(...) init_plotting_library - plot(*args, **options, &b) + plot(...) end - alias_method :rename, :name= + alias rename name= # Returns an enumerator that enumerates on categorical data # @return [Enumerator] an enumerator that enumerates over data stored in vector def each return enum_for(:each) unless block_given? + @array.each { |pos| yield cat_from_int pos } self end @@ -114,11 +115,11 @@ def to_a # # 4 c def dup Daru::Vector.new to_a.dup, - name: @name, - index: @index.dup, - type: :category, - categories: categories, - ordered: ordered? + name: @name, + index: @index.dup, + type: :category, + categories: categories, + ordered: ordered? end # Associates a category to the vector. @@ -142,8 +143,8 @@ def add_category(*new_categories) # # => 2 # dv.count # # => 5 - def count category=UNDEFINED - return @cat_hash.values.map(&:size).inject(&:+) if category == UNDEFINED # count all + def count(category = UNDEFINED) + return @cat_hash.values.sum(&:size) if category == UNDEFINED # count all raise ArgumentError, "Invalid category #{category}" unless categories.include?(category) @@ -161,7 +162,7 @@ def count category=UNDEFINED # # b 0 # # c 1 # # 1 2 - def frequencies type=:count + def frequencies(type = :count) counts = @cat_hash.values.map(&:size) values = case type @@ -172,8 +173,8 @@ def frequencies type=:count when :percentage counts.map { |c| c / size.to_f * 100 } else - raise ArgumentError, 'Type should be either :count, :fraction or'\ - " :percentage. #{type} not supported." + raise ArgumentError, 'Type should be either :count, :fraction or' \ + " :percentage. #{type} not supported." end Daru::Vector.new values, index: categories, name: name end @@ -195,16 +196,16 @@ def frequencies type=:count # # => # # # a a # # b 1 - def [] *indexes + def [](*indexes) positions = @index.pos(*indexes) return category_from_position(positions) if positions.is_a? Integer Daru::Vector.new positions.map { |pos| category_from_position pos }, - index: @index.subset(*indexes), - name: @name, - type: :category, - ordered: @ordered, - categories: categories + index: @index.subset(*indexes), + name: @name, + type: :category, + ordered: @ordered, + categories: categories end # Returns vector for positions specified. @@ -218,7 +219,7 @@ def [] *indexes # # 1 1 # # 2 a # # 3 1 - def at *positions + def at(*positions) original_positions = positions positions = coerce_positions(*positions) validate_positions(*positions) @@ -226,11 +227,11 @@ def at *positions return category_from_position(positions) if positions.is_a? Integer Daru::Vector.new positions.map { |pos| category_from_position(pos) }, - index: @index.at(*original_positions), - name: @name, - type: :category, - ordered: @ordered, - categories: categories + index: @index.at(*original_positions), + name: @name, + type: :category, + ordered: @ordered, + categories: categories end # Modifies values at specified indexes/positions. @@ -249,7 +250,7 @@ def at *positions # # 2 a # # 3 1 # # 4 c - def []= *indexes, val + def []=(*indexes, val) positions = @index.pos(*indexes) if positions.is_a? Numeric @@ -274,7 +275,7 @@ def []= *indexes, val # # 2 a # # 3 1 # # 4 c - def set_at positions, val + def set_at(positions, val) validate_positions(*positions) positions.map { |pos| modify_category_at pos, val } self @@ -307,7 +308,7 @@ def ordered? # dv.ordered = true # dv.ordered? # # => true - def ordered= bool + def ordered=(bool) @ordered = bool end @@ -323,7 +324,7 @@ def categories @cat_hash.keys end - alias_method :order, :categories + alias order categories # Sets order of the categories. # @note If extra categories are specified, they get added too. @@ -333,7 +334,7 @@ def categories # dv.categories = [:a, :b, :c, 1] # dv.categories # # => [:a, :b, :c, 1] - def categories= cat_with_order + def categories=(cat_with_order) validate_categories(cat_with_order) add_extra_categories(cat_with_order - categories) order_with cat_with_order @@ -355,7 +356,7 @@ def categories= cat_with_order # # 2 b # # 3 1 # # 4 c - def rename_categories old_to_new + def rename_categories(old_to_new) old_categories = categories data = to_a.map do |cat| old_to_new.include?(cat) ? old_to_new[cat] : cat @@ -448,8 +449,8 @@ def sort! @cat_hash = categories.inject([{}, 0]) do |acc, cat| hash, count = acc cat_count = @cat_hash[cat].size - cat_count.times { |i| @array[count+i] = int_from_cat(cat) } - hash[cat] = (count...(cat_count+count)).to_a + cat_count.times { |i| @array[count + i] = int_from_cat(cat) } + hash[cat] = (count...(cat_count + count)).to_a [hash, count + cat_count] end.first @@ -467,9 +468,10 @@ def sort # dv.coding_scheme = :deviation # dv.coding_scheme # # => :deviation - def coding_scheme= scheme + def coding_scheme=(scheme) raise ArgumentError, "Unknown or unsupported coding scheme #{scheme}." unless CODING_SCHEMES.include? scheme + @coding_scheme = scheme end @@ -491,7 +493,7 @@ def coding_scheme= scheme # # 2 0 0 # # 3 1 0 # # 4 0 1 - def contrast_code opts={} + def contrast_code(opts = {}) if opts[:user_defined] user_defined_coding(opts[:user_defined]) else @@ -509,7 +511,7 @@ def contrast_code opts={} # index: 1..5 # dv == other # # => false - def == other + def ==(other) size == other.size && to_a == other.to_a && index == other.index @@ -540,9 +542,10 @@ def to_ints # # a 1 # # b 2 # # c 3 - def reorder! order + def reorder!(order) raise ArgumentError, 'Invalid order specified' unless order.sort == size.times.to_a + # TODO: Room for optimization old_data = to_a new_data = order.map { |i| old_data[i] } @@ -562,7 +565,7 @@ def reorder! order # # a 1 # # b 2 # # c 3 - def reindex! idx + def reindex!(idx) idx = Daru::Index.new idx unless idx.is_a? Daru::Index raise ArgumentError, 'Invalid index specified' unless idx.to_a.sort == index.to_a.sort @@ -592,8 +595,8 @@ def reindex! idx end end end - alias :gt :mt - alias :gteq :mteq + alias gt mt + alias gteq mteq # For querying the data # @param bool_array [object] arel like query syntax @@ -607,7 +610,7 @@ def reindex! idx # # => # # # 1 II # # 5 II - def where bool_array + def where(bool_array) Daru::Core::Query.vector_where self, bool_array end @@ -665,7 +668,7 @@ def to_non_category # # a 1 # # b 2 # # c 3 - def index= idx + def index=(idx) @index = coerce_index idx end @@ -708,9 +711,8 @@ def reject_values(*values) # dv.count_values nil # # => 2 def count_values(*values) - values.map { |v| @cat_hash[v].size if @cat_hash.include? v } - .compact - .inject(0, :+) + values.filter_map { |v| @cat_hash[v].size if @cat_hash.include? v } + .sum end # Return indexes of values specified @@ -742,9 +744,9 @@ def indexes(*values) # # 1 2 # # 2 nil # # 3 nil - def replace_values old_values, new_value + def replace_values(old_values, new_value) old_values = [old_values] unless old_values.is_a? Array - rename_hash = old_values.map { |v| [v, new_value] }.to_h + rename_hash = old_values.to_h { |v| [v, new_value] } rename_categories rename_hash end @@ -760,19 +762,19 @@ def init_plotting_library self.plotting_library = Daru.plotting_library end - def validate_categories input_categories + def validate_categories(input_categories) raise ArgumentError, 'Input categories and speculated categories mismatch' unless (categories - input_categories).empty? end - def add_extra_categories extra_categories + def add_extra_categories(extra_categories) extra_categories.each { |cat| @cat_hash[cat] = [] } end - def initialize_core_attributes data + def initialize_core_attributes(data) # Create a hash to map each category to positional indexes categories = data.each_with_index.group_by(&:first) - @cat_hash = categories.map { |cat, group| [cat, group.map(&:last)] }.to_h + @cat_hash = categories.transform_values { |group| group.map(&:last) } # Map each category to a unique integer for effective storage in @array map_cat_int = categories.keys.each_with_index.to_h @@ -782,19 +784,19 @@ def initialize_core_attributes data @array = map_cat_int.values_at(*data) end - def category_from_position position + def category_from_position(position) cat_from_int @array[position] end - def assert_ordered operation + def assert_ordered(operation) # TODO: Change ArgumentError to something more expressive return if ordered? - raise ArgumentError, "Can not apply #{operation} when vector is unordered. "\ - 'To make the categorical data ordered, use #ordered = true'\ + raise ArgumentError, "Can not apply #{operation} when vector is unordered. " \ + 'To make the categorical data ordered, use #ordered = true' \ end - def dummy_coding full + def dummy_coding(full) categories = @cat_hash.keys categories.delete(base_category) unless full @@ -803,17 +805,17 @@ def dummy_coding full end Daru::DataFrame.new df, - index: @index, - order: create_names(categories) + index: @index, + order: create_names(categories) end - def dummy_code positions + def dummy_code(positions) code = Array.new(size, 0) positions.each { |pos| code[pos] = 1 } code end - def simple_coding full + def simple_coding(full) categories = @cat_hash.keys categories.delete(base_category) unless full @@ -822,14 +824,14 @@ def simple_coding full end Daru::DataFrame.new df, - index: @index, - order: create_names(categories) + index: @index, + order: create_names(categories) end - def simple_code positions + def simple_code(positions) n = @cat_hash.keys.size.to_f - code = Array.new(size, -1/n) - positions.each { |pos| code[pos] = (n-1)/n } + code = Array.new(size, -1 / n) + positions.each { |pos| code[pos] = (n - 1) / n } code end @@ -841,18 +843,18 @@ def helmert_coding(*) end Daru::DataFrame.new df, - index: @index, - order: create_names(categories) + index: @index, + order: create_names(categories) end - def helmert_code index + def helmert_code(index) n = (categories.size - index).to_f @array.map do |cat_index| if cat_index == index - (n-1)/n + (n - 1) / n elsif cat_index > index - -1/n + -1 / n else 0 end @@ -867,11 +869,11 @@ def deviation_coding(*) end Daru::DataFrame.new df, - index: @index, - order: create_names(categories) + index: @index, + order: create_names(categories) end - def deviation_code index + def deviation_code(index) last = categories.size - 1 @array.map do |cat_index| case cat_index @@ -882,19 +884,19 @@ def deviation_code index end end - def user_defined_coding df + def user_defined_coding(df) Daru::DataFrame.rows (Array.new(size) { |pos| df.row[at(pos)].to_a }), - index: @index, - order: df.vectors.to_a + index: @index, + order: df.vectors.to_a end - def create_names categories + def create_names(categories) categories.map do |cat| name.is_a?(Symbol) ? "#{name}_#{cat}".to_sym : "#{name}_#{cat}" end end - def coerce_index index + def coerce_index(index) index = case index when Daru::MultiIndex, Daru::CategoricalIndex, Daru::Index @@ -912,18 +914,18 @@ def coerce_index index index end - def validate_index index + def validate_index(index) # Change to SizeError return unless size != index.size - raise ArgumentError, "Size of index (#{index.size}) does not matches"\ - "size of vector (#{size})" + raise ArgumentError, "Size of index (#{index.size}) does not matches" \ + "size of vector (#{size})" end - def modify_category_at pos, category + def modify_category_at(pos, category) unless categories.include? category - raise ArgumentError, "Invalid category #{category}, "\ - 'to add a new category use #add_category' + raise ArgumentError, "Invalid category #{category}, " \ + 'to add a new category use #add_category' end old_category = category_from_position pos @array[pos] = int_from_cat category @@ -931,10 +933,8 @@ def modify_category_at pos, category @cat_hash[category] << pos end - def order_with new - if new.to_set != categories.to_set - raise ArgumentError, 'The contents of new and old order must be the same.' - end + def order_with(new) + raise ArgumentError, 'The contents of new and old order must be the same.' if new.to_set != categories.to_set @cat_hash = new.map { |cat| [cat, @cat_hash[cat]] }.to_h @@ -945,11 +945,11 @@ def order_with new end end - def cat_from_int int + def cat_from_int(int) @cat_hash.keys[int] end - def int_from_cat cat + def int_from_cat(cat) @cat_hash.keys.index cat end end diff --git a/lib/daru/core/group_by.rb b/lib/daru/core/group_by.rb index 78ef9cbf0..aa4cb66bd 100644 --- a/lib/daru/core/group_by.rb +++ b/lib/daru/core/group_by.rb @@ -14,7 +14,7 @@ def group_by_index_to_positions(indexes_with_positions, sort: false) if sort # TODO: maybe add a more "stable" sorting option? sorted_keys = index_to_positions.keys.sort(&Daru::Core::GroupBy::TUPLE_SORTER) - index_to_positions = sorted_keys.map { |k| [k, index_to_positions[k]] }.to_h + index_to_positions = sorted_keys.to_h { |k| [k, index_to_positions[k]] } end index_to_positions @@ -23,7 +23,7 @@ def group_by_index_to_positions(indexes_with_positions, sort: false) deprecate :get_positions_group_map_on, :group_by_index_to_positions, 2019, 10 # @private - def get_positions_group_for_aggregation(multi_index, level=-1) + def get_positions_group_for_aggregation(multi_index, level = -1) raise unless multi_index.is_a?(Daru::MultiIndex) new_index = multi_index.dup @@ -41,7 +41,7 @@ def get_positions_group_map_for_df(df, group_by_keys, sort: true) # @private def group_map_from_positions_to_indexes(positions_group_map, index) - positions_group_map.map { |k, positions| [k, positions.map { |pos| index.at(pos) }] }.to_h + positions_group_map.transform_values { |positions| positions.map { |pos| index.at(pos) } } end # @private @@ -68,20 +68,20 @@ def df_from_group_map(df, group_map, remaining_vectors, from_position: true) def groups @groups ||= GroupBy.group_map_from_positions_to_indexes(@groups_by_pos, @context.index) end - alias :groups_by_idx :groups + alias groups_by_idx groups # lazy accessor/attr_reader for the attribute df def df @df ||= GroupBy.df_from_group_map(@context, @groups_by_pos, @non_group_vectors) end - alias :grouped_df :df + alias grouped_df df # Iterate over each group created by group_by. A DataFrame is yielded in # block. def each_group return to_enum(:each_group) unless block_given? - groups.keys.each do |k| + groups.each_key do |k| yield get_group(k) end end @@ -93,10 +93,11 @@ def each_group left = left.compact right = right.compact return left <=> right || 0 if left.length == right.length + left.length <=> right.length end - def initialize context, names + def initialize(context, names) @group_vectors = names @non_group_vectors = context.vectors.to_a - names @@ -149,7 +150,7 @@ def last # # 0 foo one 1 11 # # 7 foo three 8 88 # # 2 foo two 3 33 - def head quantity=5 + def head(quantity = 5) select_groups_from :first, quantity end @@ -172,7 +173,7 @@ def head quantity=5 # # 6 foo one 3 77 # # 7 foo three 8 88 # # 4 foo two 3 55 - def tail quantity=5 + def tail(quantity = 5) select_groups_from :last, quantity end @@ -227,7 +228,7 @@ def sum # # ["foo", "two"] 2 2 def count width = @non_group_vectors.size - Daru::DataFrame.new([size]*width, order: @non_group_vectors) + Daru::DataFrame.new([size] * width, order: @non_group_vectors) end # Calculate sample standard deviation of numeric vector groups, excluding @@ -262,7 +263,7 @@ def min # ## # # a b c d # # 5 bar two 6 66 - def get_group group + def get_group(group) indexes = groups_by_idx[group] elements = @context.each_vector.map(&:to_a) transpose = elements.transpose @@ -289,7 +290,7 @@ def get_group group # # nil # # a ACE # # b BDF - def reduce(init=nil) + def reduce(init = nil) result_hash = groups_by_idx.each_with_object({}) do |(group, indices), h| group_indices = indices.map { |v| @context.index.to_a[v] } @@ -346,7 +347,7 @@ def inspect # Krishna Delhi,Raipur,Banglore # Ram Hyderabad,Mumbai # - def aggregate(options={}) + def aggregate(options = {}) new_index = get_grouped_index @context.aggregate(options) { [@groups_by_pos.values, new_index] } @@ -354,9 +355,10 @@ def aggregate(options={}) private - def select_groups_from method, quantity - selection = @context - rows, indexes = [], [] + def select_groups_from(method, quantity) + selection = @context + rows = [] + indexes = [] groups_by_idx.each_value do |index| index.send(method, quantity).each do |idx| @@ -373,14 +375,15 @@ def select_numeric_non_group_vectors @non_group_vectors.select { |ngvec| @context[ngvec].type == :numeric } end - def apply_method method_type, method + def apply_method(method_type, method) raise 'To implement' if method_type != :numeric + aggregation_options = select_numeric_non_group_vectors.map { |k| [k, method] }.to_h aggregate(aggregation_options) end - def get_grouped_index(index_tuples=nil) + def get_grouped_index(index_tuples = nil) index_tuples = @groups_by_pos.keys if index_tuples.nil? if multi_indexed_grouping? @@ -392,6 +395,7 @@ def get_grouped_index(index_tuples=nil) def multi_indexed_grouping? return false unless @groups_by_pos.keys[0] + @groups_by_pos.keys[0].size > 1 end end diff --git a/lib/daru/core/merge.rb b/lib/daru/core/merge.rb index e5686d991..5fd1d6f94 100644 --- a/lib/daru/core/merge.rb +++ b/lib/daru/core/merge.rb @@ -17,7 +17,8 @@ def <=>(other) end end - def initialize left_df, right_df, opts={} # quick-fix for issue #171 + # quick-fix for issue #171 + def initialize(left_df, right_df, opts = {}) init_opts(opts) validate_on!(left_df, right_df) key_sanitizer = ->(h) { sanitize_merge_keys(h.values_at(*on)) } @@ -49,15 +50,15 @@ def join private attr_reader :on, :indicator, - :left, :left_key_values, :keep_left, :left_keys, - :right, :right_key_values, :keep_right, :right_keys + :left, :left_key_values, :keep_left, :left_keys, + :right, :right_key_values, :keep_right, :right_keys attr_accessor :merge_key LEFT_RIGHT_COMBINATIONS = { # left right inner: [false, false], - left: [true, false], + left: [true, false], right: [false, true], outer: [true, true] }.freeze @@ -81,7 +82,7 @@ def sanitize_merge_keys(merge_keys) merge_keys.map { |v| v.nil? ? NilSorter.new : v } end - def df_to_a df + def df_to_a(df) # FIXME: much faster than "native" DataFrame#to_a. Should not be h = df.to_h keys = h.keys @@ -100,25 +101,24 @@ def merge_keys(df1, df2, on) ] end - def guard_keys keys, duplicates, num - keys.map { |v| [v, guard_duplicate(v, duplicates, num)] }.to_h + def guard_keys(keys, duplicates, num) + keys.to_h { |v| [v, guard_duplicate(v, duplicates, num)] } end - def guard_duplicate val, duplicates, num + def guard_duplicate(val, duplicates, num) duplicates.include?(val) ? :"#{val}_#{num}" : val end def row(lkey, rkey) - case - when !lkey && !rkey + if !lkey && !rkey # :nocov: # It's just an impossibility handler, can't be covered :) raise 'Unexpected condition met during merge' # :nocov: - when lkey == rkey + elsif lkey == rkey self.merge_key = lkey add_indicator(merge_matching_rows, :both) - when !rkey || lt(lkey, rkey) + elsif !rkey || lt(lkey, rkey) add_indicator(left_row_missing_right, :left_only) else # !lkey || lt(rkey, lkey) add_indicator(right_row_missing_left, :right_only) @@ -127,6 +127,7 @@ def row(lkey, rkey) def add_indicator(row, indicator_value) return row unless indicator + row[indicator] = indicator_value row end @@ -184,19 +185,19 @@ def lt(k1, k2) (k1 <=> k2) == -1 end - def merge_rows lrow, rrow + def merge_rows(lrow, rrow) left_keys - .map { |from, to| [to, lrow[from]] }.to_h - .merge(on.map { |col| [col, lrow[col]] }.to_h) - .merge(indicator ? {indicator => nil} : {}) - .merge(right_keys.map { |from, to| [to, rrow[from]] }.to_h) + .to_h { |from, to| [to, lrow[from]] } + .merge(on.to_h { |col| [col, lrow[col]] }) + .merge(indicator ? { indicator => nil } : {}) + .merge(right_keys.to_h { |from, to| [to, rrow[from]] }) end - def expand_row row, renamings + def expand_row(row, renamings) renamings - .map { |from, to| [to, row[from]] }.to_h - .merge(on.map { |col| [col, row[col]] }.to_h) - .merge(indicator ? {indicator => nil} : {}) + .to_h { |from, to| [to, row[from]] } + .merge(on.to_h { |col| [col, row[col]] }) + .merge(indicator ? { indicator => nil } : {}) end def first_right_key @@ -242,24 +243,25 @@ def end_cartesian_product def validate_on!(left_df, right_df) @on.each do |on| - left_df.has_vector?(on) && right_df.has_vector?(on) or + (left_df.has_vector?(on) && right_df.has_vector?(on)) or raise ArgumentError, "Both dataframes expected to have #{on.inspect} field" end end def safe_compare(left_array, right_array) - left_array.zip(right_array).map { |l, r| + left_array.zip(right_array).map do |l, r| next 0 if l.nil? && r.nil? next 1 if r.nil? next -1 if l.nil? + l <=> r - }.reject(&:zero?).first || 0 + end.reject(&:zero?).first || 0 end end module Merge class << self - def join df1, df2, opts={} + def join(df1, df2, opts = {}) MergeFrame.new(df1, df2, opts).join end end diff --git a/lib/daru/core/query.rb b/lib/daru/core/query.rb index 1be6f5abf..a0403d467 100644 --- a/lib/daru/core/query.rb +++ b/lib/daru/core/query.rb @@ -4,27 +4,27 @@ module Query class BoolArray attr_reader :barry - def initialize barry + def initialize(barry) @barry = barry end - def & other + def &(other) BoolArray.new(@barry.zip(other.barry).map { |b, o| b && o }) end - alias :and :& + alias and & - def | other + def |(other) BoolArray.new(@barry.zip(other.barry).map { |b, o| b || o }) end - alias :or :| + alias or | def ! BoolArray.new(@barry.map(&:!)) end - def == other + def ==(other) @barry == other.barry end @@ -38,15 +38,15 @@ def inspect end class << self - def apply_scalar_operator operator, data, other + def apply_scalar_operator(operator, data, other) BoolArray.new(data.map { |d| !!d.send(operator, other) if d.respond_to?(operator) }) end - def apply_vector_operator operator, vector, other + def apply_vector_operator(operator, vector, other) BoolArray.new(vector.zip(other).map { |d, o| !!d.send(operator, o) }) end - def df_where data_frame, bool_array + def df_where(data_frame, bool_array) vecs = data_frame.map do |vector| vector.where(bool_array) end @@ -56,30 +56,30 @@ def df_where data_frame, bool_array ) end - def vector_where dv, bool_array + def vector_where(dv, bool_array) new_data, new_index = fetch_new_data_and_index dv, bool_array resultant_dv = Daru::Vector.new new_data, - index: dv.index.class.new(new_index), - dtype: dv.dtype, - type: dv.type, - name: dv.name + index: dv.index.class.new(new_index), + dtype: dv.dtype, + type: dv.type, + name: dv.name # Preserve categories order for category vector resultant_dv.categories = dv.categories if dv.category? resultant_dv end - def vector_apply_where dv, bool_array + def vector_apply_where(dv, bool_array) _data, new_index = fetch_new_data_and_index dv, bool_array all_index = dv.index all_data = all_index.map { |idx| new_index.include?(idx) ? yield(dv[idx]) : dv[idx] } resultant_dv = Daru::Vector.new all_data, - index: dv.index.class.new(all_index), - dtype: dv.dtype, - type: dv.type, - name: dv.name + index: dv.index.class.new(all_index), + dtype: dv.dtype, + type: dv.type, + name: dv.name # Preserve categories order for category vector resultant_dv.categories = dv.categories if dv.category? @@ -88,7 +88,7 @@ def vector_apply_where dv, bool_array private - def fetch_new_data_and_index dv, bool_array + def fetch_new_data_and_index(dv, bool_array) barry = bool_array.to_a positions = dv.size.times.select { |i| barry[i] } # FIXME: The below 4 lines should be replaced with values_at when the diff --git a/lib/daru/dataframe.rb b/lib/daru/dataframe.rb index d240f4f56..06206f17d 100644 --- a/lib/daru/dataframe.rb +++ b/lib/daru/dataframe.rb @@ -1,8 +1,8 @@ -require 'daru/accessors/dataframe_by_row.rb' -require 'daru/maths/arithmetic/dataframe.rb' -require 'daru/maths/statistics/dataframe.rb' -require 'daru/plotting/gruff.rb' -require 'daru/io/io.rb' +require 'daru/accessors/dataframe_by_row' +require 'daru/maths/arithmetic/dataframe' +require 'daru/maths/statistics/dataframe' +require 'daru/plotting/gruff' +require 'daru/io/io' module Daru class DataFrame # rubocop:disable Metrics/ClassLength @@ -44,7 +44,7 @@ class << self # :col_sep => ',', # :converters => :numeric # } - def from_csv path, opts={}, &block + def from_csv(path, opts = {}, &block) Daru::IO.from_csv path, opts, &block end @@ -57,7 +57,7 @@ def from_csv path, opts={}, &block # == Options # # *:worksheet_id - ID of the worksheet that is to be read. - def from_excel path, opts={}, &block + def from_excel(path, opts = {}, &block) Daru::IO.from_excel path, opts, &block end @@ -77,7 +77,7 @@ def from_excel path, opts={}, &block # # require 'dbi' # Daru::DataFrame.from_sql("path/to/sqlite.db", "SELECT * FROM test") - def from_sql dbh, query + def from_sql(dbh, query) Daru::IO.from_sql dbh, query end @@ -97,7 +97,7 @@ def from_sql dbh, query # # # You can load active posts into a dataframe by: # Daru::DataFrame.from_activerecord(Post.active, :title, :published_at) - def from_activerecord relation, *fields + def from_activerecord(relation, *fields) Daru::IO.from_activerecord relation, *fields end @@ -113,13 +113,13 @@ def from_activerecord relation, *fields # == Usage # # df = Daru::DataFrame.from_plaintext 'spec/fixtures/bank2.dat', [:v1,:v2,:v3,:v4,:v5,:v6] - def from_plaintext path, fields + def from_plaintext(path, fields) Daru::IO.from_plaintext path, fields end # Create DataFrame by specifying rows as an Array of Arrays or Array of # Daru::Vector objects. - def rows source, opts={} + def rows(source, opts = {}) raise SizeError, 'All vectors must have same length' \ unless source.all? { |v| v.size == source.first.size } @@ -153,18 +153,18 @@ def rows source, opts={} # b 1 0 # # Useful to process outputs from databases - def crosstab_by_assignation rows, columns, values + def crosstab_by_assignation(rows, columns, values) raise 'Three vectors should be equal size' if - rows.size != columns.size || rows.size!=values.size + rows.size != columns.size || rows.size != values.size - data = Hash.new { |h, col| + data = Hash.new do |h, col| h[col] = rows.factors.map { |r| [r, nil] }.to_h - } + end columns.zip(rows, values).each { |c, r, v| data[c][r] = v } # FIXME: in fact, WITHOUT this line you'll obtain more "right" # data: with vectors having "rows" as an index... - data = data.map { |c, r| [c, r.values] }.to_h + data = data.transform_values(&:values) data[:_id] = rows.factors DataFrame.new(data) @@ -172,7 +172,7 @@ def crosstab_by_assignation rows, columns, values private - def guess_order source + def guess_order(source) case source.first when Vector # assume that all are Vectors source.first.index.to_a @@ -181,7 +181,7 @@ def guess_order source end end - def from_vector_rows source, opts + def from_vector_rows(source, opts) index = source.map(&:name) .each_with_index.map { |n, i| n || i } index = ArrayHelper.recode_repeated(index) @@ -297,8 +297,9 @@ def from_vector_rows source, opts # # 1 4 14 44 # # 2 5 15 55 - def initialize source={}, opts={} # rubocop:disable Metrics/MethodLength - vectors, index = opts[:order], opts[:index] # FIXME: just keyword arges after Ruby 2.1 + def initialize(source = {}, opts = {}) # rubocop:disable Metrics/MethodLength + vectors = opts[:order] + index = opts[:index] # FIXME: just keyword arges after Ruby 2.1 @data = [] @name = opts[:name] @@ -318,7 +319,7 @@ def initialize source={}, opts={} # rubocop:disable Metrics/MethodLength update end - def plotting_library= lib + def plotting_library=(lib) case lib when :gruff @plotting_library = lib @@ -328,16 +329,16 @@ def plotting_library= lib ) end else - raise ArgumentError, "Plotting library #{lib} not supported. "\ - 'Supported library is :gruff' + raise ArgumentError, "Plotting library #{lib} not supported. " \ + 'Supported library is :gruff' end end # this method is overwritten: see Daru::DataFrame#plotting_library= - def plot(*args, **options, &b) + def plot(...) init_plotting_library - plot(*args, **options, &b) + plot(...) end # Access row or vector. Specify name of row/vector followed by axis(:row, :vector). @@ -361,7 +362,7 @@ def [](*names) # # a b # # 1 2 b # # 2 3 c - def row_at *positions + def row_at(*positions) original_positions = positions positions = coerce_positions(*positions, nrows) validate_positions(*positions, nrows) @@ -390,7 +391,7 @@ def row_at *positions # # 0 x x # # 1 x x # # 2 3 c - def set_row_at positions, vector + def set_row_at(positions, vector) validate_positions(*positions, nrows) vector = if vector.is_a? Daru::Vector @@ -423,7 +424,7 @@ def set_row_at positions, vector # # 0 1 # # 1 2 # # 2 3 - def at *positions + def at(*positions) if AXES.include? positions.last axis = positions.pop return row_at(*positions) if axis == :row @@ -437,9 +438,9 @@ def at *positions @data[positions].dup else Daru::DataFrame.new positions.map { |pos| @data[pos].dup }, - index: @index, - order: @vectors.at(*original_positions), - name: @name + index: @index, + order: @vectors.at(*original_positions), + name: @name end end @@ -458,7 +459,7 @@ def at *positions # # 0 x a # # 1 y b # # 2 z c - def set_at positions, vector + def set_at(positions, vector) if positions.last == :row positions.pop return set_row_at(positions, vector) @@ -493,23 +494,24 @@ def []=(*args) dispatch_to_axis axis, :insert_or_modify, names, vector end - def add_row row, index=nil + def add_row(row, index = nil) self.row[*(index || @size)] = row end - def add_vector n, vector + def add_vector(n, vector) self[n] = vector end - def insert_vector n, name, source + def insert_vector(n, name, source) raise ArgumentError unless source.is_a? Array + vector = Daru::Vector.new(source, index: @index, name: @name) @data << vector @vectors = @vectors.add name ordr = @vectors.dup.to_a elmnt = ordr.pop - ordr = ordr.insert n, elmnt - self.order=ordr + ordr.insert n, elmnt + self.order = ordr end # Access a row or set/create a row. Refer #[] and #[]= docs for details. @@ -541,8 +543,8 @@ def get_sub_dataframe(keys, by_position: true) # # * +vectors_to_dup+ - An Array specifying the names of Vectors to # be duplicated. Will duplicate the entire DataFrame if not specified. - def dup vectors_to_dup=nil - vectors_to_dup = @vectors.to_a unless vectors_to_dup + def dup(vectors_to_dup = nil) + vectors_to_dup ||= @vectors.to_a src = vectors_to_dup.map { |vec| @data[@vectors.pos(vec)].dup } new_order = Daru::Index.new(vectors_to_dup) @@ -562,7 +564,7 @@ def clone_structure # # +vectors_to_clone+ - Names of vectors to clone. Optional. Will return # a view of the whole data frame otherwise. - def clone *vectors_to_clone + def clone(*vectors_to_clone) vectors_to_clone.flatten! if ArrayHelper.array_of?(vectors_to_clone, Array) vectors_to_clone = @vectors.to_a if vectors_to_clone.empty? @@ -582,7 +584,7 @@ def clone_only_valid # Creates a new duplicate dataframe containing only rows # without a single missing value. - def dup_only_valid vecs=nil + def dup_only_valid(vecs = nil) rows_with_nil = @data.map { |vec| vec.indexes(*Daru::MISSING_VALUES) } .inject(&:concat) .uniq @@ -642,7 +644,7 @@ def reject_values(*values) # # 16 NaN 3 5 # # 17 1 5 NaN # # 18 7 8 7 - def replace_values old_values, new_value + def replace_values(old_values, new_value) @data.each { |vec| vec.replace_values old_values, new_value } self end @@ -682,12 +684,12 @@ def replace_values old_values, new_value # 6 1 5 5 # 7 7 5 7 # - def rolling_fillna!(direction=:forward) + def rolling_fillna!(direction = :forward) @data.each { |vec| vec.rolling_fillna!(direction) } self end - def rolling_fillna(direction=:forward) + def rolling_fillna(direction = :forward) dup.rolling_fillna!(direction) end @@ -731,8 +733,8 @@ def uniq(*vtrs) end # Iterate over each index of the DataFrame. - def each_index &block - return to_enum(:each_index) unless block_given? + def each_index(&block) + return to_enum(:each_index) unless block @index.each(&block) @@ -741,14 +743,14 @@ def each_index &block # Iterate over each vector def each_vector(&block) - return to_enum(:each_vector) unless block_given? + return to_enum(:each_vector) unless block @data.each(&block) self end - alias_method :each_column, :each_vector + alias each_column each_vector # Iterate over each vector alongwith the name of the vector def each_vector_with_index @@ -761,7 +763,7 @@ def each_vector_with_index self end - alias_method :each_column_with_index, :each_vector_with_index + alias each_column_with_index each_vector_with_index # Iterate over each row def each_row @@ -797,7 +799,7 @@ def each_row_with_index # # * +axis+ - The axis to iterate over. Can be :vector (or :column) # or :row. Default to :vector. - def each axis=:vector, &block + def each(axis = :vector, &block) dispatch_to_axis axis, :each, &block end @@ -816,7 +818,7 @@ def each axis=:vector, &block # # * +axis+ - The axis to iterate over. Can be :vector (or :column) # or :row. Default to :vector. - def collect axis=:vector, &block + def collect(axis = :vector, &block) dispatch_to_axis_pl axis, :collect, &block end @@ -836,7 +838,7 @@ def collect axis=:vector, &block # # * +axis+ - The axis to map over. Can be :vector (or :column) or :row. # Default to :vector. - def map axis=:vector, &block + def map(axis = :vector, &block) dispatch_to_axis_pl axis, :map, &block end @@ -848,7 +850,7 @@ def map axis=:vector, &block # # * +axis+ - The axis to map over. Can be :vector (or :column) or :row. # Default to :vector. - def map! axis=:vector, &block + def map!(axis = :vector, &block) if %i[vector column].include?(axis) map_vectors!(&block) elsif axis == :row @@ -873,7 +875,7 @@ def map! axis=:vector, &block # # * +axis+ - The axis to map over. Can be :vector (or :column) or :row. # Default to :vector. - def recode axis=:vector, &block + def recode(axis = :vector, &block) dispatch_to_axis_pl axis, :recode, &block end @@ -905,7 +907,7 @@ def recode axis=:vector, &block # df.filter(:row) do |row| # row[:a] + row[:d] < 100 # end - def filter axis=:vector, &block + def filter(axis = :vector, &block) dispatch_to_axis_pl axis, :filter, &block end @@ -930,8 +932,8 @@ def recode_rows end # Map each vector and return an Array. - def map_vectors &block - return to_enum(:map_vectors) unless block_given? + def map_vectors(&block) + return to_enum(:map_vectors) unless block @data.map(&block) end @@ -948,21 +950,21 @@ def map_vectors! end # Map vectors alongwith the index. - def map_vectors_with_index &block - return to_enum(:map_vectors_with_index) unless block_given? + def map_vectors_with_index(&block) + return to_enum(:map_vectors_with_index) unless block each_vector_with_index.map(&block) end # Map each row - def map_rows &block - return to_enum(:map_rows) unless block_given? + def map_rows(&block) + return to_enum(:map_rows) unless block each_row.map(&block) end - def map_rows_with_index &block - return to_enum(:map_rows_with_index) unless block_given? + def map_rows_with_index(&block) + return to_enum(:map_rows_with_index) unless block each_row_with_index.map(&block) end @@ -987,32 +989,32 @@ def apply_method(method, keys: nil, by_position: true) else raise end end - alias :apply_method_on_sub_df :apply_method + alias apply_method_on_sub_df apply_method # Retrieves a Daru::Vector, based on the result of calculation # performed on each row. - def collect_rows &block - return to_enum(:collect_rows) unless block_given? + def collect_rows(&block) + return to_enum(:collect_rows) unless block Daru::Vector.new(each_row.map(&block), index: @index) end - def collect_row_with_index &block - return to_enum(:collect_row_with_index) unless block_given? + def collect_row_with_index(&block) + return to_enum(:collect_row_with_index) unless block Daru::Vector.new(each_row_with_index.map(&block), index: @index) end # Retrives a Daru::Vector, based on the result of calculation # performed on each vector. - def collect_vectors &block - return to_enum(:collect_vectors) unless block_given? + def collect_vectors(&block) + return to_enum(:collect_vectors) unless block Daru::Vector.new(each_vector.map(&block), index: @vectors) end - def collect_vector_with_index &block - return to_enum(:collect_vector_with_index) unless block_given? + def collect_vector_with_index(&block) + return to_enum(:collect_vector_with_index) unless block Daru::Vector.new(each_vector_with_index.map(&block), index: @vectors) end @@ -1027,18 +1029,18 @@ def collect_matrix return to_enum(:collect_matrix) unless block_given? vecs = vectors.to_a - rows = vecs.collect { |row| - vecs.collect { |col| - yield row,col - } - } + rows = vecs.collect do |row| + vecs.collect do |col| + yield row, col + end + end Matrix.rows(rows) end # :nocov: # Delete a vector - def delete_vector vector + def delete_vector(vector) raise IndexError, "Vector #{vector} does not exist." unless @vectors.include?(vector) @data.delete_at @vectors[vector] @@ -1048,17 +1050,18 @@ def delete_vector vector end # Deletes a list of vectors - def delete_vectors *vectors + def delete_vectors(*vectors) Array(vectors).each { |vec| delete_vector vec } self end # Delete a row - def delete_row index + def delete_row(index) idx = named_index_for index raise IndexError, "Index #{index} does not exist." unless @index.include? idx + @index = Daru::Index.new(@index.to_a - [idx]) each_vector do |vector| vector.delete_at idx @@ -1071,7 +1074,7 @@ def delete_row index # If n not given, uses original number of rows. # # @return {Daru::DataFrame} - def bootstrap(n=nil) + def bootstrap(n = nil) n ||= nrows Daru::DataFrame.new({}, order: @vectors).tap do |df_boot| n.times do @@ -1094,7 +1097,7 @@ def keep_vector_if end # creates a new vector with the data of a given field which the block returns true - def filter_vector vec, &block + def filter_vector(vec, &block) Daru::Vector.new(each_row.select(&block).map { |row| row[vec] }) end @@ -1110,8 +1113,8 @@ def filter_rows # Iterates over each vector and retains it in a new DataFrame if the block returns # true for that vector. - def filter_vectors &block - return to_enum(:filter_vectors) unless block_given? + def filter_vectors(&block) + return to_enum(:filter_vectors) unless block dup.tap { |df| df.keep_vector_if(&block) } end @@ -1152,7 +1155,7 @@ def verify(*tests) # # 4 555 # # 5 666 # # 6 777 - def vector_by_calculation &block + def vector_by_calculation(&block) a = each_row.map { |r| r.instance_eval(&block) } Daru::Vector.new a, index: @index @@ -1175,6 +1178,7 @@ def vector_by_calculation &block def order=(order_array) raise ArgumentError, 'Invalid order' unless order_array.sort == vectors.to_a.sort + initialize(to_h, order: order_array) end @@ -1190,8 +1194,9 @@ def order=(order_array) # ds = Daru::DataFrame.new({:a => a,:b => b}) # ds.compute("a+b") # => Vector [4,6] - def compute text, &block - return instance_eval(&block) if block_given? + def compute(text, &block) + return instance_eval(&block) if block + instance_eval(text) end @@ -1201,7 +1206,7 @@ def compute text, &block # # * +missing_values+ - An Array of the values that should be # treated as 'missing'. The default missing value is *nil*. - def missing_values_rows missing_values=[nil] + def missing_values_rows(missing_values = [nil]) number_of_missing = each_row.map do |row| row.indexes(*missing_values).size end @@ -1210,12 +1215,12 @@ def missing_values_rows missing_values=[nil] end # TODO: remove next version - alias :vector_missing_values :missing_values_rows + alias vector_missing_values missing_values_rows def has_missing_data? @data.any? { |vec| vec.include_values?(*Daru::MISSING_VALUES) } end - alias :flawed? :has_missing_data? + alias flawed? has_missing_data? deprecate :has_missing_data?, :include_values?, 2016, 10 deprecate :flawed?, :include_values?, 2016, 10 @@ -1239,7 +1244,7 @@ def include_values?(*values) # hashes with other values. If block provided, is used to provide the # values, with parameters +row+ of dataset, +current+ last hash on # hierarchy and +name+ of the key to include - def nest *tree_keys, &_block + def nest(*tree_keys, &_block) tree_keys = tree_keys[0] if tree_keys[0].is_a? Array each_row.each_with_object({}) do |row, current| @@ -1248,27 +1253,27 @@ def nest *tree_keys, &_block current = keys.inject(current) { |c, f| c[row[f]] ||= {} } name = row[last] - if block_given? + if _block current[name] = yield(row, current, name) else current[name] ||= [] - current[name].push(row.to_h.delete_if { |key,_value| tree_keys.include? key }) + current[name].push(row.to_h.delete_if { |key, _value| tree_keys.include? key }) end end end - def vector_count_characters vecs=nil + def vector_count_characters(vecs = nil) vecs ||= @vectors.to_a collect_rows do |row| - vecs.map { |v| row[v].to_s.size }.inject(:+) + vecs.sum { |v| row[v].to_s.size } end end - def add_vectors_by_split(name,join='-',sep=Daru::SPLIT_TOKEN) + def add_vectors_by_split(name, join = '-', sep = Daru::SPLIT_TOKEN) self[name] .split_by_separator(sep) - .each { |k,v| self["#{name}#{join}#{k}".to_sym] = v } + .each { |k, v| self["#{name}#{join}#{k}".to_sym] = v } end # Return the number of rows and columns of the DataFrame in an Array. @@ -1287,7 +1292,7 @@ def ncols end # Check if a vector is present - def has_vector? vector + def has_vector?(vector) @vectors.include? vector end @@ -1300,7 +1305,7 @@ def has_vector? vector # df.any?(:row) do |row| # row[:a] < 3 and row[:b] == 'b' # end #=> true - def any? axis=:vector, &block + def any?(axis = :vector, &block) if %i[vector column].include?(axis) @data.any?(&block) elsif axis == :row @@ -1322,7 +1327,7 @@ def any? axis=:vector, &block # df.all?(:row) do |row| # row[:a] < 10 # end #=> true - def all? axis=:vector, &block + def all?(axis = :vector, &block) if %i[vector column].include?(axis) @data.all?(&block) elsif axis == :row @@ -1335,21 +1340,21 @@ def all? axis=:vector, &block # The first ten elements of the DataFrame # # @param [Fixnum] quantity (10) The number of elements to display from the top. - def head quantity=10 - row.at 0..(quantity-1) + def head(quantity = 10) + row.at 0..(quantity - 1) end - alias :first :head + alias first head # The last ten elements of the DataFrame # # @param [Fixnum] quantity (10) The number of elements to display from the bottom. - def tail quantity=10 + def tail(quantity = 10) start = [-quantity, -size].max row.at start..-1 end - alias :last :tail + alias last tail # Sum all numeric/specified vectors in the DataFrame. # @@ -1396,14 +1401,14 @@ def tail quantity=10 # 2 4 # def vector_sum(*args) - defaults = {vecs: nil, skipnil: false} + defaults = { vecs: nil, skipnil: false } options = args.last.is_a?(::Hash) ? args.pop : {} options = defaults.merge(options) vecs = args[0] || options[:vecs] skipnil = args[1] || options[:skipnil] vecs ||= numeric_vectors - sum = Daru::Vector.new [0]*@size, index: @index, name: @name, dtype: @dtype + sum = Daru::Vector.new [0] * @size, index: @index, name: @name, dtype: @dtype vecs.inject(sum) { |memo, n| self[n].add(memo, skipnil: skipnil) } end @@ -1413,10 +1418,10 @@ def vector_sum(*args) # # * +max_missing+ - The maximum number of elements in the row that can be # zero for the mean calculation to happen. Default to 0. - def vector_mean max_missing=0 + def vector_mean(max_missing = 0) # FIXME: in vector_sum we preserve created vector dtype, but # here we are not. Is this by design or ...? - zverok, 2016-05-18 - mean_vec = Daru::Vector.new [0]*@size, index: @index, name: "mean_#{@name}" + mean_vec = Daru::Vector.new [0] * @size, index: @index, name: "mean_#{@name}" each_row_with_index.each_with_object(mean_vec) do |(row, i), memo| memo[i] = row.indexes(*Daru::MISSING_VALUES).size > max_missing ? nil : row.mean @@ -1447,27 +1452,25 @@ def vector_mean max_missing=0 # # ["foo", "one", 3]=>[6], # # ["foo", "three", 8]=>[7], # # ["foo", "two", 3]=>[2, 4]} - def group_by *vectors + def group_by(*vectors) vectors.flatten! missing = vectors - @vectors.to_a - unless missing.empty? - raise(ArgumentError, "Vector(s) missing: #{missing.join(', ')}") - end + raise(ArgumentError, "Vector(s) missing: #{missing.join(', ')}") unless missing.empty? vectors = [@vectors.first] if vectors.empty? Daru::Core::GroupBy.new(self, vectors) end - def reindex_vectors new_vectors + def reindex_vectors(new_vectors) unless new_vectors.is_a?(Daru::Index) - raise ArgumentError, 'Must pass the new index of type Index or its '\ - "subclasses, not #{new_vectors.class}" + raise ArgumentError, 'Must pass the new index of type Index or its ' \ + "subclasses, not #{new_vectors.class}" end cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name) new_vectors.each_with_object(cl) do |vec, memo| - memo[vec] = @vectors.include?(vec) ? self[vec] : [nil]*nrows + memo[vec] = @vectors.include?(vec) ? self[vec] : [nil] * nrows end end @@ -1477,7 +1480,7 @@ def get_vector_anyways(v) # Concatenate another DataFrame along corresponding columns. # If columns do not exist in both dataframes, they are filled with nils - def concat other_df + def concat(other_df) vectors = (@vectors.to_a + other_df.vectors.to_a).uniq data = vectors.map do |v| @@ -1491,7 +1494,7 @@ def concat other_df # Additionally it tries to preserve the index. If the indices contain # common elements, #union will overwrite the according rows in the # first dataframe. - def union other_df + def union(other_df) index = (@index.to_a + other_df.index.to_a).uniq df = row[*(@index.to_a - other_df.index.to_a)] @@ -1532,7 +1535,7 @@ def self.delete_vector(df, cols) end # Set a particular column as the new DF - def set_index new_index_col, opts={} + def set_index(new_index_col, opts = {}) if new_index_col.respond_to?(:to_a) strategy = SetMultiIndexStrategy new_index_col = new_index_col.to_a @@ -1571,15 +1574,15 @@ def set_index new_index_col, opts={} # # 0 nil nil # # a 1 11 # # g nil nil - def reindex new_index + def reindex(new_index) unless new_index.is_a?(Daru::Index) - raise ArgumentError, 'Must pass the new index of type Index or its '\ - "subclasses, not #{new_index.class}" + raise ArgumentError, 'Must pass the new index of type Index or its ' \ + "subclasses, not #{new_index.class}" end cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name) new_index.each_with_object(cl) do |idx, memo| - memo.row[idx] = @index.include?(idx) ? row[idx] : [nil]*ncols + memo.row[idx] = @index.include?(idx) ? row[idx] : [nil] * ncols end end @@ -1607,7 +1610,7 @@ def reset_index # df.index = Daru::Index.new(['a','b','c','d']) # df.index.to_a #=> ['a','b','c','d'] # df.row['a'].to_a #=> [1,11] - def index= idx + def index=(idx) @index = Index.coerce idx @data.each { |vec| vec.index = @index } @@ -1624,14 +1627,12 @@ def index= idx # # df.vectors = Daru::Index.new([:foo, :bar, :baz]) # df.vectors.to_a #=> [:foo, :bar, :baz] - def vectors= new_index - unless new_index.is_a?(Daru::Index) - raise ArgumentError, 'Can only reindex with Index and its subclasses' - end + def vectors=(new_index) + raise ArgumentError, 'Can only reindex with Index and its subclasses' unless new_index.is_a?(Daru::Index) if new_index.size != ncols - raise ArgumentError, "Specified index length #{new_index.size} not equal to"\ - "dataframe size #{ncols}" + raise ArgumentError, "Specified index length #{new_index.size} not equal to" \ + "dataframe size #{ncols}" end @vectors = new_index @@ -1655,11 +1656,11 @@ def vectors= new_index # df = Daru::DataFrame.new({ a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44] }) # df.rename_vectors :a => :alpha, :c => :gamma # df.vectors.to_a #=> [:alpha, :b, :gamma] - def rename_vectors name_map - existing_targets = name_map.reject { |k,v| k == v }.values & vectors.to_a + def rename_vectors(name_map) + existing_targets = name_map.reject { |k, v| k == v }.values & vectors.to_a delete_vectors(*existing_targets) - new_names = vectors.to_a.map { |v| name_map[v] ? name_map[v] : v } + new_names = vectors.to_a.map { |v| name_map[v] || v } self.vectors = Daru::Index.new new_names end @@ -1676,7 +1677,7 @@ def rename_vectors name_map # # df = Daru::DataFrame.new({ a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44] }) # df.rename_vectors! :a => :alpha, :c => :gamma # df - def rename_vectors! name_map + def rename_vectors!(name_map) rename_vectors(name_map) self end @@ -1697,8 +1698,8 @@ def numeric_vector_names # Return a DataFrame of only the numerical Vectors. If clone: false # is specified as option, only a *view* of the Vectors will be # returned. Defaults to clone: true. - def only_numerics opts={} - cln = opts[:clone] == false ? false : true + def only_numerics(opts = {}) + cln = opts[:clone] != false arry = numeric_vectors.map { |v| self[v] } order = Index.new(numeric_vectors) @@ -1800,7 +1801,7 @@ def summary # # 2 1 nil # # 5 1 nil - def sort! vector_order, opts={} + def sort!(vector_order, opts = {}) raise ArgumentError, 'Required atleast one vector name' if vector_order.empty? # To enable sorting with categorical data, @@ -1824,7 +1825,7 @@ def sort! vector_order, opts={} end # Non-destructive version of #sort! - def sort vector_order, opts={} + def sort(vector_order, opts = {}) dup.sort! vector_order, opts end @@ -1862,7 +1863,7 @@ def sort vector_order, opts={} # # [:e, :one] [:e, :two] # # [:bar] 18 26 # # [:foo] 10 12 - def pivot_table opts={} + def pivot_table(opts = {}) raise ArgumentError, 'Specify grouping index' if Array(opts[:index]).empty? index = opts[:index] @@ -1883,10 +1884,10 @@ def pivot_table opts={} # the vectors names are changed to x_1, x_2 .... # # @return {Daru::DataFrame} - def merge other_df + def merge(other_df) unless nrows == other_df.nrows raise ArgumentError, - "Number of rows must be equal in this: #{nrows} and other: #{other_df.nrows}" + "Number of rows must be equal in this: #{nrows} and other: #{other_df.nrows}" end new_fields = (@vectors.to_a + other_df.vectors.to_a) @@ -1928,7 +1929,7 @@ def merge other_df # # id_1 name id_2 # # 0 1 Pirate 2 # # 1 3 Ninja 4 - def join(other_df,opts={}) + def join(other_df, opts = {}) Daru::Core::Merge.join(self, other_df, opts) end @@ -1980,7 +1981,7 @@ def one_to_many(parent_fields, pattern) end end - def add_vectors_by_split_recode(nm, join='-', sep=Daru::SPLIT_TOKEN) + def add_vectors_by_split_recode(nm, join = '-', sep = Daru::SPLIT_TOKEN) self[nm] .split_by_separator(sep) .each_with_index do |(k, v), i| @@ -2005,14 +2006,14 @@ def add_vectors_by_split_recode(nm, join='-', sep=Daru::SPLIT_TOKEN) # ds.create_sql('names') # #=>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;" # - def create_sql(table,charset='UTF8') + def create_sql(table, charset = 'UTF8') sql = "CREATE TABLE #{table} (" fields = vectors.to_a.collect do |f| v = self[f] - f.to_s + ' ' + v.db_type + "#{f} #{v.db_type}" end - sql + fields.join(",\n ")+") CHARACTER SET=#{charset};" + sql + fields.join(",\n ") + ") CHARACTER SET=#{charset};" end # Returns the dataframe. This can be convenient when the user does not @@ -2038,7 +2039,7 @@ def to_a # Convert to json. If no_index is false then the index will NOT be included # in the JSON thus created. - def to_json no_index=true + def to_json(no_index = true) if no_index to_a[0].to_json else @@ -2055,13 +2056,13 @@ def to_h end # Convert to html for IRuby. - def to_html(threshold=Daru.max_rows) + def to_html(threshold = Daru.max_rows) table_thead = to_html_thead table_tbody = to_html_tbody(threshold) path = if index.is_a?(MultiIndex) - File.expand_path('../iruby/templates/dataframe_mi.html.erb', __FILE__) + File.expand_path('iruby/templates/dataframe_mi.html.erb', __dir__) else - File.expand_path('../iruby/templates/dataframe.html.erb', __FILE__) + File.expand_path('iruby/templates/dataframe.html.erb', __dir__) end ERB.new(File.read(path).strip).result(binding) end @@ -2069,26 +2070,26 @@ def to_html(threshold=Daru.max_rows) def to_html_thead table_thead_path = if index.is_a?(MultiIndex) - File.expand_path('../iruby/templates/dataframe_mi_thead.html.erb', __FILE__) + File.expand_path('iruby/templates/dataframe_mi_thead.html.erb', __dir__) else - File.expand_path('../iruby/templates/dataframe_thead.html.erb', __FILE__) + File.expand_path('iruby/templates/dataframe_thead.html.erb', __dir__) end ERB.new(File.read(table_thead_path).strip).result(binding) end - def to_html_tbody(threshold=Daru.max_rows) + def to_html_tbody(threshold = Daru.max_rows) threshold ||= @size table_tbody_path = if index.is_a?(MultiIndex) - File.expand_path('../iruby/templates/dataframe_mi_tbody.html.erb', __FILE__) + File.expand_path('iruby/templates/dataframe_mi_tbody.html.erb', __dir__) else - File.expand_path('../iruby/templates/dataframe_tbody.html.erb', __FILE__) + File.expand_path('iruby/templates/dataframe_tbody.html.erb', __dir__) end ERB.new(File.read(table_tbody_path).strip).result(binding) end def to_s - "#<#{self.class}#{': ' + @name.to_s if @name}(#{nrows}x#{ncols})>" + "#<#{self.class}#{": #{@name}" if @name}(#{nrows}x#{ncols})>" end # Method for updating the metadata (i.e. missing value positions) of the @@ -2101,12 +2102,12 @@ def update end # Rename the DataFrame. - def rename new_name + def rename(new_name) @name = new_name self end - alias_method :name=, :rename + alias name= rename # Write this DataFrame to a CSV file. # @@ -2120,7 +2121,7 @@ def rename new_name # of the data to full stops ('.'). # All the options accepted by CSV.read() can also be passed into this # function. - def write_csv filename, opts={} + def write_csv(filename, opts = {}) Daru::IO.dataframe_write_csv self, filename, opts end @@ -2129,7 +2130,7 @@ def write_csv filename, opts={} # == Arguments # # * filename - The path of the file where the DataFrame should be written. - def write_excel filename, opts={} + def write_excel(filename, opts = {}) Daru::IO.dataframe_write_excel self, filename, opts end @@ -2145,30 +2146,30 @@ def write_excel filename, opts={} # ds = Daru::DataFrame.new({:id=>Daru::Vector.new([1,2,3]), :name=>Daru::Vector.new(["a","b","c"])}) # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password") # ds.write_sql(dbh,"test") - def write_sql dbh, table + def write_sql(dbh, table) Daru::IO.dataframe_write_sql self, dbh, table end # Use marshalling to save dataframe to a file. - def save filename + def save(filename) Daru::IO.save self, filename end def _dump(_depth) Marshal.dump( - data: @data, + data: @data, index: @index.to_a, order: @vectors.to_a, - name: @name + name: @name ) end - def self._load data + def self._load(data) h = Marshal.load data Daru::DataFrame.new(h[:data], - index: h[:index], - order: h[:order], - name: h[:name]) + index: h[:index], + order: h[:order], + name: h[:name]) end # Transpose a DataFrame, tranposing elements and row, column indexing. @@ -2183,7 +2184,7 @@ def transpose end # Pretty print in a nice table format for the command line (irb/pry/iruby) - def inspect spacing=Daru.spacing, threshold=Daru.max_rows + def inspect(spacing = Daru.spacing, threshold = Daru.max_rows) name_part = @name ? ": #{@name} " : '' spacing = [headers.to_a.map(&:length).max, spacing].max @@ -2198,11 +2199,11 @@ def inspect spacing=Daru.spacing, threshold=Daru.max_rows end # Query a DataFrame by passing a Daru::Core::Query::BoolArray object. - def where bool_array + def where(bool_array) Daru::Core::Query.df_where self, bool_array end - def == other + def ==(other) self.class == other.class && @size == other.size && @index == other.index && @@ -2222,38 +2223,37 @@ def == other # df.to_category :b # df[:b].type # # => :category - def to_category *names + def to_category(*names) names.each { |n| self[n] = self[n].to_category } self end def method_missing(name, *args, &block) - case - when name =~ /(.+)\=/ - name = name[/(.+)\=/].delete('=') + if /(.+)=/.match?(name) + name = name[/(.+)=/].delete('=') name = name.to_sym unless has_vector?(name) insert_or_modify_vector [name], args[0] - when has_vector?(name) + elsif has_vector?(name) self[name] - when has_vector?(name.to_s) + elsif has_vector?(name.to_s) self[name.to_s] else super end end - def respond_to_missing?(name, include_private=false) + def respond_to_missing?(name, include_private = false) name.to_s.end_with?('=') || has_vector?(name) || super end - def interact_code vector_names, full + def interact_code(vector_names, full) dfs = vector_names.zip(full).map do |vec_name, f| self[vec_name].contrast_code(full: f).each.to_a end all_vectors = recursive_product(dfs) Daru::DataFrame.new all_vectors, - order: all_vectors.map(&:name) + order: all_vectors.map(&:name) end # Split the dataframe into many dataframes based on category vector @@ -2274,7 +2274,7 @@ def interact_code vector_names, full # # # # # a # # 2 3] - def split_by_category cat_name + def split_by_category(cat_name) cat_dv = self[cat_name] raise ArgumentError, "#{cat_name} is not a category vector" unless cat_dv.category? @@ -2317,9 +2317,10 @@ def split_by_category cat_name # # => [[3, "c"]] # df_mi.access_row_tuples_by_indexs(:a) # # => [[1, "a"], [2, "b"], [4, "d"]] - def access_row_tuples_by_indexs *indexes + def access_row_tuples_by_indexs(*indexes) return get_sub_dataframe(indexes, by_position: false).map_rows(&:to_a) if @index.is_a?(Daru::MultiIndex) + positions = @index.pos(*indexes) if positions.is_a? Numeric row = get_rows_for([positions]) @@ -2377,9 +2378,9 @@ def access_row_tuples_by_indexs *indexes # # Note: `GroupBy` class `aggregate` method uses this `aggregate` method # internally. - def aggregate(options={}, multi_index_level=-1) + def aggregate(options = {}, multi_index_level = -1) if block_given? - positions_tuples, new_index = yield(@index) # note: use of yield is private for now + positions_tuples, new_index = yield(@index) # NOTE: use of yield is private for now else positions_tuples, new_index = group_index_for_aggregation(@index, multi_index_level) end @@ -2408,32 +2409,34 @@ def row_headers index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a end - def convert_categorical_vectors names - names.map do |n| + def convert_categorical_vectors(names) + names.filter_map do |n| next unless self[n].category? + old = [n, self[n]] self[n] = Daru::Vector.new(self[n].to_ints) old - end.compact + end end - def restore_categorical_vectors old + def restore_categorical_vectors(old) old.each { |name, vector| self[name] = vector } end - def recursive_product dfs + def recursive_product(dfs) return dfs.first if dfs.size == 1 left = dfs.first dfs.shift right = recursive_product dfs left.product(right).map do |dv1, dv2| - (dv1*dv2).rename "#{dv1.name}:#{dv2.name}" + (dv1 * dv2).rename "#{dv1.name}:#{dv2.name}" end end - def should_be_vector! val + def should_be_vector!(val) return val if val.is_a?(Daru::Vector) + raise TypeError, "Every iteration must return Daru::Vector not #{val.class}" end @@ -2459,7 +2462,7 @@ def dispatch_to_axis_pl(axis, method, *args, &block) AXES = %i[row vector].freeze - def extract_axis names, default=:vector + def extract_axis(names, default = :vector) if AXES.include?(names.last) names.pop else @@ -2467,7 +2470,7 @@ def extract_axis names, default=:vector end end - def access_vector *names + def access_vector(*names) if names.first.is_a?(Range) dup(@vectors.subset(names.first)) elsif @vectors.is_a?(MultiIndex) @@ -2477,7 +2480,7 @@ def access_vector *names end end - def access_vector_multi_index *names + def access_vector_multi_index(*names) pos = @vectors[names] return @data[pos] if pos.is_a?(Integer) @@ -2489,7 +2492,7 @@ def access_vector_multi_index *names Daru::DataFrame.new(new_vectors, index: @index, order: pos) end - def access_vector_single_index *names + def access_vector_single_index(*names) if names.count < 2 begin pos = @vectors.is_a?(Daru::DateTimeIndex) ? @vectors[names.first] : @vectors.pos(names.first) @@ -2497,6 +2500,7 @@ def access_vector_single_index *names raise IndexError, "Specified vector #{names.first} does not exist" end return @data[pos] if pos.is_a?(Numeric) + names = pos end @@ -2507,7 +2511,7 @@ def access_vector_single_index *names index: @index, name: @name) end - def access_row *indexes + def access_row(*indexes) positions = @index.pos(*indexes) if positions.is_a? Numeric @@ -2537,7 +2541,7 @@ def get_rows_for(keys, by_position: true) end end - def insert_or_modify_vector name, vector + def insert_or_modify_vector(name, vector) name = name[0] unless @vectors.is_a?(MultiIndex) if @index.empty? @@ -2549,7 +2553,7 @@ def insert_or_modify_vector name, vector end end - def assign_or_add_vector name, v + def assign_or_add_vector(name, v) # FIXME: fix this jugaad. need to make changes in Indexing itself. begin pos = @vectors[name] @@ -2557,11 +2561,10 @@ def assign_or_add_vector name, v pos = name end - case - when pos.is_a?(Daru::Index) + if pos.is_a?(Daru::Index) assign_multiple_vectors pos, v - when pos == name && - (@vectors.include?(name) || (pos.is_a?(Integer) && pos < @data.size)) + elsif pos == name && + (@vectors.include?(name) || (pos.is_a?(Integer) && pos < @data.size)) @data[pos] = v else @@ -2569,18 +2572,18 @@ def assign_or_add_vector name, v end end - def assign_multiple_vectors pos, v + def assign_multiple_vectors(pos, v) pos.each do |p| @data[@vectors[p]] = v end end - def assign_or_add_vector_rough name, v + def assign_or_add_vector_rough(name, v) @vectors |= [name] unless @vectors.include?(name) @data[@vectors[name]] = v end - def insert_vector_in_empty name, vector + def insert_vector_in_empty(name, vector) vec = Vector.coerce(vector.to_a, name: coerce_name(name)) @index = vec.index @@ -2590,7 +2593,7 @@ def insert_vector_in_empty name, vector @data.map! { |v| v.empty? ? v.reindex(@index) : v } end - def prepare_for_insert name, arg + def prepare_for_insert(name, arg) if arg.is_a? Daru::Vector prepare_vector_for_insert name, arg elsif arg.respond_to?(:to_a) @@ -2600,28 +2603,30 @@ def prepare_for_insert name, arg end end - def prepare_vector_for_insert name, vector + def prepare_vector_for_insert(name, vector) # so that index-by-index assignment is avoided when possible. return vector.dup if vector.index == @index - Daru::Vector.new([], name: coerce_name(name), index: @index).tap { |v| + + Daru::Vector.new([], name: coerce_name(name), index: @index).tap do |v| @index.each do |idx| v[idx] = vector.index.include?(idx) ? vector[idx] : nil end - } + end end - def prepare_enum_for_insert name, enum + def prepare_enum_for_insert(name, enum) if @size != enum.size raise "Specified vector of length #{enum.size} cannot be inserted in DataFrame of size #{@size}" end + Daru::Vector.new(enum, name: coerce_name(name), index: @index) end - def prepare_value_for_insert name, value + def prepare_value_for_insert(name, value) Daru::Vector.new(Array(value) * @size, name: coerce_name(name), index: @index) end - def insert_or_modify_row indexes, vector + def insert_or_modify_row(indexes, vector) vector = coerce_vector vector raise SizeError, 'Vector length should match row length' if @@ -2647,10 +2652,11 @@ def create_empty_vectors(vectors, index) def validate_labels if @vectors && @vectors.size != @data.size raise IndexError, "Expected equal number of vector names (#{@vectors.size}) " \ - "for number of vectors (#{@data.size})." + "for number of vectors (#{@data.size})." end return unless @index && @data[0] && @index.size != @data[0].size + raise IndexError, 'Expected number of indexes same as number of rows' end @@ -2669,7 +2675,7 @@ def set_size @size = @index.size end - def named_index_for index + def named_index_for(index) if @index.include? index index elsif @index.key index @@ -2679,7 +2685,7 @@ def named_index_for index end end - def create_vectors_index_with vectors, source + def create_vectors_index_with(vectors, source) vectors = source.keys if vectors.nil? @vectors = @@ -2690,26 +2696,26 @@ def create_vectors_index_with vectors, source end end - def all_vectors_have_equal_indexes? source + def all_vectors_have_equal_indexes?(source) idx = source.values[0].index source.values.all? { |vector| idx == vector.index } end - def coerce_name potential_name + def coerce_name(potential_name) potential_name.is_a?(Array) ? potential_name.join : potential_name end - def initialize_from_array source, vectors, index, opts + def initialize_from_array(source, vectors, index, opts) raise ArgumentError, 'All objects in data source should be same class' \ unless source.map(&:class).uniq.size == 1 case source.first when Array - vectors ||= (0..source.size-1).to_a + vectors ||= (0..source.size - 1).to_a initialize_from_array_of_arrays source, vectors, index, opts when Vector - vectors ||= (0..source.size-1).to_a + vectors ||= (0..source.size - 1).to_a initialize_from_array_of_vectors source, vectors, index, opts when Hash initialize_from_array_of_hashes source, vectors, index, opts @@ -2718,10 +2724,10 @@ def initialize_from_array source, vectors, index, opts end end - def initialize_from_array_of_arrays source, vectors, index, _opts + def initialize_from_array_of_arrays(source, vectors, index, _opts) if source.size != vectors.size raise ArgumentError, "Number of vectors (#{vectors.size}) should " \ - "equal order size (#{source.size})" + "equal order size (#{source.size})" end @index = Index.coerce(index || source[0].size) @@ -2730,15 +2736,15 @@ def initialize_from_array_of_arrays source, vectors, index, _opts update_data source, vectors end - def initialize_from_array_of_vectors source, vectors, index, opts + def initialize_from_array_of_vectors(source, vectors, index, opts) clone = opts[:clone] != false - hsh = vectors.each_with_index.map do |name, idx| + hsh = vectors.each_with_index.to_h do |name, idx| [name, source[idx]] - end.to_h + end initialize(hsh, index: index, order: vectors, name: @name, clone: clone) end - def initialize_from_array_of_hashes source, vectors, index, _opts + def initialize_from_array_of_hashes(source, vectors, index, _opts) names = if vectors.nil? source[0].keys @@ -2754,7 +2760,7 @@ def initialize_from_array_of_hashes source, vectors, index, _opts end end - def initialize_from_hash source, vectors, index, opts + def initialize_from_hash(source, vectors, index, opts) create_vectors_index_with vectors, source if ArrayHelper.array_of?(source.values, Vector) @@ -2764,7 +2770,7 @@ def initialize_from_hash source, vectors, index, opts end end - def initialize_from_hash_with_vectors source, index, opts + def initialize_from_hash_with_vectors(source, index, opts) vectors_have_same_index = all_vectors_have_equal_indexes?(source) clone = opts[:clone] != false @@ -2779,7 +2785,7 @@ def initialize_from_hash_with_vectors source, index, opts end end - def deduce_index index, source, vectors_have_same_index + def deduce_index(index, source, vectors_have_same_index) if !index.nil? Index.coerce index elsif vectors_have_same_index @@ -2793,7 +2799,7 @@ def deduce_index index, source, vectors_have_same_index end end - def clone_vectors source, vectors_have_same_index + def clone_vectors(source, vectors_have_same_index) @vectors.map do |vector| # avoids matching indexes of vectors if all the supplied vectors # have the same index. @@ -2809,7 +2815,7 @@ def clone_vectors source, vectors_have_same_index end end - def initialize_from_hash_with_arrays source, index, _opts + def initialize_from_hash_with_arrays(source, index, _opts) @index = Index.coerce(index || source.values[0].size) @vectors.each do |name| @@ -2817,31 +2823,36 @@ def initialize_from_hash_with_arrays source, index, _opts end end - def sort_build_row vector_locs, by_blocks, ascending, handle_nils, r1, r2 # rubocop:disable Metrics/ParameterLists + def sort_build_row(vector_locs, by_blocks, ascending, handle_nils, r1, r2) # rubocop:disable Metrics/ParameterLists # Create an array to be used for comparison of two rows in sorting vector_locs .zip(by_blocks, ascending, handle_nils) .map do |vector_loc, by, asc, handle_nil| value = @data[vector_loc].data[asc ? r1 : r2] - value = by.call(value) rescue nil if by + if by + value = begin + by.call(value) + rescue StandardError + nil + end + end sort_handle_nils value, asc, handle_nil || !by end end - def sort_handle_nils value, asc, handle_nil - case - when !handle_nil + def sort_handle_nils(value, asc, handle_nil) + if !handle_nil value - when asc + elsif asc [value.nil? ? 0 : 1, value] else [value.nil? ? 1 : 0, value] end end - def sort_coerce_boolean opts, symbol, default, size + def sort_coerce_boolean(opts, symbol, default, size) val = opts[symbol] case val when true, false @@ -2851,13 +2862,14 @@ def sort_coerce_boolean opts, symbol, default, size when Array raise ArgumentError, "Specify same number of vector names and #{symbol}" if size != val.size + val else raise ArgumentError, "Can't coerce #{symbol} from #{val.class} to boolean option" end end - def sort_prepare_block vector_order, opts + def sort_prepare_block(vector_order, opts) ascending = sort_coerce_boolean opts, :ascending, true, vector_order.size handle_nils = sort_coerce_boolean opts, :handle_nils, false, vector_order.size @@ -2876,7 +2888,7 @@ def sort_prepare_block vector_order, opts end end - def verify_error_message row, test, id, i + def verify_error_message(row, test, id, i) description, fields, = test values = if fields.empty? @@ -2884,10 +2896,10 @@ def verify_error_message row, test, id, i else ' (' + fields.collect { |k| "#{k}=#{row[k]}" }.join(', ') + ')' end - "#{i+1} [#{row[id]}]: #{description}#{values}" + "#{i + 1} [#{row[id]}]: #{description}#{values}" end - def prepare_pivot_values index, vectors, opts + def prepare_pivot_values(index, vectors, opts) case opts[:values] when nil # values not specified at all. (@vectors.to_a - (index | vectors)) & numeric_vector_names @@ -2898,8 +2910,8 @@ def prepare_pivot_values index, vectors, opts end end - def make_pivot_hash grouped, vectors, values, aggregate_function - grouped.groups.map { |n, _| [n, {}] }.to_h.tap do |super_hash| + def make_pivot_hash(grouped, vectors, values, aggregate_function) + grouped.groups.transform_values { |_| {} }.tap do |super_hash| values.each do |value| grouped.groups.each do |group_name, row_numbers| row_numbers.each do |num| @@ -2916,7 +2928,7 @@ def make_pivot_hash grouped, vectors, values, aggregate_function end end - def setup_pivot_aggregates super_hash, aggregate_function + def setup_pivot_aggregates(super_hash, aggregate_function) super_hash.each_value do |sub_hash| sub_hash.each do |group_name, aggregates| sub_hash[group_name] = Daru::Vector.new(aggregates).send(aggregate_function) @@ -2924,7 +2936,7 @@ def setup_pivot_aggregates super_hash, aggregate_function end end - def pivot_dataframe super_hash + def pivot_dataframe(super_hash) df_index = Daru::MultiIndex.from_tuples super_hash.keys df_vectors = Daru::MultiIndex.from_tuples super_hash.values.flat_map(&:keys).uniq @@ -2937,8 +2949,8 @@ def pivot_dataframe super_hash end end - def one_to_many_components pattern - re = Regexp.new pattern.gsub('%v','(.+?)').gsub('%n','(\\d+?)') + def one_to_many_components(pattern) + re = Regexp.new pattern.gsub('%v', '(.+?)').gsub('%n', '(\\d+?)') vars, numbers = @vectors @@ -2948,23 +2960,23 @@ def one_to_many_components pattern [vars.uniq, numbers.map(&:to_i).sort.uniq] end - def one_to_many_row row, number, vars, pattern + def one_to_many_row(row, number, vars, pattern) vars - .map { |v| + .to_h do |v| name = pattern.sub('%v', v).sub('%n', number.to_s) [v, row[name]] - }.to_h + end end # Raises IndexError when one of the positions is not a valid position - def validate_positions *positions, size + def validate_positions(*positions, size) positions.each do |pos| raise IndexError, "#{pos} is not a valid position." if pos >= size end end # Accepts hash, enumerable and vector and align it properly so it can be added - def coerce_vector vector + def coerce_vector(vector) case vector when Daru::Vector vector.reindex @vectors @@ -2975,7 +2987,7 @@ def coerce_vector vector end end - def update_data source, vectors + def update_data(source, vectors) @data = @vectors.each_with_index.map do |_vec, idx| Daru::Vector.new(source[idx], index: @index, name: vectors[idx]) end @@ -2995,7 +3007,7 @@ def aggregate_by_positions_tuples(options, positions_tuples) else methods = options.values - # note: because we aggregate over rows, we don't have to re-get sub-dfs for each method (which is expensive) + # NOTE: because we aggregate over rows, we don't have to re-get sub-dfs for each method (which is expensive) rows = positions_tuples.map do |positions| apply_method_on_sub_df(methods, keys: positions) end @@ -3026,7 +3038,7 @@ def cast_aggregation_options(options) [over_vectors, options] end - def group_index_for_aggregation(index, multi_index_level=-1) + def group_index_for_aggregation(index, multi_index_level = -1) case index when Daru::MultiIndex groups_by_pos = Daru::Core::GroupBy.get_positions_group_for_aggregation(index, multi_index_level) @@ -3043,7 +3055,7 @@ def group_index_for_aggregation(index, multi_index_level=-1) end # coerce ranges, integers and array in appropriate ways - def coerce_positions *positions, size + def coerce_positions(*positions, size) if positions.size == 1 case positions.first when Integer diff --git a/lib/daru/date_time/index.rb b/lib/daru/date_time/index.rb index b6cd77242..3497b7502 100644 --- a/lib/daru/date_time/index.rb +++ b/lib/daru/date_time/index.rb @@ -4,11 +4,11 @@ module Daru module DateTimeIndexHelper class << self OFFSETS_HASH = { - 'S' => Daru::Offsets::Second, - 'M' => Daru::Offsets::Minute, - 'H' => Daru::Offsets::Hour, - 'D' => Daru::Offsets::Day, - 'W' => Daru::Offsets::Week, + 'S' => Daru::Offsets::Second, + 'M' => Daru::Offsets::Minute, + 'H' => Daru::Offsets::Hour, + 'D' => Daru::Offsets::Day, + 'W' => Daru::Offsets::Week, 'MONTH' => Daru::Offsets::Month, 'MB' => Daru::Offsets::MonthBegin, 'ME' => Daru::Offsets::MonthEnd, @@ -18,10 +18,10 @@ class << self }.freeze TIME_INTERVALS = { - Rational(1,1) => Daru::Offsets::Day, - Rational(1,24) => Daru::Offsets::Hour, - Rational(1,1440) => Daru::Offsets::Minute, - Rational(1,86_400) => Daru::Offsets::Second + Rational(1, 1) => Daru::Offsets::Day, + Rational(1, 24) => Daru::Offsets::Hour, + Rational(1, 1440) => Daru::Offsets::Minute, + Rational(1, 86_400) => Daru::Offsets::Second }.freeze DOW_REGEXP = Regexp.new(Daru::DAYS_OF_WEEK.keys.join('|')) @@ -30,13 +30,14 @@ class << self ( (?MONTH|YEAR|S|H|MB|ME|M|D|YB|YE) | (?W)(-(?#{DOW_REGEXP}))? - )$/x + )$/x.freeze # Generates a Daru::DateOffset object for generic offsets or one of the # specialized classed within Daru::Offsets depending on the 'frequency' # string. - def offset_from_frequency frequency + def offset_from_frequency(frequency) return frequency if frequency.is_a?(Daru::DateOffset) + frequency ||= 'D' matched = FREQUENCY_PATTERN.match(frequency) or @@ -54,23 +55,25 @@ def offset_from_frequency frequency end end - def coerce_date date + def coerce_date(date) return date unless date.is_a?(String) + date_time_from(date, determine_date_precision_of(date)) end - def begin_from_offset? offset, start + def begin_from_offset?(offset, start) offset.is_a?(Daru::Offsets::Tick) || - offset.respond_to?(:on_offset?) && offset.on_offset?(start) + (offset.respond_to?(:on_offset?) && offset.on_offset?(start)) end - def generate_data start, en, offset, periods + def generate_data(start, en, offset, periods) data = [] new_date = begin_from_offset?(offset, start) ? start : offset + start if periods.nil? # use end loop do break if new_date > en + data << new_date new_date = offset + new_date end @@ -84,13 +87,13 @@ def generate_data start, en, offset, periods data end - def verify_start_and_end start, en + def verify_start_and_end(start, en) raise ArgumentError, 'Start and end cannot be the same' if start == en raise ArgumentError, 'Start must be lesser than end' if start > en raise ArgumentError, 'Only same time zones are allowed' if start.zone != en.zone end - def infer_offset data + def infer_offset(data) diffs = data.each_cons(2).map { |d1, d2| d2 - d1 } return nil unless diffs.uniq.count == 1 @@ -101,87 +104,87 @@ def infer_offset data Daru::Offsets::Second.new(number_of_seconds.numerator) if number_of_seconds.denominator == 1 end - def find_index_of_date data, date_time + def find_index_of_date(data, date_time) searched = data.bsearch { |d| d[0] >= date_time } raise(ArgumentError, "Cannot find #{date_time}") if searched.nil? || searched[0] != date_time searched[1] end - def find_date_string_bounds date_string + def find_date_string_bounds(date_string) date_precision = determine_date_precision_of date_string date_time = date_time_from date_string, date_precision generate_bounds date_time, date_precision end - def date_time_from date_string, date_precision + def date_time_from(date_string, date_precision) case date_precision when :year DateTime.new(date_string.gsub(/[^0-9]/, '').to_i) when :month DateTime.new( date_string.match(/\d\d\d\d/).to_s.to_i, - date_string.match(/\-\d?\d/).to_s.delete('-').to_i + date_string.match(/-\d?\d/).to_s.delete('-').to_i ) else DateTime.parse date_string end end - DATE_PRECISION_REGEXP = /^(\d\d\d\d)(-\d{1,2}(-\d{1,2}( \d{1,2}(:\d{1,2}(:\d{1,2})?)?)?)?)?$/ + DATE_PRECISION_REGEXP = /^(\d\d\d\d)(-\d{1,2}(-\d{1,2}( \d{1,2}(:\d{1,2}(:\d{1,2})?)?)?)?)?$/.freeze DATE_PRECISIONS = [nil, :year, :month, :day, :hour, :min, :sec].freeze - def determine_date_precision_of date_string + def determine_date_precision_of(date_string) components = date_string.scan(DATE_PRECISION_REGEXP).flatten.compact DATE_PRECISIONS[components.count] or raise ArgumentError, "Unacceptable date string #{date_string}" end - def generate_bounds date_time, date_precision # rubocop:disable Metrics/MethodLength + def generate_bounds(date_time, date_precision) # rubocop:disable Metrics/MethodLength # FIXME: about that ^ disable: I'd like to use my zverok/time_boots here, which will simplify things case date_precision when :year [ date_time, - DateTime.new(date_time.year,12,31,23,59,59) + DateTime.new(date_time.year, 12, 31, 23, 59, 59) ] when :month [ date_time, DateTime.new(date_time.year, date_time.month, ((date_time >> 1) - 1).day, - 23,59,59) + 23, 59, 59) ] when :day [ date_time, - DateTime.new(date_time.year, date_time.month, date_time.day,23,59,59) + DateTime.new(date_time.year, date_time.month, date_time.day, 23, 59, 59) ] when :hour [ date_time, DateTime.new(date_time.year, date_time.month, date_time.day, - date_time.hour,59,59) + date_time.hour, 59, 59) ] when :min [ date_time, DateTime.new(date_time.year, date_time.month, date_time.day, - date_time.hour, date_time.min, 59) + date_time.hour, date_time.min, 59) ] else # second or when precision is same as offset [date_time, date_time] end end - def possibly_convert_to_date_time data + def possibly_convert_to_date_time(data) data[0].is_a?(String) ? data.map! { |e| DateTime.parse(e) } : data end - def last_date data - data.sort_by { |d| d[1] }.last + def last_date(data) + data.max_by { |d| d[1] } end - def key_out_of_bounds? key, data + def key_out_of_bounds?(key, data) dates = data.transpose.first precision = determine_date_precision_of key @@ -205,8 +208,8 @@ def year_out_of_bounds?(date_time, dates) end def year_month_out_of_bounds?(date_time, dates) - date_time.year < dates.first.year && date_time.month < dates.first.month || - date_time.year > dates.last.year && date_time.month > dates.last.month + (date_time.year < dates.first.year && date_time.month < dates.first.month) || + (date_time.year > dates.last.year && date_time.month > dates.last.month) end end end @@ -216,11 +219,7 @@ class DateTimeIndex < Index Helper = DateTimeIndexHelper def self.try_create(source) - if source && ArrayHelper.array_of?(source, ::DateTime) - new(source, freq: :infer) - else - nil - end + new(source, freq: :infer) if source && ArrayHelper.array_of?(source, ::DateTime) end def each(&block) @@ -253,7 +252,7 @@ def each(&block) # DateTime.new(2012,4,8), DateTime.new(2012,4,9), DateTime.new(2012,4,10), # DateTime.new(2012,4,11), DateTime.new(2012,4,12)], freq: :infer) # #=># - def initialize data, opts={freq: nil} + def initialize(data, opts = { freq: nil }) super data Helper.possibly_convert_to_date_time data @@ -333,7 +332,7 @@ def dup # Daru::DateTimeIndex.date_range( # :start => '2012-5-2', :periods => 50, :freq => 'ME') # #=> # - def self.date_range opts={} + def self.date_range(opts = {}) start = Helper.coerce_date opts[:start] en = Helper.coerce_date opts[:end] Helper.verify_start_and_end(start, en) unless en.nil? @@ -347,8 +346,9 @@ def self.date_range opts={} # # @param key [String, DateTime] Specify a date partially (as a String) or # completely to retrieve. - def [] *key + def [](*key) return slice(*key) if key.size != 1 + key = key[0] case key when Numeric @@ -367,18 +367,19 @@ def [] *key end end - def pos *args + def pos(*args) # to filled out = self[*args] return out if out.is_a? Numeric + out.map { |date| self[date] } end - def subset *args + def subset(*args) self[*args] end - def valid? *args + def valid?(*args) self[*args] true rescue IndexError @@ -389,7 +390,7 @@ def valid? *args # # @param [String, DateTime] first Start of the slice as a string or DateTime. # @param [String, DateTime] last End of the slice as a string or DateTime. - def slice first, last + def slice(first, last) if first.is_a?(Integer) && last.is_a?(Integer) DateTimeIndex.new(to_a[first..last], freq: @offset) else @@ -415,15 +416,16 @@ def size @periods end - def == other + def ==(other) to_a == other.to_a end def inspect meta = [@periods, @frequency ? "frequency=#{@frequency}" : nil].compact.join(', ') return "#<#{self.class}(#{meta})>" if @data.empty? + "#<#{self.class}(#{meta}) " \ - "#{@data.first[0]}...#{@data.last[0]}>" + "#{@data.first[0]}...#{@data.last[0]}>" end # Shift all dates in the index by a positive number in the future. The dates @@ -446,8 +448,8 @@ def inspect # # Pass an integer to shift # index.shift(4) # #=># - def shift distance - distance.is_a?(Integer) && distance < 0 and + def shift(distance) + distance.is_a?(Integer) && distance.negative? and raise IndexError, "Distance #{distance} cannot be negative" _shift(distance) @@ -462,8 +464,8 @@ def shift distance # Passing a positive integer will offset each data point by the same offset # that it was created with. # @return [DateTimeIndex] A new lagged DateTimeIndex object. - def lag distance - distance.is_a?(Integer) && distance < 0 and + def lag(distance) + distance.is_a?(Integer) && distance.negative? and raise IndexError, "Distance #{distance} cannot be negative" _shift(-distance) @@ -474,7 +476,7 @@ def _dump(_depth) Marshal.dump(data: to_a, freq: @offset) end - def self._load data + def self._load(data) h = Marshal.load data Daru::DateTimeIndex.new(h[:data], freq: h[:freq]) @@ -503,7 +505,7 @@ def self._load data # Check if a date exists in the index. Will be inferred from string in case # you pass a string. Recommened specifying the full date as a DateTime object. - def include? date_time + def include?(date_time) return false unless date_time.is_a?(String) || date_time.is_a?(DateTime) if date_time.is_a?(String) @@ -522,7 +524,7 @@ def empty? private - def get_by_range first, last + def get_by_range(first, last) return slice(first, last) if first.is_a?(Integer) && last.is_a?(Integer) raise ArgumentError, "Keys #{first} and #{last} are out of bounds" if @@ -531,7 +533,7 @@ def get_by_range first, last slice first, last end - def slice_between_dates first, last + def slice_between_dates(first, last) # about that ^ disable: I'm waiting for cleaner understanding # of offsets logic. Reference: https://github.com/v0dro/daru/commit/7e1c34aec9516a9ba33037b4a1daaaaf1de0726a#diff-a95ef410a8e1f4ea3cc48d231bb880faR250 start = @data.bsearch { |d| d[0] >= first } @@ -540,21 +542,23 @@ def slice_between_dates first, last if @offset en = after_en ? @data[after_en[1] - 1] : @data.last return start[1] if start == en + DateTimeIndex.date_range start: start[0], end: en[0], freq: @offset else st = @data.index(start) en = after_en ? @data.index(after_en) - 1 : Helper.last_date(@data)[1] return start[1] if st == en + DateTimeIndex.new(@data[st..en].transpose[0] || []) # empty slice guard end end - def _shift distance + def _shift(distance) if distance.is_a?(Integer) raise IndexError, 'To lag non-freq date time index pass an offset.' unless @offset start = @data[0][0] - off = distance > 0 ? @offset : -@offset + off = distance.positive? ? @offset : -@offset distance.abs.times { start = off + start } DateTimeIndex.date_range(start: start, periods: @periods, freq: @offset) else diff --git a/lib/daru/date_time/offsets.rb b/lib/daru/date_time/offsets.rb index 8f55a7141..ba022e2cf 100644 --- a/lib/daru/date_time/offsets.rb +++ b/lib/daru/date_time/offsets.rb @@ -1,6 +1,4 @@ module Daru - # rubocop:disable Style/OpMethod - # Generic class for generating date offsets. class DateOffset # A Daru::DateOffset object is created by a passing certain options @@ -35,7 +33,7 @@ class DateOffset # offset = Daru::DateOffset.new(mins: 2, n: 5) # offset + DateTime.new(2011,5,3,3,5) # #=> # - def initialize opts={} + def initialize(opts = {}) n = opts[:n] || 1 Offsets::LIST.each do |key, klass| if opts.key?(key) @@ -44,21 +42,21 @@ def initialize opts={} end end - @offset = Offsets::Day.new(7*n*opts[:weeks]) if opts[:weeks] + @offset = Offsets::Day.new(7 * n * opts[:weeks]) if opts[:weeks] end # Offset a DateTime forward. # - # @param date_time [DateTime] A DateTime object which is to offset. - def + date_time - @offset + date_time + # @param other [DateTime] A DateTime object which is to offset. + def +(other) + @offset + other end # Offset a DateTime backward. # - # @param date_time [DateTime] A DateTime object which is to offset. - def - date_time - @offset - date_time + # @param other [DateTime] A DateTime object which is to offset. + def -(other) + @offset - other end def -@ @@ -71,12 +69,12 @@ def initialize(offset) @offset = offset end - def + date_time - @offset - date_time + def +(other) + @offset - other end - def - date_time - @offset + date_time + def -(other) + @offset + other end def -@ @@ -91,7 +89,7 @@ class DateOffsetType < DateOffset # the offset should be applied, which is the supplied as the argument. # # @param n [Integer] The number of times an offset should be applied. - def initialize n=1 + def initialize(n = 1) @n = n end @@ -104,16 +102,16 @@ def freq_string # @abstract # @private class Tick < DateOffsetType - def + date_time - date_time + @n*multiplier + def +(other) + other + (@n * multiplier) end - def - date_time - date_time - @n*multiplier + def -(other) + other - (@n * multiplier) end - def ==(other_obj) - other_obj.is_a?(Tick) && period == other_obj.period + def ==(other) + other.is_a?(Tick) && period == other.period end def period @@ -191,12 +189,12 @@ def multiplier class Month < Tick FREQ = 'MONTH'.freeze - def + date_time - date_time >> @n + def +(other) + other >> @n end - def - date_time - date_time << @n + def -(other) + other << @n end end @@ -210,48 +208,48 @@ def - date_time class Year < Tick FREQ = 'YEAR'.freeze - def + date_time - date_time >> @n*12 + def +(other) + other >> (@n * 12) end - def - date_time - date_time << @n*12 + def -(other) + other << (@n * 12) end end class Week < DateOffset - def initialize *args + def initialize(*args) @n = args[0].is_a?(Hash) ? 1 : args[0] opts = args[-1] @weekday = opts[:weekday] || 0 end - def + date_time - wday = date_time.wday + def +(other) + wday = other.wday distance = (@weekday - wday).abs if @weekday > wday - date_time + distance + 7*(@n-1) + other + distance + (7 * (@n - 1)) else - date_time + (7-distance) + 7*(@n -1) + other + (7 - distance) + (7 * (@n - 1)) end end - def - date_time - wday = date_time.wday + def -(other) + wday = other.wday distance = (@weekday - wday).abs if @weekday >= wday - date_time - ((7 - distance) + 7*(@n -1)) + other - ((7 - distance) + (7 * (@n - 1))) else - date_time - (distance + 7*(@n-1)) + other - (distance + (7 * (@n - 1))) end end - def on_offset? date_time + def on_offset?(date_time) date_time.wday == @weekday end def freq_string - (@n == 1 ? '' : @n.to_s) + 'W' + '-' + Daru::DAYS_OF_WEEK.key(@weekday) + "#{@n == 1 ? '' : @n.to_s}W-#{Daru::DAYS_OF_WEEK.key(@weekday)}" end end @@ -265,27 +263,27 @@ def freq_string class MonthBegin < DateOffsetType FREQ = 'MB'.freeze - def + date_time + def +(other) @n.times do - days_in_month = Daru::MONTH_DAYS[date_time.month] - days_in_month += 1 if date_time.leap? && date_time.month == 2 - date_time += (days_in_month - date_time.day + 1) + days_in_month = Daru::MONTH_DAYS[other.month] + days_in_month += 1 if other.leap? && other.month == 2 + other += (days_in_month - other.day + 1) end - date_time + other end - def - date_time + def -(other) @n.times do - date_time = date_time << 1 if on_offset?(date_time) - date_time = DateTime.new(date_time.year, date_time.month, 1, - date_time.hour, date_time.min, date_time.sec) + other = other << 1 if on_offset?(other) + other = DateTime.new(other.year, other.month, 1, + other.hour, other.min, other.sec) end - date_time + other end - def on_offset? date_time + def on_offset?(date_time) date_time.day == 1 end end @@ -300,31 +298,31 @@ def on_offset? date_time class MonthEnd < DateOffsetType FREQ = 'ME'.freeze - def + date_time + def +(other) @n.times do - date_time = date_time >> 1 if on_offset?(date_time) - days_in_month = Daru::MONTH_DAYS[date_time.month] - days_in_month += 1 if date_time.leap? && date_time.month == 2 + other = other >> 1 if on_offset?(other) + days_in_month = Daru::MONTH_DAYS[other.month] + days_in_month += 1 if other.leap? && other.month == 2 - date_time += (days_in_month - date_time.day) + other += (days_in_month - other.day) end - date_time + other end - def - date_time + def -(other) @n.times do - date_time = date_time << 1 - days_in_month = Daru::MONTH_DAYS[date_time.month] - days_in_month += 1 if date_time.leap? && date_time.month == 2 + other = other << 1 + days_in_month = Daru::MONTH_DAYS[other.month] + days_in_month += 1 if other.leap? && other.month == 2 - date_time += (days_in_month - date_time.day) + other += (days_in_month - other.day) end - date_time + other end - def on_offset? date_time + def on_offset?(date_time) (date_time + 1).day == 1 end end @@ -339,21 +337,21 @@ def on_offset? date_time class YearBegin < DateOffsetType FREQ = 'YB'.freeze - def + date_time - DateTime.new(date_time.year + @n, 1, 1, - date_time.hour,date_time.min, date_time.sec) + def +(other) + DateTime.new(other.year + @n, 1, 1, + other.hour, other.min, other.sec) end - def - date_time - if on_offset?(date_time) - DateTime.new(date_time.year - @n, 1, 1, - date_time.hour,date_time.min, date_time.sec) + def -(other) + if on_offset?(other) + DateTime.new(other.year - @n, 1, 1, + other.hour, other.min, other.sec) else - DateTime.new(date_time.year - (@n-1), 1, 1) + DateTime.new(other.year - (@n - 1), 1, 1) end end - def on_offset? date_time + def on_offset?(date_time) date_time.month == 1 && date_time.day == 1 end end @@ -368,21 +366,21 @@ def on_offset? date_time class YearEnd < DateOffsetType FREQ = 'YE'.freeze - def + date_time - if on_offset?(date_time) - DateTime.new(date_time.year + @n, 12, 31, - date_time.hour, date_time.min, date_time.sec) + def +(other) + if on_offset?(other) + DateTime.new(other.year + @n, 12, 31, + other.hour, other.min, other.sec) else - DateTime.new(date_time.year + (@n-1), 12, 31, - date_time.hour, date_time.min, date_time.sec) + DateTime.new(other.year + (@n - 1), 12, 31, + other.hour, other.min, other.sec) end end - def - date_time - DateTime.new(date_time.year - 1, 12, 31) + def -(other) + DateTime.new(other.year - 1, 12, 31) end - def on_offset? date_time + def on_offset?(date_time) date_time.month == 12 && date_time.day == 31 end end @@ -396,6 +394,4 @@ def on_offset? date_time years: Year }.freeze end - - # rubocop:enable Style/OpMethod end diff --git a/lib/daru/extensions/which_dsl.rb b/lib/daru/extensions/which_dsl.rb index 3c9abab1f..dc855d6bd 100644 --- a/lib/daru/extensions/which_dsl.rb +++ b/lib/daru/extensions/which_dsl.rb @@ -30,9 +30,7 @@ def exec end def `(vector_name) - if !@df.has_vector?(vector_name) && @df.has_vector?(vector_name.to_sym) - vector_name = vector_name.to_sym - end + vector_name = vector_name.to_sym if !@df.has_vector?(vector_name) && @df.has_vector?(vector_name.to_sym) VectorWrapper.new(@df[vector_name]) end @@ -40,9 +38,9 @@ class VectorWrapper < SimpleDelegator { :== => :eq, :!= => :not_eq, - :< => :lt, + :< => :lt, :<= => :lteq, - :> => :mt, + :> => :mt, :>= => :mteq, :=~ => :in }.each do |opt, method| diff --git a/lib/daru/formatters/table.rb b/lib/daru/formatters/table.rb index 6a1bbae6d..83378b065 100644 --- a/lib/daru/formatters/table.rb +++ b/lib/daru/formatters/table.rb @@ -1,7 +1,7 @@ module Daru module Formatters class Table - def self.format data, options={} + def self.format(data, options = {}) new(data, options[:headers], options[:row_headers]) .format(options[:threshold], options[:spacing]) end @@ -13,7 +13,7 @@ def initialize(data, headers, row_headers) @row_headers = [''] * @data.to_a.size if @row_headers.empty? end - def format threshold=nil, spacing=nil + def format(threshold = nil, spacing = nil) rows = build_rows(threshold || Daru.max_rows) formatter = construct_formatter rows, spacing || Daru.spacing @@ -23,20 +23,20 @@ def format threshold=nil, spacing=nil private - def build_rows threshold + def build_rows(threshold) @row_headers.first(threshold).zip(@data).map do |(r, datarow)| [*[r].flatten.map(&:to_s), *(datarow || []).map(&method(:pretty_to_s))] end.tap do |rows| unless @headers.empty? spaces_to_add = rows.empty? ? 0 : rows.first.size - @headers.size - rows.unshift [''] * spaces_to_add + @headers.map(&:to_s) + rows.unshift ([''] * spaces_to_add) + @headers.map(&:to_s) end - rows << ['...'] * rows.first.count if @row_headers.count > threshold + rows << (['...'] * rows.first.count) if @row_headers.count > threshold end end - def construct_formatter rows, spacing + def construct_formatter(rows, spacing) width = rows.flatten.map(&:size).max || 0 width = [3, width].max # not less than 'nil' width = [width, spacing].min # not more than max width diff --git a/lib/daru/helpers/array.rb b/lib/daru/helpers/array.rb index f9cbefe1f..14829a85d 100644 --- a/lib/daru/helpers/array.rb +++ b/lib/daru/helpers/array.rb @@ -17,13 +17,13 @@ def recode_repeated(array) .group_by(&:itself) .select { |_, g| g.size > 1 } .map(&:first) - .collect { |n| [n, 0] }.to_h + .to_h { |n| [n, 0] } # ...and use this hash for actual recode array.collect do |n| if counter.key?(n) counter[n] += 1 - new_n = '%s_%d' % [n, counter[n]] + new_n = format('%s_%d', n, counter[n]) n.is_a?(Symbol) ? new_n.to_sym : new_n else n @@ -34,7 +34,7 @@ def recode_repeated(array) def array_of?(array, match) array.is_a?(Array) && !array.empty? && - array.all? { |el| match === el } # rubocop:disable Style/CaseEquality + array.all? { |el| match === el } # rubocop:disable Style/CaseEquality,Performance/RedundantEqualityComparisonBlock end end end diff --git a/lib/daru/index/categorical_index.rb b/lib/daru/index/categorical_index.rb index d8229da24..97951255d 100644 --- a/lib/daru/index/categorical_index.rb +++ b/lib/daru/index/categorical_index.rb @@ -6,10 +6,10 @@ class CategoricalIndex < Index # @example # Daru::CategoricalIndex.new [:a, 1, :a, 1, :c] # # => # - def initialize indexes + def initialize(indexes) # Create a hash to map each category to positional indexes categories = indexes.each_with_index.group_by(&:first) - @cat_hash = categories.map { |cat, group| [cat, group.map(&:last)] }.to_h + @cat_hash = categories.transform_values { |group| group.map(&:last) } # Map each category to a unique integer for effective storage in @array map_cat_int = categories.keys.each_with_index.to_h @@ -29,7 +29,7 @@ def dup # Returns true index or category is valid # @param index [object] the index value to look for # @return [true, false] true if index is included, false otherwise - def include? index + def include?(index) @cat_hash.include? index end @@ -50,15 +50,15 @@ def categories # x = Daru::CategoricalIndex.new [:a, 1, :a, 1, :c] # x.pos :a, 1 # # => [0, 1, 2, 3] - def pos *indexes + def pos(*indexes) positions = indexes.map do |index| if include? index @cat_hash[index] elsif index.is_a?(Numeric) && index < @array.size index else - raise IndexError, "#{index.inspect} is neither a valid category"\ - ' nor a valid position' + raise IndexError, "#{index.inspect} is neither a valid category" \ + ' nor a valid position' end end @@ -73,7 +73,7 @@ def pos *indexes # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c] # idx.index_from_pos 1 # # => :b - def index_from_pos pos + def index_from_pos(pos) cat_from_int @array[pos] end @@ -85,6 +85,7 @@ def index_from_pos pos # # => [:a, :a, :b] def each return enum_for(:each) unless block_given? + @array.each { |pos| yield cat_from_int pos } self end @@ -98,7 +99,7 @@ def each # b = Daru::CategoricalIndex.new [:b, :a, :a] # a == b # # => false - def == other + def ==(other) self.class == other.class && size == other.size && to_h == other.to_h @@ -152,7 +153,7 @@ def empty? # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c] # idx.subset :a, :b # # => # - def subset *indexes + def subset(*indexes) positions = pos(*indexes) new_index = positions.map { |pos| index_from_pos pos } @@ -167,7 +168,7 @@ def subset *indexes # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c] # idx.at 0, 1 # # => # - def at *positions + def at(*positions) positions = preprocess_positions(*positions) validate_positions(*positions) if positions.is_a? Integer @@ -184,17 +185,17 @@ def at *positions # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c] # idx.add :d # # => # - def add *indexes + def add(*indexes) Daru::CategoricalIndex.new(to_a + indexes) end private - def int_from_cat cat + def int_from_cat(cat) @cat_hash.keys.index cat end - def cat_from_int cat + def cat_from_int(cat) @cat_hash.keys[cat] end end diff --git a/lib/daru/index/index.rb b/lib/daru/index/index.rb index d991cb974..48db57496 100644 --- a/lib/daru/index/index.rb +++ b/lib/daru/index/index.rb @@ -9,18 +9,18 @@ class Index # rubocop:disable Metrics/ClassLength # Object) is once again the default .new for the subclass. # Refer http://blog.sidu.in/2007/12/rubys-new-as-factory.html class << self - alias :__new__ :new + alias __new__ new - def inherited subclass + def inherited(subclass) class << subclass - alias :new :__new__ + alias_method :new, :__new__ end end end # We over-ride the .new method so that any sort of Index can be generated # from Daru::Index based on the types of arguments supplied. - def self.new *args, &block + def self.new(*args, &block) # FIXME: I'm not sure this clever trick really deserves our attention. # Most of common ruby libraries just avoid it in favor of usual # factor method, like `Index.create`. When `Index.new(...).class != Index` @@ -32,12 +32,12 @@ def self.new *args, &block allocate.tap { |i| i.send :initialize, *args, &block } end - def self.coerce maybe_index + def self.coerce(maybe_index) maybe_index.is_a?(Index) ? maybe_index : Daru::Index.new(maybe_index) end def each(&block) - return to_enum(:each) unless block_given? + return to_enum(:each) unless block @relation_hash.each_key(&block) self @@ -63,7 +63,7 @@ def each(&block) # # idx = Daru::Index.new [:one, 'one', 1, 2, :two], name: "index_name" # => # - def initialize index, opts={} + def initialize(index, opts = {}) index = guess_index index @relation_hash = index.each_with_index.to_h.freeze @keys = @relation_hash.keys @@ -79,10 +79,9 @@ def ==(other) end def [](key, *rest) - case - when key.is_a?(Range) + if key.is_a?(Range) by_range key - when !rest.empty? + elsif !rest.empty? by_multi_key key, *rest else by_single_key key @@ -97,7 +96,7 @@ def [](key, *rest) # # => true # idx.valid? 3 # # => false - def valid? *indexes + def valid?(*indexes) indexes.all? { |i| to_a.include?(i) || (i.is_a?(Numeric) && i < size) } end @@ -109,7 +108,7 @@ def valid? *indexes # x = Daru::Index.new [:a, :b, :c] # x.pos :a, 1 # # => [0, 1] - def pos *indexes + def pos(*indexes) indexes = preprocess_range(indexes.first) if indexes.first.is_a? Range if indexes.size == 1 @@ -119,7 +118,7 @@ def pos *indexes end end - def subset *indexes + def subset(*indexes) if indexes.first.is_a? Range start = indexes.first.begin en = indexes.first.end @@ -142,7 +141,7 @@ def subset *indexes # idx = Daru::Index.new [:a, :b, :c] # idx.at 0, 1 # # => # - def at *positions + def at(*positions) positions = preprocess_positions(*positions) validate_positions(*positions) if positions.is_a? Integer @@ -152,7 +151,7 @@ def at *positions end end - def inspect threshold=20 + def inspect(threshold = 20) name_part = @name ? "#{@name} " : '' if size <= threshold "#<#{self.class}(#{size}): #{name_part}{#{to_a.join(', ')}}>" @@ -161,7 +160,7 @@ def inspect threshold=20 end end - def slice *args + def slice(*args) start = args[0] en = args[1] @@ -171,13 +170,13 @@ def slice *args if start_idx.nil? nil elsif en_idx.nil? - Array(start_idx..size-1) + Array(start_idx..size - 1) else Array(start_idx..en_idx) end end - def subset_slice *args + def subset_slice(*args) start = args[0] en = args[1] @@ -196,7 +195,7 @@ def |(other) end # Produce a new index from the set intersection of two indexes - def & other + def &(other) Index.new(to_a & other.to_a) end @@ -206,10 +205,11 @@ def to_a def key(value) return nil unless value.is_a?(Numeric) + @keys[value] end - def include? index + def include?(index) @relation_hash.key? index end @@ -229,7 +229,7 @@ def include? index # # 2 false # # 3 false # # 4 true - def is_values(*indexes) # rubocop:disable Style/PredicateName + def is_values(*indexes) # rubocop:disable Naming/PredicateName bool_array = @keys.map { |r| indexes.include?(r) } Daru::Vector.new(bool_array) end @@ -242,7 +242,7 @@ def dup Daru::Index.new @keys, name: @name end - def add *indexes + def add(*indexes) Daru::Index.new(to_a + indexes) end @@ -250,7 +250,7 @@ def _dump(*) Marshal.dump(relation_hash: @relation_hash) end - def self._load data + def self._load(data) h = Marshal.load data Daru::Index.new(h[:relation_hash].keys) @@ -283,8 +283,8 @@ def reorder(new_order) # di.sort(ascending: false) #=> Daru::Index.new [101, 100, 99, 2, 1] # # Say you want to sort in ascending order # di.sort #=> Daru::Index.new [1, 2, 99, 100, 101] - def sort opts={} - opts = {ascending: true}.merge(opts) + def sort(opts = {}) + opts = { ascending: true }.merge(opts) new_index = @keys.sort new_index = new_index.reverse unless opts[:ascending] @@ -298,7 +298,7 @@ def to_df private - def guess_index index + def guess_index(index) case index when nil [] @@ -308,11 +308,11 @@ def guess_index index index.to_a else raise ArgumentError, - "Cannot create index from #{index.class} #{index.inspect}" + "Cannot create index from #{index.class} #{index.inspect}" end end - def preprocess_range rng + def preprocess_range(rng) start = rng.begin en = rng.end @@ -326,31 +326,27 @@ def preprocess_range rng end end - def by_range rng + def by_range(rng) slice rng.begin, rng.end end - def by_multi_key *key + def by_multi_key(*key) key.map { |k| by_single_key k } end - def by_single_key key - if @relation_hash.key?(key) - @relation_hash[key] - else - nil - end + def by_single_key(key) + @relation_hash[key] if @relation_hash.key?(key) end # Raises IndexError when one of the positions is an invalid position - def validate_positions *positions + def validate_positions(*positions) positions.each do |pos| raise IndexError, "#{pos} is not a valid position." if pos >= size || pos < -size end end # Preprocess ranges, integers and array in appropriate ways - def preprocess_positions *positions + def preprocess_positions(*positions) if positions.size == 1 case positions.first when Integer @@ -365,7 +361,7 @@ def preprocess_positions *positions end end - def numeric_pos key + def numeric_pos(key) if @relation_hash.key?(key) @relation_hash[key] elsif key.is_a?(Numeric) && (key < size && key >= -size) diff --git a/lib/daru/index/multi_index.rb b/lib/daru/index/multi_index.rb index a37462d16..803a8aee5 100644 --- a/lib/daru/index/multi_index.rb +++ b/lib/daru/index/multi_index.rb @@ -8,8 +8,7 @@ def map(&block) to_a.map(&block) end - attr_reader :labels - attr_reader :name + attr_reader :labels, :name def levels @levels.map(&:keys) @@ -77,7 +76,7 @@ def levels # # c one # # two # - def initialize opts={} + def initialize(opts = {}) labels = opts[:labels] levels = opts[:levels] @@ -103,7 +102,7 @@ def incorrect_fields?(_labels, levels) private :incorrect_fields? - def self.from_arrays arrays + def self.from_arrays(arrays) levels = arrays.map { |e| e.uniq.sort_by(&:to_s) } labels = arrays.each_with_index.map do |arry, level_index| @@ -114,24 +113,19 @@ def self.from_arrays arrays MultiIndex.new labels: labels, levels: levels end - def self.from_tuples tuples + def self.from_tuples(tuples) from_arrays tuples.transpose end - def self.try_from_tuples tuples - if tuples.respond_to?(:first) && tuples.first.is_a?(Array) - from_tuples(tuples) - else - nil - end + def self.try_from_tuples(tuples) + from_tuples(tuples) if tuples.respond_to?(:first) && tuples.first.is_a?(Array) end - def [] *key + def [](*key) key.flatten! - case - when key[0].is_a?(Range) + if key[0].is_a?(Range) retrieve_from_range(key[0]) - when key[0].is_a?(Integer) && key.size == 1 + elsif key[0].is_a?(Integer) && key.size == 1 try_retrieve_from_integer(key[0]) else begin @@ -142,12 +136,12 @@ def [] *key end end - def valid? *indexes + def valid?(*indexes) # FIXME: This is perhaps not a good method pos(*indexes) - return true + true rescue IndexError - return false + false end # Returns positions given indexes or positions @@ -158,17 +152,19 @@ def valid? *indexes # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]] # idx.pos :a # # => [0, 1] - def pos *indexes + def pos(*indexes) if indexes.first.is_a? Integer return indexes.first if indexes.size == 1 + return indexes end res = self[indexes] return res if res.is_a? Integer + res.map { |i| self[i] } end - def subset *indexes + def subset(*indexes) if indexes.first.is_a? Integer MultiIndex.from_tuples(indexes.map { |index| key(index) }) else @@ -186,7 +182,7 @@ def subset *indexes # # => # # # a one # # two - def at *positions + def at(*positions) positions = preprocess_positions(*positions) validate_positions(*positions) if positions.is_a? Integer @@ -196,7 +192,7 @@ def at *positions end end - def add *indexes + def add(*indexes) Daru::MultiIndex.from_tuples(to_a + [indexes]) end @@ -205,33 +201,35 @@ def reorder(new_order) MultiIndex.from_tuples(new_order.map { |i| from[i] }) end - def try_retrieve_from_integer int + def try_retrieve_from_integer(int) @levels[0].key?(int) ? retrieve_from_tuples([int]) : int end - def retrieve_from_range range + def retrieve_from_range(range) MultiIndex.from_tuples(range.map { |index| key(index) }) end - def retrieve_from_tuples key + def retrieve_from_tuples(key) chosen = [] key.each_with_index do |k, depth| level_index = @levels[depth][k] raise IndexError, "Specified index #{key.inspect} do not exist" if level_index.nil? + label = @labels[depth] chosen = find_all_indexes label, level_index, chosen end return chosen[0] if chosen.size == 1 && key.size == @levels.size + multi_index_from_multiple_selections(chosen) end - def multi_index_from_multiple_selections chosen + def multi_index_from_multiple_selections(chosen) MultiIndex.from_tuples(chosen.map { |e| key(e) }) end - def find_all_indexes label, level_index, chosen + def find_all_indexes(label, level_index, chosen) if chosen.empty? label.each_with_index .select { |lbl, _| lbl == level_index }.map(&:last) @@ -240,10 +238,10 @@ def find_all_indexes label, level_index, chosen end end - def remove_layer layer_index + def remove_layer(layer_index) @levels.delete_at(layer_index) @labels.delete_at(layer_index) - @name.delete_at(layer_index) unless @name.nil? + @name&.delete_at(layer_index) coerce_index end @@ -263,21 +261,21 @@ def coerce_index end # Array `name` must have same length as levels and labels. - def validate_name names, levels - error_msg = "'names' and 'levels' should be of same size. Size of the "\ - "'name' array is #{names.size} and size of the MultiIndex 'levels' and "\ - "'labels' is #{labels.size}." + def validate_name(names, levels) + error_msg = "'names' and 'levels' should be of same size. Size of the " \ + "'name' array is #{names.size} and size of the MultiIndex 'levels' and " \ + "'labels' is #{labels.size}." suggestion_msg = "If you don\'t want to set name for particular level " \ - "(say level 'i') then put empty string on index 'i' of the 'name' Array." + "(say level 'i') then put empty string on index 'i' of the 'name' Array." raise SizeError, error_msg if names.size > levels.size raise SizeError, [error_msg, suggestion_msg].join("\n") if names.size < levels.size end private :find_all_indexes, :multi_index_from_multiple_selections, - :retrieve_from_range, :retrieve_from_tuples, :validate_name + :retrieve_from_range, :retrieve_from_tuples, :validate_name - def key index + def key(index) raise ArgumentError, "Key #{index} is too large" if index >= @labels[0].size @labels @@ -289,15 +287,15 @@ def dup MultiIndex.new levels: levels.dup, labels: labels.dup, name: (@name.nil? ? nil : @name.dup) end - def drop_left_level by=1 - MultiIndex.from_arrays to_a.transpose[by..-1] + def drop_left_level(by = 1) + MultiIndex.from_arrays to_a.transpose[by..] end - def | other + def |(other) MultiIndex.from_tuples(to_a | other.to_a) end - def & other + def &(other) MultiIndex.from_tuples(to_a & other.to_a) end @@ -305,8 +303,9 @@ def empty? @labels.flatten.empty? && @levels.all?(&:empty?) end - def include? tuple + def include?(tuple) return false unless tuple.is_a? Enumerable + @labels[0...tuple.flatten.size] .transpose .include?(tuple.flatten.each_with_index.map { |e, i| @levels[i][e] }) @@ -320,7 +319,7 @@ def width @levels.size end - def == other + def ==(other) self.class == other.class && labels == other.labels && levels == other.levels @@ -334,13 +333,13 @@ def values Array.new(size) { |i| i } end - def inspect threshold=20 + def inspect(threshold = 20) "#\n" + Formatters::Table.format([], headers: @name, row_headers: sparse_tuples, threshold: threshold) end def to_html - path = File.expand_path('../../iruby/templates/multi_index.html.erb', __FILE__) + path = File.expand_path('../iruby/templates/multi_index.html.erb', __dir__) ERB.new(File.read(path).strip).result(binding) end @@ -348,8 +347,9 @@ def to_html # # @param input_indexes [Array] the input by user to index the vector # @return [Object] the MultiIndex object for sub vector produced - def conform input_indexes + def conform(input_indexes) return self if input_indexes[0].is_a? Range + drop_left_level input_indexes.size end @@ -361,10 +361,10 @@ def conform input_indexes # def sparse_tuples tuples = to_a - [tuples.first] + each_cons(2).map { |prev, cur| + [tuples.first] + each_cons(2).map do |prev, cur| left = cur.zip(prev).drop_while { |c, p| c == p } - [nil] * (cur.size - left.size) + left.map(&:first) - } + ([nil] * (cur.size - left.size)) + left.map(&:first) + end end def to_df diff --git a/lib/daru/io/io.rb b/lib/daru/io/io.rb index c19c0abfc..447ec5052 100644 --- a/lib/daru/io/io.rb +++ b/lib/daru/io/io.rb @@ -1,8 +1,8 @@ module Daru - require_relative 'csv/converters.rb' + require_relative 'csv/converters' module IOHelpers class << self - def process_row(row,empty) + def process_row(row, empty) row.to_a.map do |c| if empty.include?(c) # FIXME: As far as I can guess, it will never work. @@ -18,8 +18,8 @@ def process_row(row,empty) private - INT_PATTERN = /^[-+]?\d+$/ - FLOAT_PATTERN = /^[-+]?\d+[,.]?\d*(e-?\d+)?$/ + INT_PATTERN = /^[-+]?\d+$/.freeze + FLOAT_PATTERN = /^[-+]?\d+[,.]?\d*(e-?\d+)?$/.freeze def try_string_to_number(s) case s @@ -38,7 +38,7 @@ module IO class << self # Functions for loading/writing Excel files. - def from_excel path, opts={} + def from_excel(path, opts = {}) opts = { worksheet_id: 0, row_id: 0 @@ -46,7 +46,7 @@ def from_excel path, opts={} worksheet, headers = read_from_excel(path, opts) df = Daru::DataFrame.new({}) - headers.each_with_index do |h,i| + headers.each_with_index do |h, i| col = worksheet.column(i).to_a col.delete_at 0 df[h] = col @@ -55,7 +55,7 @@ def from_excel path, opts={} df end - def read_from_excel path, opts + def read_from_excel(path, opts) optional_gem 'spreadsheet', '~>1.3.0' worksheet_id = opts[:worksheet_id] @@ -67,7 +67,7 @@ def read_from_excel path, opts [worksheet, headers] end - def dataframe_write_excel dataframe, path, _opts={} + def dataframe_write_excel(dataframe, path, _opts = {}) book = Spreadsheet::Workbook.new sheet = book.create_worksheet format = Spreadsheet::Format.new color: :blue, weight: :bold @@ -84,7 +84,7 @@ def dataframe_write_excel dataframe, path, _opts={} end # Functions for loading/writing CSV files - def from_csv path, opts={} + def from_csv(path, opts = {}) daru_options, opts = from_csv_prepare_opts opts # Preprocess headers for detecting and correcting repetition in # case the :headers option is not specified. @@ -95,10 +95,10 @@ def from_csv path, opts={} from_csv_hash(path, opts) .tap { |hash| daru_options[:order] = hash.keys } end - Daru::DataFrame.new(hsh,daru_options) + Daru::DataFrame.new(hsh, daru_options) end - def dataframe_write_csv dataframe, path, opts={} + def dataframe_write_csv(dataframe, path, opts = {}) options = { converters: :numeric }.merge(opts) @@ -128,9 +128,9 @@ def from_sql(db, query) SqlDataSource.make_dataframe(db, query) end - def dataframe_write_sql ds, dbh, table + def dataframe_write_sql(ds, dbh, table) require 'dbi' - query = "INSERT INTO #{table} ("+ds.vectors.to_a.join(',')+') VALUES ('+(['?']*ds.vectors.size).join(',')+')' + query = "INSERT INTO #{table} (#{ds.vectors.to_a.join(',')}) VALUES (#{(['?'] * ds.vectors.size).join(',')})" sth = dbh.prepare(query) ds.each_row { |c| sth.execute(*c.to_a) } true @@ -151,12 +151,13 @@ def from_activerecord(relation, *fields) # Loading data from plain text files - def from_plaintext filename, fields + def from_plaintext(filename, fields) ds = Daru::DataFrame.new({}, order: fields) - fp = File.open(filename,'r') + fp = File.open(filename, 'r') fp.each_line do |line| - row = Daru::IOHelpers.process_row(line.strip.split(/\s+/),['']) + row = Daru::IOHelpers.process_row(line.strip.split(/\s+/), ['']) next if row == ["\x1A"] + ds.add_row(row) end ds.update @@ -165,13 +166,13 @@ def from_plaintext filename, fields end # Loading and writing Marshalled DataFrame/Vector - def save klass, filename + def save(klass, filename) fp = File.open(filename, 'w') Marshal.dump(klass, fp) fp.close end - def load filename + def load(filename) if File.exist? filename o = false File.open(filename, 'r') { |fp| o = Marshal.load(fp) } @@ -187,13 +188,13 @@ def optional_gem(name, version) gem name, version require name rescue LoadError - Daru.error "\nInstall the #{name} gem version #{version} for using"\ - " #{name} functions." + Daru.error "\nInstall the #{name} gem version #{version} for using" \ + " #{name} functions." end DARU_OPT_KEYS = %i[clone order index name].freeze - def from_csv_prepare_opts opts + def from_csv_prepare_opts(opts) opts[:col_sep] ||= ',' opts[:skip_blanks] ||= true opts[:converters] ||= [:numeric] @@ -223,7 +224,7 @@ def from_csv_hash_with_headers(path, opts) ::CSV .parse(open(path), opts) .tap { |c| yield c if block_given? } - .by_col.map { |col_name, values| [col_name, values] }.to_h + .by_col.to_h { |col_name, values| [col_name, values] } end def from_csv_hash(path, opts) @@ -234,16 +235,17 @@ def from_csv_hash(path, opts) .to_a headers = ArrayHelper.recode_repeated(csv_as_arrays.shift) csv_as_arrays = csv_as_arrays.transpose - headers.each_with_index.map { |h, i| [h, csv_as_arrays[i]] }.to_h + headers.each_with_index.to_h { |h, i| [h, csv_as_arrays[i]] } end def html_parse_table(table) - headers, headers_size = html_scrape_tag(table,'th') + headers, headers_size = html_scrape_tag(table, 'th') data, size = html_scrape_tag(table, 'td') data = data.keep_if { |x| x.count == size } order, indice = html_parse_hash(headers, size, headers_size) if headers_size >= size - return unless (indice.nil? || indice.count == data.count) && !order.nil? && order.count>0 - {data: data.compact, index: indice, order: order} + return unless (indice.nil? || indice.count == data.count) && !order.nil? && order.count.positive? + + { data: data.compact, index: indice, order: order } end def html_scrape_tag(table, tag) @@ -259,18 +261,18 @@ def html_parse_hash(headers, size, headers_size) headers_index = headers.find_index { |x| x.count == headers_size } order = headers[headers_index] order_index = order.count - size - order = order[order_index..-1] - indice = headers[headers_index+1..-1].flatten + order = order[order_index..] + indice = headers[headers_index + 1..].flatten indice = nil if indice.to_a.empty? [order, indice] end - def html_search(table, match=nil) + def html_search(table, match = nil) match.nil? ? true : (table.to_s.include? match) end # Allows user to override the scraped order / index / data - def html_decide_values(scraped_val={}, user_val={}) + def html_decide_values(scraped_val = {}, user_val = {}) %I[data index name order].each do |key| user_val[key] ||= scraped_val[key] end @@ -279,9 +281,9 @@ def html_decide_values(scraped_val={}, user_val={}) def html_table_to_dataframe(table) Daru::DataFrame.rows table[:data], - index: table[:index], - order: table[:order], - name: table[:name] + index: table[:index], + order: table[:order], + name: table[:name] end end end diff --git a/lib/daru/iruby/helpers.rb b/lib/daru/iruby/helpers.rb index e839c1684..9ee54777a 100644 --- a/lib/daru/iruby/helpers.rb +++ b/lib/daru/iruby/helpers.rb @@ -20,18 +20,18 @@ def tuples_with_colspans(index) # It does [:a, nil, nil, :b, nil, :c] # => # [[:a,3], nil, nil, [:b,2], nil, :c] # Needed by tuples_with_colspans/rowspans, which we need for pretty HTML - def nils_counted array + def nils_counted(array) grouped = [[array.first]] - array[1..-1].each do |val| + array[1..].each do |val| if val grouped << [val] else grouped.last << val end end - grouped.flat_map { |items| + grouped.flat_map do |items| [[items.first, items.count], *[nil] * (items.count - 1)] - } + end end end end diff --git a/lib/daru/maths/arithmetic/dataframe.rb b/lib/daru/maths/arithmetic/dataframe.rb index 2eb905951..15d484d76 100644 --- a/lib/daru/maths/arithmetic/dataframe.rb +++ b/lib/daru/maths/arithmetic/dataframe.rb @@ -4,32 +4,32 @@ module Maths module Arithmetic module DataFrame # Add a scalar or another DataFrame - def + other + def +(other) binary_operation :+, other end # Subtract a scalar or another DataFrame. - def - other + def -(other) binary_operation :-, other end # Multiply a scalar or another DataFrame. - def * other + def *(other) binary_operation :*, other end # Divide a scalar or another DataFrame. - def / other + def /(other) binary_operation :/, other end # Modulus with a scalar or another DataFrame. - def % other + def %(other) binary_operation :%, other end # Exponent with a scalar or another DataFrame. - def ** other + def **(other) binary_operation :**, other end @@ -43,13 +43,13 @@ def sqrt only_numerics(clone: false).recode(&:sqrt) end - def round precision=0 + def round(precision = 0) only_numerics(clone: false).recode { |v| v.round(precision) } end private - def binary_operation operation, other + def binary_operation(operation, other) case other when Daru::DataFrame dataframe_binary_operation operation, other @@ -58,21 +58,21 @@ def binary_operation operation, other end end - def dataframe_binary_operation operation, other + def dataframe_binary_operation(operation, other) all_vectors = (vectors.to_a | other.vectors.to_a).sort all_indexes = (index.to_a | other.index.to_a).sort hsh = - all_vectors.map do |vector_name| + all_vectors.to_h do |vector_name| vector = dataframe_binary_operation_on_vectors other, vector_name, operation, all_indexes [vector_name, vector] - end.to_h + end Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype) end - def dataframe_binary_operation_on_vectors other, name, operation, indexes + def dataframe_binary_operation_on_vectors(other, name, operation, indexes) if has_vector?(name) && other.has_vector?(name) self[name].send(operation, other[name]) else @@ -80,7 +80,7 @@ def dataframe_binary_operation_on_vectors other, name, operation, indexes end end - def scalar_binary_operation operation, other + def scalar_binary_operation(operation, other) dup.map_vectors! do |vector| vector.numeric? ? vector.send(operation, other) : vector end diff --git a/lib/daru/maths/arithmetic/vector.rb b/lib/daru/maths/arithmetic/vector.rb index e914903e1..39b293155 100644 --- a/lib/daru/maths/arithmetic/vector.rb +++ b/lib/daru/maths/arithmetic/vector.rb @@ -2,27 +2,27 @@ module Daru module Maths module Arithmetic module Vector - def + other + def +(other) binary_op :+, other end - def - other + def -(other) binary_op :-, other end - def * other + def *(other) binary_op :*, other end - def / other + def /(other) binary_op :/, other end - def % other + def %(other) binary_op :%, other end - def ** other + def **(other) binary_op :**, other end @@ -35,11 +35,11 @@ def sqrt end def abs - recode { |e| e.abs unless e.nil? } + recode { |e| e&.abs } end - def round precision=0 - recode { |e| e.round(precision) unless e.nil? } + def round(precision = 0) + recode { |e| e&.round(precision) } end # Add specified vector. @@ -66,17 +66,17 @@ def round precision=0 # 2 3 # 3 0 # - def add other, opts={} + def add(other, opts = {}) v2v_binary :+, other, skipnil: opts.fetch(:skipnil, false) end private - def math_unary_op operation + def math_unary_op(operation) recode { |e| Math.send(operation, e) unless e.nil? } end - def binary_op operation, other + def binary_op(operation, other) case other when Daru::Vector v2v_binary operation, other @@ -85,12 +85,12 @@ def binary_op operation, other end end - def v2o_binary operation, other + def v2o_binary(operation, other) Daru::Vector.new map { |e| e.nil? ? nil : e.send(operation, other) }, - name: @name, index: @index + name: @name, index: @index end - def v2v_binary operation, other, opts={} + def v2v_binary(operation, other, opts = {}) # FIXME: why the sorting?.. - zverok, 2016-05-18 index = (@index.to_a | other.index.to_a).sort diff --git a/lib/daru/maths/statistics/dataframe.rb b/lib/daru/maths/statistics/dataframe.rb index fc7d678ae..c9e51d655 100644 --- a/lib/daru/maths/statistics/dataframe.rb +++ b/lib/daru/maths/statistics/dataframe.rb @@ -29,7 +29,7 @@ module DataFrame end # Calculate the maximum value of each numeric vector. - def max opts={} + def max(opts = {}) if opts[:vector] row[*self[opts[:vector]].max_index.index.to_a] else @@ -87,7 +87,7 @@ def max opts={} # +methods+ - An array with aggregation methods specified as symbols to # be applied to numeric vectors. Default is [:count, :mean, :std, :max, # :min]. Methods will be applied in the specified order. - def describe methods=nil + def describe(methods = nil) methods ||= %i[count mean std min max] description_hash = {} @@ -121,7 +121,7 @@ def describe methods=nil # # four 0.3333333333333333 0.3333333333333333 # # five 0.25 0.25 # # six 0.2 0.2 - def percent_change periods=1 + def percent_change(periods = 1) df_numeric = only_numerics.vectors.to_a df = Daru::DataFrame.new({}, order: @order, index: @index, name: @name) df_numeric.each do |vec| @@ -133,7 +133,7 @@ def percent_change periods=1 # Calculate sample variance-covariance between the numeric vectors. def covariance cache = Hash.new do |h, (col, row)| - value = vector_cov(self[row],self[col]) + value = vector_cov(self[row], self[col]) h[[col, row]] = value h[[row, col]] = value end @@ -141,14 +141,14 @@ def covariance mat_rows = vectors.collect do |row| vectors.collect do |col| - row == col ? self[row].variance : cache[[col,row]] + row == col ? self[row].variance : cache[[col, row]] end end Daru::DataFrame.rows(mat_rows, index: numeric_vectors, order: numeric_vectors) end - alias :cov :covariance + alias cov covariance # Calculate the correlation between the numeric vectors. def correlation @@ -161,11 +161,11 @@ def correlation Daru::DataFrame.rows(corr_arry, index: numeric_vectors, order: numeric_vectors) end - alias :corr :correlation + alias corr correlation private - def apply_method_to_numerics method, *args + def apply_method_to_numerics(method, *args) numerics = @vectors.to_a.map { |n| [n, @data[@vectors[n]]] } .select { |_n, v| v.numeric? } computed = numerics.map { |_n, v| v.send(method, *args) } @@ -173,28 +173,29 @@ def apply_method_to_numerics method, *args Daru::DataFrame.new(computed, index: @index, order: numerics.map(&:first), clone: false) end - def vector_cov v1a, v2a - sum_of_squares(v1a,v2a) / (v1a.size - 1) + def vector_cov(v1a, v2a) + sum_of_squares(v1a, v2a) / (v1a.size - 1) end - def sum_of_squares v1, v2 - v1a,v2a = v1.reject_values(*Daru::MISSING_VALUES),v2.reject_values(*Daru::MISSING_VALUES) + def sum_of_squares(v1, v2) + v1a = v1.reject_values(*Daru::MISSING_VALUES) + v2a = v2.reject_values(*Daru::MISSING_VALUES) v1a.reset_index! v2a.reset_index! m1 = v1a.mean m2 = v2a.mean - v1a.size.times.inject(0) { |ac,i| ac+(v1a[i]-m1)*(v2a[i]-m2) } + v1a.size.times.inject(0) { |ac, i| ac + ((v1a[i] - m1) * (v2a[i] - m2)) } end - def compute_stats method + def compute_stats(method) Daru::Vector.new( numeric_vectors.each_with_object({}) do |vec, hash| hash[vec] = self[vec].send(method) end, name: method ) end - alias :sds :std - alias :variance :variance_sample + alias sds std + alias variance variance_sample end end end diff --git a/lib/daru/maths/statistics/vector.rb b/lib/daru/maths/statistics/vector.rb index 12ebc391a..4a47674f1 100644 --- a/lib/daru/maths/statistics/vector.rb +++ b/lib/daru/maths/statistics/vector.rb @@ -28,7 +28,7 @@ def median end def mode - mode = frequencies.to_h.select { |_,v| v == frequencies.max }.keys + mode = frequencies.to_h.select { |_, v| v == frequencies.max }.keys mode.size > 1 ? Daru::Vector.new(mode) : mode.first end @@ -40,7 +40,7 @@ def mode # +methods+ - An array with aggregation methods specified as symbols to # be applied to vectors. Default is [:count, :mean, :std, :max, # :min]. Methods will be applied in the specified order. - def describe methods=nil + def describe(methods = nil) methods ||= %i[count mean std min max] description = methods.map { |m| send(m) } Daru::Vector.new(description, index: methods, name: :statistics) @@ -51,14 +51,14 @@ def median_absolute_deviation recode { |val| (val - m).abs }.median end - alias :mad :median_absolute_deviation + alias mad median_absolute_deviation def standard_error - standard_deviation_sample/Math.sqrt(size - count_values(*Daru::MISSING_VALUES)) + standard_deviation_sample / Math.sqrt(size - count_values(*Daru::MISSING_VALUES)) end def sum_of_squared_deviation - (@data.inject(0) { |a,x| x**2 + a } - (sum**2).quo(size - count_values(*Daru::MISSING_VALUES)).to_f).to_f + (@data.inject(0) { |a, x| (x**2) + a } - (sum**2).quo(size - count_values(*Daru::MISSING_VALUES)).to_f).to_f end # Retrieve unique values of non-nil data @@ -85,7 +85,7 @@ def factors # # dv.max(2) { |a,b| a.size <=> b.size } # #=> ["Jon Starkgaryen","Daenerys"] - def max(size=nil, &block) + def max(size = nil, &block) reject_values(*Daru::MISSING_VALUES).to_a.max(size, &block) end @@ -104,8 +104,9 @@ def max(size=nil, &block) # # dv.max_by(2) { |i| i.size } # #=> ["Jon Starkgaryen","Daenerys"] - def max_by(size=nil, &block) - raise ArgumentError, 'Expected compulsory object block in max_by method' unless block_given? + def max_by(size = nil, &block) + raise ArgumentError, 'Expected compulsory object block in max_by method' unless block + reject_values(*Daru::MISSING_VALUES).to_a.max_by(size, &block) end @@ -127,7 +128,7 @@ def max_by(size=nil, &block) # # dv.min(2) { |a,b| a.size <=> b.size } # #=> ["Tyrion","Daenerys"] - def min(size=nil, &block) + def min(size = nil, &block) reject_values(*Daru::MISSING_VALUES).to_a.min(size, &block) end @@ -146,8 +147,9 @@ def min(size=nil, &block) # # dv.min_by(2) { |i| i.size } # #=> ["Tyrion","Daenerys"] - def min_by(size=nil, &block) - raise ArgumentError, 'Expected compulsory object block in min_by method' unless block_given? + def min_by(size = nil, &block) + raise ArgumentError, 'Expected compulsory object block in min_by method' unless block + reject_values(*Daru::MISSING_VALUES).to_a.min_by(size, &block) end else @@ -169,8 +171,8 @@ def min_by(size=nil, &block) # # dv.max(2) { |a,b| a.size <=> b.size } # #=> ["Jon Starkgaryen","Daenerys"] - def max(size=nil, &block) - range = size.nil? ? 0 : (0..size-1) + def max(size = nil, &block) + range = size.nil? ? 0 : (0..size - 1) reject_values(*Daru::MISSING_VALUES).to_a.sort(&block).reverse[range] end @@ -189,9 +191,10 @@ def max(size=nil, &block) # # dv.max_by(2) { |i| i.size } # #=> ["Jon Starkgaryen","Daenerys"] - def max_by(size=nil, &block) - raise ArgumentError, 'Expected compulsory object block in max_by method' unless block_given? - reject_values(*Daru::MISSING_VALUES).to_a.sort_by(&block).reverse[size.nil? ? 0 : (0..size-1)] + def max_by(size = nil, &block) + raise ArgumentError, 'Expected compulsory object block in max_by method' unless block + + reject_values(*Daru::MISSING_VALUES).to_a.sort_by(&block).reverse[size.nil? ? 0 : (0..size - 1)] end # Returns the minimum value(s) present in the vector, with an optional comparator block. @@ -212,8 +215,8 @@ def max_by(size=nil, &block) # # dv.min(2) { |a,b| a.size <=> b.size } # #=> ["Tyrion","Daenerys"] - def min(size=nil, &block) - range = size.nil? ? 0 : (0..size-1) + def min(size = nil, &block) + range = size.nil? ? 0 : (0..size - 1) reject_values(*Daru::MISSING_VALUES).to_a.sort(&block)[range] end @@ -235,9 +238,10 @@ def min(size=nil, &block) # # dv.min_by(2) { |i| i.size } # #=> ["Tyrion","Daenerys"] - def min_by(size=nil, &block) - raise ArgumentError, 'Expected compulsory object block in min_by method' unless block_given? - reject_values(*Daru::MISSING_VALUES).to_a.sort_by(&block)[size.nil? ? 0 : (0..size-1)] + def min_by(size = nil, &block) + raise ArgumentError, 'Expected compulsory object block in min_by method' unless block + + reject_values(*Daru::MISSING_VALUES).to_a.sort_by(&block)[size.nil? ? 0 : (0..size - 1)] end end @@ -260,7 +264,7 @@ def min_by(size=nil, &block) # # dv.index_of_max(2) { |a,b| a.size <=> b.size } # #=> [:j, :d] - def index_of_max(size=nil,&block) + def index_of_max(size = nil, &block) vals = max(size, &block) dv = reject_values(*Daru::MISSING_VALUES) vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals) @@ -282,7 +286,7 @@ def index_of_max(size=nil,&block) # # dv.index_of_max_by(2) { |i| i.size } # #=> [:j, :d] - def index_of_max_by(size=nil,&block) + def index_of_max_by(size = nil, &block) vals = max_by(size, &block) dv = reject_values(*Daru::MISSING_VALUES) vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals) @@ -307,7 +311,7 @@ def index_of_max_by(size=nil,&block) # # dv.index_of_min(2) { |a,b| a.size <=> b.size } # #=> [:t, :d] - def index_of_min(size=nil,&block) + def index_of_min(size = nil, &block) vals = min(size, &block) dv = reject_values(*Daru::MISSING_VALUES) vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals) @@ -329,7 +333,7 @@ def index_of_min(size=nil,&block) # # dv.index_of_min(2) { |i| i.size } # #=> [:t, :d] - def index_of_min_by(size=nil,&block) + def index_of_min_by(size = nil, &block) vals = min_by(size, &block) dv = reject_values(*Daru::MISSING_VALUES) vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals) @@ -339,7 +343,7 @@ def index_of_min_by(size=nil,&block) # @return [Daru::Vector] def max_index max_value = @data.max - Daru::Vector.new({index_of(max_value) => max_value}, name: @name, dtype: @dtype) + Daru::Vector.new({ index_of(max_value) => max_value }, name: @name, dtype: @dtype) end def frequencies @@ -350,13 +354,13 @@ def frequencies ) end - alias_method :freqs, :frequencies + alias freqs frequencies deprecate :freqs, :frequencies, 2016, 10 def proportions len = size - count_values(*Daru::MISSING_VALUES) - frequencies.to_h.each_with_object({}) do |(el, count), hash| - hash[el] = count / len.to_f + frequencies.to_h.transform_values do |count| + count / len.to_f end end @@ -378,9 +382,9 @@ def coefficient_of_variation # retrieves number of instances where block returns true. If other # values given, retrieves the frequency for this value. If no value # given, counts the number of non-nil elements in the Vector. - def count value=false, &block - if block_given? - @data.select(&block).count + def count(value = false, &block) + if block + @data.count(&block) elsif value count { |val| val == value } else @@ -397,12 +401,12 @@ def value_counts Daru::Vector.new(values) end - def proportion value=1 + def proportion(value = 1) frequencies[value].quo(size - count_values(*Daru::MISSING_VALUES)).to_f end # Sample variance with denominator (N-1) - def variance_sample m=nil + def variance_sample(m = nil) m ||= mean if @data.respond_to? :variance_sample @data.variance_sample m @@ -412,7 +416,7 @@ def variance_sample m=nil end # Population variance with denominator (N) - def variance_population m=nil + def variance_population(m = nil) m ||= mean if @data.respond_to? :variance_population @data.variance_population m @@ -422,25 +426,25 @@ def variance_population m=nil end # Sample covariance with denominator (N-1) - def covariance_sample other + def covariance_sample(other) size == other.size or raise ArgumentError, 'size of both the vectors must be equal' covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES) - 1) end # Population covariance with denominator (N) - def covariance_population other + def covariance_population(other) size == other.size or raise ArgumentError, 'size of both the vectors must be equal' covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES)) end - def sum_of_squares(m=nil) + def sum_of_squares(m = nil) m ||= mean - reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val| - memo + (val - m)**2 - } + reject_values(*Daru::MISSING_VALUES).data.inject(0) do |memo, val| + memo + ((val - m)**2) + end end - def standard_deviation_population m=nil + def standard_deviation_population(m = nil) m ||= mean if @data.respond_to? :standard_deviation_population @data.standard_deviation_population(m) @@ -449,7 +453,7 @@ def standard_deviation_population m=nil end end - def standard_deviation_sample m=nil + def standard_deviation_sample(m = nil) m ||= mean if @data.respond_to? :standard_deviation_sample @data.standard_deviation_sample m @@ -459,7 +463,7 @@ def standard_deviation_sample m=nil end # Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3) - def skew m=nil + def skew(m = nil) if @data.respond_to? :skew @data.skew else @@ -469,22 +473,22 @@ def skew m=nil end end - def kurtosis m=nil + def kurtosis(m = nil) if @data.respond_to? :kurtosis @data.kurtosis else m ||= mean - fo = @data.inject(0) { |a, x| a + ((x - m) ** 4) } - fo.quo((size - indexes(*Daru::MISSING_VALUES).size) * standard_deviation_sample(m) ** 4) - 3 + fo = @data.inject(0) { |a, x| a + ((x - m)**4) } + fo.quo((size - indexes(*Daru::MISSING_VALUES).size) * (standard_deviation_sample(m)**4)) - 3 end end - def average_deviation_population m=nil + def average_deviation_population(m = nil) must_be_numeric! m ||= mean - reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val| + reject_values(*Daru::MISSING_VALUES).data.inject(0) do |memo, val| (val - m).abs + memo - }.quo(size - count_values(*Daru::MISSING_VALUES)) + end.quo(size - count_values(*Daru::MISSING_VALUES)) end # Returns the value of the percentile q @@ -497,7 +501,7 @@ def average_deviation_population m=nil # == References # # This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method) - def percentile(q, strategy=:midpoint) + def percentile(q, strategy = :midpoint) case strategy when :midpoint midpoint_percentile(q) @@ -511,7 +515,7 @@ def percentile(q, strategy=:midpoint) # Dichotomize the vector with 0 and 1, based on lowest value. # If parameter is defined, this value and lower will be 0 # and higher, 1. - def dichotomize(low=nil) + def dichotomize(low = nil) low ||= factors.min recode do |x| @@ -536,27 +540,25 @@ def center # # * use_population - Pass as *true* if you want to use population # standard deviation instead of sample standard deviation. - def standardize use_population=false + def standardize(use_population = false) m ||= mean sd = use_population ? sdp : sds - return Daru::Vector.new([nil]*size) if m.nil? || sd == 0.0 + return Daru::Vector.new([nil] * size) if m.nil? || sd == 0.0 vector_standardized_compute m, sd end # :nocov: - def box_cox_transformation lambda # :nodoc: + def box_cox_transformation(lambda) # :nodoc: must_be_numeric! recode do |x| - if !x.nil? + unless x.nil? if lambda.zero? Math.log(x) else - (x ** lambda - 1).quo(lambda) + ((x**lambda) - 1).quo(lambda) end - else - nil end end end @@ -565,15 +567,15 @@ def box_cox_transformation lambda # :nodoc: # Replace each non-nil value in the vector with its percentile. def vector_percentile c = size - indexes(*Daru::MISSING_VALUES).size - ranked.recode! { |i| i.nil? ? nil : (i.quo(c)*100).to_f } + ranked.recode! { |i| i.nil? ? nil : (i.quo(c) * 100).to_f } end - def vector_standardized_compute(m,sd) + def vector_standardized_compute(m, sd) if @data.respond_to? :vector_standardized_compute - @data.vector_standardized_compute(m,sd) + @data.vector_standardized_compute(m, sd) else Daru::Vector.new @data.collect { |x| x.nil? ? nil : (x.to_f - m).quo(sd) }, - index: index, name: name, dtype: dtype + index: index, name: name, dtype: dtype end end @@ -581,8 +583,8 @@ def vector_centered_compute(m) if @data.respond_to? :vector_centered_compute @data.vector_centered_compute(m) else - Daru::Vector.new @data.collect { |x| x.nil? ? nil : x.to_f-m }, - index: index, name: name, dtype: dtype + Daru::Vector.new @data.collect { |x| x.nil? ? nil : x.to_f - m }, + index: index, name: name, dtype: dtype end end @@ -591,7 +593,7 @@ def vector_centered_compute(m) # # In all the trails, every item have the same probability # of been selected. - def sample_with_replacement(sample=1) + def sample_with_replacement(sample = 1) if @data.respond_to? :sample_with_replacement @data.sample_with_replacement sample else @@ -607,7 +609,7 @@ def sample_with_replacement(sample=1) # Every element could only be selected once. # # A sample of the same size of the vector is the vector itself. - def sample_without_replacement(sample=1) + def sample_without_replacement(sample = 1) if @data.respond_to? :sample_without_replacement @data.sample_without_replacement sample else @@ -632,7 +634,7 @@ def sample_without_replacement(sample=1) # # t 0.0 # # i 0.3333333333333333 # # k 0.25 - def percent_change periods=1 + def percent_change(periods = 1) must_be_numeric! prev = nil @@ -667,7 +669,7 @@ def percent_change periods=1 # ts.diff # => [nil, -0.46, 0.21, 0.27, ...] # # @return [Daru::Vector] - def diff(max_lags=1) + def diff(max_lags = 1) ts = self difference = [] max_lags.times do @@ -689,9 +691,9 @@ def diff(max_lags=1) # # => [0.69, 0.23, 0.44, 0.71, ...] # # first 9 observations are nil # ts.rolling(:mean) # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ] - def rolling function, n=10 + def rolling(function, n = 10) Daru::Vector.new( - [nil] * (n - 1) + + ([nil] * (n - 1)) + (0..(size - n)).map do |i| Daru::Vector.new(@data[i...(i + n)]).send(function) end, index: @index @@ -723,7 +725,7 @@ def rolling function, n=10 # Calculate rolling variance # @yieldparam [Integer] n (10) Loopback length %i[count mean median max min sum std variance].each do |meth| - define_method("rolling_#{meth}".to_sym) do |n=10| + define_method("rolling_#{meth}".to_sym) do |n = 10| rolling(meth, n) end end @@ -751,16 +753,16 @@ def rolling function, n=10 # ts.ema # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ] # # @return [Daru::Vector] Contains EMA - def ema(n=10, wilder=false) + def ema(n = 10, wilder = false) smoother = wilder ? 1.0 / n : 2.0 / (n + 1) # need to start everything from the first non-nil observation start = @data.index { |i| !i.nil? } # first n - 1 observations are nil base = [nil] * (start + n - 1) # nth observation is just a moving average - base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n + base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n) (start + n).upto size - 1 do |i| - base << self[i] * smoother + (1 - smoother) * base.last + base << ((self[i] * smoother) + ((1 - smoother) * base.last)) end Daru::Vector.new(base, index: @index, name: @name) @@ -785,20 +787,20 @@ def ema(n=10, wilder=false) # ts.emv # => [ ... nil, 0.073... , 0.082..., 0.080..., ...] # # @return [Daru::Vector] contains EMV - def emv(n=10, wilder=false) # rubocop:disable Metrics/AbcSize + def emv(n = 10, wilder = false) # rubocop:disable Metrics/AbcSize smoother = wilder ? 1.0 / n : 2.0 / (n + 1) # need to start everything from the first non-nil observation start = @data.index { |i| !i.nil? } # first n - 1 observations are nil var_base = [nil] * (start + n - 1) mean_base = [nil] * (start + n - 1) - mean_base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n + mean_base << (@data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n) # nth observation is just a moving variance_population - var_base << @data[start...(start + n)].inject(0.0) { |s,x| x.nil? ? s : s + (x - mean_base.last)**2 } / n + var_base << (@data[start...(start + n)].inject(0.0) { |s, x| x.nil? ? s : s + ((x - mean_base.last)**2) } / n) (start + n).upto size - 1 do |i| last = mean_base.last - mean_base << self[i] * smoother + (1 - smoother) * last - var_base << (1 - smoother) * var_base.last + smoother * (self[i] - last) * (self[i] - mean_base.last) + mean_base << ((self[i] * smoother) + ((1 - smoother) * last)) + var_base << (((1 - smoother) * var_base.last) + (smoother * (self[i] - last) * (self[i] - mean_base.last))) end Daru::Vector.new(var_base, index: @index, name: @name) end @@ -822,7 +824,7 @@ def emv(n=10, wilder=false) # rubocop:disable Metrics/AbcSize # ts.emsd # => [ ... nil, 0.285... , 0.258..., 0.243..., ...] # # @return [Daru::Vector] contains EMSD - def emsd(n=10, wilder=false) + def emsd(n = 10, wilder = false) result = [] emv_return = emv(n, wilder) emv_return.each do |d| @@ -850,7 +852,7 @@ def emsd(n=10, wilder=false) # @return [Array] macdseries, macdsignal and macdhist are # returned as an array of three Daru::Vectors # - def macd(fast=12, slow=26, signal=9) + def macd(fast = 12, slow = 26, signal = 9) macdseries = ema(fast) - ema(slow) macdsignal = macdseries.ema(signal) macdhist = macdseries - macdsignal @@ -867,7 +869,7 @@ def macd(fast=12, slow=26, signal=9) # # ts.acf # => array with first 21 autocorrelations # ts.acf 3 # => array with first 3 autocorrelations - def acf(max_lags=nil) + def acf(max_lags = nil) max_lags ||= (10 * Math.log10(size)).to_i (0..max_lags).map do |i| @@ -892,7 +894,7 @@ def acf(max_lags=nil) # == Returns # # Autocovariance value - def acvf(demean=true, unbiased=true) # rubocop:disable Metrics/MethodLength + def acvf(demean = true, unbiased = true) # rubocop:disable Metrics/MethodLength opts = { demean: true, unbaised: true @@ -931,17 +933,17 @@ def cumsum Daru::Vector.new(result, index: @index) end - alias :sdp :standard_deviation_population - alias :sds :standard_deviation_sample - alias :std :sds - alias :adp :average_deviation_population - alias :cov :coefficient_of_variation - alias :variance :variance_sample - alias :covariance :covariance_sample - alias :sd :standard_deviation_sample - alias :ss :sum_of_squares - alias :percentil :percentile - alias :se :standard_error + alias sdp standard_deviation_population + alias sds standard_deviation_sample + alias std sds + alias adp average_deviation_population + alias cov coefficient_of_variation + alias variance variance_sample + alias covariance covariance_sample + alias sd standard_deviation_sample + alias ss sum_of_squares + alias percentil percentile + alias se standard_error private @@ -949,7 +951,7 @@ def must_be_numeric! numeric? or raise TypeError, 'Vector must be numeric' end - def covariance_sum other + def covariance_sum(other) self_mean = mean other_mean = other.mean @data @@ -966,10 +968,10 @@ def midpoint_percentile(q) sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort v = ((size - count_values(*Daru::MISSING_VALUES)) * q).quo(100) - if v.to_i!=v - sorted[v.to_i] + if v.to_i == v + (sorted[(v - 0.5).to_i].to_f + sorted[(v + 0.5).to_i]).quo(2) else - (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2) + sorted[v.to_i] end end @@ -985,14 +987,15 @@ def linear_percentile(q) elsif k >= sorted.size sorted[-1] else - sorted[k - 1] + d * (sorted[k] - sorted[k - 1]) + sorted[k - 1] + (d * (sorted[k] - sorted[k - 1])) end end - def raw_sample_without_replacement sample + def raw_sample_without_replacement(sample) valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES) raise ArgumentError, "Sample size couldn't be greater than n" if sample > valid.size + out = [] size = valid.size while out.size < sample diff --git a/lib/daru/monkeys.rb b/lib/daru/monkeys.rb index 74fe4c873..7bdae43d6 100644 --- a/lib/daru/monkeys.rb +++ b/lib/daru/monkeys.rb @@ -1,9 +1,9 @@ class Array - def daru_vector name=nil, index=nil, dtype=:array + def daru_vector(name = nil, index = nil, dtype = :array) Daru::Vector.new self, name: name, index: index, dtype: dtype end - alias_method :dv, :daru_vector + alias dv daru_vector def to_index Daru::Index.new self @@ -11,11 +11,11 @@ def to_index end class Range - def daru_vector name=nil, index=nil, dtype=:array + def daru_vector(name = nil, index = nil, dtype = :array) Daru::Vector.new self, name: name, index: index, dtype: dtype end - alias_method :dv, :daru_vector + alias dv daru_vector def to_index Daru::Index.new to_a @@ -23,23 +23,23 @@ def to_index end class Hash - def daru_vector index=nil, dtype=:array + def daru_vector(index = nil, dtype = :array) Daru::Vector.new values[0], name: keys[0], index: index, dtype: dtype end - alias_method :dv, :daru_vector + alias dv daru_vector end class MDArray - def daru_vector(name=nil, index=nil, *) + def daru_vector(name = nil, index = nil, *) Daru::Vector.new self, name: name, index: index, dtype: :mdarray end - alias_method :dv, :daru_vector + alias dv daru_vector end class Matrix - def elementwise_division other + def elementwise_division(other) map.with_index do |e, index| e / other.to_a.flatten[index] end diff --git a/lib/daru/plotting/gruff.rb b/lib/daru/plotting/gruff.rb index 2dbcf4729..049783d38 100644 --- a/lib/daru/plotting/gruff.rb +++ b/lib/daru/plotting/gruff.rb @@ -1,3 +1,3 @@ -require_relative 'gruff/category.rb' -require_relative 'gruff/vector.rb' -require_relative 'gruff/dataframe.rb' +require_relative 'gruff/category' +require_relative 'gruff/vector' +require_relative 'gruff/dataframe' diff --git a/lib/daru/plotting/gruff/category.rb b/lib/daru/plotting/gruff/category.rb index d99aed48f..f1a807de7 100644 --- a/lib/daru/plotting/gruff/category.rb +++ b/lib/daru/plotting/gruff/category.rb @@ -2,7 +2,7 @@ module Daru module Plotting module Category module GruffLibrary - def plot opts={} + def plot(opts = {}) type = opts[:type] || :bar size = opts[:size] || 500 case type @@ -17,7 +17,7 @@ def plot opts={} private - def category_bar_plot size, method + def category_bar_plot(size, method) plot = Gruff::Bar.new size method ||= :count dv = frequencies(method) @@ -26,7 +26,7 @@ def category_bar_plot size, method plot end - def category_pie_plot size, method + def category_pie_plot(size, method) plot = Gruff::Pie.new size method ||= :count frequencies(method).each_with_index do |data, index| @@ -35,9 +35,9 @@ def category_pie_plot size, method plot end - def category_sidebar_plot size, method + def category_sidebar_plot(size, method) plot = Gruff::SideBar.new size - plot.labels = {0 => (name.to_s || 'vector')} + plot.labels = { 0 => (name.to_s || 'vector') } method ||= :count frequencies(method).each_with_index do |data, index| plot.data index, data diff --git a/lib/daru/plotting/gruff/dataframe.rb b/lib/daru/plotting/gruff/dataframe.rb index 37b1a5bac..7f92cb529 100644 --- a/lib/daru/plotting/gruff/dataframe.rb +++ b/lib/daru/plotting/gruff/dataframe.rb @@ -2,7 +2,7 @@ module Daru module Plotting module DataFrame module GruffLibrary - def plot opts={} + def plot(opts = {}) type = opts[:type] || :bar size = opts[:size] || 500 x = extract_x_vector opts[:x] @@ -24,11 +24,11 @@ def plot opts={} private - def process_type type, categorized + def process_type(type, categorized) type == :scatter && categorized ? :scatter_categorized : type end - def line_plot size, x, y + def line_plot(size, x, y) plot = Gruff::Line.new size plot.labels = size.times.to_a.zip(x).to_h y.each do |vec| @@ -37,7 +37,7 @@ def line_plot size, x, y plot end - def bar_plot size, x, y + def bar_plot(size, x, y) plot = Gruff::Bar.new size plot.labels = size.times.to_a.zip(x).to_h y.each do |vec| @@ -46,7 +46,7 @@ def bar_plot size, x, y plot end - def scatter_plot size, x, y + def scatter_plot(size, x, y) plot = Gruff::Scatter.new size y.each do |vec| plot.data vec.name || :vector, x, vec.to_a @@ -54,7 +54,7 @@ def scatter_plot size, x, y plot end - def scatter_with_category size, x, y, opts + def scatter_with_category(size, x, y, opts) x = Daru::Vector.new x y = y.first plot = Gruff::Scatter.new size @@ -66,11 +66,11 @@ def scatter_with_category size, x, y, opts plot end - def extract_x_vector x_name - x_name && self[x_name].to_a || index.to_a + def extract_x_vector(x_name) + (x_name && self[x_name].to_a) || index.to_a end - def extract_y_vectors y_names + def extract_y_vectors(y_names) y_names = case y_names when nil diff --git a/lib/daru/plotting/gruff/vector.rb b/lib/daru/plotting/gruff/vector.rb index afd55c2bd..d016c1e33 100644 --- a/lib/daru/plotting/gruff/vector.rb +++ b/lib/daru/plotting/gruff/vector.rb @@ -2,7 +2,7 @@ module Daru module Plotting module Vector module GruffLibrary - def plot opts={} + def plot(opts = {}) type = opts[:type] || :bar size = opts[:size] || 500 case type @@ -19,35 +19,35 @@ def plot opts={} private - def line_plot size + def line_plot(size) plot = Gruff::Line.new size plot.labels = size.times.to_a.zip(index.to_a).to_h plot.data name || :vector, to_a plot end - def bar_plot size + def bar_plot(size) plot = Gruff::Bar.new size plot.labels = size.times.to_a.zip(index.to_a).to_h plot.data name || :vector, to_a plot end - def pie_plot size + def pie_plot(size) plot = Gruff::Pie.new size each_with_index { |data, index| plot.data index, data } plot end - def scatter_plot size + def scatter_plot(size) plot = Gruff::Scatter.new size plot.data name || :vector, index.to_a, to_a plot end - def sidebar_plot size + def sidebar_plot(size) plot = Gruff::SideBar.new size - plot.labels = {0 => (name.to_s || 'vector')} + plot.labels = { 0 => (name.to_s || 'vector') } each_with_index { |data, index| plot.data index, data } plot end diff --git a/lib/daru/vector.rb b/lib/daru/vector.rb index 7132c391d..8aa86b062 100644 --- a/lib/daru/vector.rb +++ b/lib/daru/vector.rb @@ -1,8 +1,8 @@ -require 'daru/maths/arithmetic/vector.rb' -require 'daru/maths/statistics/vector.rb' -require 'daru/plotting/gruff.rb' -require 'daru/accessors/array_wrapper.rb' -require 'daru/category.rb' +require 'daru/maths/arithmetic/vector' +require 'daru/maths/statistics/vector' +require 'daru/plotting/gruff' +require 'daru/accessors/array_wrapper' +require 'daru/category' module Daru class Vector # rubocop:disable Metrics/ClassLength @@ -27,7 +27,7 @@ class << self # == Options # :value # All the rest like .new - def new_with_size n, opts={}, &block + def new_with_size(n, opts = {}, &block) value = opts.delete :value block ||= ->(_) { value } Daru::Vector.new Array.new(n, &block), opts @@ -70,12 +70,12 @@ def [](*indexes) def _load(data) # :nodoc: h = Marshal.load(data) Daru::Vector.new(h[:data], - index: h[:index], - name: h[:name], - dtype: h[:dtype], missing_values: h[:missing_values]) + index: h[:index], + name: h[:name], + dtype: h[:dtype], missing_values: h[:missing_values]) end - def coerce(data, options={}) + def coerce(data, options = {}) case data when Daru::Vector data @@ -92,21 +92,21 @@ def size end def each(&block) - return to_enum(:each) unless block_given? + return to_enum(:each) unless block @data.each(&block) self end def each_index(&block) - return to_enum(:each_index) unless block_given? + return to_enum(:each_index) unless block @index.each(&block) self end - def each_with_index &block - return to_enum(:each_with_index) unless block_given? + def each_with_index(&block) + return to_enum(:each_with_index) unless block @data.to_a.zip(@index.to_a).each(&block) @@ -114,7 +114,8 @@ def each_with_index &block end def map!(&block) - return to_enum(:map!) unless block_given? + return to_enum(:map!) unless block + @data.map!(&block) self end @@ -128,7 +129,7 @@ def apply_method(method, keys: nil, by_position: true) else raise end end - alias :apply_method_on_sub_vector :apply_method + alias apply_method_on_sub_vector apply_method # The name of the Daru::Vector. String. attr_reader :name @@ -139,6 +140,7 @@ def apply_method(method, keys: nil, by_position: true) attr_reader :nm_dtype # An Array or the positions in the vector that are being treated as 'missing'. attr_reader :missing_positions + deprecate :missing_positions, :indexes, 2016, 10 # Store a hash of labels for values. Supplementary only. Recommend using index # for proper usage. @@ -175,7 +177,7 @@ def apply_method(method, keys: nil, by_position: true) # # vecarr = Daru::Vector.new [1,2,3,4], index: [:a, :e, :i, :o] # vechsh = Daru::Vector.new({a: 1, e: 2, i: 3, o: 4}) - def initialize source, opts={} + def initialize(source, opts = {}) if opts[:type] == :category # Initialize category type vector extend Daru::Category @@ -193,7 +195,7 @@ def plotting_library @plotting_library end - def plotting_library= lib + def plotting_library=(lib) case lib when :gruff @plotting_library = lib @@ -203,16 +205,16 @@ def plotting_library= lib ) end else - raise ArgumentError, "Plotting library #{lib} not supported. "\ - 'Supported library is :gruff' + raise ArgumentError, "Plotting library #{lib} not supported. " \ + 'Supported library is :gruff' end end # this method is overwritten: see Daru::Vector#plotting_library= - def plot(*args, **options, &b) + def plot(...) init_plotting_library - plot(*args, **options, &b) + plot(...) end # Get one or more elements with specified index or a range. @@ -251,7 +253,7 @@ def [](*input_indexes) # # 0 a # # 1 b # # 2 c - def at *positions + def at(*positions) # to be used to form index original_positions = positions positions = coerce_positions(*positions) @@ -278,7 +280,7 @@ def at *positions # # 2 c # # 3 d # # 4 e - def set_at positions, val + def set_at(positions, val) validate_positions(*positions) positions.map { |pos| @data[pos] = val } update_position_cache @@ -309,7 +311,7 @@ def []=(*indexes, val) # Two vectors are equal if they have the exact same index values corresponding # with the exact same elements. Name is ignored. - def == other + def ==(other) case other when Daru::Vector @index == other.index && size == other.size && @@ -371,8 +373,8 @@ def == other end alias_method operator, method if operator != :== && operator != :!= end - alias :gt :mt - alias :gteq :mteq + alias gt mt + alias gteq mteq # Comparator for checking if any of the elements in *other* exist in self. # @@ -386,11 +388,11 @@ def == other # # nil # # 2 3 # # 4 5 - def in other - other = Hash[other.zip(Array.new(other.size, 0))] + def in(other) + other = other.zip(Array.new(other.size, 0)).to_h Daru::Core::Query::BoolArray.new( @data.each_with_object([]) do |d, memo| - memo << (other.key?(d) ? true : false) + memo << (other.key?(d)) end ) end @@ -431,7 +433,7 @@ def in other # # 11 5 # # 13 5 # # 15 1 - def where bool_array + def where(bool_array) Daru::Core::Query.vector_where self, bool_array end @@ -452,20 +454,20 @@ def where bool_array # # 0 3 days # # 1 35 days # # 2 14 days - def apply_where bool_array, &block + def apply_where(bool_array, &block) Daru::Core::Query.vector_apply_where self, bool_array, &block end - def head q=10 - self[0..(q-1)] + def head(q = 10) + self[0..(q - 1)] end - def tail q=10 + def tail(q = 10) start = [size - q, 0].max - self[start..(size-1)] + self[start..(size - 1)] end - def last q=1 + def last(q = 1) # The Enumerable mixin dose not provide the last method. tail(q) end @@ -486,7 +488,7 @@ def object? def has_missing_data? !indexes(*Daru::MISSING_VALUES).empty? end - alias :flawed? :has_missing_data? + alias flawed? has_missing_data? deprecate :has_missing_data?, :include_values?, 2016, 10 deprecate :flawed?, :include_values?, 2016, 10 @@ -523,7 +525,7 @@ def is_values(*values) end # Append an element to the vector by specifying the element and index - def concat element, index + def concat(element, index) raise IndexError, 'Expected new unique index' if @index.include? index @index |= [index] @@ -531,15 +533,15 @@ def concat element, index update_position_cache end - alias :push :concat - alias :<< :concat + alias push concat + alias << concat # Cast a vector to a new data type. # # == Options # # * +:dtype+ - :array for Ruby Array.. - def cast opts={} + def cast(opts = {}) dt = opts[:dtype] raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless dt == :array @@ -547,12 +549,12 @@ def cast opts={} end # Delete an element by value - def delete element + def delete(element) delete_at index_of(element) end # Delete element by index - def delete_at index + def delete_at(index) @data.delete_at @index[index] @index = Daru::Index.new(@index.to_a - [index]) @@ -568,6 +570,7 @@ def type @type = :numeric each do |e| next if e.nil? || e.is_a?(Numeric) + @type = :object break end @@ -588,7 +591,7 @@ def category? end # Get index of element - def index_of element + def index_of(element) case dtype when :array then @index.key(@data.index { |x| x.eql? element }) else @index.key @data.index(element) @@ -603,11 +606,11 @@ def uniq Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype end - def any? &block + def any?(&block) @data.data.any?(&block) end - def all? &block + def all?(&block) @data.data.all?(&block) end @@ -627,8 +630,8 @@ def all? &block # v = Daru::Vector.new ["My first guitar", "jazz", "guitar"] # # Say you want to sort these strings by length. # v.sort(ascending: false) { |a,b| a.length <=> b.length } - def sort opts={}, &block - opts = {ascending: true}.merge(opts) + def sort(opts = {}, &block) + opts = { ascending: true }.merge(opts) vector_index = resort_index(@data.each_with_index, opts, &block) vector, index = vector_index.transpose @@ -656,20 +659,19 @@ def sort opts={}, &block # # Say you want to sort index in descending order # dv.sort_by_index(ascending: false) # #=> Daru::Vector.new [11, 12, 13], index: [23, 22, 21] - def sort_by_index opts={} - opts = {ascending: true}.merge(opts) + def sort_by_index(opts = {}) + opts = { ascending: true }.merge(opts) _, new_order = resort_index(@index.each_with_index, opts).transpose reorder new_order end DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)| - case - when lv.nil? && rv.nil? + if lv.nil? && rv.nil? li <=> ri - when lv.nil? + elsif lv.nil? -1 - when rv.nil? + elsif rv.nil? 1 else lv <=> rv @@ -679,21 +681,21 @@ def sort_by_index opts={} # Just sort the data and get an Array in return using Enumerable#sort. # Non-destructive. # :nocov: - def sorted_data &block + def sorted_data(&block) @data.to_a.sort(&block) end # :nocov: # Like map, but returns a Daru::Vector with the returned values. - def recode dt=nil, &block - return to_enum(:recode) unless block_given? + def recode(dt = nil, &block) + return to_enum(:recode) unless block dup.recode! dt, &block end # Destructive version of recode! - def recode! dt=nil, &block - return to_enum(:recode!) unless block_given? + def recode!(dt = nil, &block) + return to_enum(:recode!) unless block @data.map!(&block).data @data = cast_vector_to(dt || @dtype) @@ -735,7 +737,7 @@ def verify # a.splitted # => # [["a","b"],["c","d"],["a","b"],["d"]] - def splitted sep=',' + def splitted(sep = ',') @data.map do |s| if s.nil? nil @@ -760,21 +762,21 @@ def splitted sep=',' # "c"=>#} # - def split_by_separator sep=',' + def split_by_separator(sep = ',') split_data = splitted sep split_data - .flatten.uniq.compact.map do |key| + .flatten.uniq.compact.to_h do |key| [ key, Daru::Vector.new(split_data.map { |v| split_value(key, v) }) ] - end.to_h + end end - def split_by_separator_freq(sep=',') - split_by_separator(sep).map { |k, v| - [k, v.map(&:to_i).inject(:+)] - }.to_h + def split_by_separator_freq(sep = ',') + split_by_separator(sep).transform_values do |v| + v.sum(&:to_i) + end end def reset_index! @@ -788,7 +790,7 @@ def reset_index! # == Arguments # # * +replacement+ - The value which should replace all nils - def replace_nils! replacement + def replace_nils!(replacement) indexes(*Daru::MISSING_VALUES).each do |idx| self[idx] = replacement end @@ -816,7 +818,7 @@ def replace_nils! replacement # 7 3 # 8 3 # - def rolling_fillna!(direction=:forward) + def rolling_fillna!(direction = :forward) enum = direction == :forward ? index : index.reverse_each last_valid_value = 0 enum.each do |idx| @@ -830,7 +832,7 @@ def rolling_fillna!(direction=:forward) end # Non-destructive version of rolling_fillna! - def rolling_fillna(direction=:forward) + def rolling_fillna(direction = :forward) dup.rolling_fillna!(direction) end @@ -861,11 +863,11 @@ def rolling_fillna(direction=:forward) # ts.lag(2) # => [nil, nil, 1, 2, 3] # ts.lag(-1) # => [2, 3, 4, 5, nil] # - def lag k=1 + def lag(k = 1) case k when 0 then dup when 1...size - copy([nil] * k + data.to_a) + copy(([nil] * k) + data.to_a) when -size..-1 copy(data.to_a[k.abs...size]) else @@ -881,7 +883,7 @@ def detach_index end # Non-destructive version of #replace_nils! - def replace_nils replacement + def replace_nils(replacement) dup.replace_nils!(replacement) end @@ -903,7 +905,7 @@ def count_values(*values) end # Returns *true* if an index exists - def has_index? index + def has_index?(index) @index.include? index end @@ -922,7 +924,7 @@ def get_sub_vector(keys, by_position: true) # @return [Daru::DataFrame] the vector as a single-vector dataframe def to_df - Daru::DataFrame.new({@name => @data}, name: @name, index: @index) + Daru::DataFrame.new({ @name => @data }, name: @name, index: @index) end # Convert Vector to a horizontal or vertical Ruby Matrix. @@ -930,10 +932,11 @@ def to_df # == Arguments # # * +axis+ - Specify whether you want a *:horizontal* or a *:vertical* matrix. - def to_matrix axis=:horizontal - if axis == :horizontal + def to_matrix(axis = :horizontal) + case axis + when :horizontal Matrix[to_a] - elsif axis == :vertical + when :vertical Matrix.columns([to_a]) else raise ArgumentError, "axis should be either :horizontal or :vertical, not #{axis}" @@ -942,7 +945,7 @@ def to_matrix axis=:horizontal # Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements def to_h - @index.map { |index| [index, self[index]] }.to_h + @index.to_h { |index| [index, self[index]] } end # Return an array @@ -956,13 +959,13 @@ def to_json(*) end # Convert to html for iruby - def to_html(threshold=30) + def to_html(threshold = 30) table_thead = to_html_thead table_tbody = to_html_tbody(threshold) path = if index.is_a?(MultiIndex) - File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__) + File.expand_path('iruby/templates/vector_mi.html.erb', __dir__) else - File.expand_path('../iruby/templates/vector.html.erb', __FILE__) + File.expand_path('iruby/templates/vector.html.erb', __dir__) end ERB.new(File.read(path).strip).result(binding) end @@ -970,25 +973,25 @@ def to_html(threshold=30) def to_html_thead table_thead_path = if index.is_a?(MultiIndex) - File.expand_path('../iruby/templates/vector_mi_thead.html.erb', __FILE__) + File.expand_path('iruby/templates/vector_mi_thead.html.erb', __dir__) else - File.expand_path('../iruby/templates/vector_thead.html.erb', __FILE__) + File.expand_path('iruby/templates/vector_thead.html.erb', __dir__) end ERB.new(File.read(table_thead_path).strip).result(binding) end - def to_html_tbody(threshold=30) + def to_html_tbody(threshold = 30) table_tbody_path = if index.is_a?(MultiIndex) - File.expand_path('../iruby/templates/vector_mi_tbody.html.erb', __FILE__) + File.expand_path('iruby/templates/vector_mi_tbody.html.erb', __dir__) else - File.expand_path('../iruby/templates/vector_tbody.html.erb', __FILE__) + File.expand_path('iruby/templates/vector_tbody.html.erb', __dir__) end ERB.new(File.read(table_tbody_path).strip).result(binding) end def to_s - "#<#{self.class}#{': ' + @name.to_s if @name}(#{size})#{':category' if category?}>" + "#<#{self.class}#{": #{@name}" if @name}(#{size})#{':category' if category?}>" end # Create a summary of the Vector @@ -1007,18 +1010,18 @@ def to_s # # std.err.: 0.5774 # # skew: 0.0000 # # kurtosis: -2.3333 - def summary(indent_level=0) + def summary(indent_level = 0) non_missing = size - count_values(*Daru::MISSING_VALUES) - summary = ' =' * indent_level + "= #{name}" \ - "\n n :#{size}" \ - "\n non-missing:#{non_missing}" + summary = (' =' * indent_level) + "= #{name}" \ + "\n n :#{size}" \ + "\n non-missing:#{non_missing}" case type when :object summary << object_summary when :numeric summary << numeric_summary end - summary.split("\n").join("\n" + ' ' * indent_level) + summary.split("\n").join("\n#{' ' * indent_level}") end # Displays summary for an object type Vector @@ -1030,7 +1033,7 @@ def object_summary "\n Distribution\n" data = frequencies.sort.each_with_index.map do |v, k| - [k, v, '%0.2f%%' % ((nval.zero? ? 1 : v.quo(nval))*100)] + [k, v, format('%0.2f%%', ((nval.zero? ? 1 : v.quo(nval)) * 100))] end summary + Formatters::Table.format(data) @@ -1040,21 +1043,21 @@ def object_summary # @return [String] String containing numeric vector summary def numeric_summary summary = "\n median: #{median}" + - "\n mean: %0.4f" % mean + ("\n mean: %0.4f" % mean) if sd - summary << "\n std.dev.: %0.4f" % sd + - "\n std.err.: %0.4f" % se + summary << (("\n std.dev.: %0.4f" % sd) + + ("\n std.err.: %0.4f" % se)) end if count_values(*Daru::MISSING_VALUES).zero? - summary << "\n skew: %0.4f" % skew + - "\n kurtosis: %0.4f" % kurtosis + summary << (("\n skew: %0.4f" % skew) + + ("\n kurtosis: %0.4f" % kurtosis)) end summary end # Over rides original inspect for pretty printing in irb - def inspect spacing=20, threshold=15 + def inspect(spacing = 20, threshold = 15) row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a "#<#{self.class}(#{size})#{':category' if category?}>\n" + @@ -1073,7 +1076,7 @@ def inspect spacing=20, threshold=15 # index as an input to reorder the vector # @param [Daru::Index, Daru::MultiIndex] new_index new index to order with # @return [Daru::Vector] vector reindexed with new index - def reindex! new_index + def reindex!(new_index) values = [] each_with_index do |val, i| values[new_index[i]] = val if new_index.include?(i) @@ -1100,7 +1103,7 @@ def reindex! new_index # # a 1 # # b 2 # # c 3 - def reorder! order + def reorder!(order) @index = @index.reorder order data_array = order.map { |i| @data[i] } @data = cast_vector_to @dtype, data_array, @nm_dtype @@ -1109,27 +1112,25 @@ def reorder! order end # Non-destructive version of #reorder! - def reorder order + def reorder(order) dup.reorder! order end # Create a new vector with a different index, and preserve the indexing of # current elements. - def reindex new_index + def reindex(new_index) dup.reindex!(new_index) end - def index= idx + def index=(idx) idx = Index.coerce idx if idx.size != size raise ArgumentError, - "Size of supplied index #{idx.size} does not match size of Vector" + "Size of supplied index #{idx.size} does not match size of Vector" end - unless idx.is_a?(Daru::Index) - raise ArgumentError, 'Can only assign type Index and its subclasses.' - end + raise ArgumentError, 'Can only assign type Index and its subclasses.' unless idx.is_a?(Daru::Index) @index = idx self @@ -1138,12 +1139,12 @@ def index= idx # Give the vector a new name # # @param new_name [Symbol] The new name. - def rename new_name + def rename(new_name) @name = new_name self end - alias_method :name=, :rename + alias name= rename # Duplicated a vector # @return [Daru::Vector] duplicated vector @@ -1166,7 +1167,7 @@ def dup # # Returns a DataFrame where each vector is a vector # of length +nr+ containing the computed resample estimates. - def bootstrap(estimators, nr, s=nil) + def bootstrap(estimators, nr, s = nil) s ||= size h_est, es, bss = prepare_bootstrap(estimators) @@ -1201,23 +1202,23 @@ def bootstrap(estimators, nr, s=nil) # # == Reference: # * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife. - def jackknife(estimators, k=1) # rubocop:disable Metrics/MethodLength + def jackknife(estimators, k = 1) # rubocop:disable Metrics/MethodLength raise "n should be divisible by k:#{k}" unless (size % k).zero? nb = (size / k).to_i h_est, es, ps = prepare_bootstrap(estimators) - est_n = es.map { |v| [v, h_est[v].call(self)] }.to_h + est_n = es.to_h { |v| [v, h_est[v].call(self)] } nb.times do |i| other = @data.dup - other.slice!(i*k, k) + other.slice!(i * k, k) other = Daru::Vector.new other es.each do |estimator| # Add pseudovalue ps[estimator].push( - nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other) + (nb * est_n[estimator]) - ((nb - 1) * h_est[estimator].call(other)) ) end end @@ -1256,7 +1257,7 @@ def match(regexp) # Otherwise, a duplicate will be returned irrespective of # presence of missing data. - def only_valid as_a=:vector, _duplicate=true + def only_valid(as_a = :vector, _duplicate = true) # FIXME: Now duplicate is just ignored. # There are no spec that fail on this case, so I'll leave it # this way for now - zverok, 2016-05-07 @@ -1331,10 +1332,11 @@ def replace_values(old_values, new_value) end # Returns a Vector containing only missing data (preserves indexes). - def only_missing as_a=:vector - if as_a == :vector + def only_missing(as_a = :vector) + case as_a + when :vector self[*indexes(*Daru::MISSING_VALUES)] - elsif as_a == :array + when :array self[*indexes(*Daru::MISSING_VALUES)].to_a end end @@ -1351,17 +1353,16 @@ def only_numerics self[*numeric_indexes] end - DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/ + DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/.freeze # Returns the database type for the vector, according to its content def db_type # first, detect any character not number - case - when @data.any? { |v| v.to_s =~ DATE_REGEXP } + if @data.any? { |v| v.to_s =~ DATE_REGEXP } 'DATE' - when @data.any? { |v| v.to_s =~ /[^0-9e.-]/ } + elsif @data.any? { |v| v.to_s =~ /[^0-9e.-]/ } 'VARCHAR (255)' - when @data.any? { |v| v.to_s =~ /\./ } + elsif @data.any? { |v| v.to_s.include?('.') } 'DOUBLE' else 'INTEGER' @@ -1371,7 +1372,7 @@ def db_type # Copies the structure of the vector (i.e the index, size, etc.) and fills all # all values with nils. def clone_structure - Daru::Vector.new(([nil]*size), name: @name, index: @index.dup) + Daru::Vector.new(([nil] * size), name: @name, index: @index.dup) end # Save the vector to a file @@ -1379,16 +1380,16 @@ def clone_structure # == Arguments # # * filename - Path of file where the vector is to be saved - def save filename + def save(filename) Daru::IO.save self, filename end def _dump(*) # :nodoc: Marshal.dump( - data: @data.to_a, - dtype: @dtype, - name: @name, - index: @index + data: @data.to_a, + dtype: @dtype, + name: @name, + index: @index ) end @@ -1398,7 +1399,7 @@ def daru_vector(*) end # :nocov: - alias :dv :daru_vector + alias dv daru_vector # Converts a non category type vector to category type vector. # @param [Hash] opts options to convert to category @@ -1406,7 +1407,7 @@ def daru_vector(*) # If it is ordered, it can be sorted and min, max like functions would work # @option opts [Array] :categories set categories in the specified order # @return [Daru::Vector] vector with type category - def to_category opts={} + def to_category(opts = {}) dv = Daru::Vector.new to_a, type: :category, name: @name, index: @index dv.ordered = opts[:ordered] || false dv.categories = opts[:categories] if opts[:categories] @@ -1416,8 +1417,8 @@ def to_category opts={} def method_missing(name, *args, &block) # FIXME: it is shamefully fragile. Should be either made stronger # (string/symbol dychotomy, informative errors) or removed totally. - zverok - if name =~ /(.+)\=/ - self[$1.to_sym] = args[0] + if name =~ /(.+)=/ + self[Regexp.last_match(1).to_sym] = args[0] elsif has_index?(name) self[name] else @@ -1425,7 +1426,7 @@ def method_missing(name, *args, &block) end end - def respond_to_missing?(name, include_private=false) + def respond_to_missing?(name, include_private = false) name.to_s.end_with?('=') || has_index?(name) || super end @@ -1447,20 +1448,21 @@ def respond_to_missing?(name, include_private=false) # # 3 high # # 4 medium # # 5 high - def cut partitions, opts={} - close_at, labels = opts[:close_at] || :right, opts[:labels] + def cut(partitions, opts = {}) + close_at = opts[:close_at] || :right + labels = opts[:labels] partitions = partitions.to_a values = to_a.map { |val| cut_find_category partitions, val, close_at } cats = cut_categories(partitions, close_at) dv = Daru::Vector.new values, - index: @index, - type: :category, - categories: cats + index: @index, + type: :category, + categories: cats # Rename categories if new labels provided if labels - dv.rename_categories Hash[cats.zip(labels)] + dv.rename_categories cats.zip(labels).to_h else dv end @@ -1492,7 +1494,7 @@ def init_plotting_library def copy(values) # Make sure values is right-justified to the size of the vector - values.concat([nil] * (size-values.size)) if values.size < size + values.concat([nil] * (size - values.size)) if values.size < size Daru::Vector.new(values[0...size], index: @index, name: @name) end @@ -1510,10 +1512,10 @@ def nan_positions # Helper method returning validity of arbitrary value def valid_value?(v) - v.respond_to?(:nan?) && v.nan? || v.nil? ? false : true + !((v.respond_to?(:nan?) && v.nan?) || v.nil?) end - def initialize_vector source, opts + def initialize_vector(source, opts) index, source = parse_source(source, opts) set_name opts[:name] @@ -1525,7 +1527,7 @@ def initialize_vector source, opts @possibly_changed_type = true end - def parse_source source, opts + def parse_source(source, opts) if source.is_a?(Hash) [source.keys, source.values] else @@ -1542,17 +1544,19 @@ def guard_sizes! end end - def guard_type_check value + def guard_type_check(value) @possibly_changed_type = true \ - if object? && (value.nil? || value.is_a?(Numeric)) || - numeric? && !value.is_a?(Numeric) && !value.nil? + if (object? && (value.nil? || value.is_a?(Numeric))) || + (numeric? && !value.is_a?(Numeric) && !value.nil?) end - def split_value key, v - case - when v.nil? then nil - when v.include?(key) then 1 - else 0 + def split_value(key, v) + if v.nil? + nil + elsif v.include?(key) + 1 + else + 0 end end @@ -1566,18 +1570,18 @@ def prepare_bootstrap(estimators) h_est = [h_est] unless h_est.is_a?(Array) || h_est.is_a?(Hash) if h_est.is_a? Array - h_est = h_est.map do |est| + h_est = h_est.to_h do |est| [est, ->(v) { Daru::Vector.new(v).send(est) }] - end.to_h + end end - bss = h_est.keys.map { |v| [v, []] }.to_h + bss = h_est.keys.to_h { |v| [v, []] } [h_est, h_est.keys, bss] end - # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the + # NOTE: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the # @param dtype [db_type] variable is set and the underlying data type of vector changed. - def cast_vector_to dtype, source=nil, _nm_dtype=nil + def cast_vector_to(dtype, source = nil, _nm_dtype = nil) source = @data.to_a if source.nil? new_vector = @@ -1591,25 +1595,23 @@ def cast_vector_to dtype, source=nil, _nm_dtype=nil new_vector end - def set_name name # rubocop:disable Style/AccessorMethodName + def set_name(name) # rubocop:disable Naming/AccessorMethodName @name = if name.is_a?(Numeric) then name elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple elsif name then name # anything but Numeric or nil - else - nil end end # Raises IndexError when one of the positions is an invalid position - def validate_positions *positions + def validate_positions(*positions) positions.each do |pos| raise IndexError, "#{pos} is not a valid position." if pos >= size end end # coerce ranges, integers and array in appropriate ways - def coerce_positions *positions + def coerce_positions(*positions) if positions.size == 1 case positions.first when Integer @@ -1647,7 +1649,7 @@ def insert_vector(indexes, val) # Works similar to #[]= but also insert the vector in case index is not valid # It is there only to be accessed by Daru::DataFrame and not meant for user. - def set indexes, val + def set(indexes, val) cast(dtype: :array) if val.nil? && dtype != :array guard_type_check(val) @@ -1660,37 +1662,39 @@ def set indexes, val update_position_cache end - def cut_find_category partitions, val, close_at + def cut_find_category(partitions, val, close_at) case close_at when :right right_index = partitions.index { |i| i > val } raise ArgumentError, 'Invalid partition' if right_index.nil? + left_index = right_index - 1 - "#{partitions[left_index]}-#{partitions[right_index]-1}" + "#{partitions[left_index]}-#{partitions[right_index] - 1}" when :left right_index = partitions.index { |i| i >= val } raise ArgumentError, 'Invalid partition' if right_index.nil? + left_index = right_index - 1 - "#{partitions[left_index]+1}-#{partitions[right_index]}" + "#{partitions[left_index] + 1}-#{partitions[right_index]}" else raise ArgumentError, "Invalid parameter #{close_at} to close_at." end end - def cut_categories partitions, close_at + def cut_categories(partitions, close_at) case close_at when :right - Array.new(partitions.size-1) do |left_index| - "#{partitions[left_index]}-#{partitions[left_index+1]-1}" + Array.new(partitions.size - 1) do |left_index| + "#{partitions[left_index]}-#{partitions[left_index + 1] - 1}" end when :left - Array.new(partitions.size-1) do |left_index| - "#{partitions[left_index]+1}-#{partitions[left_index+1]}" + Array.new(partitions.size - 1) do |left_index| + "#{partitions[left_index] + 1}-#{partitions[left_index + 1]}" end end end - def include_with_nan? array, value + def include_with_nan?(array, value) # Returns true if value is included in array. # Similar to include? but also works if value is Float::NAN if value.respond_to?(:nan?) && value.nan? @@ -1705,7 +1709,7 @@ def update_position_cache @nan_positions = nil end - def resort_index vector_index, opts + def resort_index(vector_index, opts) if block_given? vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) } else From a9853158461d977a0768455cbc79fe03e45ea941 Mon Sep 17 00:00:00 2001 From: Thomas Naude Date: Wed, 17 Aug 2022 14:39:01 +0200 Subject: [PATCH 2/2] fix style offences --- .byebug_history | 16 ------- .gitignore | 1 + .rubocop_todo.yml | 72 +---------------------------- lib/daru/category.rb | 4 +- lib/daru/core/merge.rb | 12 ++--- lib/daru/dataframe.rb | 13 ++---- lib/daru/formatters/table.rb | 2 +- lib/daru/helpers/array.rb | 2 +- lib/daru/index/categorical_index.rb | 5 +- lib/daru/index/index.rb | 2 +- lib/daru/index/multi_index.rb | 4 +- lib/daru/io/io.rb | 3 +- lib/daru/iruby/helpers.rb | 2 +- lib/daru/maths/statistics/vector.rb | 11 +---- lib/daru/vector.rb | 22 ++++----- 15 files changed, 35 insertions(+), 136 deletions(-) delete mode 100644 .byebug_history diff --git a/.byebug_history b/.byebug_history deleted file mode 100644 index eef1105df..000000000 --- a/.byebug_history +++ /dev/null @@ -1,16 +0,0 @@ -exit -rows.factors.to_h { |header| [header, nil] } -rows.factors.map { |header| [header, nil] } -rows.factors.each { |header| [header, nil] } -rows.factors.each { |header| p header.first } -rows.factors.each { |header| p header } -rows.factors.to_h { |(header, value)| [value, nil] } -rows.factors.map { |r| [r, nil] }.to_h -rows.factors.to_h { |header, value| [value, nil] } -rows.factors -rows.factors.to_s -rows.factors.map { |r| [r, nil] } -rows.factors.map { |r| [r, nil] }.to_h -rows.factors.to_h { |r| [r, nil] } -rows.factors.first -rows.factors diff --git a/.gitignore b/.gitignore index 6809a4f30..a950cbb65 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ vendor/ profile/out/ coverage/ .ruby-version +.byebug_history diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 7ef36258e..43c1a330e 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,20 +1,11 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2022-08-16 13:18:56 UTC using RuboCop version 1.35.0. +# on 2022-08-16 13:20:50 UTC using RuboCop version 1.35.0. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new # versions of RuboCop, may require this file to be generated again. -# Offense count: 3 -# Configuration parameters: AutoCorrect, EnforcedStyle. -# SupportedStyles: leading, trailing -Layout/LineContinuationLeadingSpace: - Exclude: - - 'lib/daru/category.rb' - - 'lib/daru/index/categorical_index.rb' - - 'lib/daru/io/io.rb' - # Offense count: 2 # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches. Lint/DuplicateBranch: @@ -33,11 +24,6 @@ Lint/EmptyClass: Exclude: - 'lib/daru/accessors/mdarray_wrapper.rb' -# Offense count: 1 -Lint/FloatComparison: - Exclude: - - 'lib/daru/maths/statistics/vector.rb' - # Offense count: 5 Lint/MissingSuper: Exclude: @@ -46,17 +32,6 @@ Lint/MissingSuper: - 'lib/daru/index/index.rb' - 'lib/daru/index/multi_index.rb' -# Offense count: 2 -# Configuration parameters: IgnoreImplicitReferences. -Lint/ShadowedArgument: - Exclude: - - 'lib/daru/maths/statistics/vector.rb' - -# Offense count: 2 -Lint/ToEnumArguments: - Exclude: - - 'lib/daru/vector.rb' - # Offense count: 1 # Configuration parameters: AllowKeywordBlockArguments. Lint/UnderscorePrefixedVariableName: @@ -94,7 +69,7 @@ Metrics/MethodLength: # Offense count: 2 # Configuration parameters: CountComments, CountAsOne. Metrics/ModuleLength: - Max: 191 + Max: 190 # Offense count: 4 # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods. @@ -132,22 +107,6 @@ Naming/PredicateName: - 'lib/daru/dataframe.rb' - 'lib/daru/vector.rb' -# Offense count: 4 -# Configuration parameters: MinSize. -Performance/CollectionLiteralInLoop: - Exclude: - - 'lib/daru/dataframe.rb' - - 'lib/daru/index/multi_index.rb' - - 'lib/daru/iruby/helpers.rb' - -# Offense count: 4 -Performance/MethodObjectAsBlock: - Exclude: - - 'lib/daru/formatters/table.rb' - - 'lib/daru/index/categorical_index.rb' - - 'lib/daru/index/index.rb' - - 'lib/daru/index/multi_index.rb' - # Offense count: 5 Security/MarshalLoad: Exclude: @@ -172,27 +131,6 @@ Style/ClassVars: Style/Documentation: Enabled: false -# Offense count: 5 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle. -# SupportedStyles: format, sprintf, percent -Style/FormatString: - Exclude: - - 'lib/daru/vector.rb' - -# Offense count: 2 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: MaxUnannotatedPlaceholdersAllowed, AllowedMethods, AllowedPatterns, IgnoredMethods. -# SupportedStyles: annotated, template, unannotated -Style/FormatStringToken: - EnforcedStyle: unannotated - -# Offense count: 1 -# Configuration parameters: MinBodyLength, AllowConsecutiveConditionals. -Style/GuardClause: - Exclude: - - 'lib/daru/core/merge.rb' - # Offense count: 7 # This cop supports unsafe autocorrection (--autocorrect-all). Style/MapToHash: @@ -221,9 +159,3 @@ Style/RedundantSelfAssignment: Exclude: - 'lib/daru/dataframe.rb' -# Offense count: 1 -# This cop supports unsafe autocorrection (--autocorrect-all). -# Configuration parameters: Mode. -Style/StringConcatenation: - Exclude: - - 'lib/daru/dataframe.rb' diff --git a/lib/daru/category.rb b/lib/daru/category.rb index dfd03df1e..3ce953038 100644 --- a/lib/daru/category.rb +++ b/lib/daru/category.rb @@ -173,8 +173,8 @@ def frequencies(type = :count) when :percentage counts.map { |c| c / size.to_f * 100 } else - raise ArgumentError, 'Type should be either :count, :fraction or' \ - " :percentage. #{type} not supported." + raise ArgumentError, 'Type should be either :count, :fraction or ' \ + ":percentage. #{type} not supported." end Daru::Vector.new values, index: categories, name: name end diff --git a/lib/daru/core/merge.rb b/lib/daru/core/merge.rb index 5fd1d6f94..075e1279a 100644 --- a/lib/daru/core/merge.rb +++ b/lib/daru/core/merge.rb @@ -110,12 +110,12 @@ def guard_duplicate(val, duplicates, num) end def row(lkey, rkey) - if !lkey && !rkey - # :nocov: - # It's just an impossibility handler, can't be covered :) - raise 'Unexpected condition met during merge' - # :nocov: - elsif lkey == rkey + # :nocov: + # It's just an impossibility handler, can't be covered :) + raise 'Unexpected condition met during merge' if !lkey && !rkey + + # :nocov: + if lkey == rkey self.merge_key = lkey add_indicator(merge_matching_rows, :both) elsif !rkey || lt(lkey, rkey) diff --git a/lib/daru/dataframe.rb b/lib/daru/dataframe.rb index 06206f17d..d2a524f4b 100644 --- a/lib/daru/dataframe.rb +++ b/lib/daru/dataframe.rb @@ -1470,12 +1470,12 @@ def reindex_vectors(new_vectors) cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name) new_vectors.each_with_object(cl) do |vec, memo| - memo[vec] = @vectors.include?(vec) ? self[vec] : [nil] * nrows + memo[vec] = @vectors.include?(vec) ? self[vec] : Array.new(nrows) end end def get_vector_anyways(v) - @vectors.include?(v) ? self[v].to_a : [nil] * size + @vectors.include?(v) ? self[v].to_a : Array.new(size) end # Concatenate another DataFrame along corresponding columns. @@ -1582,7 +1582,7 @@ def reindex(new_index) cl = Daru::DataFrame.new({}, order: @vectors, index: new_index, name: @name) new_index.each_with_object(cl) do |idx, memo| - memo.row[idx] = @index.include?(idx) ? row[idx] : [nil] * ncols + memo.row[idx] = @index.include?(idx) ? row[idx] : Array.new(ncols) end end @@ -2890,12 +2890,7 @@ def sort_prepare_block(vector_order, opts) def verify_error_message(row, test, id, i) description, fields, = test - values = - if fields.empty? - '' - else - ' (' + fields.collect { |k| "#{k}=#{row[k]}" }.join(', ') + ')' - end + values = fields.empty? ? '' : " (#{fields.collect { |k| "#{k}=#{row[k]}" }.join(', ')})" "#{i + 1} [#{row[id]}]: #{description}#{values}" end diff --git a/lib/daru/formatters/table.rb b/lib/daru/formatters/table.rb index 83378b065..e8a791a52 100644 --- a/lib/daru/formatters/table.rb +++ b/lib/daru/formatters/table.rb @@ -25,7 +25,7 @@ def format(threshold = nil, spacing = nil) def build_rows(threshold) @row_headers.first(threshold).zip(@data).map do |(r, datarow)| - [*[r].flatten.map(&:to_s), *(datarow || []).map(&method(:pretty_to_s))] + [*[r].flatten.map(&:to_s), *(datarow || []).map { |v| pretty_to_s(v) }] end.tap do |rows| unless @headers.empty? spaces_to_add = rows.empty? ? 0 : rows.first.size - @headers.size diff --git a/lib/daru/helpers/array.rb b/lib/daru/helpers/array.rb index 14829a85d..b76f58696 100644 --- a/lib/daru/helpers/array.rb +++ b/lib/daru/helpers/array.rb @@ -23,7 +23,7 @@ def recode_repeated(array) array.collect do |n| if counter.key?(n) counter[n] += 1 - new_n = format('%s_%d', n, counter[n]) + new_n = format('%s_%d', index: n, counter: counter[n]) n.is_a?(Symbol) ? new_n.to_sym : new_n else n diff --git a/lib/daru/index/categorical_index.rb b/lib/daru/index/categorical_index.rb index 97951255d..f6306a1a3 100644 --- a/lib/daru/index/categorical_index.rb +++ b/lib/daru/index/categorical_index.rb @@ -57,8 +57,7 @@ def pos(*indexes) elsif index.is_a?(Numeric) && index < @array.size index else - raise IndexError, "#{index.inspect} is neither a valid category" \ - ' nor a valid position' + raise IndexError, "#{index.inspect} is neither a valid category nor a valid position" end end @@ -174,7 +173,7 @@ def at(*positions) if positions.is_a? Integer index_from_pos(positions) else - Daru::CategoricalIndex.new positions.map(&method(:index_from_pos)) + Daru::CategoricalIndex.new(positions.map { |p| index_from_pos(p) }) end end diff --git a/lib/daru/index/index.rb b/lib/daru/index/index.rb index 48db57496..bfdc52e61 100644 --- a/lib/daru/index/index.rb +++ b/lib/daru/index/index.rb @@ -147,7 +147,7 @@ def at(*positions) if positions.is_a? Integer key(positions) else - self.class.new positions.map(&method(:key)) + self.class.new(positions.map { |v| key(v) }) end end diff --git a/lib/daru/index/multi_index.rb b/lib/daru/index/multi_index.rb index 803a8aee5..dc9f112cc 100644 --- a/lib/daru/index/multi_index.rb +++ b/lib/daru/index/multi_index.rb @@ -188,7 +188,7 @@ def at(*positions) if positions.is_a? Integer key(positions) else - Daru::MultiIndex.from_tuples positions.map(&method(:key)) + Daru::MultiIndex.from_tuples(positions.map { |v| key(v) }) end end @@ -363,7 +363,7 @@ def sparse_tuples tuples = to_a [tuples.first] + each_cons(2).map do |prev, cur| left = cur.zip(prev).drop_while { |c, p| c == p } - ([nil] * (cur.size - left.size)) + left.map(&:first) + Array.new(cur.size - left.size) + left.map(&:first) end end diff --git a/lib/daru/io/io.rb b/lib/daru/io/io.rb index 447ec5052..8a7d4f64a 100644 --- a/lib/daru/io/io.rb +++ b/lib/daru/io/io.rb @@ -188,8 +188,7 @@ def optional_gem(name, version) gem name, version require name rescue LoadError - Daru.error "\nInstall the #{name} gem version #{version} for using" \ - " #{name} functions." + Daru.error "\nInstall the #{name} gem version #{version} for using #{name} functions." end DARU_OPT_KEYS = %i[clone order index name].freeze diff --git a/lib/daru/iruby/helpers.rb b/lib/daru/iruby/helpers.rb index 9ee54777a..4878c39d4 100644 --- a/lib/daru/iruby/helpers.rb +++ b/lib/daru/iruby/helpers.rb @@ -30,7 +30,7 @@ def nils_counted(array) end end grouped.flat_map do |items| - [[items.first, items.count], *[nil] * (items.count - 1)] + [[items.first, items.count], *Array.new(items.count - 1)] end end end diff --git a/lib/daru/maths/statistics/vector.rb b/lib/daru/maths/statistics/vector.rb index 4a47674f1..ab650f083 100644 --- a/lib/daru/maths/statistics/vector.rb +++ b/lib/daru/maths/statistics/vector.rb @@ -543,7 +543,7 @@ def center def standardize(use_population = false) m ||= mean sd = use_population ? sdp : sds - return Daru::Vector.new([nil] * size) if m.nil? || sd == 0.0 + return Daru::Vector.new([nil] * size) if m.nil? || sd.to_d == BigDecimal('0.0') vector_standardized_compute m, sd end @@ -894,14 +894,7 @@ def acf(max_lags = nil) # == Returns # # Autocovariance value - def acvf(demean = true, unbiased = true) # rubocop:disable Metrics/MethodLength - opts = { - demean: true, - unbaised: true - }.merge(opts) - - demean = opts[:demean] - unbiased = opts[:unbiased] + def acvf(demean = true, unbiased = true) demeaned_series = demean ? self - mean : self n = (10 * Math.log10(size)).to_i + 1 diff --git a/lib/daru/vector.rb b/lib/daru/vector.rb index 8aa86b062..86135b594 100644 --- a/lib/daru/vector.rb +++ b/lib/daru/vector.rb @@ -688,14 +688,14 @@ def sorted_data(&block) # Like map, but returns a Daru::Vector with the returned values. def recode(dt = nil, &block) - return to_enum(:recode) unless block + return to_enum(:recode, dt) unless block dup.recode! dt, &block end # Destructive version of recode! def recode!(dt = nil, &block) - return to_enum(:recode!) unless block + return to_enum(:recode!, dt) unless block @data.map!(&block).data @data = cast_vector_to(dt || @dtype) @@ -1043,15 +1043,15 @@ def object_summary # @return [String] String containing numeric vector summary def numeric_summary summary = "\n median: #{median}" + - ("\n mean: %0.4f" % mean) + format("\n mean: %0.4f", mean) if sd - summary << (("\n std.dev.: %0.4f" % sd) + - ("\n std.err.: %0.4f" % se)) + summary << (format("\n std.dev.: %0.4f", sd) + + format("\n std.err.: %0.4f", se)) end if count_values(*Daru::MISSING_VALUES).zero? - summary << (("\n skew: %0.4f" % skew) + - ("\n kurtosis: %0.4f" % kurtosis)) + summary << (format("\n skew: %0.4f", skew) + + format("\n kurtosis: %0.4f", kurtosis)) end summary end @@ -1123,13 +1123,9 @@ def reindex(new_index) end def index=(idx) - idx = Index.coerce idx - - if idx.size != size - raise ArgumentError, - "Size of supplied index #{idx.size} does not match size of Vector" - end + idx = Index.coerce(idx) + raise ArgumentError, "Size of supplied index #{idx.size} does not match size of Vector" if idx.size != size raise ArgumentError, 'Can only assign type Index and its subclasses.' unless idx.is_a?(Daru::Index) @index = idx