Skip to content

Commit 1041059

Browse files
committed
hxlquickmeta (#9),hxl2tab (#2): hxlquickmeta now have the HXLMeta code that was drafted on other files
1 parent eb8de4c commit 1041059

File tree

5 files changed

+410
-552
lines changed

5 files changed

+410
-552
lines changed

bin/hxl2example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ def output_csv(data, response):
373373

374374

375375
@hug.get('/hxl2example.csv', output=output_csv)
376-
def api_hxl2tab(source_url):
376+
def api_hxl2example(source_url):
377377
"""hxl2example (@see https://github.com/EticaAI/HXL-Data-Science-file-formats)
378378
379379
Example:

bin/hxl2tab

Lines changed: 5 additions & 341 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,11 @@ class HXL2Tab:
140140

141141
if args.hxlmeta:
142142
print('TODO: hxlmeta')
143-
print('output.output', output.output)
144-
print('source', source)
145-
# print('source.columns', source.headers())
146-
hxlmeta = HXLMeta(local_hxl_file=output.output.name)
147-
hxlmeta.debuginfo()
143+
# print('output.output', output.output)
144+
# print('source', source)
145+
# # print('source.columns', source.headers())
146+
# hxlmeta = HXLMeta(local_hxl_file=output.output.name)
147+
# hxlmeta.debuginfo()
148148
else:
149149
self.hxl2tab(args.outfile, self.original_outfile,
150150
self.original_outfile_is_stdout)
@@ -447,328 +447,6 @@ class HXLTabConverter:
447447
print("TODO")
448448

449449

450-
class HXLMeta:
451-
"""
452-
HXLMeta is (...)
453-
454-
Author: Multiple Authors
455-
License: Public Domain
456-
Version: v0.6.4
457-
"""
458-
459-
GLOSSARY = {
460-
# @see EticaAI-Data_HXL-Data-Science-file-formats_HXLMeta_Glossary
461-
# https://docs.google.com/spreadsheets/d/
462-
# 1vFkBSharAEg5g5K2u_iDLCBvpWWPqpzC1hcL6QpFNZY/edit#gid=1066910203
463-
'DataType': {
464-
'name': "HXL Data Type (Official HXL Standard Data Type)",
465-
'description': "Since the HXL standard is meant to be easy to " +
466-
"write also by information managers in the field," +
467-
"most Data Types are implicit and some base " +
468-
"hashtags enforce beyond the generic ‘text’. " +
469-
"(#date assumes +date, affected assumes " +
470-
"+num/number).",
471-
'table': "https://docs.google.com/spreadsheets/d/" +
472-
"1vFkBSharAEg5g5K2u_iDLCBvpWWPqpzC1hcL6QpFNZY/edit#gid=717813523"
473-
},
474-
'StorageType': {
475-
'name': "Variable Storage Type",
476-
'description': "Storage Type is one way to document low level" +
477-
"storage type more specific to official HXL Data" +
478-
"Types",
479-
'table': "https://docs.google.com/spreadsheets/d/" +
480-
"1vFkBSharAEg5g5K2u_iDLCBvpWWPqpzC1hcL6QpFNZY/edit#gid=211012023"
481-
},
482-
'StatisticalType': {
483-
'name': "Variable Statistical Data Type",
484-
'description': "In statistics, groups of individual data points " +
485-
"may be classified as belonging to any of various "
486-
"statistical data types, e.g. categorical (red, "
487-
"blue, green), real number (1.68, -5, 1.7e+6), "
488-
"odd number(1,3,5) etc. -- Wikipedia",
489-
'table': "https://docs.google.com/spreadsheets/d/" +
490-
"1vFkBSharAEg5g5K2u_iDLCBvpWWPqpzC1hcL6QpFNZY/edit#gid=1566300457"
491-
},
492-
'LevelType': {
493-
'name': "Variable Level of measurement Type",
494-
'description': "Level of measurement or scale of measure is a " +
495-
"classification that describes " +
496-
"the nature of information within the values " +
497-
" assigned to variables. Psychologist Stanley " +
498-
" Smith Stevens developed the best-known " +
499-
"classification with four levels, or scales, of " +
500-
"measurement: nominal, ordinal, interval, "
501-
"and ratio. -- Wikipedia. "
502-
"Note: while the current tables document only " +
503-
"Stanley Smith Stevens classification, the " +
504-
"LevelType actually could be used to represent " +
505-
" other typologies if enough users could test " +
506-
" them.",
507-
'table': "https://docs.google.com/spreadsheets/d/" +
508-
"1vFkBSharAEg5g5K2u_iDLCBvpWWPqpzC1hcL6QpFNZY/edit#gid=1053765950"
509-
},
510-
'UsageType': {
511-
'name': "Variable Usage Type",
512-
'description': "HXLMeta Usage Types can be used to define how " +
513-
"external tools should use the variable (data on " +
514-
"the column). The most common type that cannot " +
515-
"be automatically safely detected is canonically " +
516-
"called 'focus'/'focusN' (some softwares like " +
517-
"PSPP/SPSS/SAS/WPA Analytics uses 'target', and " +
518-
" Weka/Orange use 'class'; that's why these are " +
519-
" considered direct aliases). ",
520-
'table': "https://docs.google.com/spreadsheets/d/" +
521-
"1vFkBSharAEg5g5K2u_iDLCBvpWWPqpzC1hcL6QpFNZY/edit#gid=617579056"
522-
},
523-
'WeightLevel': {
524-
'name': "Variable Weight",
525-
'description': "WeightLevel defines for both HXL-Aware tools " +
526-
"or for exported files based on HXLated datasets" +
527-
"the weight of one observation (a data row). " +
528-
"This is intended mostly for statistical" +
529-
"analysis, but most tools would treat this as " +
530-
"meta information. The default weight is 1. " +
531-
"Weight 0 means ignore. Negative weight (while " +
532-
"allowed on HXL) are likely to raise errors on " +
533-
"external tools.",
534-
'table': "https://docs.google.com/spreadsheets/d/" +
535-
"1vFkBSharAEg5g5K2u_iDLCBvpWWPqpzC1hcL6QpFNZY/edit#gid=1507056660"
536-
},
537-
'VariableLabel': {
538-
'description': "TODO: write here"
539-
},
540-
'VariableDescription': {
541-
'description': "TODO: write here"
542-
},
543-
'OriginaHashtag': {
544-
'description': "TODO: write here"
545-
}
546-
}
547-
548-
# Uses HXL v1.1-final
549-
# @see https://hxlstandard.org/standard/1-1final/dictionary/
550-
# @see https://docs.google.com/spreadsheets/d/
551-
# 1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/edit#gid=319251406
552-
# @see https://en.wikipedia.org/wiki/Statistical_data_type
553-
HXL_REFERENCE = {
554-
# 'hxl_core_datatypes': [
555-
# 'text', 'number', 'url', 'email', 'phone', 'date'
556-
# ]
557-
'hashtag': {
558-
# access
559-
# activity
560-
# adm1
561-
# adm2
562-
# adm3
563-
# adm4
564-
# adm5
565-
# affected | Number of people or households affected by an
566-
# emergency
567-
'affected': {
568-
'datatype_hxl': "number",
569-
'datatype_statistical': "count",
570-
},
571-
# beneficiary
572-
# capacity
573-
# cause
574-
# channel
575-
# contact | Contact information for the subject of a data record
576-
# (e.g. an activity).
577-
# Since HXL 1.0.
578-
# country
579-
# crisis
580-
# currency
581-
# date | Date related to the data in the record applies.
582-
# Preferred format is ISO 8610 (e.g. "2015-06-01",
583-
# "2015-Q1", etc.)
584-
# Since HXL 1.0.
585-
# Note to self: date be viewed both as discrete and as
586-
# continuous
587-
# @see https://stats.stackexchange.com/questions/220812/
588-
# time-is-a-continuous-or-discrete-variables
589-
'date': {
590-
'datatype_hxl': "date"
591-
},
592-
# delivery
593-
# description
594-
# event
595-
# frequency | The frequency with which something occurs.
596-
# Since HXL 1.1.
597-
# Note to self: need to check better if 95% of the time
598-
# this is right
599-
# @see https://en.wikipedia.org/wiki/Ordinal_data
600-
'frequency': {
601-
'datatype_statistical': "ordinal"
602-
},
603-
# geo
604-
# group
605-
# impact
606-
# indicator
607-
# inneed | Number of people or households in need of humanitarian
608-
# assistance.
609-
'inneed': {
610-
'datatype_hxl': "number",
611-
'datatype_statistical': "count",
612-
},
613-
# item
614-
# loc
615-
# meta
616-
'meta': {
617-
'datafeature_hxl': "meta",
618-
'datafeature_orange': "meta",
619-
},
620-
# modality
621-
# need
622-
# operations
623-
# org
624-
# output
625-
# population | General population number for an area or location,
626-
# regardless of their specific humanitarian needs.
627-
# Since HXL 1.0.
628-
'population': {
629-
'datatype_hxl': "number",
630-
'datatype_statistical': "count",
631-
},
632-
# reached | Number of people or households reached with
633-
# humanitarian assistance. Subset of #targeted.
634-
# Since HXL 1.0.
635-
'reached': {
636-
'datatype_hxl': "number",
637-
'datatype_statistical': "count",
638-
},
639-
# region
640-
# respondee
641-
# sector
642-
# service
643-
# severity
644-
# status
645-
# subsector
646-
# targeted | Number of people or households targeted for
647-
# humanitarian assistance. Subset of #inneed; superset
648-
# of #reached.
649-
# Since HXL 1.0.
650-
'targeted': {
651-
'datatype_hxl': "number",
652-
'datatype_statistical': "count",
653-
},
654-
# value | A monetary value, such as the price of goods in a market,
655-
# a project budget, or the amount of cash transferred to
656-
# beneficiaries. May be used together with #currency in
657-
# financial or cash data.
658-
# Since HXL 1.1.
659-
'value': {
660-
'datatype_hxl': "number"
661-
},
662-
},
663-
'attributes': {
664-
# ### HXL official Core attributes ________________________________
665-
# @see https://docs.google.com/spreadsheets/d/
666-
# 1En9FlmM8PrbTWgl3UHPF_MXnJ6ziVZFhBbojSJzBdLI/edit#gid=1810309357
667-
# +code
668-
'code': {
669-
'datatype_hxl': "text"
670-
},
671-
# +coord
672-
# +dest
673-
# +displaced
674-
# +elevation
675-
# +email (also an HXL Core Data Type)
676-
# +id
677-
# +label
678-
'label': {
679-
'datatype_hxl': "text"
680-
},
681-
# +lat
682-
# +lon
683-
# +name
684-
# +num
685-
'num': {
686-
'datatype_hxl': "number"
687-
},
688-
# +pct
689-
# +phone (also an HXL Core Data Type)
690-
# +start
691-
# +text (also an HXL Core Data Type)
692-
# +type
693-
# +url (also an HXL Core Data Type)
694-
# ### HXL Data Types ______________________________________________
695-
# text
696-
'text': {
697-
'datatype_hxl': "text"
698-
},
699-
# number
700-
'number': {
701-
'datatype_hxl': "number"
702-
},
703-
# url
704-
'url': {
705-
'datatype_hxl': "url",
706-
'dataflag_orange': "meta",
707-
},
708-
# email
709-
'email': {
710-
'datatype_hxl': "email",
711-
'dataflag_orange': "meta",
712-
},
713-
# phone
714-
'phone': {
715-
'datatype_hxl': "phone",
716-
'dataflag_orange': "meta",
717-
},
718-
# date
719-
'date': {
720-
'datatype_hxl': "date"
721-
},
722-
}
723-
}
724-
725-
def __init__(self, local_hxl_file=None):
726-
"""
727-
Constructs all the necessary attributes for the HXLMeta
728-
object.
729-
"""
730-
731-
# Posix exit codes
732-
self.EXIT_OK = 0
733-
self.EXIT_ERROR = 1
734-
self.EXIT_SYNTAX = 2
735-
736-
# TODO: Use some abstraction instead of access directly the file
737-
self.local_hxl_file = local_hxl_file
738-
self.text_headers = None
739-
self.hxl_headers = None
740-
self.data_columns = None
741-
742-
def debuginfo(self):
743-
print('debuginfo')
744-
with open(self.local_hxl_file, 'r') as csv_file:
745-
csv_reader = csv.reader(csv_file)
746-
line_1st = next(csv_reader)
747-
line_2nd = next(csv_reader)
748-
# line_start = -1
749-
750-
if line_1st[0].find('#') == -1 and line_2nd[0].find('#') == 0:
751-
self.text_headers = line_1st
752-
self.hxl_headers = line_2nd
753-
# line_start = 1
754-
elif line_1st[0].find('#') == 0:
755-
self.text_headers = None
756-
self.hxl_headers = line_1st
757-
# line_start = 0
758-
else:
759-
raise Exception("HXLMetaUnknownSourceException")
760-
761-
# for line in csv_reader:
762-
# txt_writer.writerow(line)
763-
764-
# Hotfix: skip first non-HXL header. Ideally I think the already
765-
# exported HXlated file should already save without headers.
766-
print('self.text_headers', self.text_headers)
767-
print('self.hxl_headers', self.hxl_headers)
768-
# header_original = next(csv_reader)
769-
# print('header_original', header_original)
770-
771-
772450
class HXLUtils:
773451
"""
774452
HXLUtils contains functions from the Console scripts of libhxl-python
@@ -1004,17 +682,3 @@ def api_hxl2tab(source_url):
1004682
hxl2tab = HXL2Tab()
1005683

1006684
return hxl2tab.execute_web(source_url)
1007-
1008-
1009-
@hug.get('/hxlmeta')
1010-
def api_hxlmeta(source_url):
1011-
"""hxl2tab (@see https://github.com/EticaAI/HXL-Data-Science-file-formats)
1012-
1013-
Example:
1014-
http://localhost:8000/api_hxlmeta?source_url=https://docs.google.com/spreadsheets/u/1/d/1l7POf1WPfzgJb-ks4JM86akFSvaZOhAUWqafSJsm3Y4/edit#gid=634938833
1015-
1016-
"""
1017-
1018-
hxl2tab = HXL2Tab()
1019-
1020-
return hxl2tab.execute_web(source_url=source_url, hxlmeta=True)

0 commit comments

Comments
 (0)