Skip to content

Commit 3f87045

Browse files
committed
Add custom format string overrides, fix output ordering, improve element rendering
1 parent e8a2446 commit 3f87045

File tree

2 files changed

+142
-56
lines changed

2 files changed

+142
-56
lines changed

main.cpp

Lines changed: 140 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <myhtml/serialization.h>
1313
#include <mycss/selectors/serialization.h>
1414
#include <modest/finder/finder.h>
15+
#include <fmt/core.h>
1516

1617
using namespace std;
1718

@@ -27,6 +28,9 @@ Usage: %s [options] <selector> <mode> [mode argument]
2728
delimiter character to use between results (defaults to newline)
2829
-0, --null
2930
uses \0 as delimiter
31+
-F, --format <selector> <format string>
32+
specify custom format string for element stringification (can be specified multiple times)
33+
example: `-F a '->{}<-'` - renders <a> text wrapped in '->' and '<-'
3034
3135
<selector>
3236
CSS selector to match against
@@ -53,11 +57,18 @@ static const string afmt_e = "m";
5357
static const vector<char> collapsible = {' ', '\t', '\n', '\r'};
5458
static const vector<myhtml_tag_id_t> breaking = {
5559
MyHTML_TAG_BR,
56-
MyHTML_TAG_P
60+
MyHTML_TAG_P,
61+
MyHTML_TAG_H1,
62+
MyHTML_TAG_H2,
63+
MyHTML_TAG_H3,
64+
MyHTML_TAG_H4,
65+
MyHTML_TAG_H5,
66+
MyHTML_TAG_H6,
67+
MyHTML_TAG_HR,
5768
};
5869

59-
static map<const string, bool> flags = {
60-
{"dirtyargs", false}
70+
static map<const string, int> flags = {
71+
{"dirtyargs", 0}
6172
};
6273

6374
static map<const string, string> state = { // global state
@@ -67,15 +78,16 @@ static map<const string, string> state = { // global state
6778
{"selector", ""}, // matching selector
6879
{"mode", ""}, // output mode
6980
{"data", ""}, // read input data
70-
{"modearg", ""} // mode argument (optional)
81+
{"modearg", ""}, // mode argument (optional)
82+
{"scratch", ""}, // scratchpad value (internal use)
7183
};
7284

7385
bool readarg(int &argc, const char** &argv, string argname, const bool die_on_err = true){
7486
if(argc > 1){
75-
state[argname] = argv[1];
7687
argv++;
7788
argc--;
78-
flags["dirtyargs"] = true;
89+
state[argname] = *argv;
90+
flags["dirtyargs"]++;
7991
return true;
8092
}else{
8193
if(die_on_err){
@@ -110,6 +122,10 @@ template <typename ...T> inline bool node_in(myhtml_tree_node_t* node, T... tags
110122
return false;
111123
}
112124

125+
bool node_sort(myhtml_tree_node_t* lhs, myhtml_tree_node_t* rhs){
126+
return myhtml_node_element_position(lhs).begin < myhtml_node_element_position(rhs).begin;
127+
}
128+
113129
template <typename ...T> inline bool node_before(myhtml_tree_node_t* node, T... tags){
114130
while((node = node->next) && node->tag_id <= 0x003);
115131

@@ -134,9 +150,21 @@ static map<const char, const string> option_longopts = { // maps shortopts to lo
134150
{'h', "help"},
135151
{'f', "file"},
136152
{'d', "delimiter"},
137-
{'0', "zero"}
153+
{'0', "zero"},
154+
{'F', "format"},
138155
};
139156

157+
vector<tuple<string, string, myhtml_collection_t*>> selector_format = {};
158+
159+
const char* format_node(myhtml_tree_node_t* node){
160+
for(auto& [fselect, fstr, fcollect] : selector_format)
161+
if(fcollect)
162+
for(myhtml_tree_node_t* select_node : vector<myhtml_tree_node_t*>(fcollect->list, fcollect->list+fcollect->length))
163+
if(node == select_node) return fstr.c_str();
164+
165+
return "{}";
166+
}
167+
140168
static map<const string, const function<void(int&, const char**&)>> option_handlers = { // maps longopts to functions
141169
{"help", [](int &argc, const char** &argv) {
142170
fprintf(stderr, helptext, state["progname"].c_str(), state["progname"].c_str(), state["progname"].c_str());
@@ -152,7 +180,29 @@ static map<const string, const function<void(int&, const char**&)>> option_handl
152180
}},
153181
{"zero", [](int &argc, const char** &argv) {
154182
state["delim"] = "\0";
155-
}}
183+
}},
184+
{"format", [](int &argc, const char** &argv) {
185+
argv++, argc--;
186+
if(!readarg(argc, argv, "scratch", false)){
187+
cerr << "missing selector in --format" << endl;
188+
exit(EXIT_FAILURE);
189+
}
190+
string fselect = state["scratch"];
191+
if(!readarg(argc, argv, "scratch", false)){
192+
cerr << "missing format string in --format" << endl;
193+
exit(EXIT_FAILURE);
194+
}
195+
string form = state["scratch"];
196+
197+
if(fselect.length() == 0){
198+
cerr << "invalid --format " << fselect << " " << form << endl;
199+
exit(EXIT_FAILURE);
200+
}
201+
202+
selector_format.push_back(tuple<string, string, myhtml_collection_t*>(fselect, form, nullptr));
203+
204+
argv--, argc++;
205+
}},
156206
};
157207

158208
static pair<const function<void(myhtml_tree_node_t*, string&)>, const function<void(myhtml_tree_node_t*, string&)>> format_handlers = { // {format, unformat}
@@ -169,6 +219,12 @@ static pair<const function<void(myhtml_tree_node_t*, string&)>, const function<v
169219
case MyHTML_TAG_I: // italics on
170220
case MyHTML_TAG_U:
171221
case MyHTML_TAG_EM:
222+
case MyHTML_TAG_H1:
223+
case MyHTML_TAG_H2:
224+
case MyHTML_TAG_H3:
225+
case MyHTML_TAG_H4:
226+
case MyHTML_TAG_H5:
227+
case MyHTML_TAG_H6:
172228
if(ansi) rendered += afmt_s + "4" + afmt_e;
173229
if(md) rendered += "_";
174230
break;
@@ -201,6 +257,12 @@ static pair<const function<void(myhtml_tree_node_t*, string&)>, const function<v
201257
case MyHTML_TAG_I: // italics off
202258
case MyHTML_TAG_U:
203259
case MyHTML_TAG_EM:
260+
case MyHTML_TAG_H1:
261+
case MyHTML_TAG_H2:
262+
case MyHTML_TAG_H3:
263+
case MyHTML_TAG_H4:
264+
case MyHTML_TAG_H5:
265+
case MyHTML_TAG_H6:
204266
if(ansi) rendered += afmt_s + "24" + afmt_e; // no italics here :(
205267
if(md) rendered += "_";
206268
break;
@@ -225,6 +287,11 @@ static pair<const function<void(myhtml_tree_node_t*, string&)>, const function<v
225287
rendered += "\t";
226288
}
227289
break;
290+
case MyHTML_TAG_TR:
291+
if(rendered.back() != '\n'){
292+
rendered += "\n";
293+
}
294+
break;
228295
}
229296

230297
if(vec_has(breaking, node_iter->tag_id)){ // <br/>
@@ -233,56 +300,57 @@ static pair<const function<void(myhtml_tree_node_t*, string&)>, const function<v
233300
}
234301
};
235302

303+
string render_node(myhtml_tree_node_t* node_iter){
304+
string rendered = "";
305+
306+
if(node_iter->tag_id == MyHTML_TAG_STYLE) return rendered;
307+
308+
format_handlers.first(node_iter, rendered);
309+
310+
if(node_iter->tag_id == MyHTML_TAG__TEXT){
311+
string text(myhtml_node_text(node_iter, nullptr));
312+
if(!node_in(node_iter, MyHTML_TAG_PRE)){
313+
// collapse whitespace to single character
314+
string::iterator nend = unique(text.begin(), text.end(), [](char c1, char c2) -> bool {
315+
return vec_has(collapsible, c1) && vec_has(collapsible, c2);
316+
});
317+
text.resize(static_cast<unsigned long>(nend-text.begin()));
318+
319+
// replace whitespace with space
320+
replace_if(text.begin(), text.end(), [](char c) -> bool {
321+
return vec_has(collapsible, c);
322+
}, ' ');
323+
}
324+
325+
rendered += text;
326+
}
327+
328+
if(node_iter->child){
329+
rendered += render_node(node_iter->child);
330+
}
331+
332+
rendered = fmt::format(format_node(node_iter), rendered);
333+
334+
format_handlers.second(node_iter, rendered);
335+
336+
if((node_iter = node_iter->next)){
337+
rendered += render_node(node_iter);
338+
}
339+
340+
return rendered;
341+
}
342+
236343
static map<const string, const function<void(myhtml_tree_node_t*)>> mode_handlers = { // maps modes to functions
237344
{"data", [](myhtml_tree_node_t* node) {
238345
myhtml_serialization_tree_callback(node, [](const char* data, size_t len, void* ctx) -> unsigned int {
239-
printf("%.*s", static_cast<int>(len), data);
346+
printf("%s", data);
240347
return 0;
241-
}, nullptr);
348+
}, node);
242349
printf("%c", state["delim"][0]);
243350
}},
244351

245352
{"text", [](myhtml_tree_node_t* node) {
246-
string rendered = "";
247-
248-
myhtml_tree_node_t* node_iter = node->child;
249-
while(node_iter){
250-
const char* text_c = myhtml_node_text(node_iter, nullptr);
251-
string text = "";
252-
if(text_c != nullptr) text += text_c;
253-
254-
if(node_iter->tag_id == MyHTML_TAG__TEXT){
255-
if(!node_in(node_iter, MyHTML_TAG_PRE)){
256-
// collapse whitespace to single character
257-
string::iterator nend = unique(text.begin(), text.end(), [](char c1, char c2) -> bool {
258-
return vec_has(collapsible, c1) && vec_has(collapsible, c2);
259-
});
260-
text.resize(static_cast<unsigned long>(nend-text.begin()));
261-
262-
// replace whitespace with space
263-
replace_if(text.begin(), text.end(), [](char c) -> bool {
264-
return vec_has(collapsible, c);
265-
}, ' ');
266-
}
267-
268-
rendered += text;
269-
}else{
270-
format_handlers.first(node_iter, rendered);
271-
}
272-
273-
if(node_iter->child) node_iter = node_iter->child;
274-
else{
275-
while(node_iter != node && node_iter->next == nullptr){
276-
format_handlers.second(node_iter, rendered);
277-
278-
node_iter = node_iter->parent;
279-
}
280-
if(node_iter == node) break;
281-
282-
format_handlers.second(node_iter, rendered);
283-
node_iter = node_iter->next;
284-
}
285-
}
353+
string rendered = render_node(node->child);
286354

287355
size_t index = 0;
288356
while((index = rendered.find("\n ", index)) != string::npos){ // clear whitespace before multiline content
@@ -296,7 +364,8 @@ static map<const string, const function<void(myhtml_tree_node_t*)>> mode_handler
296364
while(vec_has(collapsible, rendered[0])) rendered.erase(0, 1); // clear whitespace before single-line content
297365
while(vec_has(collapsible, *(rendered.end()-1))) rendered.erase(rendered.length()-1, 1); // clear whitespace after single-line content
298366

299-
cout << rendered;
367+
fmt::print(format_node(node), rendered);
368+
//printf(fmt, rendered);
300369
printf("%c", state["delim"][0]);
301370
}},
302371

@@ -314,7 +383,7 @@ static map<const string, const function<void(myhtml_tree_node_t*)>> mode_handler
314383

315384
do{
316385
if(state["modearg"] == mycore_string_data(&attr->key)){
317-
cout << mycore_string_data(&attr->value);
386+
fmt::print(format_node(node), mycore_string_data(&attr->value));
318387
printf("%c", state["delim"][0]);
319388
}
320389
}while(attr != token->attr_last && (attr = attr->next)); // move attr pointer further & loop if attr_last not hit
@@ -343,8 +412,8 @@ void parseopts(int &argc, const char** &argv){
343412
cerr << "invalid short option '-" << argv[1][0] << "'" << endl;
344413
exit(EXIT_FAILURE);
345414
}
346-
if(flags["dirtyargs"]){ // option handler touched argv (args?); skip
347-
flags["dirtyargs"] = false;
415+
if(flags["dirtyargs"] > 0){ // option handler touched argv (args?); skip
416+
flags["dirtyargs"]--;
348417
break;
349418
}
350419
}
@@ -406,9 +475,25 @@ int main(int argc, const char* argv[]){
406475
myhtml_collection_t* collection = nullptr;
407476
modest_finder_by_selectors_list(finder, html_tree->node_html, selectors_list, &collection);
408477

478+
for(auto& [fselect, fstr, fcollect] : selector_format){
479+
mycss_selectors_list_t* fselect_parsed = mycss_selectors_parse(
480+
mycss_entry_selectors(css_entry),
481+
MyENCODING_UTF_8,
482+
fselect.c_str(), fselect.length(),
483+
&mystatus
484+
);
485+
if(fselect_parsed == nullptr || (fselect_parsed->flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD)){
486+
cerr << "bad format selector '" << fselect << "'" << endl;
487+
exit(EXIT_FAILURE);
488+
}
489+
modest_finder_by_selectors_list(finder, html_tree->node_html, fselect_parsed, &fcollect);
490+
}
491+
409492
if(collection){
493+
vector<myhtml_tree_node_t*> nodes(collection->list, collection->list+collection->length);
494+
sort(nodes.begin(), nodes.end(), node_sort);
410495
try{
411-
for(myhtml_tree_node_t* node : vector<myhtml_tree_node_t*>(collection->list, collection->list+collection->length)){
496+
for(myhtml_tree_node_t* node : nodes){
412497
mode_handlers[state["mode"]](node);
413498
}
414499
}catch(bad_function_call&){

meson.build

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
project('hq', 'cpp', default_options: ['cpp_std=c++17'])
22
modest = dependency('modest')
3+
fmt = dependency('fmt')
34
executable('hq', 'main.cpp',
4-
dependencies: [modest]
5+
dependencies: [modest, fmt]
56
)

0 commit comments

Comments
 (0)