diff --git a/exif.cc b/exif.cc index cece53d16..8cf300fda 100644 --- a/exif.cc +++ b/exif.cc @@ -63,7 +63,7 @@ #include // for memcmp, strlen #include // for as_const -#include "defs.h" // for Waypoint, fatal, warning, global_options, global_opts, unknown_alt, xfree, route_disp_all, track_disp_all, waypt_disp_all, wp_flags, KNOTS_TO_MPS, KPH_TO_MPS, MPH_TO_MPS, MPS_TO_KPH, WAYPT_HAS, case_ignore_strcmp, waypt_add, xstrdup, fix_2d +#include "defs.h" // for Waypoint, fatal, warning, global_options, global_opts, unknown_alt, xfree, route_disp_all, track_disp_all, waypt_disp_all, wp_flags, KNOTS_TO_MPS, KPH_TO_MPS, MPH_TO_MPS, MPS_TO_KPH, WAYPT_HAS, case_ignore_strcmp, waypt_add, fix_2d #include "garmin_tables.h" // for gt_lookup_datum_index #include "gbfile.h" // for gbfputuint32, gbfputuint16, gbfgetuint16, gbfgetuint32, gbfseek, gbftell, gbfile, gbfclose, gbfcopyfrom, gbfwrite, gbfopen_be, gbfread, gbfrewind, gbfgetflt, gbfgetint16, gbfopen, gbfputc, gbfputflt, gbsize_t, gbfeof, gbfgetdbl, gbfputdbl, gbfile::(anonymous) #include "jeeps/gpsmath.h" // for GPS_Math_WGS84_To_Known_Datum_M diff --git a/reference/gc/GCGCA8_nasty.gpx b/reference/gc/GCGCA8_nasty.gpx new file mode 100644 index 000000000..ebcac6636 --- /dev/null +++ b/reference/gc/GCGCA8_nasty.gpx @@ -0,0 +1,80 @@ + + + Cache Listing Generated from Geocaching.com + This is an individual cache generated from Geocaching.com + Account "robertlipe" From Geocaching.com + contact@geocaching.com + https://www.geocaching.com + Geocaching - High Tech Treasure Hunting + + cache, geocache + + + + GCGCA8 + Oozy rat in a sanitary zoo by robertlipe, Unknown Cache (3/2) + https://www.geocaching.com/geocache/GCGCA8 + Oozy rat in a sanitary zoo + Geocache + Geocache|Unknown Cache + + Oozy rat in a sanitary zoo + robertlipe + robertlipe + Unknown Cache + Not chosen + + Wheelchair accessible + Ticks + Dangerous animals + Poisonous plants + Thorns + Picnic tables nearby + Public restrooms nearby + Dogs + + 3 + 2 + United States + Tennessee + <body>The cache is <style> +not</style> at the coordinates above. These coords will get +you to the correct park and within 1/2 mile of the cache. The cache +is within 35 feet of the trail. It is not handicapped accessible. +It is a nice walk in the woods that is practical for all ages. +There is no space in the container for trading items. You should +bring a writing stick and bug spray is recommended.</body> + + <html><body text="color">So if the cache isn't at the above coordinates, where is it? +<ul> +<li>Too bad I hid a boot</li> +<li>Too hot to hoot</li> +<li>Never odd or even</li> +<li>Do geese see God?</li> +<li>"Do nine men interpret?" "Nine men," I nod</li> +<li>Rats live on no evil star</li> +<li>Go hang a salami, I'm a lasagna hog</li></ul> +Now that it's intuitively obvious to even the most casual observer +where the cache is, turn on your geo-mojo and go find it. <br> +<image src="http://www.mtgc.org/mtgc_member-banner.gif" width="500" +height="40" alt= +"Member of Middle Tennessee GeoCachers Club [www.mtgc.org]" + border="0"><br> +<br></body></html> + + + + + + 2017-11-11T01:44:14Z + Archive + robertlipe + Removed the container from the final location. Enough construction has occurred since this was placed to make it much less of an adventure than is used to be, so I'm archiving. + +Thanx to all that hunted it. + + + + + + diff --git a/reference/gc/GCGCA8_nasty.html b/reference/gc/GCGCA8_nasty.html new file mode 100644 index 000000000..d6c592ce1 --- /dev/null +++ b/reference/gc/GCGCA8_nasty.html @@ -0,0 +1,55 @@ + + + + + GPSBabel HTML Output + + + +

+ GCGCA8 - Oozy rat in a sanitary zoo
+

+

+ + + + + + + + +
+

GCGCA8 - N35°55.300 W86°51.700 (16S 512480 3975269)
+Oozy rat in a sanitary zoo by robertlipe

+
+

3 / 2
+Unknown Cache / Unknown

+
+

The cache is at the coordinates above. These coords will get +you to the correct park and within 1/2 mile of the cache. The cache +is within 35 feet of the trail. It is not handicapped accessible. +It is a nice walk in the woods that is practical for all ages. +There is no space in the container for trading items. You should +bring a writing stick and bug spray is recommended.

+

So if the cache isn't at the above coordinates, where is it? +

    +
  • Too bad I hid a boot
  • +
  • Too hot to hoot
  • +
  • Never odd or even
  • +
  • Do geese see God?
  • +
  • "Do nine men interpret?" "Nine men," I nod
  • +
  • Rats live on no evil star
  • +
  • Go hang a salami, I'm a lasagna hog
+Now that it's intuitively obvious to even the most casual observer +where the cache is, turn on your geo-mojo and go find it.
++"Member
+
+
+
+ + diff --git a/testo b/testo index fc5550162..5083eead1 100755 --- a/testo +++ b/testo @@ -189,4 +189,5 @@ if [ -z "${VALGRIND}" ]; then fi fi +echo "Total Errors: $errorcount" exit $errorcount diff --git a/testo.d/text.test b/testo.d/text.test index e89b52226..4307260bb 100644 --- a/testo.d/text.test +++ b/testo.d/text.test @@ -16,3 +16,7 @@ gpsbabel -i gpx -f ${REFERENCE}/gc/GC7FA4.gpx \ -o text,logs -F ${TMPDIR}/GC7FA4.text compare ${REFERENCE}/gc/GC7FA4.html ${TMPDIR}/GC7FA4.html compare ${REFERENCE}/gc/GC7FA4.text ${TMPDIR}/GC7FA4.text + +# GCGC8_nasty.gpx is hand modifed to test strip_nasty_html +gpsbabel -i gpx -f ${REFERENCE}/gc/GCGCA8_nasty.gpx -o html -F ${TMPDIR}/GCGCA8_nasty.html +compare ${REFERENCE}/gc/GCGCA8_nasty.html ${TMPDIR}/GCGCA8_nasty.html diff --git a/util.cc b/util.cc index 7840d5268..c3a6db059 100644 --- a/util.cc +++ b/util.cc @@ -20,7 +20,7 @@ */ #include // for sort -#include // for isspace, tolower +#include // for assert #include // for errno #include // for INT_MAX, INT_MIN #include // for fabs, floor @@ -35,6 +35,9 @@ #include // for QDateTime #include // for QFileInfo #include // for QList +#include // for QRegularExpression +#include // for QRegularExpressionMatch +#include // for QRegularExpressionMatchIterator #include // for QString #include // for QTextBoundaryFinder, QTextBoundaryFinder::Grapheme #include // for QTextCodec @@ -734,92 +737,29 @@ pretty_deg_format(double lat, double lon, char fmt, const char* sep, bool html) /* * Get rid of potentially nasty HTML that would influence another record * that includes; - * - to stop backgrounds/background colours from being loaded + * - to stop backgrounds/background colors from being loaded * and - stop processing altogether * - stop overriding styles for everything */ QString strip_nastyhtml(const QString& in) { - char* returnstr = xstrdup(in); - char* lcstr = xstrdup(in.toLower()); - - while (char* lcp = strstr(lcstr, "")) { - char* sp = returnstr + (lcp - lcstr) ; /* becomes */ - sp++; - *sp++ = '!'; - *sp++ = ' '; - *sp++ = ' '; - *sp++ = ' '; - *lcp = '*'; /* so we wont find it again */ - } - while (char* lcp = strstr(lcstr, " */ - char* sp = returnstr + (lcp - lcstr) ; - sp++; - *sp++ = '!'; - *sp++ = '-'; - *sp++ = '-'; - while ((*sp) && (*sp != '>')) { - sp++; - } - *--sp = '-'; - *--sp = '-'; - *lcp = '*'; /* so we wont find it again */ - } - while (char* lcp = strstr(lcstr, "")) { - char* sp = returnstr + (lcp - lcstr) ; /* becomes */ - *sp++ = ' '; - *sp++ = ' '; - *sp++ = ' '; - *sp++ = ' '; - *sp++ = ' '; - *sp++ = '-'; - *sp++ = '-'; - *lcp = '*'; /* so we wont find it again */ - } - while (char* lcp = strstr(lcstr, "", QRegularExpression::CaseInsensitiveOption | QRegularExpression::DotMatchesEverythingOption); + assert(htmlre.isValid()); + static const QRegularExpression bodyre("", QRegularExpression::CaseInsensitiveOption | QRegularExpression::DotMatchesEverythingOption); + assert(bodyre.isValid()); + static const QRegularExpression stylere(".*?", QRegularExpression::CaseInsensitiveOption | QRegularExpression::DotMatchesEverythingOption); + assert(stylere.isValid()); + QString out(in); + + out.replace(bodyre, ""); + out.replace("", "", Qt::CaseInsensitive); + out.replace(htmlre, ""); + out.replace("", "", Qt::CaseInsensitive); + out.replace(stylere, ""); + out.replace("[^ >]*).*?>)|(?:&(?.*?);)|(?[^<&]+)|(?.+)", + QRegularExpression::DotMatchesEverythingOption); + assert(re.isValid()); + static const QRegularExpression newlinespace_re("\\n\\s*"); + assert(newlinespace_re.isValid()); + QString out; + + QRegularExpressionMatchIterator it = re.globalMatch(utfstring); + while (it.hasNext()) { + auto match = it.next(); + //qDebug() << match.capturedTexts(); + // TODO: Qt >= 6.3 use match.hasCaptured(...) instead of !match.captured(...).isNull() + if (!match.captured(u"tag").isNull()) { + QString tag = match.captured(u"tag"); + //qDebug() << "tag match:" << tag; + if (tag.compare("p", Qt::CaseInsensitive) == 0) { + out.append('\n'); + } else if (tag.compare("br", Qt::CaseInsensitive) == 0) { + out.append('\n'); + } else if (tag.compare("/tr", Qt::CaseInsensitive) == 0) { + out.append('\n'); + } else if (tag.compare("/td", Qt::CaseInsensitive) == 0) { + out.append(' '); + } else if (tag.startsWith("img", Qt::CaseInsensitive)) { + out.append("[IMG]"); + } // else eat the tag + } else if (!match.captured(u"entity").isNull()) { + QString entity = match.captured(u"entity"); + //qDebug() << "entity match:" << entity; + if (entity == "amp") { + out.append('&'); + } else if (entity == "lt") { + out.append('<'); + } else if (entity == "gt") { + out.append('>'); + } else if (entity == "quot") { + out.append('"'); + } else if (entity == "nbsp") { + out.append(' '); + } else if (entity == "deg") { + out.append("deg"); + } // else eat the entity + } else if (!match.captured(u"other").isNull()) { + //qDebug() << "other match:" << match.capturedTexts(); + out.append(match.captured(u"other").replace(newlinespace_re, " ")); + //} else { + // qDebug() << "unexpected fragment:" << match.capturedTexts(); } - - if (! tag[0]) { - if (*instr == '\n') { - *out++ = ' '; - do { - instr++; - } while (isspace(*instr)); - continue; - } else { - *out++ = *instr; - } - } else { - if (taglen < (sizeof(tag)-1)) { - tag[taglen++] = tolower(*instr); - tag[taglen] = 0; - } - } - - if (((tag[0] == '<') && (*instr == '>')) || - ((tag[0] == '&') && (*instr == ';'))) { - if (! strcmp(tag, "&")) { - *out++ = '&'; - } else if (! strcmp(tag, "<")) { - *out++ = '<'; - } else if (! strcmp(tag, ">")) { - *out++ = '>'; - } else if (! strcmp(tag, """)) { - *out++ = '"'; - } else if (! strcmp(tag, " ")) { - *out++ = ' '; - } else if (! strcmp(tag, "°")) { - *out++ = 'd'; - *out++ = 'e'; - *out++ = 'g'; - } else if ((tag[0]=='<') && (tag[1]=='p')) { - *out++ = '\n'; - } else if ((tag[0]=='<') && (tag[1]=='b') && (tag[2]=='r')) { - *out++ = '\n'; - } else if ((tag[0]=='<') && (tag[1]=='/') && (tag[2]=='t') && (tag[3]=='r')) { - *out++ = '\n'; - } else if ((tag[0]=='<') && (tag[1]=='/') && (tag[2]=='t') && (tag[3]=='d')) { - *out++ = ' '; - } else if ((tag[0]=='<') && (tag[1]=='i') && (tag[2]=='m') && (tag[3]=='g')) { - *out++ = '['; - *out++ = 'I'; - *out++ = 'M'; - *out++ = 'G'; - *out++ = ']'; - } - - tag[0] = 0; - } - instr++; } - *out++ = 0; - xfree(incopy); - QString rv(outstring); - xfree(outstring); - return rv; + + return out; #endif }