Skip to content

Commit 83b6d13

Browse files
committed
Rewrote logic for decoding strings.
Fixed bugs related to parsing strings with certain escape character combos.
1 parent 89aa160 commit 83b6d13

File tree

1 file changed

+128
-125
lines changed

1 file changed

+128
-125
lines changed

src/json_module.F90

Lines changed: 128 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -4797,156 +4797,162 @@ subroutine json_get_string(me, value)
47974797
type(json_value),pointer,intent(in) :: me
47984798
character(kind=CK,len=:),allocatable,intent(out) :: value
47994799

4800-
character(kind=CK ,len=:),allocatable :: s,pre,post
4801-
integer(IK) :: j,jprev,n
4802-
character(kind=CK,len=1) :: c
4803-
48044800
value = ''
4805-
if ( exception_thrown) return
4801+
if (.not. exception_thrown) then
48064802

4807-
select case (me%var_type)
4803+
select case (me%var_type)
48084804

4809-
case (json_string)
4805+
case (json_string)
48104806

4811-
if (allocated(me%str_value)) then
4807+
if (allocated(me%str_value)) then
4808+
call unescape_string(me%str_value, value)
4809+
else
4810+
call throw_exception('Error in json_get_string:'//&
4811+
' me%str_value not allocated')
4812+
end if
48124813

4813-
!get the value as is:
4814-
s = me%str_value
4814+
case default
48154815

4816-
! Now, have to remove the escape characters:
4817-
!
4818-
! '\"' quotation mark
4819-
! '\\' reverse solidus
4820-
! '\/' solidus
4821-
! '\b' backspace
4822-
! '\f' formfeed
4823-
! '\n' newline (LF)
4824-
! '\r' carriage return (CR)
4825-
! '\t' horizontal tab
4826-
! '\uXXXX' 4 hexadecimal digits
4827-
!
4816+
call throw_exception('Error in json_get_string:'//&
4817+
' Unable to resolve value to characters: '//me%name)
48284818

4829-
!initialize:
4830-
n = len(s)
4831-
j = 1
4819+
! Note: for the other cases, we could do val to string conversions.
48324820

4833-
do
4821+
end select
48344822

4835-
jprev = j !initialize
4836-
j = index(s(j:n),backslash) !look for an escape character
4823+
end if
48374824

4838-
if (j>0) then !an escape character was found
4825+
end subroutine json_get_string
4826+
!*****************************************************************************************
48394827

4840-
!index in full string of the escape character:
4841-
j = j + (jprev-1)
4828+
!*****************************************************************************************
4829+
!>
4830+
! Remove the escape characters from a JSON string and return it.
4831+
!
4832+
! The escaped characters are denoted by the '\' character:
4833+
!````
4834+
! '\"' quotation mark
4835+
! '\\' reverse solidus
4836+
! '\/' solidus
4837+
! '\b' backspace
4838+
! '\f' formfeed
4839+
! '\n' newline (LF)
4840+
! '\r' carriage return (CR)
4841+
! '\t' horizontal tab
4842+
! '\uXXXX' 4 hexadecimal digits
4843+
!````
48424844

4843-
if (j<n) then
4845+
subroutine unescape_string(str_in, str_out)
48444846

4845-
!save the bit before the escape character:
4846-
if (j>1) then
4847-
pre = s( 1 : j-1 )
4848-
else
4849-
pre = ''
4850-
end if
4847+
implicit none
4848+
4849+
character(kind=CK,len=*),intent(in) :: str_in !! string as stored in a [[json_value]]
4850+
character(kind=CK,len=:),allocatable,intent(out) :: str_out !! decoded string
4851+
4852+
integer :: i !! counter
4853+
integer :: n !! length of str_in
4854+
integer :: m !! length of str_out
4855+
character(kind=CK,len=1) :: c !! for scanning each character in string
48514856

4852-
!character after the escape character:
4853-
c = s( j+1 : j+1 )
4854-
4855-
if (any(c == [quotation_mark,backslash,slash, &
4856-
to_unicode(['b','f','n','r','t'])])) then
4857-
4858-
!save the bit after the escape characters:
4859-
if (j+2<n) then
4860-
post = s(j+2:n)
4861-
else
4862-
post = ''
4863-
end if
4864-
4865-
select case(c)
4866-
case (quotation_mark,backslash,slash)
4867-
!use c as is
4868-
case (CK_'b')
4869-
c = bspace
4870-
case (CK_'f')
4871-
c = formfeed
4872-
case (CK_'n')
4873-
c = newline
4874-
case (CK_'r')
4875-
c = carriage_return
4876-
case (CK_'t')
4877-
c = horizontal_tab
4878-
end select
4879-
4880-
s = pre//c//post
4881-
4882-
n = n-1 !backslash character has been
4883-
! removed from the string
4884-
4885-
else if (c == 'u') then !expecting 4 hexadecimal digits after
4886-
!the escape character [\uXXXX]
4887-
4888-
!for now, we are just printing them as is
4889-
![not checking to see if it is a valid hex value]
4890-
4891-
if (j+5<=n) then
4892-
j=j+4
4893-
else
4894-
call throw_exception('Error in json_get_string:'//&
4895-
' Invalid hexadecimal sequence'//&
4896-
' in string: '//trim(c))
4897-
exit
4898-
end if
4857+
if (scan(str_in,backslash)>0) then
48994858

4859+
!there is at least one escape character, so process this string:
4860+
4861+
n = len(str_in)
4862+
str_out = repeat(space,n) !size the output string (will be trimmed later)
4863+
m = 0 !counter in str_out
4864+
i = 0 !counter in str_in
4865+
4866+
do
4867+
4868+
i = i + 1
4869+
if (i>n) exit ! finished
4870+
c = str_in(i:i) ! get next character in the string
4871+
4872+
if (c == backslash) then
4873+
4874+
if (i<n) then
4875+
4876+
i = i + 1
4877+
c = str_in(i:i) !character after the escape
4878+
4879+
if (any(c == [quotation_mark,backslash,slash, &
4880+
to_unicode(['b','f','n','r','t'])])) then
4881+
4882+
select case(c)
4883+
case (quotation_mark,backslash,slash)
4884+
!use d as is
4885+
case (CK_'b')
4886+
c = bspace
4887+
case (CK_'f')
4888+
c = formfeed
4889+
case (CK_'n')
4890+
c = newline
4891+
case (CK_'r')
4892+
c = carriage_return
4893+
case (CK_'t')
4894+
c = horizontal_tab
4895+
end select
4896+
4897+
m = m + 1
4898+
str_out(m:m) = c
4899+
4900+
else if (c == 'u') then !expecting 4 hexadecimal digits after
4901+
!the escape character [\uXXXX]
4902+
4903+
!for now, we are just returning them as is
4904+
![not checking to see if it is a valid hex value]
4905+
!
4906+
! Example:
4907+
! 123456
4908+
! \uXXXX
4909+
4910+
if (i+4<=n) then
4911+
m = m + 1
4912+
str_out(m:m+5) = str_in(i-1:i+4)
4913+
i = i + 4
4914+
m = m + 5
49004915
else
4901-
!unknown escape character
49024916
call throw_exception('Error in json_get_string:'//&
4903-
' unknown escape sequence in string "'//&
4904-
trim(s)//'" ['//backslash//c//']')
4905-
exit
4917+
' Invalid hexadecimal sequence'//&
4918+
' in string: '//str_in(i-1:))
4919+
str_out = ''
4920+
return
49064921
end if
49074922

4908-
j=j+1 !go to the next character
4909-
4910-
if (j>=n) exit !finished
4911-
49124923
else
4913-
!an escape character is the last character in
4914-
! the string [this may not be valid syntax,
4915-
! but just keep it]
4916-
exit
4924+
!unknown escape character
4925+
call throw_exception('Error in json_get_string:'//&
4926+
' unknown escape sequence in string "'//&
4927+
trim(str_in)//'" ['//backslash//c//']')
4928+
str_out = ''
4929+
return
49174930
end if
49184931

49194932
else
4920-
exit !no more escape characters in the string
4933+
!an escape character is the last character in
4934+
! the string [this may not be valid syntax,
4935+
! but just keep it]
4936+
m = m + 1
4937+
str_out(m:m) = c
49214938
end if
49224939

4923-
end do
4924-
4925-
if (exception_thrown) then
4926-
if (allocated(value)) deallocate(value)
49274940
else
4928-
value = s
4941+
m = m + 1
4942+
str_out(m:m) = c
49294943
end if
49304944

4931-
else
4932-
call throw_exception('Error in json_get_string:'//&
4933-
' me%value not allocated')
4934-
end if
4935-
4936-
case default
4937-
call throw_exception('Error in json_get_string:'//&
4938-
' Unable to resolve value to characters: '//me%name)
4939-
4940-
! Note: for the other cases, we could do val to string conversions.
4945+
end do
49414946

4942-
end select
4947+
!trim trailing space:
4948+
str_out = str_out(1:m)
49434949

4944-
!cleanup:
4945-
if (allocated(s)) deallocate(s)
4946-
if (allocated(pre)) deallocate(pre)
4947-
if (allocated(post)) deallocate(post)
4950+
else
4951+
!there are no escape characters, so return as is:
4952+
str_out = str_in
4953+
end if
49484954

4949-
end subroutine json_get_string
4955+
end subroutine unescape_string
49504956
!*****************************************************************************************
49514957

49524958
!*****************************************************************************************
@@ -6383,6 +6389,7 @@ end subroutine parse_array
63836389
!
63846390
!# History
63856391
! * Jacob Williams : 6/16/2014 : Added hex validation.
6392+
! * Jacob Williams : 12/3/2015 : Fixed some bugs.
63866393

63876394
subroutine parse_string(unit, str, string)
63886395

@@ -6393,7 +6400,7 @@ subroutine parse_string(unit, str, string)
63936400
character(kind=CK,len=:),allocatable,intent(out) :: string
63946401

63956402
logical(LK) :: eof, is_hex, escape
6396-
character(kind=CK,len=1) :: c, last
6403+
character(kind=CK,len=1) :: c
63976404
character(kind=CK,len=4) :: hex
63986405
integer(IK) :: i
63996406
integer(IK) :: ip !! index to put next character,
@@ -6406,7 +6413,6 @@ subroutine parse_string(unit, str, string)
64066413

64076414
!initialize:
64086415
ip = 1
6409-
last = space
64106416
is_hex = .false.
64116417
escape = .false.
64126418
i = 0
@@ -6421,7 +6427,7 @@ subroutine parse_string(unit, str, string)
64216427
call throw_exception('Error in parse_string: Expecting end of string')
64226428
return
64236429

6424-
else if (c==quotation_mark .and. last /= backslash) then
6430+
else if (c==quotation_mark .and. .not. escape) then !end of string
64256431

64266432
if (is_hex) call throw_exception('Error in parse_string:'//&
64276433
' incomplete hex string: \u'//trim(hex))
@@ -6466,9 +6472,6 @@ subroutine parse_string(unit, str, string)
64666472

64676473
end if
64686474

6469-
!update for next char:
6470-
last = c
6471-
64726475
end if
64736476

64746477
end do

0 commit comments

Comments
 (0)