import * as React from 'react'
  /* @jsx mdx */
import { mdx } from '@mdx-js/react';
/* @jsxRuntime classic */

/* @jsx mdx */

import { BlogTitle } from "@/components/BlogTitle/BlogTitle.tsx";
import { Page } from "@/components/Page.tsx";
import "@/styles/Blog.scss";
export const _frontmatter = {
  "title": "Parsing JSON with FSharp",
  "path": "/blog/2021/Nov/fsharp-json-parser",
  "date": "2021-11-05T00:00:00.000Z",
  "description": "A simple JSON parser written in FSharp and deployed as a JavaScript app."
};
const layoutProps = {
  _frontmatter
};
const MDXLayout = "wrapper";
export default function MDXContent({
  components,
  ...props
}) {
  return <MDXLayout {...layoutProps} {...props} components={components} mdxType="MDXLayout">

    <Page mdxType="Page">
      <BlogTitle title={props.pageContext.frontmatter.title} datePublished={props.pageContext.frontmatter.date} mdxType="BlogTitle" />
      <p>{`This is an overview of parsing JSON with FSharp - it is not a step-by-step tutorial. For that I
recommend reading Scott Wlaschin's four-part tutorial on
`}<a parentName="p" {...{
          "href": "https://fsharpforfunandprofit.com/posts/understanding-parser-combinators/"
        }}>{`how to use Parser Combinators to parse JSON`}</a>{`.`}</p>
      <p>{`JSON is just a format to represent arbritrary data. But JSON is just text at the end of the day. How
do we take a simple string and parse it into a complex in-memory representation of arbritrary data?`}</p>
      <h2>{`Final Results`}</h2>
      <p>{`Final Result: ✨ `}<a parentName="p" {...{
          "href": "https://fsharp-json-parser.netlify.app"
        }}>{`https://fsharp-json-parser.netlify.app`}</a>{` ✨`}</p>
      <p>{`Source: `}<a parentName="p" {...{
          "href": "https://github.com/VivekRajagopal/fsharp-json-parser"
        }}>{`https://github.com/VivekRajagopal/fsharp-json-parser`}</a></p>
      <h2>{`Parser Combinators`}</h2>
      <p>{`Parser Combinators are the logical building blocks in comining smaller, atomic units of parsing
logic into more complex data structures. This JSON parser assumes that the input is a whole piece of
text representing the raw JSON. This makes the atomic unit to parse a single character.`}</p>
      <p>{`Take such an atomic parser;`}</p>
      <pre><code parentName="pre" {...{
          "className": "language-fsharp"
        }}>{`let parse_n input =
  let firstChar = input.[0]

  if firstChar = 'n' then
    Ok ('n')
  else
    Error "Expected input to be 'n'"
`}</code></pre>
      <p>{`The signature being `}<inlineCode parentName="p">{`fun input:string -> Result<char, string>`}</inlineCode></p>
      <p>{`Let's make the parser a bit more flexible.`}</p>
      <pre><code parentName="pre" {...{
          "className": "language-fsharp"
        }}>{`let parseF predicate input =
  let firstChar = input.[0]

  if predicate(firstChar) then
    Ok (firstChar)
  else
    Error $"Expected input to be '{firstChar}'"
`}</code></pre>
      <p>{`Here `}<inlineCode parentName="p">{`predicate`}</inlineCode>{` has a signature of `}<inlineCode parentName="p">{`fun char -> bool`}</inlineCode>{`, which allows us easily create more atomic
parser.`}</p>
      <h2>{`Parse this AND that`}</h2>
      <p>{`Okay, something a bit more challenging. Let's try and parse an entire JSON `}<inlineCode parentName="p">{`null`}</inlineCode>{` value. We need to
parse each letter sequentially, and we can do that by combining atomic parsers. Let's also make
things easier by returning the remaining unparsed input in the `}<inlineCode parentName="p">{`Ok`}</inlineCode>{` path in `}<inlineCode parentName="p">{`parseF`}</inlineCode>{`. And why not
check to see if `}<inlineCode parentName="p">{`input`}</inlineCode>{` is empty while we're at it.`}</p>
      <p>{`We need to introduce our first `}<inlineCode parentName="p">{`andThen`}</inlineCode>{` combinator. This combines two parser into one, and requires
the first one to succeed followed by the second.`}</p>
      <pre><code parentName="pre" {...{
          "className": "language-fsharp"
        }}>{`open System

let parseF predicate input =
  if String.IsNullOrEmpty(input) then
    Error "input is null or empty"
  else
    let firstChar = input.[0]

    if predicate(firstChar) then
      let remainingChars = input.[1..]
      Ok (firstChar, remainingChars)
    else
      Error $"Expected input to be '{firstChar}'"

let parse_n = parseF (fun c -> c = 'n')
let parse_u = parseF (fun c -> c = 'u')
let parse_l = parseF (fun c -> c = 'l')

let andThen p1 p2 =
  let parser input =
    input
    |> p1
    |> Result.bind (
      fun (v1, r1) ->
        p2 r1
        |> Result.map (fun (v2,r2) -> (v1, v2, r2))
    )
    |> Result.bind (fun (v1, v2, r2) -> Ok ((v1, v2), r2))

  parser

let (>>>) = andThen

let parse_null =
  parse_n >>> parse_u >>> parse_l >>> parse_l
`}</code></pre>
      <p>{`Try it out on the Fable REPL
`}<a parentName="p" {...{
          "href": "https://fable.io/repl/#?code=DYUwLgBADghgTgZxAMWnEATAlgYxmECACiwDsoBXMALggTDjIHMBKCAXgFgAoCPiUJABmWRGADCAC3gcIZSmAB0AbQAMAXR49+coWky58IIiLFT4bMJJCltO-oIjoAtjDLNziWfKoqAjIqKmrz2fADyANbEpvSeADROIK7upEyeCCx2fCDASFn2AKJwcAD2cBAAJABEBQAeUCA4BBhy5FQQYCUQAEaEAOQA3jES0nAAvn1VWtyOsIggAPqksnNIqCYUyzgQALQAfBDb7BB9pH2ZM+DQ8EgLFCs3KNGbh7sHRycU5zyzjwvAD3m6yEL22+1exz6wG+3B+VxwJWc3TIIAACo8vFA-NAAEwcOy-eblHyQLghPgk-IAHwOWOpBwASiAEBRgEpkaQWkR8nwQcsiAA3PwJOB+Nj7Hn8KB40WSvg0iBMllsxSuKDPfkCnFxOA48UHQXCiBakV6i46c38BVK1nsshcvnEIUJE1OPVvCCRYiGl1600sC52VYgODTRxEPaRtjHBFIlHookIMNXYNLVkAsl8VPLSO0v73XPXeb-CCF1PAabZ9MQKqkdNTbgKqCMUhgPRVACkAEEqkA&html=Q&css=Q"
        }}>{`here`}</a>{`.`}</p>
      <h3>{`Expanded Parser`}</h3>
      <p>{`The `}<inlineCode parentName="p">{`null`}</inlineCode>{` parser we created above is fairly simple. If we write out the expanded code in english it
would be something like;`}</p>
      <blockquote>
        <p parentName="blockquote">{`Parse "n",`}</p>
        <blockquote parentName="blockquote">
          <p parentName="blockquote">{`then in the remaining input, parse "u"`}</p>
          <blockquote parentName="blockquote">
            <p parentName="blockquote">{`then in the remaining input, parse "l"`}</p>
            <blockquote parentName="blockquote">
              <p parentName="blockquote">{`then in the remaining input, parse "l"`}</p>
            </blockquote>
          </blockquote>
        </blockquote>
      </blockquote>
      <h2>{`Parse this OR that`}</h2>
      <p>{`Our next useful parser is the `}<inlineCode parentName="p">{`orElse`}</inlineCode>{` parser. This takes two parsers and requires one to succeed.`}</p>
      <pre><code parentName="pre" {...{
          "className": "language-fsharp"
        }}>{`let orElse p1 p2 =
  let parser input =
    let result1 = input |> p1
    let result2 = input |> p2

    match result1, result2 with
    | Ok (v1, r1), Ok _ -> Ok (v1, r1)
    | _, Ok (v2, r2) -> Ok (v2, r2)
    | Ok (v1, r1), _ -> Ok (v1, r1)
    | Error err1, Error _ -> Error err1

  parser

let (<|>) = orElse
`}</code></pre>
      <p>{`Armed with the `}<inlineCode parentName="p">{`andThen`}</inlineCode>{` and `}<inlineCode parentName="p">{`orElse`}</inlineCode>{` parser we can build fairly complex parsers.`}</p>
      <p>{`A single JSON value can be many different types; `}<inlineCode parentName="p">{`null`}</inlineCode>{`, `}<inlineCode parentName="p">{`true`}</inlineCode>{`, `}<inlineCode parentName="p">{`false`}</inlineCode>{`.. etc. We can use the
`}<inlineCode parentName="p">{`orElse`}</inlineCode>{` parser to choose between valid fully parsed JSON values.`}</p>
      <p>{`In FSharp we can represent our JSON model like;`}</p>
      <pre><code parentName="pre" {...{
          "className": "language-fsharp"
        }}>{`type JValue =
  | JString of string
  | JNumber of float
  | JBool of bool
  | JNull
  | JObject of Map<string, JValue>
  | JArray of JValue list
`}</code></pre>
      <h2>{`Mutually Recursive Parser`}</h2>
      <p>{`One slightly tricky part of parsing JSON is the fact that model is recursive. A `}<inlineCode parentName="p">{`JArray`}</inlineCode>{` is an array
of `}<inlineCode parentName="p">{`JValue`}</inlineCode>{`, which itself can be a `}<inlineCode parentName="p">{`JArray`}</inlineCode>{`. So how can we parse a `}<inlineCode parentName="p">{`JArray`}</inlineCode>{` if we can't first parse
`}<inlineCode parentName="p">{`JValue`}</inlineCode>{`... which needs the `}<inlineCode parentName="p">{`JArray`}</inlineCode>{` parser?!`}</p>
      <p>{`The slightly "hacky" way to get around this is to declare a dummy parser for `}<inlineCode parentName="p">{`JValue`}</inlineCode>{`, that `}<inlineCode parentName="p">{`JValue`}</inlineCode>{`
and `}<inlineCode parentName="p">{`JObject`}</inlineCode>{` can still reference in their parsing logic. `}<inlineCode parentName="p">{`JValue`}</inlineCode>{` parser is finally defined at the
end to close off the loop.`}</p>
      <pre><code parentName="pre" {...{
          "className": "language-fsharp"
        }}>{`let createForwardRefParser<'T> () =
  let dummyParser = fun _ -> failwith "Null forward parser"

  let parserRef = ref dummyParser

  let actualParser input =
    input
    |> Parser.run !parserRef

  actualParser, parserRef

let jValue, jValueRef = createForwardRefParser<JValue>()

// ... use jValue in other parsers

// ... finally update value in jValueRef to actual jValue parser

jValueRef := jValue
`}</code></pre>
      <p>{`I wasn't a total fan of this method, and instead would prefer to use mutually recursive functions.
This is totally valid code in FSharp (copied from
`}<a parentName="p" {...{
          "href": "https://stackoverflow.com/a/3621208/10195374"
        }}>{`StackOverflow`}</a>{`);`}</p>
      <pre><code parentName="pre" {...{
          "className": "language-fsharp"
        }}>{`let rec f x =
  if x > 0 then
    g (x-1)
  else
    x

and g x =
  if x > 0 then
    f (x-1)
  else
    x
`}</code></pre>
      <h2>{`Deploying this on the Web 🚀`}</h2>
      <p>{`If you don't release something you develop, does it really exist?`}</p>
      <p>{`Faux rhetoric aside, I was keen to deploy this JSON parser in an easily consumable format. There's
no production use-case for this parser, but I wanted to deploy it for demo and showcasing purposes.`}</p>
      <p><a parentName="p" {...{
          "href": "https://fable.io/"
        }}>{`Fable`}</a>{` (not the video game) is a FSharp to JavaScript transpiler. This enables
us to code our app in FSharp and use Fable to spit out JavaScript that can run on a user's browser
(or purely as a NodeJS app). The idea of a functionally programmed, strongly typed "JavaScript" app
is not new. Check out `}<a parentName="p" {...{
          "href": "https://elm-lang.org/"
        }}>{`Elm`}</a>{` if you're interested in an alternative.`}</p>
      <p>{`From here there was a little trial and error to deploy the built JS web app to Netlify. Netlify does
not support using the Dotnet SDK during the build process. So intead, Github builds the FSharp
Dotnet solution into a JS app and pushes that into Netlify.`}</p>
      <p>{`After all that, we get `}<a parentName="p" {...{
          "href": "https://fsharp-json-parser.netlify.app"
        }}>{`https://fsharp-json-parser.netlify.app`}</a>{`.`}</p>
      <h2>{`Future Improvements`}</h2>
      <p>{`The final solution is the most basic implementation clocking in at a pretty light ~290 lines of code
for the core business logic. Performance was not really considered for this project. But I'm still
curious about how well (or rather poorly) it performs.`}</p>
      <p>{`If I ever get time in the future, I'd like to benchmark this parser and see if I can improve on it.
The first performance improvement opportunity to explore is to try and parse more than one character
at a time. This would allow an early fail, or short-circuiting into a more specific JValue type.`}</p>
    </Page>

    </MDXLayout>;
}
;
MDXContent.isMDXComponent = true;
      