Web Dev Solutions

Catalin Mititiuc

defmodule Pandoc do # https://github.com/jgm/pandoc/releases @latest_version "3.6.1" @moduledoc """ Pandoc is an installer, runner and watcher for [pandoc](https://pandoc.org). ## Profiles You can define multiple pandoc profiles. By default, there is a profile called `:default` which you can configure its args, current directory and environment. You can make the args dynamic by defining a function. config :pandoc, version: "#{@latest_version}", default: [ args: fn extra_args -> {_, [input_file], _} = OptionParser.parse(extra_args, switches: []) ~w(--output=../priv/static/posts/\#{Path.rootname(input_file)}.html) end, cd: Path.expand("../documents", __DIR__) ] ## Pandoc configuration There are four global configurations for the pandoc application: * `:version` - the expected pandoc version * `:version_check` - whether to perform the version check or not. Useful when you manage the pandoc executable with an external tool * `:cacerts_path` - the directory to find certificates for https connections * `:path` - the path to find the pandoc executable at. By default, it is automatically downloaded and placed inside the `_build` directory of your current app Overriding the `:path` is not recommended, as we will automatically download and manage `pandoc` for you. But in case you can't download it, you may want to set the `:path` to a configurable system location. """ require Logger @doc false # Latest known version at the time of publishing. def latest_version, do: @latest_version @doc """ Returns the configured pandoc version. """ def configured_version do Application.get_env(:pandoc, :version, latest_version()) end @doc """ Returns the configuration for the given profile. Returns nil if the profile does not exist. """ def config_for!(profile) when is_atom(profile) do Application.get_env(:pandoc, profile) || raise ArgumentError, """ unknown pandoc profile. Make sure the profile is defined in your config/config.exs file, such as: config :pandoc, version: "#{@latest_version}", #{profile}: [ cd: Path.expand("../documents", __DIR__) ] """ end @doc """ Returns the path to the executable. The executable may not be available if it was not yet installed. """ def bin_path do name = "pandoc-#{target()}" Application.get_env(:pandoc, :path) || if Code.ensure_loaded?(Mix.Project) do relative_build_dir = Mix.Project.build_path() |> Path.dirname() |> Path.relative_to_cwd() project_dir = Path.dirname(Mix.Project.project_file()) Path.join([project_dir, relative_build_dir, name]) else Path.expand("_build/#{name}") end end @doc """ Returns the version of the pandoc executable. Returns `{:ok, version_string}` on success or `:error` when the executable is not available. """ def bin_version do path = bin_path() with true <- File.exists?(path), {out, 0} <- System.cmd(path, ["--version"]), [vsn] <- Regex.run(~r/#{Path.basename(path)} ([^\s]+)/, out, capture: :all_but_first) do {:ok, vsn} else _ -> :error end end @doc """ Starts a file system watcher that runs the given command with `args` when a file event is received for a file that matches the given pattern. The given args will be appended to the configured args. The task output will be streamed directly to stdio. """ def watch(profile, extra_args \\ [], pattern \\ ~r/\.md$/) when is_atom(profile) do config = config_for!(profile) opts = [dirs: [config[:cd] || File.cwd!()]] ref = __MODULE__.Supervisor |> Supervisor.start_child( Supervisor.child_spec({Pandoc.Watcher, [profile, opts, pattern, extra_args]}, restart: :transient, id: __MODULE__.Watcher ) ) |> case do {:ok, pid} -> pid {:error, {:already_started, pid}} -> pid end |> Process.monitor() receive do {:DOWN, ^ref, _, _, _} -> :ok end end @doc """ Runs the given command with `args`. The given args will be appended to the configured args. The task output will be streamed directly to stdio. It returns the status of the underlying call. """ def run(profile, extra_args) when is_atom(profile) and is_list(extra_args) do config = config_for!(profile) args = case config[:args] do args_fn when is_function(args_fn) -> args_fn.(extra_args) args -> args || [] end if args == [] and extra_args == [] do raise "no arguments passed to pandoc" end opts = [ cd: config[:cd] || File.cwd!(), into: IO.stream(:stdio, :line), stderr_to_stdout: true ] {parsed_args, _, _} = OptionParser.parse(args, switches: [output: :string]) {_, input_files, _} = OptionParser.parse(extra_args, switches: []) if parsed_args[:output] && not File.cd!(opts[:cd], fn -> input_files |> Enum.map(&File.exists?(&1)) |> Enum.all?() end) do parsed_args[:output] |> Path.expand(opts[:cd]) |> File.rm!() else bin_path() |> System.cmd(args ++ extra_args, opts) |> elem(1) end end defp start_unique_install_worker() do ref = __MODULE__.Supervisor |> Supervisor.start_child( Supervisor.child_spec({Task, &install/0}, restart: :transient, id: __MODULE__.Installer) ) |> case do {:ok, pid} -> pid {:error, {:already_started, pid}} -> pid end |> Process.monitor() receive do {:DOWN, ^ref, _, _, _} -> :ok end end @doc """ Installs, if not available, and then runs `pandoc`. This task may be invoked concurrently and it will avoid concurrent installs. Returns the same as `run/2`. """ def install_and_run(profile, args) do File.exists?(bin_path()) || start_unique_install_worker() run(profile, args) end @doc """ The default URL to install Pandoc from. """ def default_base_url do "https://github.com/jgm/pandoc/releases/download/$version/pandoc-$version-$target.tar.gz" end @doc """ Installs pandoc with `configured_version/0`. If invoked concurrently, this task will perform concurrent installs. """ def install(base_url \\ default_base_url()) do version = configured_version() tmp_opts = if System.get_env("MIX_XDG"), do: %{os: :linux}, else: %{} tmp_dir = freshdir_p(:filename.basedir(:user_cache, "phx-pandoc", tmp_opts)) || freshdir_p(Path.join(System.tmp_dir!(), "phx-pandoc")) || raise "could not install pandoc. Set MIX_XGD=1 and then set XDG_CACHE_HOME to the path you want to use as cache" url = get_url(base_url) tar = fetch_body!(url) case :erl_tar.extract({:binary, tar}, [:compressed, cwd: to_charlist(tmp_dir)]) do :ok -> :ok other -> raise "couldn't unpack archive: #{inspect(other)}" end bin_path = bin_path() File.mkdir_p!(Path.dirname(bin_path)) [tmp_dir, "pandoc-" <> version, "bin", "pandoc"] |> Path.join() |> File.cp!(bin_path) end defp freshdir_p(path) do with {:ok, _} <- File.rm_rf(path), :ok <- File.mkdir_p(path) do path else _ -> nil end end defp fetch_body!(url, retry \\ true) do scheme = URI.parse(url).scheme url = String.to_charlist(url) Logger.debug("Downloading pandoc from #{url}") {:ok, _} = Application.ensure_all_started(:inets) {:ok, _} = Application.ensure_all_started(:ssl) if proxy = proxy_for_scheme(scheme) do %{host: host, port: port} = URI.parse(proxy) Logger.debug("Using #{String.upcase(scheme)}_PROXY: #{proxy}") set_option = if "https" == scheme, do: :https_proxy, else: :proxy :httpc.set_options([{set_option, {{String.to_charlist(host), port}, []}}]) end # https://erlef.github.io/security-wg/secure_coding_and_deployment_hardening/inets cacertfile = cacertfile() |> String.to_charlist() http_options = [ ssl: [ verify: :verify_peer, cacertfile: cacertfile, depth: 2, customize_hostname_check: [ match_fun: :public_key.pkix_verify_hostname_match_fun(:https) ], versions: protocol_versions() ] ] |> maybe_add_proxy_auth(scheme) options = [body_format: :binary] case {retry, :httpc.request(:get, {url, []}, http_options, options)} do {_, {:ok, {{_, 200, _}, _headers, body}}} -> body {true, {:error, {:failed_connect, [{:to_address, _}, {inet, _, reason}]}}} when inet in [:inet, :inet6] and reason in [:ehostunreach, :enetunreach, :eprotonosupport, :nxdomain] -> :httpc.set_options(ipfamily: fallback(inet)) fetch_body!(url, false) other -> raise """ Couldn't fetch #{url}: #{inspect(other)} This typically means we cannot reach the source or you are behind a proxy. You can try again later and, if that does not work, you might: 1. If behind a proxy, ensure your proxy is configured and that your certificates are set via the cacerts_path configuration 2. Manually download the executable from the URL above and place it inside "_build/pandoc-#{target()}" """ end end defp fallback(:inet), do: :inet6 defp fallback(:inet6), do: :inet defp proxy_for_scheme("http") do System.get_env("HTTP_PROXY") || System.get_env("http_proxy") end defp proxy_for_scheme("https") do System.get_env("HTTPS_PROXY") || System.get_env("https_proxy") end defp maybe_add_proxy_auth(http_options, scheme) do case proxy_auth(scheme) do nil -> http_options auth -> [{:proxy_auth, auth} | http_options] end end defp proxy_auth(scheme) do with proxy when is_binary(proxy) <- proxy_for_scheme(scheme), %{userinfo: userinfo} when is_binary(userinfo) <- URI.parse(proxy), [username, password] <- String.split(userinfo, ":") do {String.to_charlist(username), String.to_charlist(password)} else _ -> nil end end defp cacertfile() do Application.get_env(:pandoc, :cacerts_path) || CAStore.file_path() end defp protocol_versions do if otp_version() < 25, do: [:"tlsv1.2"], else: [:"tlsv1.2", :"tlsv1.3"] end defp otp_version do :erlang.system_info(:otp_release) |> List.to_integer() end # Available targets: https://github.com/jgm/pandoc/releases # We support only linux-amd64, for now. defp target do case :os.type() do # Assuming it's an x86 CPU {:win32, _} -> raise "pandoc does not currently support OS family: Windows" {:unix, osname} -> arch_str = :erlang.system_info(:system_architecture) [arch | _] = arch_str |> List.to_string() |> String.split("-") case arch do "amd64" -> "#{osname}-amd64" "x86_64" -> "#{osname}-amd64" _ -> raise "pandoc does not currently support architecture: #{arch_str}" end end end defp get_url(base_url) do base_url |> String.replace("$version", configured_version()) |> String.replace("$target", target()) end end