Web Dev Solutions

Catalin Mititiuc

From b8d8b3dbd88ab42fc8f050af0d18fc4dd66d7ffe Mon Sep 17 00:00:00 2001 From: Catalin Mititiuc Date: Thu, 9 Jan 2025 11:25:52 -0800 Subject: Handle installing Pandoc --- lib/pandoc.ex | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 343 insertions(+), 17 deletions(-) (limited to 'lib/pandoc.ex') diff --git a/lib/pandoc.ex b/lib/pandoc.ex index f1f07f7..ca30847 100644 --- a/lib/pandoc.ex +++ b/lib/pandoc.ex @@ -1,17 +1,130 @@ defmodule Pandoc do + # https://github.com/jgm/pandoc/releases + @latest_version "3.6.1" + @moduledoc """ - Documentation for `Pandoc`. + Pandoc is an installer, runner and watcher for [pandoc](https://pandoc.org). + + ## Profiles + + You can define multiple pandoc profiles. By default, there is a profile + called `:default` which you can configure its args, current directory and + environment. You can make the args dynamic by defining a function. + + config :pandoc, + version: "#{@latest_version}", + default: [ + args: fn extra_args -> + {_, [input_file], _} = OptionParser.parse(extra_args, switches: []) + ~w(--output=../priv/static/posts/\#{Path.rootname(input_file)}.html) + end, + cd: Path.expand("../documents", __DIR__) + ] + + ## Pandoc configuration + + There are four global configurations for the pandoc application: + + * `:version` - the expected pandoc version + + * `:version_check` - whether to perform the version check or not. + Useful when you manage the pandoc executable with an external + tool + + * `:cacerts_path` - the directory to find certificates for + https connections + + * `:path` - the path to find the pandoc executable at. By + default, it is automatically downloaded and placed inside + the `_build` directory of your current app + + Overriding the `:path` is not recommended, as we will automatically download + and manage `pandoc` for you. But in case you can't download it, you may want + to set the `:path` to a configurable system location. + """ + + require Logger + + @doc false + # Latest known version at the time of publishing. + def latest_version, do: @latest_version + + @doc """ + Returns the configured pandoc version. """ + def configured_version do + Application.get_env(:pandoc, :version, latest_version()) + end + + @doc """ + Returns the configuration for the given profile. + + Returns nil if the profile does not exist. + """ + def config_for!(profile) when is_atom(profile) do + Application.get_env(:pandoc, profile) || + raise ArgumentError, """ + unknown pandoc profile. Make sure the profile is defined in your config/config.exs file, such as: - def run(profile, ["--watch" | _]) do - config = Application.get_env(:pandoc, profile) - opts = [cd: config[:cd] || File.cwd!()] - dirs = [opts[:cd], Path.join(opts[:cd], "_drafts")] + config :pandoc, + version: "#{@latest_version}", + #{profile}: [ + cd: Path.expand("../documents", __DIR__) + ] + """ + end + + @doc """ + Returns the path to the executable. + + The executable may not be available if it was not yet installed. + """ + def bin_path do + name = "pandoc-#{target()}" + + Application.get_env(:pandoc, :path) || + if Code.ensure_loaded?(Mix.Project) do + relative_build_dir = Mix.Project.build_path() |> Path.dirname() |> Path.relative_to_cwd() + project_dir = Path.dirname(Mix.Project.project_file()) + Path.join([project_dir, relative_build_dir, name]) + else + Path.expand("_build/#{name}") + end + end + + @doc """ + Returns the version of the pandoc executable. + + Returns `{:ok, version_string}` on success or `:error` when the executable + is not available. + """ + def bin_version do + path = bin_path() + + with true <- File.exists?(path), + {out, 0} <- System.cmd(path, ["--version"]), + [vsn] <- Regex.run(~r/#{Path.basename(path)} ([^\s]+)/, out, capture: :all_but_first) do + {:ok, vsn} + else + _ -> :error + end + end + + @doc """ + Starts a file system watcher that runs the given command with `args` when a + file event is received for a file that matches the given pattern. + + The given args will be appended to the configured args. The task output will + be streamed directly to stdio. + """ + def watch(profile, extra_args \\ [], pattern \\ ~r/\.md$/) when is_atom(profile) do + config = config_for!(profile) + opts = [dirs: [config[:cd] || File.cwd!()]] ref = __MODULE__.Supervisor |> Supervisor.start_child( - Supervisor.child_spec({Pandoc.Watcher, [profile, dirs: dirs]}, + Supervisor.child_spec({Pandoc.Watcher, [profile, opts, pattern, extra_args]}, restart: :transient, id: __MODULE__.Watcher ) @@ -27,9 +140,24 @@ defmodule Pandoc do end end - def run(profile, path) do - config = Application.get_env(:pandoc, profile) - args = config[:args] || [] + @doc """ + Runs the given command with `args`. + + The given args will be appended to the configured args. The task output will + be streamed directly to stdio. It returns the status of the underlying call. + """ + def run(profile, extra_args) when is_atom(profile) and is_list(extra_args) do + config = config_for!(profile) + + args = + case config[:args] do + args_fn when is_function(args_fn) -> args_fn.(extra_args) + args -> args || [] + end + + if args == [] and extra_args == [] do + raise "no arguments passed to pandoc" + end opts = [ cd: config[:cd] || File.cwd!(), @@ -37,17 +165,215 @@ defmodule Pandoc do stderr_to_stdout: true ] - new_filename = - path |> Path.basename() |> String.replace_suffix(".md", ".html") |> String.slice(11..-1//1) + {parsed_args, _, _} = OptionParser.parse(args, switches: [output: :string]) + {_, input_files, _} = OptionParser.parse(extra_args, switches: []) + + if parsed_args[:output] && + not File.cd!(opts[:cd], fn -> + input_files |> Enum.map(&File.exists?(&1)) |> Enum.all?() + end) do + parsed_args[:output] |> Path.expand(opts[:cd]) |> File.rm!() + else + bin_path() |> System.cmd(args ++ extra_args, opts) |> elem(1) + end + end + + defp start_unique_install_worker() do + ref = + __MODULE__.Supervisor + |> Supervisor.start_child( + Supervisor.child_spec({Task, &install/0}, restart: :transient, id: __MODULE__.Installer) + ) + |> case do + {:ok, pid} -> pid + {:error, {:already_started, pid}} -> pid + end + |> Process.monitor() + + receive do + {:DOWN, ^ref, _, _, _} -> :ok + end + end + + @doc """ + Installs, if not available, and then runs `pandoc`. + + This task may be invoked concurrently and it will avoid concurrent installs. + + Returns the same as `run/2`. + """ + def install_and_run(profile, args) do + File.exists?(bin_path()) || start_unique_install_worker() + + run(profile, args) + end + + @doc """ + The default URL to install Pandoc from. + """ + def default_base_url do + "https://github.com/jgm/pandoc/releases/download/$version/pandoc-$version-$target.tar.gz" + end + + @doc """ + Installs pandoc with `configured_version/0`. + + If invoked concurrently, this task will perform concurrent installs. + """ + def install(base_url \\ default_base_url()) do + version = configured_version() + tmp_opts = if System.get_env("MIX_XDG"), do: %{os: :linux}, else: %{} + + tmp_dir = + freshdir_p(:filename.basedir(:user_cache, "phx-pandoc", tmp_opts)) || + freshdir_p(Path.join(System.tmp_dir!(), "phx-pandoc")) || + raise "could not install pandoc. Set MIX_XGD=1 and then set XDG_CACHE_HOME to the path you want to use as cache" - new_path = args |> List.last() |> Path.join(new_filename) - out_path = Path.join(opts[:cd], new_path) |> Path.expand() + url = get_url(base_url) + tar = fetch_body!(url) - if File.exists?(path) do - args = List.replace_at(args, -1, out_path) - "pandoc" |> System.cmd(args ++ [path], opts) |> elem(1) + case :erl_tar.extract({:binary, tar}, [:compressed, cwd: to_charlist(tmp_dir)]) do + :ok -> :ok + other -> raise "couldn't unpack archive: #{inspect(other)}" + end + + bin_path = bin_path() + File.mkdir_p!(Path.dirname(bin_path)) + [tmp_dir, "pandoc-" <> version, "bin", "pandoc"] |> Path.join() |> File.cp!(bin_path) + end + + defp freshdir_p(path) do + with {:ok, _} <- File.rm_rf(path), + :ok <- File.mkdir_p(path) do + path else - File.rm(out_path) + _ -> nil end end + + defp fetch_body!(url, retry \\ true) do + scheme = URI.parse(url).scheme + url = String.to_charlist(url) + Logger.debug("Downloading pandoc from #{url}") + + {:ok, _} = Application.ensure_all_started(:inets) + {:ok, _} = Application.ensure_all_started(:ssl) + + if proxy = proxy_for_scheme(scheme) do + %{host: host, port: port} = URI.parse(proxy) + Logger.debug("Using #{String.upcase(scheme)}_PROXY: #{proxy}") + set_option = if "https" == scheme, do: :https_proxy, else: :proxy + :httpc.set_options([{set_option, {{String.to_charlist(host), port}, []}}]) + end + + # https://erlef.github.io/security-wg/secure_coding_and_deployment_hardening/inets + cacertfile = cacertfile() |> String.to_charlist() + + http_options = + [ + ssl: [ + verify: :verify_peer, + cacertfile: cacertfile, + depth: 2, + customize_hostname_check: [ + match_fun: :public_key.pkix_verify_hostname_match_fun(:https) + ], + versions: protocol_versions() + ] + ] + |> maybe_add_proxy_auth(scheme) + + options = [body_format: :binary] + + case {retry, :httpc.request(:get, {url, []}, http_options, options)} do + {_, {:ok, {{_, 200, _}, _headers, body}}} -> + body + + {true, {:error, {:failed_connect, [{:to_address, _}, {inet, _, reason}]}}} + when inet in [:inet, :inet6] and + reason in [:ehostunreach, :enetunreach, :eprotonosupport, :nxdomain] -> + :httpc.set_options(ipfamily: fallback(inet)) + fetch_body!(url, false) + + other -> + raise """ + Couldn't fetch #{url}: #{inspect(other)} + + This typically means we cannot reach the source or you are behind a proxy. + You can try again later and, if that does not work, you might: + + 1. If behind a proxy, ensure your proxy is configured and that + your certificates are set via the cacerts_path configuration + + 2. Manually download the executable from the URL above and + place it inside "_build/pandoc-#{target()}" + """ + end + end + + defp fallback(:inet), do: :inet6 + defp fallback(:inet6), do: :inet + + defp proxy_for_scheme("http") do + System.get_env("HTTP_PROXY") || System.get_env("http_proxy") + end + + defp proxy_for_scheme("https") do + System.get_env("HTTPS_PROXY") || System.get_env("https_proxy") + end + + defp maybe_add_proxy_auth(http_options, scheme) do + case proxy_auth(scheme) do + nil -> http_options + auth -> [{:proxy_auth, auth} | http_options] + end + end + + defp proxy_auth(scheme) do + with proxy when is_binary(proxy) <- proxy_for_scheme(scheme), + %{userinfo: userinfo} when is_binary(userinfo) <- URI.parse(proxy), + [username, password] <- String.split(userinfo, ":") do + {String.to_charlist(username), String.to_charlist(password)} + else + _ -> nil + end + end + + defp cacertfile() do + Application.get_env(:pandoc, :cacerts_path) || CAStore.file_path() + end + + defp protocol_versions do + if otp_version() < 25, do: [:"tlsv1.2"], else: [:"tlsv1.2", :"tlsv1.3"] + end + + defp otp_version do + :erlang.system_info(:otp_release) |> List.to_integer() + end + + # Available targets: https://github.com/jgm/pandoc/releases + # We support only linux-amd64, for now. + defp target do + case :os.type() do + # Assuming it's an x86 CPU + {:win32, _} -> + raise "pandoc does not currently support OS family: Windows" + + {:unix, osname} -> + arch_str = :erlang.system_info(:system_architecture) + [arch | _] = arch_str |> List.to_string() |> String.split("-") + + case arch do + "amd64" -> "#{osname}-amd64" + "x86_64" -> "#{osname}-amd64" + _ -> raise "pandoc does not currently support architecture: #{arch_str}" + end + end + end + + defp get_url(base_url) do + base_url + |> String.replace("$version", configured_version()) + |> String.replace("$target", target()) + end end -- cgit v1.2.3