Web Dev Solutions

Catalin Mititiuc

From b8d8b3dbd88ab42fc8f050af0d18fc4dd66d7ffe Mon Sep 17 00:00:00 2001 From: Catalin Mititiuc Date: Thu, 9 Jan 2025 11:25:52 -0800 Subject: Handle installing Pandoc --- lib/mix/tasks/pandoc.ex | 62 ++++--- lib/mix/tasks/pandoc.install.ex | 70 ++++++++ lib/pandoc.ex | 360 ++++++++++++++++++++++++++++++++++++++-- lib/pandoc/application.ex | 43 +++-- lib/pandoc/watcher.ex | 23 ++- 5 files changed, 498 insertions(+), 60 deletions(-) create mode 100644 lib/mix/tasks/pandoc.install.ex (limited to 'lib') diff --git a/lib/mix/tasks/pandoc.ex b/lib/mix/tasks/pandoc.ex index 2c1573d..a04ab26 100644 --- a/lib/mix/tasks/pandoc.ex +++ b/lib/mix/tasks/pandoc.ex @@ -1,13 +1,44 @@ defmodule Mix.Tasks.Pandoc do - use Mix.Task + @moduledoc """ + Invokes pandoc with the given args. + + Usage: + + $ mix pandoc TASK_OPTIONS PROFILE PANDOC_ARGS + + Example: + + $ mix pandoc default documents/hello.md -o priv/static/posts/hello.html + + If pandoc is not installed, it is automatically downloaded. Note the + arguments given to this task will be appended to any configured arguments. + + ## Options + + * `--runtime-config` - load the runtime configuration + before executing command - @ext ".md" + Note flags to control this Mix task must be given before the profile: + + $ mix pandoc --runtime-config default documents/hello.md + + """ + + @shortdoc "Invokes pandoc with the profile and args" + @compile {:no_warn_undefined, Mix} + + use Mix.Task @impl true def run(args) do switches = [runtime_config: :boolean] {opts, remaining_args} = OptionParser.parse_head!(args, switches: switches) + if function_exported?(Mix, :ensure_application!, 1) do + Mix.ensure_application!(:inets) + Mix.ensure_application!(:ssl) + end + if opts[:runtime_config] do Mix.Task.run("app.config") else @@ -19,28 +50,11 @@ defmodule Mix.Tasks.Pandoc do install_and_run(remaining_args) end - defp install_and_run([profile | _args] = all) do - IO.puts("Converting markdown...") - - profile = String.to_atom(profile) - config = Application.get_env(:pandoc, profile) - args = config[:args] || [] - opts = [cd: config[:cd] || File.cwd!()] - - out_path = List.last(args) - full_out_path = [opts[:cd], out_path] |> Path.join() |> Path.expand() - File.rm_rf!(full_out_path) - File.mkdir_p!(full_out_path) - - opts[:cd] - |> Path.join("*#{@ext}") - |> Path.wildcard() - |> Enum.each(fn path -> - case Pandoc.run(profile, path) do - 0 -> :ok - status -> Mix.raise("`mix pandoc #{Enum.join(all, " ")}` exited with #{status}") - end - end) + defp install_and_run([profile | args] = all) do + case Pandoc.install_and_run(String.to_atom(profile), args) do + 0 -> :ok + status -> Mix.raise("`mix pandoc #{Enum.join(all, " ")}` exited with #{status}") + end end defp install_and_run([]) do diff --git a/lib/mix/tasks/pandoc.install.ex b/lib/mix/tasks/pandoc.install.ex new file mode 100644 index 0000000..1e36696 --- /dev/null +++ b/lib/mix/tasks/pandoc.install.ex @@ -0,0 +1,70 @@ +defmodule Mix.Tasks.Pandoc.Install do + @moduledoc """ + Installs pandoc under `_build`. + + ```bash + $ mix pandoc.install + $ mix pandoc.install --if-missing + ``` + + By default, it installs #{Pandoc.latest_version()} but you can configure it + in your config files, such as: + + config :pandoc, :version, "#{Pandoc.latest_version()}" + + ## Options + + * `--runtime-config` - load the runtime configuration before executing + command + + * `--if-missing` - install only if the given version does not exist + """ + + @shortdoc "Installs pandoc under _build" + @compile {:no_warn_undefined, Mix} + + use Mix.Task + + @impl true + def run(args) do + valid_options = [runtime_config: :boolean, if_missing: :boolean] + + {opts, base_url} = + case OptionParser.parse_head!(args, strict: valid_options) do + {opts, []} -> + {opts, Pandoc.default_base_url()} + + {opts, [base_url]} -> + {opts, base_url} + + {_, _} -> + Mix.raise(""" + Invalid arguments to pandoc.install, expected one of: + + mix pandoc.install + mix pandoc.install 'https://github.com/jgm/pandoc/releases/download/$version/pandoc-$version-$target.tar.gz' + mix pandoc.install --runtime-config + mix pandoc.install --if-missing + """) + end + + if opts[:runtime_config], do: Mix.Task.run("app.config") + + if opts[:if_missing] && latest_version?() do + :ok + else + if function_exported?(Mix, :ensure_application!, 1) do + Mix.ensure_application!(:inets) + Mix.ensure_application!(:ssl) + end + + Mix.Task.run("loadpaths") + Pandoc.install(base_url) + end + end + + defp latest_version?() do + version = Pandoc.configured_version() + match?({:ok, ^version}, Pandoc.bin_version()) + end +end diff --git a/lib/pandoc.ex b/lib/pandoc.ex index f1f07f7..ca30847 100644 --- a/lib/pandoc.ex +++ b/lib/pandoc.ex @@ -1,17 +1,130 @@ defmodule Pandoc do + # https://github.com/jgm/pandoc/releases + @latest_version "3.6.1" + @moduledoc """ - Documentation for `Pandoc`. + Pandoc is an installer, runner and watcher for [pandoc](https://pandoc.org). + + ## Profiles + + You can define multiple pandoc profiles. By default, there is a profile + called `:default` which you can configure its args, current directory and + environment. You can make the args dynamic by defining a function. + + config :pandoc, + version: "#{@latest_version}", + default: [ + args: fn extra_args -> + {_, [input_file], _} = OptionParser.parse(extra_args, switches: []) + ~w(--output=../priv/static/posts/\#{Path.rootname(input_file)}.html) + end, + cd: Path.expand("../documents", __DIR__) + ] + + ## Pandoc configuration + + There are four global configurations for the pandoc application: + + * `:version` - the expected pandoc version + + * `:version_check` - whether to perform the version check or not. + Useful when you manage the pandoc executable with an external + tool + + * `:cacerts_path` - the directory to find certificates for + https connections + + * `:path` - the path to find the pandoc executable at. By + default, it is automatically downloaded and placed inside + the `_build` directory of your current app + + Overriding the `:path` is not recommended, as we will automatically download + and manage `pandoc` for you. But in case you can't download it, you may want + to set the `:path` to a configurable system location. + """ + + require Logger + + @doc false + # Latest known version at the time of publishing. + def latest_version, do: @latest_version + + @doc """ + Returns the configured pandoc version. """ + def configured_version do + Application.get_env(:pandoc, :version, latest_version()) + end + + @doc """ + Returns the configuration for the given profile. + + Returns nil if the profile does not exist. + """ + def config_for!(profile) when is_atom(profile) do + Application.get_env(:pandoc, profile) || + raise ArgumentError, """ + unknown pandoc profile. Make sure the profile is defined in your config/config.exs file, such as: - def run(profile, ["--watch" | _]) do - config = Application.get_env(:pandoc, profile) - opts = [cd: config[:cd] || File.cwd!()] - dirs = [opts[:cd], Path.join(opts[:cd], "_drafts")] + config :pandoc, + version: "#{@latest_version}", + #{profile}: [ + cd: Path.expand("../documents", __DIR__) + ] + """ + end + + @doc """ + Returns the path to the executable. + + The executable may not be available if it was not yet installed. + """ + def bin_path do + name = "pandoc-#{target()}" + + Application.get_env(:pandoc, :path) || + if Code.ensure_loaded?(Mix.Project) do + relative_build_dir = Mix.Project.build_path() |> Path.dirname() |> Path.relative_to_cwd() + project_dir = Path.dirname(Mix.Project.project_file()) + Path.join([project_dir, relative_build_dir, name]) + else + Path.expand("_build/#{name}") + end + end + + @doc """ + Returns the version of the pandoc executable. + + Returns `{:ok, version_string}` on success or `:error` when the executable + is not available. + """ + def bin_version do + path = bin_path() + + with true <- File.exists?(path), + {out, 0} <- System.cmd(path, ["--version"]), + [vsn] <- Regex.run(~r/#{Path.basename(path)} ([^\s]+)/, out, capture: :all_but_first) do + {:ok, vsn} + else + _ -> :error + end + end + + @doc """ + Starts a file system watcher that runs the given command with `args` when a + file event is received for a file that matches the given pattern. + + The given args will be appended to the configured args. The task output will + be streamed directly to stdio. + """ + def watch(profile, extra_args \\ [], pattern \\ ~r/\.md$/) when is_atom(profile) do + config = config_for!(profile) + opts = [dirs: [config[:cd] || File.cwd!()]] ref = __MODULE__.Supervisor |> Supervisor.start_child( - Supervisor.child_spec({Pandoc.Watcher, [profile, dirs: dirs]}, + Supervisor.child_spec({Pandoc.Watcher, [profile, opts, pattern, extra_args]}, restart: :transient, id: __MODULE__.Watcher ) @@ -27,9 +140,24 @@ defmodule Pandoc do end end - def run(profile, path) do - config = Application.get_env(:pandoc, profile) - args = config[:args] || [] + @doc """ + Runs the given command with `args`. + + The given args will be appended to the configured args. The task output will + be streamed directly to stdio. It returns the status of the underlying call. + """ + def run(profile, extra_args) when is_atom(profile) and is_list(extra_args) do + config = config_for!(profile) + + args = + case config[:args] do + args_fn when is_function(args_fn) -> args_fn.(extra_args) + args -> args || [] + end + + if args == [] and extra_args == [] do + raise "no arguments passed to pandoc" + end opts = [ cd: config[:cd] || File.cwd!(), @@ -37,17 +165,215 @@ defmodule Pandoc do stderr_to_stdout: true ] - new_filename = - path |> Path.basename() |> String.replace_suffix(".md", ".html") |> String.slice(11..-1//1) + {parsed_args, _, _} = OptionParser.parse(args, switches: [output: :string]) + {_, input_files, _} = OptionParser.parse(extra_args, switches: []) + + if parsed_args[:output] && + not File.cd!(opts[:cd], fn -> + input_files |> Enum.map(&File.exists?(&1)) |> Enum.all?() + end) do + parsed_args[:output] |> Path.expand(opts[:cd]) |> File.rm!() + else + bin_path() |> System.cmd(args ++ extra_args, opts) |> elem(1) + end + end + + defp start_unique_install_worker() do + ref = + __MODULE__.Supervisor + |> Supervisor.start_child( + Supervisor.child_spec({Task, &install/0}, restart: :transient, id: __MODULE__.Installer) + ) + |> case do + {:ok, pid} -> pid + {:error, {:already_started, pid}} -> pid + end + |> Process.monitor() + + receive do + {:DOWN, ^ref, _, _, _} -> :ok + end + end + + @doc """ + Installs, if not available, and then runs `pandoc`. + + This task may be invoked concurrently and it will avoid concurrent installs. + + Returns the same as `run/2`. + """ + def install_and_run(profile, args) do + File.exists?(bin_path()) || start_unique_install_worker() + + run(profile, args) + end + + @doc """ + The default URL to install Pandoc from. + """ + def default_base_url do + "https://github.com/jgm/pandoc/releases/download/$version/pandoc-$version-$target.tar.gz" + end + + @doc """ + Installs pandoc with `configured_version/0`. + + If invoked concurrently, this task will perform concurrent installs. + """ + def install(base_url \\ default_base_url()) do + version = configured_version() + tmp_opts = if System.get_env("MIX_XDG"), do: %{os: :linux}, else: %{} + + tmp_dir = + freshdir_p(:filename.basedir(:user_cache, "phx-pandoc", tmp_opts)) || + freshdir_p(Path.join(System.tmp_dir!(), "phx-pandoc")) || + raise "could not install pandoc. Set MIX_XGD=1 and then set XDG_CACHE_HOME to the path you want to use as cache" - new_path = args |> List.last() |> Path.join(new_filename) - out_path = Path.join(opts[:cd], new_path) |> Path.expand() + url = get_url(base_url) + tar = fetch_body!(url) - if File.exists?(path) do - args = List.replace_at(args, -1, out_path) - "pandoc" |> System.cmd(args ++ [path], opts) |> elem(1) + case :erl_tar.extract({:binary, tar}, [:compressed, cwd: to_charlist(tmp_dir)]) do + :ok -> :ok + other -> raise "couldn't unpack archive: #{inspect(other)}" + end + + bin_path = bin_path() + File.mkdir_p!(Path.dirname(bin_path)) + [tmp_dir, "pandoc-" <> version, "bin", "pandoc"] |> Path.join() |> File.cp!(bin_path) + end + + defp freshdir_p(path) do + with {:ok, _} <- File.rm_rf(path), + :ok <- File.mkdir_p(path) do + path else - File.rm(out_path) + _ -> nil end end + + defp fetch_body!(url, retry \\ true) do + scheme = URI.parse(url).scheme + url = String.to_charlist(url) + Logger.debug("Downloading pandoc from #{url}") + + {:ok, _} = Application.ensure_all_started(:inets) + {:ok, _} = Application.ensure_all_started(:ssl) + + if proxy = proxy_for_scheme(scheme) do + %{host: host, port: port} = URI.parse(proxy) + Logger.debug("Using #{String.upcase(scheme)}_PROXY: #{proxy}") + set_option = if "https" == scheme, do: :https_proxy, else: :proxy + :httpc.set_options([{set_option, {{String.to_charlist(host), port}, []}}]) + end + + # https://erlef.github.io/security-wg/secure_coding_and_deployment_hardening/inets + cacertfile = cacertfile() |> String.to_charlist() + + http_options = + [ + ssl: [ + verify: :verify_peer, + cacertfile: cacertfile, + depth: 2, + customize_hostname_check: [ + match_fun: :public_key.pkix_verify_hostname_match_fun(:https) + ], + versions: protocol_versions() + ] + ] + |> maybe_add_proxy_auth(scheme) + + options = [body_format: :binary] + + case {retry, :httpc.request(:get, {url, []}, http_options, options)} do + {_, {:ok, {{_, 200, _}, _headers, body}}} -> + body + + {true, {:error, {:failed_connect, [{:to_address, _}, {inet, _, reason}]}}} + when inet in [:inet, :inet6] and + reason in [:ehostunreach, :enetunreach, :eprotonosupport, :nxdomain] -> + :httpc.set_options(ipfamily: fallback(inet)) + fetch_body!(url, false) + + other -> + raise """ + Couldn't fetch #{url}: #{inspect(other)} + + This typically means we cannot reach the source or you are behind a proxy. + You can try again later and, if that does not work, you might: + + 1. If behind a proxy, ensure your proxy is configured and that + your certificates are set via the cacerts_path configuration + + 2. Manually download the executable from the URL above and + place it inside "_build/pandoc-#{target()}" + """ + end + end + + defp fallback(:inet), do: :inet6 + defp fallback(:inet6), do: :inet + + defp proxy_for_scheme("http") do + System.get_env("HTTP_PROXY") || System.get_env("http_proxy") + end + + defp proxy_for_scheme("https") do + System.get_env("HTTPS_PROXY") || System.get_env("https_proxy") + end + + defp maybe_add_proxy_auth(http_options, scheme) do + case proxy_auth(scheme) do + nil -> http_options + auth -> [{:proxy_auth, auth} | http_options] + end + end + + defp proxy_auth(scheme) do + with proxy when is_binary(proxy) <- proxy_for_scheme(scheme), + %{userinfo: userinfo} when is_binary(userinfo) <- URI.parse(proxy), + [username, password] <- String.split(userinfo, ":") do + {String.to_charlist(username), String.to_charlist(password)} + else + _ -> nil + end + end + + defp cacertfile() do + Application.get_env(:pandoc, :cacerts_path) || CAStore.file_path() + end + + defp protocol_versions do + if otp_version() < 25, do: [:"tlsv1.2"], else: [:"tlsv1.2", :"tlsv1.3"] + end + + defp otp_version do + :erlang.system_info(:otp_release) |> List.to_integer() + end + + # Available targets: https://github.com/jgm/pandoc/releases + # We support only linux-amd64, for now. + defp target do + case :os.type() do + # Assuming it's an x86 CPU + {:win32, _} -> + raise "pandoc does not currently support OS family: Windows" + + {:unix, osname} -> + arch_str = :erlang.system_info(:system_architecture) + [arch | _] = arch_str |> List.to_string() |> String.split("-") + + case arch do + "amd64" -> "#{osname}-amd64" + "x86_64" -> "#{osname}-amd64" + _ -> raise "pandoc does not currently support architecture: #{arch_str}" + end + end + end + + defp get_url(base_url) do + base_url + |> String.replace("$version", configured_version()) + |> String.replace("$target", target()) + end end diff --git a/lib/pandoc/application.ex b/lib/pandoc/application.ex index e30e2fa..04344da 100644 --- a/lib/pandoc/application.ex +++ b/lib/pandoc/application.ex @@ -5,16 +5,37 @@ defmodule Pandoc.Application do use Application - @impl true - def start(_type, _args) do - children = [ - # Starts a worker by calling: Pandoc.Worker.start_link(arg) - # {Pandoc.Worker, arg} - ] - - # See https://hexdocs.pm/elixir/Supervisor.html - # for other strategies and supported options - opts = [strategy: :one_for_one, name: Pandoc.Supervisor] - Supervisor.start_link(children, opts) + require Logger + import Pandoc, only: [latest_version: 0, configured_version: 0, bin_version: 0] + + @doc false + def start(_, _) do + if Application.get_env(:pandoc, :version_check, true) do + unless Application.get_env(:pandoc, :version) do + Logger.warning(""" + pandoc version is not configured. Please set it in your config files: + + config :pandoc, :version, "#{latest_version()}" + """) + end + + configured_version = configured_version() + + case bin_version() do + {:ok, ^configured_version} -> + :ok + + {:ok, version} -> + Logger.warning(""" + Outdated pandoc version. Expected #{configured_version}, got #{version}. \ + Please run `mix pandoc.install` or update the version in your config files.\ + """) + + :error -> + :ok + end + end + + Supervisor.start_link([], strategy: :one_for_one, name: Pandoc.Supervisor) end end diff --git a/lib/pandoc/watcher.ex b/lib/pandoc/watcher.ex index a174e46..9ff2620 100644 --- a/lib/pandoc/watcher.ex +++ b/lib/pandoc/watcher.ex @@ -1,22 +1,29 @@ defmodule Pandoc.Watcher do - use GenServer + @moduledoc false - @ext ".md" + use GenServer def start_link(args) do GenServer.start_link(__MODULE__, args) end - def init([profile | args]) do - {:ok, watcher_pid} = FileSystem.start_link(args) + # Callbacks + + @impl true + def init([profile, options, pattern, extra_args]) do + {:ok, watcher_pid} = FileSystem.start_link(options) FileSystem.subscribe(watcher_pid) - {:ok, %{watcher_pid: watcher_pid, profile: profile}} + {:ok, %{watcher_pid: watcher_pid, profile: profile, pattern: pattern, extra_args: extra_args}} end + @impl true def handle_info({:file_event, watcher_pid, {path, events}}, %{watcher_pid: watcher_pid} = state) do - case {Path.extname(path), :closed in events or :deleted in events} do - {@ext, true} -> Pandoc.run(state[:profile], path) - _ -> nil + case {String.match?(path, state[:pattern]), :closed in events or :deleted in events} do + {true, true} -> + Pandoc.install_and_run(state[:profile], [Path.basename(path) | state[:extra_args]]) + + _ -> + nil end {:noreply, state} -- cgit v1.2.3