tiagodalloca · October 6, 2022 01:32
diff --git a/refactoring-feedback.clj b/refactoring-feedback.clj
 ;; Hey Michi! Hope you're doing well

 ;; First of all, nice "homework" you sent! I like the idea :)

 ;; Now, I'm aware my comments might seem lengthy. Honestly, in a real world scenario I'd be more brief. As this
 ;; is part of the interview process, I intentionally chose to be more thorough, so you can also judge my thought
 ;; process.

 ;; The approach I'll be taking in this exercise is highlighting specific parts of the code
 ;; acknowledging the intention behind it and explaining if it falls short, trying to explain what are the
 ;; consequences of the choices made. After this first analysis, I'll suggest an equivalent function that could be
 ;; used as replacement, which presents significant improvements both in style and performance. These improvements
 ;; will be explained as well.


 ;; Let's dive in! Note that this gist is REPL friendly, so you can drop it right into an interactive buffer for
 ;; experimentation.

 (def test-state
  {:document {:paragraphs {:uuid-1 {,,,} ,,,}
              :paragraph-order [:uuid-1 :uuid-2 :uuid-3 :uuid-4]}
   :edit {:spellcheck {:mistakes {:uuid-1 [[4 12] [38 42]]
                                  :uuid-3 [[7 12]]
                                  :uuid-4 []
                                  :uuid-X [[0 4]]}}}})

 ;; It's ok to have these "get" functions when the "path" to the data you want is complicated. In this
 ;; specific case I find it to be unnecessary, but no harm leaving it be. I'd prefer to name it 
 ;; `get-paragraph-order` though.

 (defn paragraph-order [state]
  (get-in state [:document :paragraph-order]))

 ;; This next function is not named properly, as it's hard to guess what's supposed to achieve. Now, when you
 ;; read it, then it kinda makes sense.

 (defn mistakes [state]
  (let [paragraph-order (paragraph-order state)]
    (->> (get-in state [:edit :spellcheck :mistakes])
         ;; only paragraphs with mistakes and present on paragraph-order
         (filter (fn [[k v]] (and (seq v) (some #{k} paragraph-order)))))))

 ;; A improved version of this function would be:

 (defn filter-out-mistakes
  "Returns a vector of mistake, which is a tuple of paragraph uuid and vector of text ranges, leaving out paragraphs not present in the vector [:document :paragraph-order] or those with no mistakes."
  ;; Note how much cleaner the function is using destructuring to access the data
  [{{:keys [paragraph-order]} :document
   {{:keys [mistakes]} :spellcheck} :edit}]
  ;; We could do this:
  #_(filter (fn [[k v]] (and (seq v) (some #{k} paragraph-order))) mistakes)
  ;; But `(some #{k} paragraph-order)` is not efficient, as it will have to sequentially iterate for every
  ;; "mistakes" item.
  ;;
  ;; We could simply take the paragraph-order vector and build a map from it, making the code
  ;; more efficient. For filtering out empty mistakes, we combine filter and map using comp, which gives us a
  ;; transducer. Using a transducer lets us compose elegantly two different intentions (which makes it easier to
  ;; to read) while also keeping it efficient with one single interation. Note also that `k` becomes `p-uid`.
  ;; Worth mentioning that this approach preserves the order of `paragraph-order`, which will be handy.
  (into [] (comp (filter (fn [p-uid] (not-empty (get mistakes p-uid))))
                 (map (fn [p-uid] [p-uid (get mistakes p-uid)])))
        paragraph-order))

 (comment
  (mistakes test-state)
  (filter-out-mistakes test-state))

 ;; Many of the same issues as I mentioned in the previuos function and a lack of care for efficient collection
 ;; iteration:

 (defn ordered-mistakes [state]
  ;; Instead of let, we could have used destructuring
  (let [paragraph-order (paragraph-order state)]
    (->> (mistakes state)
         ;; You see, because the `mistakes` function doesn't guarantee order, we have to order ourselves. This
         ;; could be avoided as I demonstrated.
         (sort-by (fn [[k _]] (.indexOf paragraph-order k)))
         ;; It's ok to have this mapcat followed by vec, because mapcat will return a lazy sequence that'll
         ;; be realised only once, but we could've used a single iteration over `paragraphs-order` instead.
         ;; We end up with three different iterations (`mistakes`, `sort-by`, `mapcat`)
         (mapcat (fn [[k v]] (map (fn [i] [k i]) v)))
         (vec))))

 ;; Let's take a look at a better implementation by firstly adapting our filter-out-mistakes to make it
 ;; reusable, returning a transducer which will be used for composition later.
 (defn get-filter-out-mistakes-xform
  "Returns a transducer which iterates over a vector of paragraph-uuid (e.g. `(get-in test-state [:document
  :paragraph-order])`) and returns for every non-empty mistake a tuple of paragraph uuid and vector of text
  ranges."
  [{{{:keys [mistakes]} :spellcheck} :edit}]
  (comp (filter (fn [p-uid] (not-empty (get mistakes p-uid))))
        (map (fn [p-uid] [p-uid (get mistakes p-uid)]))))


 ;; Usage:
 (comment
  (into []
        (get-filter-out-mistakes-xform test-state)
        (get-in test-state [:document :paragraph-order])))

 ;; Finally:
 (defn get-ordered-mistakes
  "Returns a vector containing tuples of paragraph-uid and text-range ordered by paragraph-order"
  [{{:keys [paragraph-order]} :document
    :as document-state}]
  ;; Using `into` with transducers is efficient. The implementation uses transients (mutable data structures)
  ;; under the hood, so you don't have to worry. It'll make a single collection traverse.
  (into []
        ;; Transducer composition
        (comp (get-filter-out-mistakes-xform document-state)
              (mapcat (fn [[p-uid text-ranges]]
                        ;; It's ok to abbreviate text-range to tr here because we're interating over
                        ;; `text-ranges`, so no room for misinterpretation.
                        ;; 
                        ;; This is the "unpacking" part
                        (mapv (fn [tr] [p-uid tr]) text-ranges))))
        paragraph-order))

 (comment
  (get-ordered-mistakes test-state))

 ;; A minimalist single function implementation:

 (defn get-ordered-mistakes-v2
  ;; It's nice that the destructuring highlights the expected data format
  [{{:keys [paragraph-order]} :document
    {{:keys [mistakes]} :spellcheck} :edit}]
  (into []
        (mapcat (fn [p-uid]
                  ;; If the mistakes vector doesn't exist or is empty, it'll be the same as if you had filtered
                  ;; it out. Thus, no need for the filter xform.
                  (mapv (fn [text-range] [p-uid text-range])
                        (get mistakes p-uid))))
        paragraph-order))

 (comment
  (get-ordered-mistakes-v2 test-state))

 ;; There are other possible stylistic and implementation choices, but the main feedbacks I'd give would be:
 ;; 1. Effort spent giving a function an appropriate name is effort well spent!
 ;; 2. Use destructuring where it makes sense. It's nice and reads well.
 ;; 3. Beware of the different iteration methods applicable and how those work: when is the iteration eager, how
 ;; to compose iterating functions (map, filter, etc.) using lazy sequences and transducers, and using composition
 ;; to be clearer on your intent while preserving efficiency.

 ;; That's it! I hope you find my remarks to be relevant.

 ;; Eagerly waiting for your feedback!
	;; Hey Michi! Hope you're doing well

	;; First of all, nice "homework" you sent! I like the idea :)

	;; Now, I'm aware my comments might seem lengthy. Honestly, in a real world scenario I'd be more brief. As this
	;; is part of the interview process, I intentionally chose to be more thorough, so you can also judge my thought
	;; process.

	;; The approach I'll be taking in this exercise is highlighting specific parts of the code
	;; acknowledging the intention behind it and explaining if it falls short, trying to explain what are the
	;; consequences of the choices made. After this first analysis, I'll suggest an equivalent function that could be
	;; used as replacement, which presents significant improvements both in style and performance. These improvements
	;; will be explained as well.


	;; Let's dive in! Note that this gist is REPL friendly, so you can drop it right into an interactive buffer for
	;; experimentation.

	(def test-state
	{:document {:paragraphs {:uuid-1 {,,,} ,,,}
	:paragraph-order [:uuid-1 :uuid-2 :uuid-3 :uuid-4]}
	:edit {:spellcheck {:mistakes {:uuid-1 [[4 12] [38 42]]
	:uuid-3 [[7 12]]
	:uuid-4 []
	:uuid-X [[0 4]]}}}})

	;; It's ok to have these "get" functions when the "path" to the data you want is complicated. In this
	;; specific case I find it to be unnecessary, but no harm leaving it be. I'd prefer to name it
	;; `get-paragraph-order` though.

	(defn paragraph-order [state]
	(get-in state [:document :paragraph-order]))

	;; This next function is not named properly, as it's hard to guess what's supposed to achieve. Now, when you
	;; read it, then it kinda makes sense.

	(defn mistakes [state]
	(let [paragraph-order (paragraph-order state)]
	(->> (get-in state [:edit :spellcheck :mistakes])
	;; only paragraphs with mistakes and present on paragraph-order
	(filter (fn [[k v]] (and (seq v) (some #{k} paragraph-order)))))))

	;; A improved version of this function would be:

	(defn filter-out-mistakes
	"Returns a vector of mistake, which is a tuple of paragraph uuid and vector of text ranges, leaving out paragraphs not present in the vector [:document :paragraph-order] or those with no mistakes."
	;; Note how much cleaner the function is using destructuring to access the data
	[{{:keys [paragraph-order]} :document
	{{:keys [mistakes]} :spellcheck} :edit}]
	;; We could do this:
	#_(filter (fn [[k v]] (and (seq v) (some #{k} paragraph-order))) mistakes)
	;; But `(some #{k} paragraph-order)` is not efficient, as it will have to sequentially iterate for every
	;; "mistakes" item.
	;;
	;; We could simply take the paragraph-order vector and build a map from it, making the code
	;; more efficient. For filtering out empty mistakes, we combine filter and map using comp, which gives us a
	;; transducer. Using a transducer lets us compose elegantly two different intentions (which makes it easier to
	;; to read) while also keeping it efficient with one single interation. Note also that `k` becomes `p-uid`.
	;; Worth mentioning that this approach preserves the order of `paragraph-order`, which will be handy.
	(into [] (comp (filter (fn [p-uid] (not-empty (get mistakes p-uid))))
	(map (fn [p-uid] [p-uid (get mistakes p-uid)])))
	paragraph-order))

	(comment
	(mistakes test-state)
	(filter-out-mistakes test-state))

	;; Many of the same issues as I mentioned in the previuos function and a lack of care for efficient collection
	;; iteration:

	(defn ordered-mistakes [state]
	;; Instead of let, we could have used destructuring
	(let [paragraph-order (paragraph-order state)]
	(->> (mistakes state)
	;; You see, because the `mistakes` function doesn't guarantee order, we have to order ourselves. This
	;; could be avoided as I demonstrated.
	(sort-by (fn [[k _]] (.indexOf paragraph-order k)))
	;; It's ok to have this mapcat followed by vec, because mapcat will return a lazy sequence that'll
	;; be realised only once, but we could've used a single iteration over `paragraphs-order` instead.
	;; We end up with three different iterations (`mistakes`, `sort-by`, `mapcat`)
	(mapcat (fn [[k v]] (map (fn [i] [k i]) v)))
	(vec))))

	;; Let's take a look at a better implementation by firstly adapting our filter-out-mistakes to make it
	;; reusable, returning a transducer which will be used for composition later.
	(defn get-filter-out-mistakes-xform
	"Returns a transducer which iterates over a vector of paragraph-uuid (e.g. `(get-in test-state [:document
	:paragraph-order])`) and returns for every non-empty mistake a tuple of paragraph uuid and vector of text
	ranges."
	[{{{:keys [mistakes]} :spellcheck} :edit}]
	(comp (filter (fn [p-uid] (not-empty (get mistakes p-uid))))
	(map (fn [p-uid] [p-uid (get mistakes p-uid)]))))


	;; Usage:
	(comment
	(into []
	(get-filter-out-mistakes-xform test-state)
	(get-in test-state [:document :paragraph-order])))

	;; Finally:
	(defn get-ordered-mistakes
	"Returns a vector containing tuples of paragraph-uid and text-range ordered by paragraph-order"
	[{{:keys [paragraph-order]} :document
	:as document-state}]
	;; Using `into` with transducers is efficient. The implementation uses transients (mutable data structures)
	;; under the hood, so you don't have to worry. It'll make a single collection traverse.
	(into []
	;; Transducer composition
	(comp (get-filter-out-mistakes-xform document-state)
	(mapcat (fn [[p-uid text-ranges]]
	;; It's ok to abbreviate text-range to tr here because we're interating over
	;; `text-ranges`, so no room for misinterpretation.
	;;
	;; This is the "unpacking" part
	(mapv (fn [tr] [p-uid tr]) text-ranges))))
	paragraph-order))

	(comment
	(get-ordered-mistakes test-state))

	;; A minimalist single function implementation:

	(defn get-ordered-mistakes-v2
	;; It's nice that the destructuring highlights the expected data format
	[{{:keys [paragraph-order]} :document
	{{:keys [mistakes]} :spellcheck} :edit}]
	(into []
	(mapcat (fn [p-uid]
	;; If the mistakes vector doesn't exist or is empty, it'll be the same as if you had filtered
	;; it out. Thus, no need for the filter xform.
	(mapv (fn [text-range] [p-uid text-range])
	(get mistakes p-uid))))
	paragraph-order))

	(comment
	(get-ordered-mistakes-v2 test-state))

	;; There are other possible stylistic and implementation choices, but the main feedbacks I'd give would be:
	;; 1. Effort spent giving a function an appropriate name is effort well spent!
	;; 2. Use destructuring where it makes sense. It's nice and reads well.
	;; 3. Beware of the different iteration methods applicable and how those work: when is the iteration eager, how
	;; to compose iterating functions (map, filter, etc.) using lazy sequences and transducers, and using composition
	;; to be clearer on your intent while preserving efficiency.

	;; That's it! I hope you find my remarks to be relevant.

	;; Eagerly waiting for your feedback!