diff --git a/.browserslistrc b/.browserslistrc new file mode 100644 index 0000000..afe4650 --- /dev/null +++ b/.browserslistrc @@ -0,0 +1,5 @@ +# https://github.com/browserslist/browserslist#browserslistrc + +last 2 versions +> 0.2% +not dead diff --git a/.commitlintrc.json b/.commitlintrc.json new file mode 100644 index 0000000..5bed7cb --- /dev/null +++ b/.commitlintrc.json @@ -0,0 +1,8 @@ +{ + "rules": { + "body-max-line-length": [ + 0, + "always" + ] + } +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..2b740bf --- /dev/null +++ b/.editorconfig @@ -0,0 +1,19 @@ +root = true + +[*] +charset = utf-8 +indent_style = space +indent_size = 2 +trim_trailing_whitespace = true +# Unix-style newlines with a newline ending every file +end_of_line = lf +insert_final_newline = true + +[*.{js,css,scss}] +quote_type = single + +[*.{yml,yaml}] +quote_type = double + +[*.md] +trim_trailing_whitespace = false diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..262d6bd --- /dev/null +++ b/.gitattributes @@ -0,0 +1,16 @@ +# Set default behavior to automatically normalize line endings. +* text=auto + +# Force bash scripts to always use LF line endings so that if a repo is accessed +# in Unix via a file share from Windows, the scripts will work. +*.sh text eol=lf + +# Force batch scripts to always use CRLF line endings so that if a repo is accessed +# in Windows via a file share from Linux, the scripts will work. +*.{cmd,[cC][mM][dD]} text eol=crlf +*.{bat,[bB][aA][tT]} text eol=crlf + +# Denote all files that are truly binary and should not be modified. +*.png binary +*.jpg binary +*.ico binary diff --git a/.github/workflows/pages-deploy.yml b/.github/workflows/pages-deploy.yml new file mode 100644 index 0000000..1dc75e8 --- /dev/null +++ b/.github/workflows/pages-deploy.yml @@ -0,0 +1,71 @@ +name: "Build and Deploy" +on: + push: + branches: + - main + - master + paths-ignore: + - .gitignore + - README.md + - LICENSE + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +# Allow one concurrent deployment +concurrency: + group: "pages" + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + # submodules: true + # If using the 'assets' git submodule from Chirpy Starter, uncomment above + # (See: https://github.com/cotes2020/chirpy-starter/tree/main/assets) + + - name: Setup Pages + id: pages + uses: actions/configure-pages@v3 + + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: 3.2 # reads from a '.ruby-version' or '.tools-version' file if 'ruby-version' is omitted + bundler-cache: true + + - name: Build site + run: bundle exec jekyll b -d "_site${{ steps.pages.outputs.base_path }}" + env: + JEKYLL_ENV: "production" + + - name: Test site + run: | + bundle exec htmlproofer _site --disable-external --check-html --allow_hash_href + + - name: Upload site artifact + uses: actions/upload-pages-artifact@v1 + with: + path: "_site${{ steps.pages.outputs.base_path }}" + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..267d370 --- /dev/null +++ b/.gitignore @@ -0,0 +1,21 @@ +# Bundler cache +.bundle +vendor +Gemfile.lock + +# Jekyll cache +.jekyll-cache +_site + +# RubyGems +*.gem + +# NPM dependencies +node_modules +package-lock.json + +# IDE configurations +.idea +.vscode + +# Misc diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..58062c5 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "assets/lib"] + path = assets/lib + url = https://github.com/cotes2020/chirpy-static-assets.git diff --git a/.husky/commit-msg b/.husky/commit-msg new file mode 100755 index 0000000..4037788 --- /dev/null +++ b/.husky/commit-msg @@ -0,0 +1,4 @@ +#!/bin/sh +. "$(dirname "$0")/_/husky.sh" + +npx --no -- commitlint -x $(npm root -g)/@commitlint/config-conventional --edit diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 0000000..36b3563 --- /dev/null +++ b/.prettierrc @@ -0,0 +1,3 @@ +{ + "trailingComma": "none" +} diff --git a/.stylelintrc.json b/.stylelintrc.json new file mode 100644 index 0000000..f489fee --- /dev/null +++ b/.stylelintrc.json @@ -0,0 +1,23 @@ +{ + "extends": "stylelint-config-standard-scss", + "rules": { + "no-descending-specificity": null, + "shorthand-property-no-redundant-values": null, + "at-rule-no-vendor-prefix": null, + "property-no-vendor-prefix": null, + "selector-no-vendor-prefix": null, + "value-no-vendor-prefix": null, + "color-function-notation": "legacy", + "alpha-value-notation": "number", + "selector-not-notation": "simple", + "color-hex-length": "long", + "declaration-block-single-line-max-declarations": 3, + "scss/operator-no-newline-after": null, + "rule-empty-line-before": [ + "always", + { "ignore": ["after-comment", "first-nested", "inside-block"] } + ], + "value-keyword-case": ["lower", { "ignoreProperties": ["/^\\$/"] }], + "media-feature-range-notation": "prefix" + } +} diff --git a/.versionrc.json b/.versionrc.json new file mode 100644 index 0000000..4b880d3 --- /dev/null +++ b/.versionrc.json @@ -0,0 +1,20 @@ +{ + "skip": { + "commit": true, + "tag": true + }, + "types": [ + { + "type": "feat", + "section": "Features" + }, + { + "type": "fix", + "section": "Bug Fixes" + }, + { + "type": "perf", + "section": "Improvements" + } + ] +} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e9c5bd6 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,245 @@ +# Changelog + +All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. + +## [6.1.0](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v6.0.0...v6.1.0) (2023-07-02) + +### Features + +* **i18n:** add Thai locale file ([#1087](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1087)) ([a60e907](https://github.com/cotes2020/jekyll-theme-chirpy/commit/a60e90791d24811caff78e21c71dc85d6a729438)) + +### Bug Fixes + +* missing xml escape for `alt` of preview image ([#1113](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1113)) ([8b0fbf5](https://github.com/cotes2020/jekyll-theme-chirpy/commit/8b0fbf5a834276f273274e4d614edd71e339cbb0)) +* the cached image is covered by shimmer ([#1100](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1100)) ([df8ff54](https://github.com/cotes2020/jekyll-theme-chirpy/commit/df8ff546ec1c8d21a3d25e0124665001fcf756f3)) +* **ui:** min-height of `page` layout exceeds the mobile screen ([73af591](https://github.com/cotes2020/jekyll-theme-chirpy/commit/73af59194ab935d38b89d298fea0e96e13be7cb7)) +* **webfont:** resume semi-bold of font family `Source Sans Pro` ([c4da99c](https://github.com/cotes2020/jekyll-theme-chirpy/commit/c4da99c7ea5d6e32b1f1b815d7d8d6ae7b0f55de)) + +### Improvements + +* **build:** use `jekyll-include-cache` plugin to reduce build time ([#1098](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1098)) ([4fe145e](https://github.com/cotes2020/jekyll-theme-chirpy/commit/4fe145e9809ee1b370d9891135939534751462d0)), closes [#1094](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1094) +* CJK characters of the "Search Cancel" button will wrap ([#1105](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1105)) ([b6d1992](https://github.com/cotes2020/jekyll-theme-chirpy/commit/b6d1992f85ec543220e826087dcc89870e7e2c00)) +* **ui:** avoid blank space at the bottom of the homepage preview image ([ce2f6f5](https://github.com/cotes2020/jekyll-theme-chirpy/commit/ce2f6f5abef7a8b874e08d1f18c1fd002650dbf1)) +* **ui:** improve hover color of sidebar nav items in light mode ([728094d](https://github.com/cotes2020/jekyll-theme-chirpy/commit/728094d1ba67a1e7c0a11e1c6c69bf87af9a767b)) + +## [6.0.1](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v6.0.0...v6.0.1) (2023-05-19) + +### Bug Fixes + +* **home:** preview image missing `[alt]` and `img_path` ([#1044](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1044)) ([aba9468](https://github.com/cotes2020/jekyll-theme-chirpy/commit/aba9468b5332802db961166889d4c4a84e404a2c)) +* **layout:** restore the margin bottom of the main area ([#1047](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1047)) ([eb40f51](https://github.com/cotes2020/jekyll-theme-chirpy/commit/eb40f51c84b011a7c301279527f544ad27efd5eb)) +* **post, page:** image link loses shimmer effect ([#1046](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1046)) ([3bd881d](https://github.com/cotes2020/jekyll-theme-chirpy/commit/3bd881da70d685d10659f47bfe0e79cd02e7af92)) +* **typography:** long string for update-list is not truncated ([#1050](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1050)) ([a51d31c](https://github.com/cotes2020/jekyll-theme-chirpy/commit/a51d31c55a37fbe034f0b0f699f4df0b6a14ba8f)), closes [#1049](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1049) + +## [6.0.0](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.6.1...v6.0.0) (2023-05-16) + +### ⚠ BREAKING CHANGES + +* rename assets origin configuration files + +### Features + +* add a hook to insert custom metadata in `head` tag ([#1015](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1015)) ([fe20341](https://github.com/cotes2020/jekyll-theme-chirpy/commit/fe203417d993508eedf5b9044fe53c4a566e44f9)) +* **i18n:** add sl-SI.yml with slovenian translations ([#989](https://github.com/cotes2020/jekyll-theme-chirpy/issues/989)) ([42a700a](https://github.com/cotes2020/jekyll-theme-chirpy/commit/42a700aa37889faa32d7ec1f6776ce4b9d845dc4)) +* **i18n:** add Traditional Chinese (Taiwan) localization file ([#961](https://github.com/cotes2020/jekyll-theme-chirpy/issues/961)) ([d97f95f](https://github.com/cotes2020/jekyll-theme-chirpy/commit/d97f95fca0bcd450ea50709ffba0217f7e65d339)) +* **i18n:** added Swedish localization file ([#969](https://github.com/cotes2020/jekyll-theme-chirpy/issues/969)) ([fe70479](https://github.com/cotes2020/jekyll-theme-chirpy/commit/fe7047959e3694c6e603e764ded30dacd49e6aa9)) +* support hiding the modification date of a post ([#1020](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1020)) ([8da583d](https://github.com/cotes2020/jekyll-theme-chirpy/commit/8da583d403456f6460ec1a6ebcbb0c2ca8127ff6)) +* **ui:** improve code snippet design ([6d99f5c](https://github.com/cotes2020/jekyll-theme-chirpy/commit/6d99f5cc36a69e5ccff51f81ba448c798d92e12e)) +* **ui:** improve the design for top bar ([83f1c34](https://github.com/cotes2020/jekyll-theme-chirpy/commit/83f1c34f92d85f3953ca9c9818be5399962bf1c9)) +* **ui:** new design footer content layout ([3210c59](https://github.com/cotes2020/jekyll-theme-chirpy/commit/3210c59466150dc04b4e4bdfc1ffd0e38adcff43)) +* **ui:** redesign the sidebar ([83bbe4a](https://github.com/cotes2020/jekyll-theme-chirpy/commit/83bbe4ac939edfd1706e68c080562e3462f83519)) +* **ui:** show preview image in home page ([97b8dfe](https://github.com/cotes2020/jekyll-theme-chirpy/commit/97b8dfeed6ce7677f6472e28dc3b03f3c2968b12)) + +### Bug Fixes + +* parameter parsing error in image URL ([#1022](https://github.com/cotes2020/jekyll-theme-chirpy/issues/1022)) ([ee88cec](https://github.com/cotes2020/jekyll-theme-chirpy/commit/ee88cec270ea5938f98913a3edf28a684cfbd6c0)) +* **rss:** double quotes in the post title will break the XML structure ([#965](https://github.com/cotes2020/jekyll-theme-chirpy/issues/965)) ([1719d81](https://github.com/cotes2020/jekyll-theme-chirpy/commit/1719d81d00b32b107c35b3903089be84a9b28a6c)) + +### refactor + +* rename assets origin configuration files ([c283e77](https://github.com/cotes2020/jekyll-theme-chirpy/commit/c283e7782fa9562d82d9855fd280a573fd58c75f)) + +### Improvements + +* **assets:** reduce HTTP requests to CDN ([9d97120](https://github.com/cotes2020/jekyll-theme-chirpy/commit/9d971201978e993a9af337d9cd5396a1ea225f00)) +* calculate heading font size dynamically ([#983](https://github.com/cotes2020/jekyll-theme-chirpy/issues/983)) ([52f5ee9](https://github.com/cotes2020/jekyll-theme-chirpy/commit/52f5ee9cd3f92a6e8f25eaa203831546cda85db6)) +* **i18n:** set the global default locales to "en" ([#979](https://github.com/cotes2020/jekyll-theme-chirpy/issues/979)) ([61fdbcb](https://github.com/cotes2020/jekyll-theme-chirpy/commit/61fdbcb83a3601ecae62ec230602b94a5eb832e1)) +* **tools:** avoid initialization interruption in single branch forks ([#992](https://github.com/cotes2020/jekyll-theme-chirpy/issues/992)) ([e90461a](https://github.com/cotes2020/jekyll-theme-chirpy/commit/e90461aa3c81633863db6a12c5924ddba33bd08e)) +* **ui:** improve categories color in dark mode ([414dd13](https://github.com/cotes2020/jekyll-theme-chirpy/commit/414dd132aed70f4bd96cb712d00eacc82d2753e9)) +* **ui:** improve hover effect for post preview cards ([7626e4d](https://github.com/cotes2020/jekyll-theme-chirpy/commit/7626e4d00544346a46b6e5ff2f3a99d234defe09)) +* **ui:** improve hover effect of trending tags ([34499f0](https://github.com/cotes2020/jekyll-theme-chirpy/commit/34499f0c927ce8fea3705dc2f0f0e6805cabda43)) +* **ui:** improve inline code in light mode ([e38309f](https://github.com/cotes2020/jekyll-theme-chirpy/commit/e38309f3bd1302ffe60b682136b6efaf96f4d9ae)) +* **ui:** improve related posts design ([2918da9](https://github.com/cotes2020/jekyll-theme-chirpy/commit/2918da9f29465618d557c082ff3a2f23d7519049)) +* **ui:** improve the color of prompts in dark mode ([8cbbcfa](https://github.com/cotes2020/jekyll-theme-chirpy/commit/8cbbcfa26da0addd88affada23a65770250f2404)) +* **ui:** lighten the link color in light-mode ([7c23a4e](https://github.com/cotes2020/jekyll-theme-chirpy/commit/7c23a4ebc53b9e231c214e04f8ac0803cbcdb720)) +* **ui:** mute the marker in lists ([0c80552](https://github.com/cotes2020/jekyll-theme-chirpy/commit/0c80552d772b874e2a161f1270294faa3af18d4a)) +* **ui:** uniform the muted text color ([aadf939](https://github.com/cotes2020/jekyll-theme-chirpy/commit/aadf9393d5c7f7528d453c4e68eba4f5cbb85bd9)) +* **ux:** improve LQIP fade in effect ([003e7b6](https://github.com/cotes2020/jekyll-theme-chirpy/commit/003e7b60c93988a7bfae4c03a8346d4f8a5f0bb6)) + +## [5.6.1](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.6.0...v5.6.1) (2023-03-30) + +### Bug Fixes + +* **deps:** `tocbot` has no initialization detection ([#957](https://github.com/cotes2020/jekyll-theme-chirpy/issues/957)) ([8225174](https://github.com/cotes2020/jekyll-theme-chirpy/commit/8225174cb5e02fda7b3cc548ec821c876b0a5139)) +* mode-toggle leads to Disqus loading failure ([#945](https://github.com/cotes2020/jekyll-theme-chirpy/issues/945)) ([6fec411](https://github.com/cotes2020/jekyll-theme-chirpy/commit/6fec411c18ca5689c467c7b216ddeda02df23623)) +* pageviews not updated immediately ([8b4f99c](https://github.com/cotes2020/jekyll-theme-chirpy/commit/8b4f99c87f9a9227f47e84fb39d7b0f551d6f4dd)) + +## [5.6.0](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.5.2...v5.6.0) (2023-03-17) + +### Features + +* change TOC plugin to `tocbot` ([#774](https://github.com/cotes2020/jekyll-theme-chirpy/issues/774)) ([02b7bd5](https://github.com/cotes2020/jekyll-theme-chirpy/commit/02b7bd5095a2affe5b4c5ed7b5b182baaf642ff3)) +* **i18n:** add Greek Language Support. ([#903](https://github.com/cotes2020/jekyll-theme-chirpy/issues/903)) ([712a9b2](https://github.com/cotes2020/jekyll-theme-chirpy/commit/712a9b22401ce591cf4c0bb03fbdd1693fee30bb)) +* **ux:** turn home page posts into clickable cards ([#895](https://github.com/cotes2020/jekyll-theme-chirpy/issues/895)) ([b85f633](https://github.com/cotes2020/jekyll-theme-chirpy/commit/b85f6330dea666350631c4461b742cdb54c5f052)) + +### Bug Fixes + +* css selector string escaping vulnerability ([#888](https://github.com/cotes2020/jekyll-theme-chirpy/issues/888)) ([5c6ec9d](https://github.com/cotes2020/jekyll-theme-chirpy/commit/5c6ec9d06b6571e2c0efe6652078442dca8af477)) +* mathematics cannot scroll horizontally ([#760](https://github.com/cotes2020/jekyll-theme-chirpy/issues/760)) ([4681df7](https://github.com/cotes2020/jekyll-theme-chirpy/commit/4681df715118a37ae1e91b588de0adb67f4e331a)) +* notch status bar doesn't match theme color ([#918](https://github.com/cotes2020/jekyll-theme-chirpy/issues/918)) ([820ba62](https://github.com/cotes2020/jekyll-theme-chirpy/commit/820ba62e9e939090523a7077d01d01bd78ec84eb)) +* some console snippets will be incompletely copied ([e8e4901](https://github.com/cotes2020/jekyll-theme-chirpy/commit/e8e4901e340dd7e5fc5f656dd3c7bcd6c97b886a)) + +## [5.5.2](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.5.1...v5.5.2) (2023-01-30) + +### Bug Fixes + +* position of prompt icon is incorrect in paragraph on mobile ([5df953f](https://github.com/cotes2020/jekyll-theme-chirpy/commit/5df953f6c877e2aa3f1f4981c97a0b8007abe6d4)) + +## [5.5.1](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.5.0...v5.5.1) (2023-01-29) + +### Bug Fixes + +* the icon position of the prompts in the list is incorrect ([0c9558d](https://github.com/cotes2020/jekyll-theme-chirpy/commit/0c9558de8a01e9ab795778f351a8bbf4d6b21763)) + +## [5.5.0](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.4.0...v5.5.0) (2023-01-29) + +### Features + +* **i18n:** add Arabic translation ([#857](https://github.com/cotes2020/jekyll-theme-chirpy/issues/857)) ([765af53](https://github.com/cotes2020/jekyll-theme-chirpy/commit/765af53b77e5c63804784d5728f5970ae274c2c7)) +* **i18n:** add Czech language ([#833](https://github.com/cotes2020/jekyll-theme-chirpy/issues/833)) ([98d48f5](https://github.com/cotes2020/jekyll-theme-chirpy/commit/98d48f5da412276d4a0c99cd01a87b19349bc6bc)) +* **i18n:** add Finnish translations ([#843](https://github.com/cotes2020/jekyll-theme-chirpy/issues/843)) ([d6d0318](https://github.com/cotes2020/jekyll-theme-chirpy/commit/d6d03183eaf94b44e037cc48b6e1c47cee183f6e)) +* **i18n:** add Italian translation ([#850](https://github.com/cotes2020/jekyll-theme-chirpy/issues/850)) ([9a011e1](https://github.com/cotes2020/jekyll-theme-chirpy/commit/9a011e14d66195d8b2fb9ec62f3e60a3e56cd032)) + +### Bug Fixes + +* copy command line incomplete(`.gp` part) ([41ed331](https://github.com/cotes2020/jekyll-theme-chirpy/commit/41ed33145639415148aec8e85edc7a6fd0de0ca3)) +* correct encoding of spaces in share URLs ([#835](https://github.com/cotes2020/jekyll-theme-chirpy/issues/835)) ([f2d2858](https://github.com/cotes2020/jekyll-theme-chirpy/commit/f2d285844e6e2979f2b0eec1d20073d3c05b6c0c)) +* post's image would cover the PWA update alert ([bd374dd](https://github.com/cotes2020/jekyll-theme-chirpy/commit/bd374dd383c50f89c8f018ecb4e25772eeb8f6d8)) +* prompt with nested blockquotes renders incorrectly ([#846](https://github.com/cotes2020/jekyll-theme-chirpy/issues/846)) ([babb4a0](https://github.com/cotes2020/jekyll-theme-chirpy/commit/babb4a0c5a58ceb2e4093bc465670accdd526c18)) + +## [5.4.0](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.3.2...v5.4.0) (2022-12-27) + +### Features + +* add `rel="me"` to Mastodon sidebar contact links for verification ([#807](https://github.com/cotes2020/jekyll-theme-chirpy/issues/807)) ([d2190c7](https://github.com/cotes2020/jekyll-theme-chirpy/commit/d2190c726f61c8c9732b88b4aecf699dc8bc7deb)) +* add embed video support ([ed6dc53](https://github.com/cotes2020/jekyll-theme-chirpy/commit/ed6dc539eff7003a3765bcd8c31ae5e91a863d65)) +* add shimmer background when image loads ([ab16fdc](https://github.com/cotes2020/jekyll-theme-chirpy/commit/ab16fdc7fc26811130b98a1773beb62bff6182e8)) +* set preview image ratio to 1.91 : 1 ([4b6ccbc](https://github.com/cotes2020/jekyll-theme-chirpy/commit/4b6ccbcbccce27b9fcb035812efefe4eb69301cf)) +* support dark and light mode images ([#481](https://github.com/cotes2020/jekyll-theme-chirpy/issues/481)) ([9306c7b](https://github.com/cotes2020/jekyll-theme-chirpy/commit/9306c7b39ecf9d9146bc1a25eebedc38eb2c3dd6)) +* support LQIP for images ([bffaf63](https://github.com/cotes2020/jekyll-theme-chirpy/commit/bffaf6374f265cec96ef743d42b46fbec3b59797)) + +### Bug Fixes + +* `hreflang` tag attribute of feed misses `site.alt_lang` ([7651d28](https://github.com/cotes2020/jekyll-theme-chirpy/commit/7651d2851b4bb7d8f0d068b62c036c89a1089bbc)) +* `og:image` will be incorrect if the image uses a cross-domain URL ([8de1abd](https://github.com/cotes2020/jekyll-theme-chirpy/commit/8de1abda6be3633982392178731431b0ddb1b52b)) +* refactoring error when the image URL contains parameters ([ec98f07](https://github.com/cotes2020/jekyll-theme-chirpy/commit/ec98f07aca0b80a9c07fbcdc8e0d7d66dba98ed2)) +* spaces in post title are encoded when sharing ([7efd2f8](https://github.com/cotes2020/jekyll-theme-chirpy/commit/7efd2f8aa2ea1c3aeb7d740bf9a018881c26fe65)) + +### Improvements + +* **cdn:** optimize cache policy for static assets ([7fb0ee0](https://github.com/cotes2020/jekyll-theme-chirpy/commit/7fb0ee0bedb63eee3f90a49c6d7fb8b5d78c9830)) + +## [5.3.2](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.3.1...v5.3.2) (2022-11-22) + +### Bug Fixes + +* `mermaid` occasionally fails to initialize ([#536](https://github.com/cotes2020/jekyll-theme-chirpy/issues/536)) ([48f14e3](https://github.com/cotes2020/jekyll-theme-chirpy/commit/48f14e39ac81bbfb3b9913ea3ee789d775b2d1ae)) +* **comment:** disqus doesn't follow theme mode switching ([b0d5956](https://github.com/cotes2020/jekyll-theme-chirpy/commit/b0d5956f5a0ed894984d6b1754efeba04d8bc966)) +* restore full-text search ([#741](https://github.com/cotes2020/jekyll-theme-chirpy/issues/741)) ([6774e0e](https://github.com/cotes2020/jekyll-theme-chirpy/commit/6774e0e1fb37cf467b14be481347412713763f05)) +* the image URL in the SEO-related tags is incomplete ([#754](https://github.com/cotes2020/jekyll-theme-chirpy/issues/754)) ([f6e9a3f](https://github.com/cotes2020/jekyll-theme-chirpy/commit/f6e9a3fccf7ab34db71f8aefaf86fdcc05861076)) + +## [5.3.1](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.3.0...v5.3.1) (2022-10-25) + +### Bug Fixes + +* 404 page missing title in tablet/desktop view ([5511b28](https://github.com/cotes2020/jekyll-theme-chirpy/commit/5511b2883fd5a395fddfb642588d00c122f18da7)) +* prompt content overflows horizontally ([#705](https://github.com/cotes2020/jekyll-theme-chirpy/issues/705)) ([fb13e32](https://github.com/cotes2020/jekyll-theme-chirpy/commit/fb13e3219b5eca0d2e4f86a1ecabfab75240369f)) +* **tools:** multiple configuration files will fail the test ([80cb0b3](https://github.com/cotes2020/jekyll-theme-chirpy/commit/80cb0b371754e96772a7907877a8ce196398ba3d)) + +### Improvements + +* **layout:** improve the min-height of main content ([#674](https://github.com/cotes2020/jekyll-theme-chirpy/issues/674)) ([49bb93c](https://github.com/cotes2020/jekyll-theme-chirpy/commit/49bb93cc0c89ad9cfaad5edcf9cb28c3d5134575)) +* modify checkbox icon with `Liquid` ([1fd665b](https://github.com/cotes2020/jekyll-theme-chirpy/commit/1fd665bf4990c26ae23635c511c5abc9640184d1)) +* optimize the extra padding in lists ([#703](https://github.com/cotes2020/jekyll-theme-chirpy/issues/703)) ([39da11e](https://github.com/cotes2020/jekyll-theme-chirpy/commit/39da11e3f3685f49321757576d2b87a48bf25db5)), closes [#702](https://github.com/cotes2020/jekyll-theme-chirpy/issues/702) +* **posts:** improve core block bottom padding ([d2fb98b](https://github.com/cotes2020/jekyll-theme-chirpy/commit/d2fb98b3e57f2f6c3fc3816551cd0721731adf40)) +* truncate post content for search results ([647eea8](https://github.com/cotes2020/jekyll-theme-chirpy/commit/647eea8dbd716f9d3cb8330c3139fa753903f51d)) +* **typography:** optimize the line height of post content ([eac3f9b](https://github.com/cotes2020/jekyll-theme-chirpy/commit/eac3f9b434ca77e3dc64eea9cedea7b93e7b306b)) + +### Others + +* **giscus:** add `reactions-enabled` option ([#712](https://github.com/cotes2020/jekyll-theme-chirpy/issues/712)) ([70662a0](https://github.com/cotes2020/jekyll-theme-chirpy/commit/70662a0365e6b9378602dc0a57462ddad5aebcf5)) +* **locale:** restore options for changing date format ([#716](https://github.com/cotes2020/jekyll-theme-chirpy/issues/716)) ([f904e8c](https://github.com/cotes2020/jekyll-theme-chirpy/commit/f904e8cd48c343cc31e25859d9d50bfe2c056f41)) +* remove site config option `prefer_datetime_locale` ([6852ceb](https://github.com/cotes2020/jekyll-theme-chirpy/commit/6852ceb280927ff4e753a3e1131f2b396d9807d0)) + +## [5.3.0](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.2.1...v5.3.0) (2022-09-23) + +### Features + +* add multiple authors to a post ([#677](https://github.com/cotes2020/jekyll-theme-chirpy/issues/677)) ([f1d9e99](https://github.com/cotes2020/jekyll-theme-chirpy/commit/f1d9e99bc02d3cd0a6b0cd1beac545f0cc7a24f8)), closes [#675](https://github.com/cotes2020/jekyll-theme-chirpy/issues/675) +* **i18n:** add Bulgarian support ([#612](https://github.com/cotes2020/jekyll-theme-chirpy/issues/612)) ([2fed338](https://github.com/cotes2020/jekyll-theme-chirpy/commit/2fed338ce6d078bf528c9717201fbc475f88cd22)) +* **i18n:** add German locale file ([#663](https://github.com/cotes2020/jekyll-theme-chirpy/issues/663)) ([940b281](https://github.com/cotes2020/jekyll-theme-chirpy/commit/940b2810e95065e30600ae8d5e4612e7183da60e)) +* **i18n:** add Hungarian locale file ([#597](https://github.com/cotes2020/jekyll-theme-chirpy/issues/597), [#598](https://github.com/cotes2020/jekyll-theme-chirpy/issues/598)) ([b032977](https://github.com/cotes2020/jekyll-theme-chirpy/commit/b0329775fc24d0323e5cc04cda46ece8b4531802)) +* **i18n:** add Turkish language ([#631](https://github.com/cotes2020/jekyll-theme-chirpy/issues/631)) ([ad137fa](https://github.com/cotes2020/jekyll-theme-chirpy/commit/ad137fa2945b1870b9c1dd5e9212a5f4af7c3580)) + +### Bug Fixes + +* add missing color to linkedin icon for share list ([#683](https://github.com/cotes2020/jekyll-theme-chirpy/issues/683)) ([0dcd39d](https://github.com/cotes2020/jekyll-theme-chirpy/commit/0dcd39d491c9c49e4acf7f75f83fe6e1d1839e37)) +* code contains spaces in headings ([#644](https://github.com/cotes2020/jekyll-theme-chirpy/issues/644)) ([3fa1bf3](https://github.com/cotes2020/jekyll-theme-chirpy/commit/3fa1bf305451f645a7f3aa93863b076463c8f165)) +* correct spelling of `panel` ([#686](https://github.com/cotes2020/jekyll-theme-chirpy/issues/686)) ([b288587](https://github.com/cotes2020/jekyll-theme-chirpy/commit/b288587c1c3d113a1c52c2d25fb46cddda348961)) +* correct the i18n for tab titles ([0c5b697](https://github.com/cotes2020/jekyll-theme-chirpy/commit/0c5b697fd3b283b6a5c926742b61ed49d8688c18)) +* the `code` doesn't wrap inside the prompt ([#626](https://github.com/cotes2020/jekyll-theme-chirpy/issues/626)) ([378b65a](https://github.com/cotes2020/jekyll-theme-chirpy/commit/378b65a0617787813519dde74d6f741f255eff3d)) + +## [5.2.1](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.2.0...v5.2.1) (2022-06-17) + +### Bug Fixes + +* exclude CHANGELOG from output ([971fe03](https://github.com/cotes2020/jekyll-theme-chirpy/commit/971fe03ec329ae49e7d60fe3af6101cfbd1acd6c)) +* **PWA:** sometimes update notification is not triggered ([96af729](https://github.com/cotes2020/jekyll-theme-chirpy/commit/96af7291ea5b2c5ed6372e7b6f7725e67c69f1ba)) + +## [5.2.0](https://github.com/cotes2020/jekyll-theme-chirpy/compare/v5.1.0...v5.2.0) (2022-06-09) + +### Features + +* add es-ES support to locales ([#533](https://github.com/cotes2020/jekyll-theme-chirpy/issues/533)) ([efe75ad](https://github.com/cotes2020/jekyll-theme-chirpy/commit/efe75adf2784956afb7a0b67f6634b146d9cb03b)) +* add fr-FR support to locales ([#582](https://github.com/cotes2020/jekyll-theme-chirpy/issues/582)) ([94e8144](https://github.com/cotes2020/jekyll-theme-chirpy/commit/94e81447afa457b1a6b7e8f487c47502803556d7)) +* add Vietnamese locale ([#517](https://github.com/cotes2020/jekyll-theme-chirpy/issues/517)) ([171463d](https://github.com/cotes2020/jekyll-theme-chirpy/commit/171463d76da9b7bc25dd327b8f0a868ea79e388b)) +* add pt-BR support to locales ([c2c503f](https://github.com/cotes2020/jekyll-theme-chirpy/commit/c2c503f63336884282b6bda4ec0703d6ae76771b)) +* add option to turn off PWA ([#527](https://github.com/cotes2020/jekyll-theme-chirpy/issues/527)) ([106c981](https://github.com/cotes2020/jekyll-theme-chirpy/commit/106c981bac71e7434204a77e1f0c9c61d6eb1509)) +* **PWA:** add Service Worker update notification ([d127183](https://github.com/cotes2020/jekyll-theme-chirpy/commit/d127183b9774f6321e409acdb66bf8a85d8814be)) +* support showing description of preview image ([2bd6efa](https://github.com/cotes2020/jekyll-theme-chirpy/commit/2bd6efa95a174ac44e30a3af1e57e6f40d6e0e3a)) + +### Bug Fixes + +* alt is not a valid attribute for 'a' tag ([58928db](https://github.com/cotes2020/jekyll-theme-chirpy/commit/58928dbc9068db4e4cda4371eeae1865920dce6a)) +* assets URL is missing `baseurl` in self-hosted mode ([#591](https://github.com/cotes2020/jekyll-theme-chirpy/issues/591)) ([54124d5](https://github.com/cotes2020/jekyll-theme-chirpy/commit/54124d5134995fce52e4c2fc0a5d4d1743d6264d)) +* correct the `twitter:creator` of Twitter summary card ([96a16c8](https://github.com/cotes2020/jekyll-theme-chirpy/commit/96a16c868ede51e7dfa412de63ffa1e5a49add7f)) +* correctly URL encode share links ([4c1c8d8](https://github.com/cotes2020/jekyll-theme-chirpy/commit/4c1c8d8b0eacecbbaa2d522bbdd6430f350ff760)), closes [#496](https://github.com/cotes2020/jekyll-theme-chirpy/issues/496) +* follow paginate_path config for pagination ([6900d9f](https://github.com/cotes2020/jekyll-theme-chirpy/commit/6900d9f2bc9380cbda4babf611c6eeff345291af)) +* force checkout of `gh-pages` branch ([#544](https://github.com/cotes2020/jekyll-theme-chirpy/issues/544)) ([5402523](https://github.com/cotes2020/jekyll-theme-chirpy/commit/5402523ae52a3740bcc15df0b226b2612644945d)) +* horizontal scroll for long equations ([#545](https://github.com/cotes2020/jekyll-theme-chirpy/issues/545)) ([30787fc](https://github.com/cotes2020/jekyll-theme-chirpy/commit/30787fc4cf151e955bb7afc26dfd859f1a06fce6)) +* p is not allowed in span ([4f590e2](https://github.com/cotes2020/jekyll-theme-chirpy/commit/4f590e2bba0639751771211bc0d357828ae70404)) +* remove whitespace from avatar URL ([#537](https://github.com/cotes2020/jekyll-theme-chirpy/issues/537)) ([0542b51](https://github.com/cotes2020/jekyll-theme-chirpy/commit/0542b5149c8287dca60e37f46ee36f31b43455e4)) +* resume the preview image SEO tag ([#529](https://github.com/cotes2020/jekyll-theme-chirpy/issues/529)) ([b8d1bcd](https://github.com/cotes2020/jekyll-theme-chirpy/commit/b8d1bcd3dea0abd1afef7ef154a4501fbb18938d)) +* script code should be in head or body, not in between ([2103191](https://github.com/cotes2020/jekyll-theme-chirpy/commit/2103191b2faf714a8e4418c7c347a1f942b51af8)) +* spurious header closing tags ([59e9557](https://github.com/cotes2020/jekyll-theme-chirpy/commit/59e955745f02f9b57c65af70b0979cd4a98bf53f)) +* table bypass refactoring when it contains IAL ([#519](https://github.com/cotes2020/jekyll-theme-chirpy/issues/519)) ([5d85ccb](https://github.com/cotes2020/jekyll-theme-chirpy/commit/5d85ccb9943aac88dbbefebe1c2234cdcbae5c53)) +* **theme mode:** `SCSS` syntax error ([#588](https://github.com/cotes2020/jekyll-theme-chirpy/issues/588)) ([76a1b6a](https://github.com/cotes2020/jekyll-theme-chirpy/commit/76a1b6a068c369138422dcd18ba08ec8cc3749a6)) +* use `jsonify` to generate valid json ([#521](https://github.com/cotes2020/jekyll-theme-chirpy/issues/521)) ([dd9d5a7](https://github.com/cotes2020/jekyll-theme-chirpy/commit/dd9d5a7207b746342d07176d8969dc4f2c380bf2)) +* when the `site.img_cdn` is set to the local path, the preview-image path loses the `baseurl` ([9cefe58](https://github.com/cotes2020/jekyll-theme-chirpy/commit/9cefe58993d9ea3a3a28424e7ffd8e0911567c5c)) + +### Improvements + +* avoid post pageviews from shifting while loading ([135a16f](https://github.com/cotes2020/jekyll-theme-chirpy/commit/135a16f13ee783d9308669ff9a824847a73c951c)) +* avoid the layout shift for post datetime ([6d35f5f](https://github.com/cotes2020/jekyll-theme-chirpy/commit/6d35f5f8da044cfad071628bb53776de03efaae4)) +* **categories:** support singular and plural forms of locale ([#595](https://github.com/cotes2020/jekyll-theme-chirpy/issues/595)) ([35cadf9](https://github.com/cotes2020/jekyll-theme-chirpy/commit/35cadf969dd0161ee62503e242c545f006f7072b)) +* improve the responsive design for ultrawide screens ([#540](https://github.com/cotes2020/jekyll-theme-chirpy/issues/540)) ([5d6e8c5](https://github.com/cotes2020/jekyll-theme-chirpy/commit/5d6e8c5ef6aa71b4d2600c5305f6e8ba540557f7)) diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..7b5377f --- /dev/null +++ b/Gemfile @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +source "https://rubygems.org" + +gemspec + +group :test do + gem "html-proofer", "~> 3.18" +end + +# Windows and JRuby does not include zoneinfo files, so bundle the tzinfo-data gem +# and associated library. +platforms :mingw, :x64_mingw, :mswin, :jruby do + gem "tzinfo", ">= 1", "< 3" + gem "tzinfo-data" +end + +# Performance-booster for watching directories on Windows +gem "wdm", "~> 0.1.1", :platforms => [:mingw, :x64_mingw, :mswin] + +# Lock `http_parser.rb` gem to `v0.6.x` on JRuby builds since newer versions of the gem +# do not have a Java counterpart. +gem "http_parser.rb", "~> 0.6.0", :platforms => [:jruby] + +# Lock jekyll-sass-converter to 2.x on Linux-musl +if RUBY_PLATFORM =~ /linux-musl/ + gem "jekyll-sass-converter", "~> 2.0" +end diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..299d89f --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2019 Cotes Chung + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..04ad93a --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +![workflow status](https://github.com/shameekagarwal/shameekagarwal.github.io/actions/workflows/pages-deploy.yml/badge.svg) + +# Docker Command + +```shell +docker run -it --rm --volume="$PWD:/srv/jekyll" -p 4000:4000 jekyll/jekyll jekyll serve +``` diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..a688bcd --- /dev/null +++ b/_config.yml @@ -0,0 +1,212 @@ +# The Site Configuration + +# Import the theme +theme: jekyll-theme-chirpy + +# Change the following value to '/PROJECT_NAME' ONLY IF your site type is GitHub Pages Project sites +# and doesn't have a custom domain. +baseurl: "" + +# The language of the webpage › http://www.lingoes.net/en/translator/langcode.htm +# If it has the same name as one of the files in folder `_data/locales`, the layout language will also be changed, +# otherwise, the layout language will use the default value of 'en'. +lang: en + +# Change to your timezone › http://www.timezoneconverter.com/cgi-bin/findzone/findzone +timezone: Asia/Kolkata + +# jekyll-seo-tag settings › https://github.com/jekyll/jekyll-seo-tag/blob/master/docs/usage.md +# ↓ -------------------------- + +title: Shameek Agarwal # the main title + +tagline: A Software Engineer # it will display as the sub-title + +description: >- # used by seo meta and the atom feed + A minimal, responsive and feature-rich Jekyll theme for technical writing. + +# fill in the protocol & hostname for your site, e.g., 'https://username.github.io' +url: "https://shameekagarwal.github.io" + +github: + username: shameekagarwal # change to your github username + +twitter: + username: twitter_username # change to your twitter username + +social: + # Change to your full name. + # It will be displayed as the default author of the posts and the copyright owner in the Footer + name: Shameek Agarwal + email: shameek.agarwal@gmail.com # change to your email address + links: + - 'https://www.linkedin.com/in/shameek-agarwal' # Fill with your Linkedin homepage + # The first element serves as the copyright owner's link + # - https://twitter.com/username # change to your twitter homepage + # - https://github.com/username # change to your github homepage + # Uncomment below to add more social links + # - https://www.facebook.com/username + # - https://www.linkedin.com/in/username + +google_site_verification: # fill in to your verification string + +# ↑ -------------------------- +# The end of `jekyll-seo-tag` settings + +google_analytics: + id: # fill in your Google Analytics ID + +# Prefer color scheme setting. +# +# Note: Keep empty will follow the system prefer color by default, +# and there will be a toggle to switch the theme between dark and light +# on the bottom left of the sidebar. +# +# Available options: +# +# light - Use the light color scheme +# dark - Use the dark color scheme +# +theme_mode: dark + +# The CDN endpoint for images. +# Notice that once it is assigned, the CDN url +# will be added to all image (site avatar & posts' images) paths starting with '/' +# +# e.g. 'https://cdn.com' +img_cdn: "" + +# the avatar on sidebar, support local or CORS resources +# avatar: "/assets/img/profile.jpg" + +# boolean type, the global switch for TOC in posts. +toc: true + +comments: + active: giscus # The global switch for posts comments, e.g., 'disqus'. Keep it empty means disable + # The active options are as follows: + disqus: + shortname: # fill with the Disqus shortname. › https://help.disqus.com/en/articles/1717111-what-s-a-shortname + # utterances settings › https://utteranc.es/ + utterances: + repo: shameekagarwal/shameekagarwal.github.io + issue_term: title + # Giscus options › https://giscus.app + giscus: + repo: shameekagarwal/shameekagarwal.github.io # / + repo_id: MDEwOlJlcG9zaXRvcnkzOTA3NzM5NzE= + category: Comments + category_id: DIC_kwDOF0q8084CX8ku + mapping: url # optional, default to 'pathname' + input_position: # optional, default to 'bottom' + lang: # optional, default to the value of `site.lang` + reactions_enabled: # optional, default to the value of `1` + +# + +# Self-hosted static assets, optional › https://github.com/cotes2020/chirpy-static-assets +assets: + self_host: + enabled: # boolean, keep empty means false + # specify the Jekyll environment, empty means both + # only works if `assets.self_host.enabled` is 'true' + env: # [development|production] + +pwa: + enabled: true # the option for PWA feature + +paginate: 10 + +# ------------ The following options are not recommended to be modified ------------------ + +kramdown: + syntax_highlighter: rouge + syntax_highlighter_opts: # Rouge Options › https://github.com/jneen/rouge#full-options + css_class: highlight + # default_lang: console + span: + line_numbers: false + block: + line_numbers: true + start_line: 1 + +collections: + tabs: + output: true + sort_by: order + +defaults: + - scope: + path: "_posts" # An empty string here means all files in the project + type: posts + values: + layout: post + comments: true # Enable comments in posts. + toc: true # Display TOC column in posts. + # DO NOT modify the following parameter unless you are confident enough + # to update the code of all other post links in this project. + permalink: /posts/:title/ + - scope: + path: _drafts + values: + comments: false + - scope: + path: "" + type: tabs # see `site.collections` + values: + layout: page + permalink: /:title/ + - scope: + path: assets/img/favicons + values: + swcache: true + - scope: + path: assets/js/dist + values: + swcache: true + +sass: + style: compressed + +compress_html: + clippings: all + comments: all + endings: all + profile: false + blanklines: false + ignore: + envs: [development] + +exclude: + - "*.gem" + - "*.gemspec" + - tools + - README.md + - CHANGELOG.md + - LICENSE + - rollup.config.js + - node_modules + - package*.json + +jekyll-archives: + enabled: [categories, tags] + layouts: + category: category + tag: tag + permalinks: + tag: /tags/:name/ + category: /categories/:name/ diff --git a/_data/authors.yml b/_data/authors.yml new file mode 100644 index 0000000..f012012 --- /dev/null +++ b/_data/authors.yml @@ -0,0 +1,17 @@ +## Template › https://github.com/jekyll/jekyll-seo-tag/blob/master/docs/advanced-usage.md#setting-author-url +# ------------------------------------- +# {author_id}: +# name: {full name} +# twitter: {twitter_of_author} +# url: {homepage_of_author} +# ------------------------------------- + +cotes: + name: Cotes Chung + twitter: cotes2020 + url: https://github.com/cotes2020/ + +sille_bille: + name: Dinesh Prasanth Moluguwan Krishnamoorthy + twitter: dinesh_MKD + url: https://github.com/SilleBille/ diff --git a/_data/contact.yml b/_data/contact.yml new file mode 100644 index 0000000..fbb9f13 --- /dev/null +++ b/_data/contact.yml @@ -0,0 +1,33 @@ +# The contact options. + +- type: github + icon: "fab fa-github" + +- type: gitlab + icon: "fab fa-gitlab" + url: 'https://gitlab.com/shameekagarwal' # Fill with your Linkedin homepage + +- type: email + icon: "fas fa-envelope" + noblank: true # open link in current tab + +#- type: rss +# icon: "fas fa-rss" +# noblank: true +# Uncomment and complete the url below to enable more contact options +# +# - type: mastodon +# icon: 'fab fa-mastodon' # icons powered by +# url: '' # Fill with your Mastodon account page, rel="me" will be applied for verification +# +- type: linkedin + icon: 'fab fa-linkedin' # icons powered by + url: 'https://www.linkedin.com/in/shameek-agarwal' # Fill with your Linkedin homepage +# +# - type: stack-overflow +# icon: 'fab fa-stack-overflow' +# url: '' # Fill with your stackoverflow homepage + +- type: phone + icon: 'fas fa-phone' + url: 'tel:+916290885679' diff --git a/_data/locales/ar.yml b/_data/locales/ar.yml new file mode 100644 index 0000000..c608298 --- /dev/null +++ b/_data/locales/ar.yml @@ -0,0 +1,91 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: منشور + category: فئة + tag: وسم + +# The tabs of sidebar +tabs: + # format: : + home: الرئيسية + categories: الفئات + tags: الوسوم + archives: الأرشيف + about: حول + +# the text displayed in the search bar & search results +search: + hint: بحث + cancel: إلغاء + no_results: نأسف! لا يوجد نتائج. + +panel: + lastmod: المحدثة مؤخرا + trending_tags: الوسوم الشائعة + toc: محتويات + +copyright: + # Shown at the bottom of the post + license: + template: هذا المنشور تحت ترخيص :LICENSE_NAME بواسطة المؤلف. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: بعض الحقوق محفوظة. + verbose: >- + ما لم يذكر خلاف ذلك ، يتم ترخيص منشورات المدونة على هذا الموقع + بموجب ترخيص Creative Commons Attribution 4.0 International (CC BY 4.0) من قبل المؤلف. + +meta: باستخدام :PLATFORM السمة :THEME + +not_found: + statment: عذرا, الرابط التالي غير صالح أو انه يشير إلى صفحة غير موجودة. + +notification: + update_found: يتوفر اصدار جديد للمحتوى. + update: تحديث + +# ----- Posts related labels ----- + +post: + written_by: بواسطة + posted: نشّر + updated: حدّث + words: كلمات + pageview_measure: مشاهدات + read_time: + unit: دقيقة + prompt: قراءة + relate_posts: إقرأ المزيد + share: شارك + button: + next: الأجدد + previous: الأقدم + copy_code: + succeed: تم النسخ! + share_link: + title: أنسخ الرابط + succeed: تم نسخ الرابط بنجاح! + +# Date time format. +# See: , +df: + post: + strftime: "%b %e, %Y" + dayjs: "ll" + archives: + strftime: "%b" + dayjs: "MMM" + +# categories page +categories: + category_measure: + singular: فئة + plural: فئات + post_measure: + singular: منشور + plural: منشورات diff --git a/_data/locales/bg-BG.yml b/_data/locales/bg-BG.yml new file mode 100644 index 0000000..3e04993 --- /dev/null +++ b/_data/locales/bg-BG.yml @@ -0,0 +1,81 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Публикация + category: Категория + tag: Таг + +# The tabs of sidebar +tabs: + # format: : + home: Начало + categories: Категории + tags: Тагове + archives: Архив + about: За мен + +# the text displayed in the search bar & search results +search: + hint: търси + cancel: Отмени + no_results: Упс! Не са намерени резултати. + +panel: + lastmod: Наскоро обновени + trending_tags: Популярни тагове + toc: Съдържание + +copyright: + # Shown at the bottom of the post + license: + template: Тази публикация е лицензирана под :LICENSE_NAME от автора. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Някои права запазени. + verbose: >- + Освен ако не е посочено друго, публикациите в блога на този сайт са лицензирани + под лиценза Creative Commons Attribution 4.0 (CC BY 4.0) от автора. + +meta: Създадено чрез :PLATFORM и :THEME тема + +not_found: + statment: Съжалявам, но на този URL адрес няма налично съдържание. + +notification: + update_found: Налична е нова версия на съдържанието. + update: Обнови + +# ----- Posts related labels ----- + +post: + written_by: Автор + posted: Публикувана + updated: Обновена + words: думи + pageview_measure: преглеждания + read_time: + unit: мин + prompt: четиво + relate_posts: Още за четене + share: Споделете + button: + next: По-нови + previous: По-стари + copy_code: + succeed: Копирано! + share_link: + title: Копирай линк + succeed: Линкът е копиран успешно! + +# categories page +categories: + category_measure: + singular: категория + plural: категории + post_measure: + singular: публикация + plural: публикации diff --git a/_data/locales/cs-CZ.yml b/_data/locales/cs-CZ.yml new file mode 100644 index 0000000..e515c08 --- /dev/null +++ b/_data/locales/cs-CZ.yml @@ -0,0 +1,89 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Příspěvek + category: Kategorie + tag: Štítek + +# The tabs of sidebar +tabs: + # format: : + home: Domů + categories: Kategorie + tags: Štítky + archives: Archivy + about: O mně + +# the text displayed in the search bar & search results +search: + hint: hledat + cancel: Zrušit + no_results: Ups! Žádný výsledek nenalezen. + +panel: + lastmod: Nedávno aktualizováno + trending_tags: Trendy štítky + toc: Obsah + +copyright: + # Shown at the bottom of the post + license: + template: Tento příspěvek je licencován pod :LICENSE_NAME autorem. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Některá práva vyhrazena. + verbose: >- + Pokud není uvedeno jinak, jsou příspěvky na tomto webu licencovány + pod licencí Creative Commons Attribution 4.0 International (CC BY 4.0) Licence autora. + +meta: Použití :PLATFORM s motivem :THEME + +not_found: + statment: Omlouváme se, adresu URL jsme špatně umístili nebo odkazuje na něco, co neexistuje. + +notification: + update_found: Je k dispozici nová verze obsahu. + update: Aktualizace + +# ----- Posts related labels ----- + +post: + written_by: Od + posted: Zveřejněno + updated: Aktualizováno + words: slova + pageview_measure: zhlednutí + read_time: + unit: minut + prompt: čtení + relate_posts: Další čtení + share: Sdílet + button: + next: Novější + previous: Starší + copy_code: + succeed: Zkopírováno! + share_link: + title: Kopírovat odkaz + succeed: Zkopírováno! + +# Date time format. +# See: , +df: + post: + strftime: "%b %e, %Y" + dayjs: "ll" + archives: + strftime: "%b" + dayjs: "MMM" + +# categories page +categories: + category_measure: kategorie + post_measure: + singular: příspěvěk + plural: příspěvky diff --git a/_data/locales/de-DE.yml b/_data/locales/de-DE.yml new file mode 100644 index 0000000..7ea3956 --- /dev/null +++ b/_data/locales/de-DE.yml @@ -0,0 +1,80 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Eintrag + category: Kategorie + tag: Tag + +# The tabs of sidebar +tabs: + # format: : + home: Startseite + categories: Kategorien + tags: Tags + archives: Archiv + about: Über + +# the text displayed in the search bar & search results +search: + hint: Suche + cancel: Abbrechen + no_results: Ups! Keine Einträge gefunden. + +panel: + lastmod: Kürzlich aktualisiert + trending_tags: Beliebte Tags + toc: Inhalt + +copyright: + # Shown at the bottom of the post + license: + template: Dieser Eintrag ist vom Autor unter :LICENSE_NAME lizensiert. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Einige Rechte vorbehalten. + verbose: >- + Alle Einträge auf dieser Seite stehen, soweit nicht anders angegeben, unter der Lizenz Creative Commons Attribution 4.0 (CC BY 4.0). + +meta: Powered by :PLATFORM with :THEME theme + +not_found: + statment: Entschuldigung, dieser Link verweist auf keine vorhandene Ressource. + +notification: + update_found: Eine neue Version ist verfügbar. + update: Neue Version + +# ----- Posts related labels ----- + +post: + written_by: Von + posted: Veröffentlicht + updated: Aktualisiert + words: Wörter + pageview_measure: Aufrufe + read_time: + unit: Minuten + prompt: lesen + relate_posts: Weiterlesen + share: Teilen + button: + next: Nächster Eintrag + previous: Eintrag vorher + copy_code: + succeed: Kopiert! + share_link: + title: Link kopieren + succeed: Link erfolgreich kopiert! + +# categories page +categories: + category_measure: + singular: Kategorie + plural: Kategorien + post_measure: + singular: Eintrag + plural: Einträge diff --git a/_data/locales/el-GR.yml b/_data/locales/el-GR.yml new file mode 100644 index 0000000..ab5fb0e --- /dev/null +++ b/_data/locales/el-GR.yml @@ -0,0 +1,91 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Δημοσίευση + category: Κατηγορία + tag: Ετικέτα + +# The tabs of sidebar +tabs: + # format: : + home: Home + categories: Κατηγορίες + tags: Ετικέτες + archives: Αρχεία + about: Σχετικά + +# the text displayed in the search bar & search results +search: + hint: αναζήτηση + cancel: Ακύρωση + no_results: Oops! Κανένα αποτέλεσμα δεν βρέθηκε. + +panel: + lastmod: Σχετικά ενημερωμένα + trending_tags: Ετικέτες τάσης + toc: Περιεχόμενα + +copyright: + # Shown at the bottom of the post + license: + template: Η δημοσίευση αυτή βρίσκεται υπο την άδεια :LICENSE_NAME Greekforce1821. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Ορισμένα δικαιώματα reserved. + verbose: >- + Εκτός αλλού ή οπουδήποτε αλλού, τα blog posts σε αυτήν την σελίδα βρίσκονται υπο την άδεια + Creative Commons Attribution 4.0 International (CC BY 4.0) του δημιουργού. + +meta: Αξιοποιώντας την :PLATFORM theme :THEME + +not_found: + statment: Συγνώμη, έχουμε τοποθετήσει λάθος αυτήν την διεύθυνση URL ή υποδεικνύει κάτι που δεν υπάρχει. + +notification: + update_found: Υπάρχει διαθέσιμη μια νέα έκδοση του περιεχομένου. + update: Ενημέρωση + +# ----- Posts related labels ----- + +post: + written_by: Από + posted: Δημοσιεύθηκε + updated: Ενημερώθηκε + words: λέξεις + pageview_measure: προβολές + read_time: + unit: Λεπτά + prompt: διαβάσματος + relate_posts: Περισσότερα + share: Κοινοποιήστε + button: + next: Νεότερα + previous: Παλαιότερα + copy_code: + succeed: Αντιγράφθηκε! + share_link: + title: Αντιγραφή συνδέσμου + succeed: Η διεύθυνση αντιγράφθηκε με επιτυχία! + +# Date time format. +# See: , +df: + post: + strftime: "%b %e, %Y" + dayjs: "ll" + archives: + strftime: "%b" + dayjs: "MMM" + +# categories page +categories: + category_measure: + singular: Κατηγορία + plural: Κατηγορίες + post_measure: + singular: Δημοσίευση + plural: Δημοσιεύσεις diff --git a/_data/locales/en.yml b/_data/locales/en.yml new file mode 100644 index 0000000..2f3f339 --- /dev/null +++ b/_data/locales/en.yml @@ -0,0 +1,91 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Post + category: Category + tag: Tag + +# The tabs of sidebar +tabs: + # format: : + home: Home + categories: Categories + tags: Tags + archives: Archives + about: About + +# the text displayed in the search bar & search results +search: + hint: search + cancel: Cancel + no_results: Oops! No results found. + +panel: + lastmod: Recently Updated + trending_tags: Trending Tags + toc: Contents + +copyright: + # Shown at the bottom of the post + license: + template: This post is licensed under :LICENSE_NAME by the author. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Some rights reserved. + verbose: >- + Except where otherwise noted, the blog posts on this site are licensed + under the Creative Commons Attribution 4.0 International (CC BY 4.0) License by the author. + +meta: Using the :PLATFORM theme :THEME + +not_found: + statment: Sorry, we've misplaced that URL or it's pointing to something that doesn't exist. + +notification: + update_found: A new version of content is available. + update: Update + +# ----- Posts related labels ----- + +post: + written_by: By + posted: Posted + updated: Updated + words: words + pageview_measure: views + read_time: + unit: min + prompt: read + relate_posts: Further Reading + share: Share + button: + next: Newer + previous: Older + copy_code: + succeed: Copied! + share_link: + title: Copy link + succeed: Link copied successfully! + +# Date time format. +# See: , +df: + post: + strftime: "%b %e, %Y" + dayjs: "ll" + archives: + strftime: "%b" + dayjs: "MMM" + +# categories page +categories: + category_measure: + singular: category + plural: categories + post_measure: + singular: post + plural: posts diff --git a/_data/locales/es-ES.yml b/_data/locales/es-ES.yml new file mode 100644 index 0000000..5529230 --- /dev/null +++ b/_data/locales/es-ES.yml @@ -0,0 +1,77 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Entrada + category: Categoría + tag: Etiqueta + +# The tabs of sidebar +tabs: + # format: : + home: Inicio + categories: Categorías + tags: Etiquetas + archives: Archivo + about: Acerca de + +# the text displayed in the search bar & search results +search: + hint: Buscar + cancel: Cancelar + no_results: ¡Oops! No se encuentran resultados. + +panel: + lastmod: Actualizado recientemente + trending_tags: Etiquetas populares + toc: Contenido + +copyright: + # Shown at the bottom of the post + license: + template: Esta entrada está licenciada bajo :LICENSE_NAME por el autor. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Algunos derechos reservados. + verbose: >- + Salvo que se indique explícitamente, las entradas de este blog están licenciadas + bajo la Creative Commons Attribution 4.0 International (CC BY 4.0) License por el autor. + +meta: Hecho con :PLATFORM usando el tema :THEME + +not_found: + statment: Lo sentimos, hemos perdido esa URL o apunta a algo que no existe. + +notification: + update_found: Hay una nueva versión de contenido disponible. + update: Actualizar + +# ----- Posts related labels ----- + +post: + written_by: Por + posted: Publicado + updated: Actualizado + words: palabras + pageview_measure: visitas + read_time: + unit: min + prompt: " de lectura" + relate_posts: Lecturas adicionales + share: Compartir + button: + next: Nuevo + previous: Anterior + copy_code: + succeed: ¡Copiado! + share_link: + title: Copiar enlace + succeed: ¡Enlace copiado! + +# categories page +categories: + category_measure: categorias + post_measure: entradas diff --git a/_data/locales/fi-FI.yml b/_data/locales/fi-FI.yml new file mode 100644 index 0000000..c817d2b --- /dev/null +++ b/_data/locales/fi-FI.yml @@ -0,0 +1,90 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Julkaisu + category: Kateogoria + tag: Tagi + +# The tabs of sidebar +tabs: + # format: : + home: Koti + categories: Kateogoriat + tags: Tagit + archives: Arkistot + about: Minusta + +# the text displayed in the search bar & search results +search: + hint: etsi + cancel: Peruuta + no_results: Hups! Ei tuloksia. + +panel: + lastmod: Viimeksi päivitetty + trending_tags: Trendaavat tagit + toc: Sisältö + +copyright: + # Shown at the bottom of the post + license: + template: Tämä julkaisu on lisenssoitu :LICENSE_NAME julkaisijan toimesta. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Jotkut oikeudet pidätetään. + verbose: >- + Paitsi jos erikseen mainitaan on kaikki sisältö Creative Commons Attribution 4.0 International (CC BY 4.0) Lisensoitu kirjoittajan toimesta. + +meta: Käytetään :PLATFORM iä Teema :THEME + +not_found: + statment: Valitettavasti tällä URL-osoitteella ei ole saatavilla sisältöä. + +notification: + update_found: Uusi versio sisällöstä on saatavilla. + update: Päivitä + +# ----- Posts related labels ----- + +post: + written_by: Kirjoittaja + posted: Julkaistu + updated: Päivitetty + words: sanaa + pageview_measure: katselukertoja + read_time: + unit: minuuttia + prompt: lukea + relate_posts: Jatka lukemista + share: Jaa + button: + next: Uudempi + previous: Vanhempi + copy_code: + succeed: Kopiotu! + share_link: + title: Kopioi linkki + succeed: Linkki kopioitu onnistuneesti! + +# Date time format. +# See: , +df: + post: + strftime: "%b %e, %Y" + dayjs: "ll" + archives: + strftime: "%b" + dayjs: "MMM" + +# categories page +categories: + category_measure: + singular: kategoria + plural: kategoriat + post_measure: + singular: julkaisu + plural: julkaisut diff --git a/_data/locales/fr-FR.yml b/_data/locales/fr-FR.yml new file mode 100644 index 0000000..72b034d --- /dev/null +++ b/_data/locales/fr-FR.yml @@ -0,0 +1,77 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Post + category: Catégorie + tag: Tag + +# The tabs of sidebar +tabs: + # format: : + home: Accueil + categories: Catégories + tags: Tags + archives: Archives + about: A propos de + +# the text displayed in the search bar & search results +search: + hint: recherche + cancel: Annuler + no_results: Oups ! Aucun résultat trouvé. + +panel: + lastmod: Récemment mis à jour + trending_tags: Tags tendance + toc: Contenu + +copyright: + # Shown at the bottom of the post + license: + template: Cet article est sous licence :LICENSE_NAME par l'auteur. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Certains droits réservés. + verbose: >- + Sauf mention contraire, les articles de ce site sont publiés sous licence + sous la licence Creative Commons Attribution 4.0 International (CC BY 4.0) par l'auteur. + +meta: Propulsé par :PLATFORM avec le thème :THEME + +not_found: + statment: Désolé, nous avons égaré cette URL ou elle pointe vers quelque chose qui n'existe pas. + +notification: + update_found: Une nouvelle version du contenu est disponible. + update: Mise à jour + +# ----- Posts related labels ----- + +post: + written_by: Par + posted: Posté + updated: Mis à jour + words: mots + pageview_measure: vues + read_time: + unit: min + prompt: lire + relate_posts: Autres lectures + share: Partager + button: + next: Plus récent + previous: Plus ancien + copy_code: + succeed: Copié ! + share_link: + title: Copier le lien + succeed: Lien copié avec succès ! + +# categories page +categories: + category_measure: catégories + post_measure: posts diff --git a/_data/locales/hu-HU.yml b/_data/locales/hu-HU.yml new file mode 100644 index 0000000..b09f2cd --- /dev/null +++ b/_data/locales/hu-HU.yml @@ -0,0 +1,79 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Bejegyzés + category: Kategória + tag: Címke + +# The tabs of sidebar +tabs: + # format: : + home: Kezdőlap + categories: Kategóriák + tags: Címkék + archives: Archívum + about: Rólam + +# the text displayed in the search bar & search results +search: + hint: keresés + cancel: Mégse + no_results: Oops! Nincs találat a keresésre. + +panel: + lastmod: Legutóbb frissítve + trending_tags: Népszerű Címkék + toc: Tartalom + links: Blog linkek + +copyright: + # Shown at the bottom of the post + license: + template: A bejegyzés :LICENSE_NAME licenccel rendelkezik. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Néhány jog fenntartva. + verbose: >- + Az oldalon található tartalmak + Creative Commons Attribution 4.0 International (CC BY 4.0) licenccel rendelkeznek, + hacsak másképp nincs jelezve. + +meta: Készítve :PLATFORM motorral :THEME témával + +not_found: + statment: Sajnáljuk, az URL-t rosszul helyeztük el, vagy valami nem létezőre mutat. + +notification: + update_found: Elérhető a tartalom új verziója. + update: Frissítés + +# ----- Posts related labels ----- + +post: + written_by: Szerző + posted: Létrehozva + updated: Frissítve + words: szó + pageview_measure: látogató + read_time: + unit: perc + prompt: elolvasni + relate_posts: További olvasnivaló + share: Megosztás + button: + next: Újabb + previous: Régebbi + copy_code: + succeed: Másolva! + share_link: + title: Link másolása + succeed: Link sikeresen másolva! + +# categories page +categories: + category_measure: kategória + post_measure: bejegyzés diff --git a/_data/locales/id-ID.yml b/_data/locales/id-ID.yml new file mode 100644 index 0000000..29ad156 --- /dev/null +++ b/_data/locales/id-ID.yml @@ -0,0 +1,77 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Postingan + category: Kategori + tag: Tagar + +# The tabs of sidebar +tabs: + # format: : + home: Beranda + categories: Kategori + tags: Tagar + archives: Arsip + about: Tentang + +# the text displayed in the search bar & search results +search: + hint: Cari + cancel: Batal + no_results: Ups! Tidak ada hasil yang ditemukan. + +panel: + lastmod: Postingan Terbaru + trending_tags: Tagar Terpopuler + toc: Konten + +copyright: + # Shown at the bottom of the post + license: + template: Postingan ini dilisensikan di bawah :LICENSE_NAME oleh penulis. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Sebagian konten dilindungi. + verbose: >- + Kecuali jika dinyatakan, Postingan blog di situs ini dilisensikan + di bawah Lisensi Creative Commons Attribution 4.0 International (CC BY 4.0) oleh penulis. + +meta: Didukung oleh :PLATFORM dengan tema :THEME + +not_found: + statment: Maaf, kami gagal menemukan URL itu atau memang mengarah ke sesuatu yang tidak ada. + +notification: + update_found: Versi konten baru tersedia. + update: Perbarui + +# ----- Posts related labels ----- + +post: + written_by: Oleh + posted: Diterbitkan + updated: Diperbarui + words: kata + pageview_measure: dilihat + read_time: + unit: menit + prompt: baca + relate_posts: Postingan Lainya + share: Bagikan + button: + next: Terbaru + previous: Terlama + copy_code: + succeed: Disalin! + share_link: + title: Salin tautan + succeed: Tautan berhasil disalin! + +# categories page +categories: + category_measure: kategori + post_measure: Postingan diff --git a/_data/locales/it-IT.yml b/_data/locales/it-IT.yml new file mode 100644 index 0000000..cf7b691 --- /dev/null +++ b/_data/locales/it-IT.yml @@ -0,0 +1,90 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Post + category: Categoria + tag: Tag + +# The tabs of sidebar +tabs: + # format: : + home: Pagina principale + categories: Categorie + tags: Tags + archives: Archivio + about: Informazioni + +# the text displayed in the search bar & search results +search: + hint: ricerca + cancel: Cancella + no_results: Oops! La ricerca non ha fornito risultati. + +panel: + lastmod: Aggiornati recentemente + trending_tags: Tags più cliccati + toc: Contenuti + +copyright: + # Shown at the bottom of the post + license: + template: Questo post è sotto licenza :LICENSE_NAME a nome dell'autore. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Alcuni diritti riservati. + verbose: >- + Eccetto quando esplicitamente menzionato, i post di questo blog sono da ritenersi sotto + i termini di licenza Creative Commons Attribution 4.0 International (CC BY 4.0). + +meta: Servizio offerto da :PLATFORM con tema :THEME +not_found: + statment: Ci scusiamo, non è stato possibile trovare l'URL in questione. Potrebbe puntare ad una pagina non esistente. + +notification: + update_found: Nuova versione del contenuto disponibile. + update: Aggiornamento + +# ----- Posts related labels ----- + +post: + written_by: Da + posted: Postato + updated: Aggiornato + words: parole + pageview_measure: visioni + read_time: + unit: min + prompt: lettura + relate_posts: Continua a leggere + share: Condividi + button: + next: Più recenti + previous: Meno recenti + copy_code: + succeed: Copiato! + share_link: + title: Copia link + succeed: Link copiato con successo! + +# Date time format. +# See: , +df: + post: + strftime: "%b %e, %Y" + dayjs: "ll" + archives: + strftime: "%b" + dayjs: "MMM" + +# categories page +categories: + category_measure: + singular: categoria + plural: categorie + post_measure: + singular: post + plural: posts diff --git a/_data/locales/ko-KR.yml b/_data/locales/ko-KR.yml new file mode 100644 index 0000000..4dd221b --- /dev/null +++ b/_data/locales/ko-KR.yml @@ -0,0 +1,84 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: 포스트 + category: 카테고리 + tag: 태그 + +# The tabs of sidebar +tabs: + # format: : + home: 홈 + categories: 카테고리 + tags: 태그 + archives: 아카이브 + about: 정보 + +# the text displayed in the search bar & search results +search: + hint: 검색 + cancel: 취소 + no_results: 검색 결과가 없습니다. + +panel: + lastmod: 최근 업데이트 + trending_tags: 인기 태그 + toc: 바로가기 + +copyright: + # Shown at the bottom of the post + license: + template: 이 기사는 저작권자의 :LICENSE_NAME 라이센스를 따릅니다. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: 일부 권리 보유 + verbose: >- + 명시되지 않는 한 이 사이트의 블로그 게시물은 작성자의 + Creative Commons Attribution 4.0 International(CC BY 4.0) 라이선스에 따라 사용이 허가되었습니다. + +meta: Powered by :PLATFORM with :THEME theme + +not_found: + statment: 해당 URL은 존재하지 않습니다. + +notification: + update_found: 새 버전의 콘텐츠를 사용할 수 있습니다. + update: 업데이트 + +# ----- Posts related labels ----- + +post: + written_by: By + posted: 게시 + updated: 업데이트 + words: 단어 + pageview_measure: 조회 + read_time: + unit: 분 + prompt: 읽는 시간 + relate_posts: 관련된 글 + share: 공유하기 + button: + next: 다음 글 + previous: 이전 글 + copy_code: + succeed: 복사되었습니다! + share_link: + title: 링크 복사하기 + succeed: 링크가 복사되었습니다! + +# Date time format. +# See: , +df: + post: + strftime: "%Y/%m/%d" + dayjs: "YYYY/MM/DD" + +# categories page +categories: + category_measure: 카테고리 + post_measure: 포스트 diff --git a/_data/locales/my-MM.yml b/_data/locales/my-MM.yml new file mode 100644 index 0000000..98848d5 --- /dev/null +++ b/_data/locales/my-MM.yml @@ -0,0 +1,77 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: ပို့စ် + category: ကဏ္ဍ + tag: နာမ(တက်ဂ်) + +# The tabs of sidebar +tabs: + # format: : + home: အဓိကစာမျက်နှာ + categories: ကဏ္ဍများ + tags: နာမ(တက်ဂ်)များ + archives: မှတ်တမ်း​တိုက် + about: အကြောင်းအရာ + +# the text displayed in the search bar & search results +search: + hint: ရှာဖွေမည် + cancel: ဖျက်သိမ်းမည် + no_results: အိုး! ဘာမှမရှိပါ + +panel: + lastmod: မကြာသေးမီကမွမ်းမံထားသည် + trending_tags: ခေတ်စားနေသည့်တက်ဂ်များ + toc: အကြောင်းအရာများ + +copyright: + # Shown at the bottom of the post + license: + template: ဤပို့စ်သည်စာရေးသူ၏ :LICENSE_NAME လိုင်စင်ရထားသည်။ + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: မူပိုင်ခွင့်အချို့ကို လက်ဝယ်ထားသည်။ + verbose: >- + အခြားမှတ်သားထားချက်များမှလွဲ၍ ဤဆိုက်ရှိ ဘလော့ဂ်ပို့စ်များသည် စာရေးသူ၏ + Creative Commons Attribution 4.0 International (CC BY 4.0) အောက်တွင် လိုင်စင်ရထားပါသည်။ + +meta: Powered by :PLATFORM with :THEME theme + +not_found: + statment: ဝမ်းနည်းပါသည်၊ ကျွန်ုပ်တို့သည် အဆိုပါ URL ကို မှားယွင်းစွာ နေရာချထားခြင်း သို့မဟုတ် ၎င်းသည် မရှိသောအရာကို ညွှန်ပြနေပါသည်။ + +notification: + update_found: အကြောင်းအရာဗားရှင်းအသစ်ကို ရနိုင်ပါပြီ။ + update: အပ်ဒိတ် + +# ----- Posts related labels ----- + +post: + written_by: ကရေးသားခဲ့သည်။ + posted: တင်ထားခဲ့သည်။ + updated: မွမ်းမံထားခဲ့သည်။ + words: စကားလုံးများ + pageview_measure: အမြင်များ + read_time: + unit: မိနစ် + prompt: ဖတ်ပါမည် + relate_posts: နောက်ထပ်ဖတ်ရန် + share: မျှဝေရန် + button: + next: အသစ်များ + previous: အဟောင်းများ + copy_code: + succeed: ကူးယူလိုက်ပြီ။ + share_link: + title: လင့်ခ်ကို ကူးယူရန် + succeed: လင့်ခ်ကို ကူးယူလိုက်ပြီ။ + +# categories page +categories: + category_measure: ကဏ္ဍများ + post_measure: ပို့စ်များ diff --git a/_data/locales/pt-BR.yml b/_data/locales/pt-BR.yml new file mode 100644 index 0000000..4cef833 --- /dev/null +++ b/_data/locales/pt-BR.yml @@ -0,0 +1,77 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Post + category: Categoria + tag: Tag + +# The tabs of sidebar +tabs: + # format: : + home: Home + categories: Categorias + tags: Tags + archives: Arquivos + about: Sobre + +# the text displayed in the search bar & search results +search: + hint: Buscar + cancel: Cancelar + no_results: Oops! Nenhum resultado encontrado. + +panel: + lastmod: Atualizados recentemente + trending_tags: Trending Tags + toc: Conteúdo + +copyright: + # Shown at the bottom of the post + license: + template: Esta postagem está licenciada sob :LICENSE_NAME pelo autor. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Alguns direitos reservados. + verbose: >- + Exceto onde indicado de outra forma, as postagens do blog neste site são licenciadas sob a + Creative Commons Attribution 4.0 International (CC BY 4.0) License pelo autor. + +meta: Feito com :PLATFORM usando o tema :THEME + +not_found: + statment: Desculpe, a página não foi encontrada. + +notification: + update_found: Uma nova versão do conteúdo está disponível. + update: atualização + +# ----- Posts related labels ----- + +post: + written_by: Por + posted: Postado em + updated: Atualizado + words: palavras + pageview_measure: visualizações + read_time: + unit: min + prompt: " de leitura" + relate_posts: Leia também + share: Compartilhar + button: + next: Próximo + previous: Anterior + copy_code: + succeed: Copiado! + share_link: + title: Copie o link + succeed: Link copiado com sucesso! + +# categories page +categories: + category_measure: categorias + post_measure: posts diff --git a/_data/locales/ru-RU.yml b/_data/locales/ru-RU.yml new file mode 100644 index 0000000..4377300 --- /dev/null +++ b/_data/locales/ru-RU.yml @@ -0,0 +1,77 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Публикация + category: Категория + tag: Тег + +# The tabs of sidebar +tabs: + # format: : + home: Домашняя страница + categories: Категории + tags: Теги + archives: Архив + about: О сайте + +# the text displayed in the search bar & search results +search: + hint: поиск + cancel: Отменить + no_results: Ох! Ничего не найдено. + +panel: + lastmod: Недавно обновлено + trending_tags: Популярные теги + toc: Содержание + +copyright: + # Shown at the bottom of the post + license: + template: Публикация защищена лицензией :LICENSE_NAME. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Некоторые права защищены. + verbose: >- + Публикации на сайте защищены лицензией Creative Commons Attribution 4.0 International (CC BY 4.0), + если в тексте публикации не указано иное. + +meta: Powered by :PLATFORM with :THEME theme + +not_found: + statment: Извините, эта ссылка указывает на ресурс который не существует. + +notification: + update_found: Доступна новая версия контента. + update: Обновлять + +# ----- Posts related labels ----- + +post: + written_by: Автор + posted: Время публикации + updated: Обновлено + words: слов + pageview_measure: просмотров + read_time: + unit: минут + prompt: чтения + relate_posts: Вам также может быть интересно + share: Поделиться + button: + next: Предыдущая публикация + previous: Следующая публикация + copy_code: + succeed: Скопировано успешно! + share_link: + title: Скопировать ссылку + succeed: Ссылка успешно скопирована! + +# categories page +categories: + category_measure: категории + post_measure: публикации diff --git a/_data/locales/sl-SI.yml b/_data/locales/sl-SI.yml new file mode 100644 index 0000000..7ab18b1 --- /dev/null +++ b/_data/locales/sl-SI.yml @@ -0,0 +1,91 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Objava #Post + category: Kategorija #Category + tag: Oznaka #Tag + +# The tabs of sidebar +tabs: + # format: : + home: Domov #Home + categories: Kategorije #Categories + tags: Oznake #Tags + archives: Arhiv #Archives + about: O meni #About + +# the text displayed in the search bar & search results +search: + hint: išči #search + cancel: Prekliči #Cancel + no_results: Ups! Vsebina ni bila najdena #Oops! No results found. + +panel: + lastmod: Nedavno Posodobljeno #Recently Updated + trending_tags: Priljubljene Oznake #Trending Tags + toc: Vsebina #Contents + +copyright: + # Shown at the bottom of the post + license: + template: Ta objava je licencirana pod :LICENCE_NAME s strani avtorja. #This post is licensed under :LICENSE_NAME by the author. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Nekatere pravice pridržane. #Some rights reserved. + verbose: >- + Razen kjer navedeno drugače, vse objave spletnega dnevnika so licencirane + pod Creative Commons Attribution 4.0 International (CC BY 4.0) s strani avtorja. + +meta: Uporabljena :PLATFORM tema :THEME #Using the :PLATFORM theme :THEME + +not_found: + statment: Oprostite, hiperpovezava je neustrezna ali vsebina ne obstajata. #Sorry, we've misplaced that URL or it's pointing to something that doesn't exist. + +notification: + update_found: Novejša različica vsebine je na voljo. #A new version of content is available. + update: Posodobi #Update + +# ----- Posts related labels ----- + +post: + written_by: Od #By + posted: Objavljeno #Posted + updated: Posodobljeno #Updated + words: besede #words + pageview_measure: ogledi #views + read_time: + unit: min + prompt: beri #read + relate_posts: Nadaljnje branje #Further Reading + share: Deli #Share + button: + next: Novejše #Newer + previous: Starejše #Older + copy_code: + succeed: Kopirano! #Copied! + share_link: + title: Kopiraj povezavo #Copy link + succeed: Povezava uspešno kopirana! #Link copied successfully! + +# Date time format. +# See: , +df: + post: + strftime: "%e %b, %Y" + dayjs: "ll" + archives: + strftime: "%b" + dayjs: "MMM" + +# categories page +categories: + category_measure: + singular: kategorija #category + plural: kategorije #categories + post_measure: + singular: objava #post + plural: objave #posts diff --git a/_data/locales/sv-SE.yml b/_data/locales/sv-SE.yml new file mode 100644 index 0000000..7ec2ee2 --- /dev/null +++ b/_data/locales/sv-SE.yml @@ -0,0 +1,91 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Inlägg #Post + category: Kategori #Category + tag: Tagga #Tag + +# The tabs of sidebar +tabs: + # format: : + home: Hem #Home + categories: Kategorier #Categories + tags: Taggar #Tags + archives: Arkiv #Archives + about: Om #About + +# the text displayed in the search bar & search results +search: + hint: sök + cancel: Avbryt + no_results: Hoppsan! Hittade inga sökträffar. + +panel: + lastmod: Senast uppdaterad + trending_tags: Trendande taggar + toc: Innehåll + +copyright: + # Shown at the bottom of the post + license: + template: Den här posten är publicerad under licensen :LICENSE_NAME av författaren. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Vissa rättigheter är reserverade. + verbose: >- + Om inte annat anges är blogginläggen på denna webbplats licensierade + under Creative Commons Attribution 4.0 International (CC BY 4.0) av författaren. + +meta: Byggd med :PLATFORM och temat :THEME + +not_found: + statment: Ursäkta, vi har tappat bort den här webbadressen eller så pekar den på något som inte längre finns. + +notification: + update_found: Det finns en ny version av innehållet. + update: Uppdatera sidan + +# ----- Posts related labels ----- + +post: + written_by: Av + posted: Postad + updated: Uppdaterad + words: ord + pageview_measure: visningar + read_time: + unit: min + prompt: läsning + relate_posts: Mer läsning + share: Dela + button: + next: Nyare + previous: Äldre + copy_code: + succeed: Kopierat! + share_link: + title: Kopiera länk + succeed: Länken har kopierats! + +# Date time format. +# See: , +df: + post: + strftime: "%b %e, %Y" + dayjs: "ll" + archives: + strftime: "%b" + dayjs: "MMM" + +# categories page +categories: + category_measure: + singular: kategori + plural: kategorier + post_measure: + singular: inlägg + plural: inlägg diff --git a/_data/locales/th.yml b/_data/locales/th.yml new file mode 100644 index 0000000..22cb00a --- /dev/null +++ b/_data/locales/th.yml @@ -0,0 +1,91 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: โพสต์ + category: หมวดหมู่ + tag: แท็ก + +# The tabs of sidebar +tabs: + # format: : + home: หน้าแรก + categories: หมวดหมู่ + tags: แท็ก + archives: คลังเก็บ + about: เกี่ยวกับ + +# the text displayed in the search bar & search results +search: + hint: ค้นหา + cancel: ยกเลิก + no_results: โอ๊ะ! ไม่พบผลลัพธ์ + +panel: + lastmod: อัปเดตล่าสุด + trending_tags: แท็กยอดนิยม + toc: เนื้อหา + +copyright: + # Shown at the bottom of the post + license: + template: โพสต์นี้อยู่ภายใต้การอนุญาต :LICENSE_NAME โดยผู้เขียน + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: สงวนลิขสิทธิ์เป็นบางส่วน + verbose: >- + เว้นแต่ว่าจะระบุเป็นอย่างอื่น โพสต์บนเว็บไซต์นี้อยู่ภายใต้ + สัญญาอนุญาตครีเอทีฟคอมมอนส์แบบ 4.0 นานาชาติ (CC BY 4.0) โดยผู้เขียน + +meta: กำลังใช้ธีมของ :PLATFORM ชื่อ :THEME + +not_found: + statment: ขออภัย เราวาง URL นั้นไว้ผิดที่ หรือมันชี้ไปยังสิ่งที่ไม่มีอยู่ + +notification: + update_found: มีเวอร์ชันใหม่ของเนื้อหา + update: อัปเดต + +# ----- Posts related labels ----- + +post: + written_by: โดย + posted: โพสต์เมื่อ + updated: อัปเดตเมื่อ + words: คำ + pageview_measure: ครั้ง + read_time: + unit: นาที + prompt: อ่าน + relate_posts: อ่านต่อ + share: แชร์ + button: + next: ใหม่กว่า + previous: เก่ากว่า + copy_code: + succeed: คัดลอกแล้ว! + share_link: + title: คัดลอกลิงก์ + succeed: คัดลอกลิงก์เรียบร้อยแล้ว! + +# Date time format. +# See: , +df: + post: + strftime: "%b %e, %Y" + dayjs: "ll" + archives: + strftime: "%b" + dayjs: "MMM" + +# categories page +categories: + category_measure: + singular: หมวดหมู่ + plural: หมวดหมู่ + post_measure: + singular: โพสต์ + plural: โพสต์ diff --git a/_data/locales/tr-TR.yml b/_data/locales/tr-TR.yml new file mode 100644 index 0000000..851f5fc --- /dev/null +++ b/_data/locales/tr-TR.yml @@ -0,0 +1,77 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Gönderi + category: Kategori + tag: Etiket + +# The tabs of sidebar +tabs: + # format: : + home: Ana Sayfa + categories: Kategoriler + tags: Etiketler + archives: Arşiv + about: Hakkında + +# the text displayed in the search bar & search results +search: + hint: Ara... + cancel: İptal + no_results: Hop! Öyle bir şey bulamadım. + +panel: + lastmod: Yeni Güncellendi + trending_tags: Yükselen Etiketler + toc: İçindekiler + +copyright: + # Shown at the bottom of the post + license: + template: Bu gönderi :LICENSE_NAME lisansı altındadır. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/deed.tr + + # Displayed in the footer + brief: Bazı hakları saklıdır. + verbose: >- + Aksi belirtilmediği sürece, bu sitedeki gönderiler Creative Commons Atıf 4.0 Uluslararası (CC BY 4.0) Lisansı altındadır. + Kısaca sayfa linkini de vererek paylaşabilir veya düzenleyip paylaşabilirsin. + +meta: :PLATFORM ve :THEME teması + +not_found: + statment: Üzgünüz, bu linki yanlış yerleştirdik veya var olmayan bir şeye işaret ediyor. + +notification: + update_found: İçeriğin yeni bir sürümü mevcut. + update: Güncelle + +# ----- Posts related labels ----- + +post: + written_by: Yazan + posted: Gönderilme Tarihi + updated: Güncellenme Tarihi + words: sözcük + pageview_measure: görüntülenme + read_time: + unit: dakikada + prompt: okunabilir + relate_posts: Benzer Gönderiler + share: Paylaş + button: + next: İleri + previous: Geri + copy_code: + succeed: Kopyalandı. + share_link: + title: Linki kopyala + succeed: Link kopyalandı. + +# categories page +categories: + category_measure: kategori + post_measure: gönderi diff --git a/_data/locales/uk-UA.yml b/_data/locales/uk-UA.yml new file mode 100644 index 0000000..b605073 --- /dev/null +++ b/_data/locales/uk-UA.yml @@ -0,0 +1,77 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Публікація + category: Категорія + tag: Тег + +# The tabs of sidebar +tabs: + # format: : + home: Домашня сторінка + categories: Категорії + tags: Теги + archives: Архів + about: Про сайт + +# the text displayed in the search bar & search results +search: + hint: пошук + cancel: Скасувати + no_results: Ох! Нічого не знайдено. + +panel: + lastmod: Нещодавно оновлено + trending_tags: Популярні теги + toc: Зміст + +copyright: + # Shown at the bottom of the post + license: + template: Публікація захищена ліцензією :LICENSE_NAME. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Деякі права захищено. + verbose: >- + Публікації на сайті захищено ліцензією Creative Commons Attribution 4.0 International (CC BY 4.0), + якщо інше не вказано в тексті. + +meta: Powered by :PLATFORM with :THEME theme + +not_found: + statment: Вибачте, це посилання вказує на ресурс, що не існує. + +notification: + update_found: Доступна нова версія вмісту. + update: Оновлення + +# ----- Posts related labels ----- + +post: + written_by: Автор + posted: Час публікації + updated: Оновлено + words: слів + pageview_measure: переглядів + read_time: + unit: хвилин + prompt: читання + relate_posts: Вас також може зацікавити + share: Поділитися + button: + next: Попередня публікація + previous: Наступна публікація + copy_code: + succeed: Успішно скопійовано! + share_link: + title: Скопіювати посилання + succeed: Посилання успішно скопійовано! + +# categories page +categories: + category_measure: категорії + post_measure: публікації diff --git a/_data/locales/vi-VN.yml b/_data/locales/vi-VN.yml new file mode 100644 index 0000000..617431a --- /dev/null +++ b/_data/locales/vi-VN.yml @@ -0,0 +1,76 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: Bài viết + category: Danh mục + tag: Thẻ + +# The tabs of sidebar +tabs: + # format: : + home: Trang chủ + categories: Các danh mục + tags: Các thẻ + archives: Lưu trữ + about: Giới thiệu + +# the text displayed in the search bar & search results +search: + hint: tìm kiếm + cancel: Hủy + no_results: Không có kết quả tìm kiếm. + +panel: + lastmod: Mới cập nhật + trending_tags: Các thẻ thịnh hành + toc: Mục lục + +copyright: + # Shown at the bottom of the post + license: + template: Bài viết này được cấp phép bởi tác giả theo giấy phép :LICENSE_NAME. + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: Một số quyền được bảo lưu. + verbose: >- + Trừ khi có ghi chú khác, các bài viết đăng trên trang này được cấp phép bởi tác giả theo giấy phép Creative Commons Attribution 4.0 International (CC BY 4.0). + +meta: Trang web này được tạo bởi :PLATFORM với chủ đề :THEME + +not_found: + statment: Xin lỗi, chúng tôi đã đặt nhầm URL hoặc đường dẫn trỏ đến một trang nào đó không tồn tại. + +notification: + update_found: Đã có phiên bản mới của nội dung. + update: Cập nhật + +# ----- Posts related labels ----- + +post: + written_by: Viết bởi + posted: Đăng lúc + updated: Cập nhật lúc + words: từ + pageview_measure: lượt xem + read_time: + unit: phút + prompt: đọc + relate_posts: Bài viết liên quan + share: Chia sẻ + button: + next: Mới hơn + previous: Cũ hơn + copy_code: + succeed: Đã sao chép! + share_link: + title: Sao chép đường dẫn + succeed: Đã sao chép đường dẫn thành công! + +# categories page +categories: + category_measure: danh mục + post_measure: bài viết diff --git a/_data/locales/zh-CN.yml b/_data/locales/zh-CN.yml new file mode 100644 index 0000000..f828134 --- /dev/null +++ b/_data/locales/zh-CN.yml @@ -0,0 +1,83 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: 文章 + category: 分类 + tag: 标签 + +# The tabs of sidebar +tabs: + # format: : + home: 首页 + categories: 分类 + tags: 标签 + archives: 归档 + about: 关于 + +# the text displayed in the search bar & search results +search: + hint: 搜索 + cancel: 取消 + no_results: 搜索结果为空 + +panel: + lastmod: 最近更新 + trending_tags: 热门标签 + toc: 文章内容 + +copyright: + # Shown at the bottom of the post + license: + template: 本文由作者按照 :LICENSE_NAME 进行授权 + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: 保留部分权利。 + verbose: >- + 除非另有说明,本网站上的博客文章均由作者按照知识共享署名 4.0 国际 (CC BY 4.0) 许可协议进行授权。 + +meta: 本站采用 :PLATFORM 主题 :THEME + +not_found: + statment: 抱歉,我们放错了该 URL,或者它指向了不存在的内容。 + +notification: + update_found: 发现新版本的内容。 + update: 更新 + +# ----- Posts related labels ----- + +post: + written_by: 作者 + posted: 发表于 + updated: 更新于 + words: 字 + pageview_measure: 次浏览 + read_time: + unit: 分钟 + prompt: 阅读 + relate_posts: 相关文章 + share: 分享 + button: + next: 下一篇 + previous: 上一篇 + copy_code: + succeed: 已复制! + share_link: + title: 分享链接 + succeed: 链接已复制! + +# Date time format. +# See: , +df: + post: + strftime: "%Y/%m/%d" + dayjs: "YYYY/MM/DD" + +# categories page +categories: + category_measure: 个分类 + post_measure: 篇文章 diff --git a/_data/locales/zh-TW.yml b/_data/locales/zh-TW.yml new file mode 100644 index 0000000..911253b --- /dev/null +++ b/_data/locales/zh-TW.yml @@ -0,0 +1,83 @@ +# The layout text of site + +# ----- Commons label ----- + +layout: + post: 文章 + category: 分類 + tag: 標籤 + +# The tabs of sidebar +tabs: + # format: : + home: 首頁 + categories: 分類 + tags: 標籤 + archives: 封存 + about: 關於 + +# the text displayed in the search bar & search results +search: + hint: 搜尋 + cancel: 取消 + no_results: 沒有搜尋結果 + +panel: + lastmod: 最近更新 + trending_tags: 熱門標籤 + toc: 文章摘要 + +copyright: + # Shown at the bottom of the post + license: + template: 本文章以 :LICENSE_NAME 授權 + name: CC BY 4.0 + link: https://creativecommons.org/licenses/by/4.0/ + + # Displayed in the footer + brief: 保留部份權利。 + verbose: >- + 除非另有說明,否則本網誌的文章均由作者按照姓名標示 4.0 國際 (CC BY 4.0) 授權條款進行授權。 + +meta: 本網站使用 :PLATFORM 產生,採用 :THEME 主題 + +not_found: + statment: 抱歉,您可能正在存取一個已被移動的 URL,或者它從未存在。 + +notification: + update_found: 發現新版本更新。 + update: 更新 + +# ----- Posts related labels ----- + +post: + written_by: 作者 + posted: 發布於 + updated: 更新於 + words: 字 + pageview_measure: 次瀏覽 + read_time: + unit: 分鐘 + prompt: 閱讀 + relate_posts: 相關文章 + share: 分享 + button: + next: 下一篇 + previous: 上一篇 + copy_code: + succeed: 已複製! + share_link: + title: 分享連結 + succeed: 已複製連結! + +# Date time format. +# See: , +df: + post: + strftime: "%Y/%m/%d" + dayjs: "YYYY/MM/DD" + +# categories page +categories: + category_measure: 個分類 + post_measure: 篇文章 diff --git a/_data/origin/basic.yml b/_data/origin/basic.yml new file mode 100644 index 0000000..14d865a --- /dev/null +++ b/_data/origin/basic.yml @@ -0,0 +1,48 @@ +# fonts + +webfonts: /assets/lib/fonts/main.css + +# Libraries + +jquery: + js: /assets/lib/jquery/jquery.min.js + +bootstrap: + css: /assets/lib/bootstrap/bootstrap.min.css + js: /assets/lib/bootstrap/bootstrap.bundle.min.js + +toc: + css: /assets/lib/tocbot/tocbot.min.css + js: /assets/lib/tocbot/tocbot.min.js + +fontawesome: + css: /assets/lib/fontawesome-free/css/all.min.css + +search: + js: /assets/lib/simple-jekyll-search/simple-jekyll-search.min.js + +mermaid: + js: /assets/lib/mermaid/mermaid.min.js + +dayjs: + js: + common: /assets/lib/dayjs/dayjs.min.js + locale: /assets/lib/dayjs/locale/en.min.js + relativeTime: /assets/lib/dayjs/plugin/relativeTime.min.js + localizedFormat: /assets/lib/dayjs/plugin/localizedFormat.min.js + +magnific-popup: + css: /assets/lib/magnific-popup/magnific-popup.css + js: /assets/lib/magnific-popup/jquery.magnific-popup.min.js + +lazysizes: + js: /assets/lib/lazysizes/lazysizes.min.js + +clipboard: + js: /assets/lib/clipboard/clipboard.min.js + +polyfill: + js: /assets/lib/polyfill-v3-es6/polyfill.min.js + +mathjax: + js: /assets/lib/mathjax/tex-chtml.js diff --git a/_data/origin/cors.yml b/_data/origin/cors.yml new file mode 100644 index 0000000..2d28bba --- /dev/null +++ b/_data/origin/cors.yml @@ -0,0 +1,59 @@ +# CDNs + +cdns: + # Google Fonts + - url: https://fonts.googleapis.com + - url: https://fonts.gstatic.com + args: crossorigin + - url: https://fonts.googleapis.com + # jsDelivr CDN + - url: https://cdn.jsdelivr.net + +# fonts + +webfonts: https://fonts.googleapis.com/css2?family=Lato&family=Source+Sans+Pro:wght@400;600;700;900&display=swap + +# Libraries + +jquery: + js: https://cdn.jsdelivr.net/npm/jquery@3.7.0/dist/jquery.min.js + +bootstrap: + css: https://cdn.jsdelivr.net/npm/bootstrap@5.2.3/dist/css/bootstrap.min.css + js: https://cdn.jsdelivr.net/npm/bootstrap@5.2.3/dist/js/bootstrap.bundle.min.js + +toc: + css: https://cdn.jsdelivr.net/npm/tocbot@4.21.0/dist/tocbot.min.css + js: https://cdn.jsdelivr.net/npm/tocbot@4.21.0/dist/tocbot.min.js + +fontawesome: + css: https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.4.0/css/all.min.css + +search: + js: https://cdn.jsdelivr.net/npm/simple-jekyll-search@1.10.0/dest/simple-jekyll-search.min.js + +mermaid: + js: https://cdn.jsdelivr.net/npm/mermaid@9.4.3/dist/mermaid.min.js + +dayjs: + js: + common: https://cdn.jsdelivr.net/npm/dayjs@1.11.7/dayjs.min.js + locale: https://cdn.jsdelivr.net/npm/dayjs@1.11.7/locale/:LOCALE.min.js + relativeTime: https://cdn.jsdelivr.net/npm/dayjs@1.11.7/plugin/relativeTime.min.js + localizedFormat: https://cdn.jsdelivr.net/npm/dayjs@1.11.7/plugin/localizedFormat.min.js + +magnific-popup: + css: https://cdn.jsdelivr.net/npm/magnific-popup@1.1.0/dist/magnific-popup.min.css + js: https://cdn.jsdelivr.net/npm/magnific-popup@1.1.0/dist/jquery.magnific-popup.min.js + +lazysizes: + js: https://cdn.jsdelivr.net/npm/lazysizes@5.3.2/lazysizes.min.js + +clipboard: + js: https://cdn.jsdelivr.net/npm/clipboard@2.0.11/dist/clipboard.min.js + +polyfill: + js: https://polyfill.io/v3/polyfill.min.js?features=es6 + +mathjax: + js: https://cdn.jsdelivr.net/npm/mathjax@3.2.2/es5/tex-chtml.js diff --git a/_data/share.yml b/_data/share.yml new file mode 100644 index 0000000..c1d4d63 --- /dev/null +++ b/_data/share.yml @@ -0,0 +1,25 @@ +# Sharing options at the bottom of the post. +# Icons from + +platforms: + - type: Twitter + icon: "fab fa-twitter" + link: "https://twitter.com/intent/tweet?text=TITLE&url=URL" + + - type: Facebook + icon: "fab fa-facebook-square" + link: "https://www.facebook.com/sharer/sharer.php?title=TITLE&u=URL" + + - type: Telegram + icon: "fab fa-telegram" + link: "https://t.me/share/url?url=URL&text=TITLE" + + # Uncomment below if you need to. + # + # - type: Linkedin + # icon: "fab fa-linkedin" + # link: "https://www.linkedin.com/sharing/share-offsite/?url=URL" + # + # - type: Weibo + # icon: "fab fa-weibo" + # link: "http://service.weibo.com/share/share.php?title=TITLE&url=URL" diff --git a/_includes/comments.html b/_includes/comments.html new file mode 100644 index 0000000..39e521f --- /dev/null +++ b/_includes/comments.html @@ -0,0 +1,5 @@ + +{% if page.comments and site.comments.active %} + {% capture path %}comments/{{ site.comments.active }}.html{% endcapture %} + {% include {{ path }} %} +{% endif %} diff --git a/_includes/comments/disqus.html b/_includes/comments/disqus.html new file mode 100644 index 0000000..d2f59df --- /dev/null +++ b/_includes/comments/disqus.html @@ -0,0 +1,49 @@ + +
+

Comments powered by Disqus.

+
+ + diff --git a/_includes/comments/giscus.html b/_includes/comments/giscus.html new file mode 100644 index 0000000..ed918a9 --- /dev/null +++ b/_includes/comments/giscus.html @@ -0,0 +1,64 @@ + + diff --git a/_includes/comments/utterances.html b/_includes/comments/utterances.html new file mode 100644 index 0000000..afd7cd3 --- /dev/null +++ b/_includes/comments/utterances.html @@ -0,0 +1,51 @@ + + + + diff --git a/_includes/datetime.html b/_includes/datetime.html new file mode 100644 index 0000000..53258ba --- /dev/null +++ b/_includes/datetime.html @@ -0,0 +1,19 @@ + + +{% assign wrap_elem = include.wrap | default: 'em' %} +{% assign df_strftime = site.data.locales[include.lang].df.post.strftime | default: '%d/%m/%Y' %} +{% assign df_dayjs = site.data.locales[include.lang].df.post.dayjs | default: 'DD/MM/YYYY' %} + +<{{ wrap_elem }} + class="{% if include.class %}{{ include.class }}{% endif %}" + data-ts="{{ include.date | date: '%s' }}" + data-df="{{ df_dayjs }}" + {% if include.tooltip %} + data-bs-toggle="tooltip" data-bs-placement="bottom" + {% endif %} +> + {{ include.date | date: df_strftime }} + diff --git a/_includes/embed/twitch.html b/_includes/embed/twitch.html new file mode 100644 index 0000000..ab0419a --- /dev/null +++ b/_includes/embed/twitch.html @@ -0,0 +1,4 @@ + diff --git a/_includes/embed/youtube.html b/_includes/embed/youtube.html new file mode 100644 index 0000000..715063c --- /dev/null +++ b/_includes/embed/youtube.html @@ -0,0 +1,6 @@ + diff --git a/_includes/favicons.html b/_includes/favicons.html new file mode 100644 index 0000000..201f6d8 --- /dev/null +++ b/_includes/favicons.html @@ -0,0 +1,17 @@ + + +{% capture favicon_path %}{{ '/assets/img/favicons' | relative_url }}{% endcapture %} + + + + + + + + + + + diff --git a/_includes/footer.html b/_includes/footer.html new file mode 100644 index 0000000..3b36c4a --- /dev/null +++ b/_includes/footer.html @@ -0,0 +1,34 @@ + + +
+
+
+

+ {%- capture _platform -%} + Jekyll + {%- endcapture -%} + + {%- capture _theme -%} + Chirpy + {%- endcapture -%} + + {{ site.data.locales[include.lang].meta | replace: ':PLATFORM', _platform | replace: ':THEME', _theme }} +

+ +

+ {{- '©' }} + {{ 'now' | date: '%Y' }} + {{ site.social.name }}. + {% if site.data.locales[include.lang].copyright.brief %} + + {{- site.data.locales[include.lang].copyright.brief -}} + + {% endif %} +

+
+
+
diff --git a/_includes/google-analytics.html b/_includes/google-analytics.html new file mode 100644 index 0000000..e5e5119 --- /dev/null +++ b/_includes/google-analytics.html @@ -0,0 +1,14 @@ + + + + diff --git a/_includes/head.html b/_includes/head.html new file mode 100644 index 0000000..e4bfcb6 --- /dev/null +++ b/_includes/head.html @@ -0,0 +1,95 @@ + + + + + + + + + + + {% capture seo_tags %} + {% seo title=false %} + {% endcapture %} + + {% if page.image %} + {% assign img = page.image.path | default: page.image %} + + {% unless img contains '://' %} + {% assign img_path = page.img_path | append: '/' | append: img | replace: '//', '/' %} + {% capture target %}"{{ img | absolute_url }}"{% endcapture %} + + {% if site.img_cdn contains '//' %} + + {% capture replacement %}"{{ site.img_cdn }}{{ img_path }}"{% endcapture %} + {% else %} + + {%- capture replacement -%} + "{{ site.img_cdn | append: '/' | append: img_path | replace: '//', '/' | absolute_url }}" + {%- endcapture -%} + {% endif %} + + {% assign seo_tags = seo_tags | replace: target, replacement %} + {% endunless %} + {% endif %} + + {{ seo_tags }} + + + {%- unless page.layout == 'home' -%} + {{ page.title | append: ' | ' }} + {%- endunless -%} + {{ site.title }} + + + {% include_cached favicons.html %} + + {% if site.resources.ignore_env != jekyll.environment and site.resources.self_hosted %} + + + {% else %} + {% for cdn in site.data.origin[type].cdns %} + + + {% endfor %} + + + {% endif %} + + + {% if jekyll.environment == 'production' and site.google_analytics.id != empty and site.google_analytics.id %} + + + + + + {% endif %} + + + + + + + + + + {% if site.toc and page.toc %} + + {% endif %} + + {% if page.layout == 'page' or page.layout == 'post' %} + + + {% endif %} + + + + {% unless site.theme_mode %} + {% include mode-toggle.html %} + {% endunless %} + + {% include metadata-hook.html %} + diff --git a/_includes/js-selector.html b/_includes/js-selector.html new file mode 100644 index 0000000..f6c8e9d --- /dev/null +++ b/_includes/js-selector.html @@ -0,0 +1,106 @@ + + + + +{% assign urls = site.data.origin[type].jquery.js + | append: ',' + | append: site.data.origin[type].bootstrap.js + | append: ',' + | append: site.data.origin[type].search.js +%} + + + +{% if page.layout == 'post' or page.layout == 'page' or page.layout == 'home' %} + {% assign urls = urls | append: ',' | append: site.data.origin[type].lazysizes.js %} + + {% unless page.layout == 'home' %} + + {% assign urls = urls + | append: ',' + | append: site.data.origin[type]['magnific-popup'].js + | append: ',' + | append: site.data.origin[type].clipboard.js + %} + {% endunless %} +{% endif %} + +{% if page.layout == 'home' + or page.layout == 'post' + or page.layout == 'archives' + or page.layout == 'category' + or page.layout == 'tag' +%} + {% assign locale = site.lang | split: '-' | first %} + + {% assign urls = urls + | append: ',' + | append: site.data.origin[type].dayjs.js.common + | append: ',' + | append: site.data.origin[type].dayjs.js.locale + | replace: ':LOCALE', locale + | append: ',' + | append: site.data.origin[type].dayjs.js.relativeTime + | append: ',' + | append: site.data.origin[type].dayjs.js.localizedFormat + %} +{% endif %} + +{% if page.content contains ' + +{% if page.math %} + + + + +{% endif %} + +{% if jekyll.environment == 'production' %} + + {% if site.pwa.enabled %} + + {% else %} + + {% endif %} + + + {% if site.google_analytics.id != empty and site.google_analytics.id %} + {% include google-analytics.html %} + {% endif %} +{% endif %} diff --git a/_includes/jsdelivr-combine.html b/_includes/jsdelivr-combine.html new file mode 100644 index 0000000..cffa699 --- /dev/null +++ b/_includes/jsdelivr-combine.html @@ -0,0 +1,26 @@ +{% assign urls = include.urls | split: ',' %} + +{% assign combined_urls = nil %} + +{% assign domain = 'https://cdn.jsdelivr.net/' %} + +{% for url in urls %} + {% if url contains domain %} + {% assign url_snippet = url | slice: domain.size, url.size %} + + {% if combined_urls %} + {% assign combined_urls = combined_urls | append: ',' | append: url_snippet %} + {% else %} + {% assign combined_urls = domain | append: 'combine/' | append: url_snippet %} + {% endif %} + + {% elsif url contains '//' %} + + {% else %} + + {% endif %} +{% endfor %} + +{% if combined_urls %} + +{% endif %} diff --git a/_includes/lang.html b/_includes/lang.html new file mode 100644 index 0000000..19558a0 --- /dev/null +++ b/_includes/lang.html @@ -0,0 +1,8 @@ +{% comment %} + Detect appearance language and return it through variable "lang" +{% endcomment %} +{% if site.data.locales[site.lang] %} + {% assign lang = site.lang %} +{% else %} + {% assign lang = 'en' %} +{% endif %} diff --git a/_includes/language-alias.html b/_includes/language-alias.html new file mode 100644 index 0000000..abfa7ba --- /dev/null +++ b/_includes/language-alias.html @@ -0,0 +1,70 @@ +{% comment %} + + Convert the alias of the syntax language to the official name + + See: + +{% endcomment %} + +{% assign _lang = include.language | default: '' %} + +{% case _lang %} + {% when 'actionscript', 'as', 'as3' %} + {{ 'ActionScript' }} + {% when 'applescript' %} + {{ 'AppleScript' }} + {% when 'brightscript', 'bs', 'brs' %} + {{ 'BrightScript' }} + {% when 'cfscript', 'cfc' %} + {{ 'CFScript' }} + {% when 'coffeescript', 'coffee', 'coffee-script' %} + {{ 'CoffeeScript' }} + {% when 'cs', 'csharp' %} + {{ 'C#' }} + {% when 'erl' %} + {{ 'Erlang' }} + {% when 'graphql' %} + {{ 'GraphQL' }} + {% when 'haskell', 'hs' %} + {{ 'Haskell' }} + {% when 'javascript', 'js' %} + {{ 'JavaScript' }} + {% when 'make', 'mf', 'gnumake', 'bsdmake' %} + {{ 'Makefile' }} + {% when 'md', 'mkd' %} + {{ 'Markdown' }} + {% when 'm' %} + {{ 'Matlab' }} + {% when 'objective_c', 'objc', 'obj-c', 'obj_c', 'objectivec' %} + {{ 'Objective-C' }} + {% when 'perl', 'pl' %} + {{ 'Perl' }} + {% when 'php','php3','php4','php5' %} + {{ 'PHP' }} + {% when 'py' %} + {{ 'Python' }} + {% when 'rb' %} + {{ 'Ruby' }} + {% when 'rs','no_run','ignore','should_panic' %} + {{ 'Rust' }} + {% when 'bash', 'zsh', 'ksh', 'sh' %} + {{ 'Shell' }} + {% when 'st', 'squeak' %} + {{ 'Smalltalk' }} + {% when 'tex'%} + {{ 'TeX' }} + {% when 'latex' %} + {{ 'LaTex' }} + {% when 'ts', 'typescript' %} + {{ 'TypeScript' }} + {% when 'vb', 'visualbasic' %} + {{ 'Visual Basic' }} + {% when 'vue', 'vuejs' %} + {{ 'Vue.js' }} + {% when 'yml' %} + {{ 'YAML' }} + {% when 'css', 'html', 'scss', 'ssh', 'toml', 'xml', 'yaml', 'json' %} + {{ _lang | upcase }} + {% else %} + {{ _lang | capitalize }} +{% endcase %} diff --git a/_includes/mermaid.html b/_includes/mermaid.html new file mode 100644 index 0000000..967cfb4 --- /dev/null +++ b/_includes/mermaid.html @@ -0,0 +1,58 @@ + + diff --git a/_includes/metadata-hook.html b/_includes/metadata-hook.html new file mode 100644 index 0000000..fd7e9bd --- /dev/null +++ b/_includes/metadata-hook.html @@ -0,0 +1 @@ + diff --git a/_includes/mode-toggle.html b/_includes/mode-toggle.html new file mode 100644 index 0000000..a347750 --- /dev/null +++ b/_includes/mode-toggle.html @@ -0,0 +1,143 @@ + + + diff --git a/_includes/no-linenos.html b/_includes/no-linenos.html new file mode 100644 index 0000000..8500693 --- /dev/null +++ b/_includes/no-linenos.html @@ -0,0 +1,10 @@ +{% comment %} + Remove the line number of the code snippet. +{% endcomment %} + +{% assign content = include.content %} + +{% if content contains '
' %}
+  {% assign content = content | replace: '
', '' %}
+{% endif %}
diff --git a/_includes/origin-type.html b/_includes/origin-type.html
new file mode 100644
index 0000000..7f72012
--- /dev/null
+++ b/_includes/origin-type.html
@@ -0,0 +1,13 @@
+{% comment %} Site static assets origin type {% endcomment %}
+
+{% assign type = 'cors' %}
+
+{% if site.assets.self_host.enabled %}
+  {% if site.assets.self_host.env %}
+    {% if site.assets.self_host.env == jekyll.environment %}
+      {% assign type = 'basic' %}
+    {% endif %}
+  {% else %}
+    {% assign type = 'basic' %}
+  {% endif %}
+{% endif %}
diff --git a/_includes/post-nav.html b/_includes/post-nav.html
new file mode 100644
index 0000000..76bcd59
--- /dev/null
+++ b/_includes/post-nav.html
@@ -0,0 +1,37 @@
+
+
+
+ {% if page.previous.url %} + +

{{ page.previous.title }}

+
+ {% else %} +
+

-

+
+ {% endif %} + + {% if page.next.url %} + +

{{ page.next.title }}

+
+ {% else %} +
+

-

+
+ {% endif %} +
diff --git a/_includes/post-paginator.html b/_includes/post-paginator.html new file mode 100644 index 0000000..668b49f --- /dev/null +++ b/_includes/post-paginator.html @@ -0,0 +1,89 @@ + + +
    + + {% if paginator.previous_page %} + {% assign prev_url = paginator.previous_page_path | relative_url %} + {% else %} + {% assign prev_url = '#' %} + {% endif %} + +
  • + + + +
  • + + + {% assign left_ellipsis = false %} + {% assign right_ellipsis = false %} + + {% for i in (1..paginator.total_pages) %} + {% assign pre = paginator.page | minus: 1 %} + {% assign next = paginator.page | plus: 1 %} + {% assign pre_less = pre | minus: 1 %} + {% assign next_more = next | plus: 1 %} + {% assign show = false %} + + {% if paginator.page == 1 %} + {% if i <= 3 or i == paginator.total_pages %} + {% assign show = true %} + {% endif %} + {% elsif paginator.page == paginator.total_pages %} + {% if i == 1 or i >= pre_less %} + {% assign show = true %} + {% endif %} + {% else %} + {% if i == 1 or i == paginator.total_pages %} + {% assign show = true %} + {% elsif i >= pre and i <= next %} + {% assign show = true %} + {% endif %} + {% endif %} + + {% if show %} + +
  • + + {{- i -}} + +
  • + {% else %} + + {% if i < pre and left_ellipsis == false %} +
  • + ... +
  • + {% assign left_ellipsis = true %} + {% elsif i > next and right_ellipsis == false %} +
  • + ... +
  • + {% assign right_ellipsis = true %} + {% endif %} + {% endif %} + {% endfor %} + + +
  • + {{ paginator.page }} + / {{ paginator.total_pages }} +
  • + + + {% if paginator.next_page_path %} + {% assign next_url = paginator.next_page_path | relative_url %} + {% else %} + {% assign next_url = '#' %} + {% endif %} + +
  • + + + +
  • +
+ diff --git a/_includes/post-sharing.html b/_includes/post-sharing.html new file mode 100644 index 0000000..f607ba2 --- /dev/null +++ b/_includes/post-sharing.html @@ -0,0 +1,35 @@ + + + diff --git a/_includes/read-time.html b/_includes/read-time.html new file mode 100644 index 0000000..9952410 --- /dev/null +++ b/_includes/read-time.html @@ -0,0 +1,37 @@ + + +{% assign words = include.content | strip_html | number_of_words: 'auto' %} + + + +{% assign wpm = 180 %} +{% assign min_time = 1 %} + +{% assign read_time = words | divided_by: wpm %} + +{% unless read_time > 0 %} + {% assign read_time = min_time %} +{% endunless %} + +{% capture read_prompt %} + {{- site.data.locales[include.lang].post.read_time.prompt -}} +{% endcapture %} + + + + + {{- read_time -}} + {{ ' ' }} + {{- site.data.locales[include.lang].post.read_time.unit -}} + + {%- if include.prompt -%} + {%- assign _prompt_words = read_prompt | number_of_words: 'auto' -%} + {%- unless _prompt_words > 1 -%}{{ ' ' }}{%- endunless -%} + {{ read_prompt }} + {%- endif -%} + diff --git a/_includes/refactor-content.html b/_includes/refactor-content.html new file mode 100644 index 0000000..655ecd6 --- /dev/null +++ b/_includes/refactor-content.html @@ -0,0 +1,286 @@ + + +{% assign _content = include.content %} + + + +{% if _content contains '', '' + | replace: '
', '' + | replace: '
', '
' + %} +{% endif %} + + + +{% if _content contains '
' %}
+  {% assign _content = _content
+    | replace: '
', '' + %} +{% endif %} + + +{% if _content contains '', + '' + | replace: + '', + '' + %} +{% endif %} + + +{% assign IMG_TAG = ' + {% if site.img_cdn %} + {% if site.img_cdn contains '//' %} + {% assign _path_prefix = site.img_cdn %} + {% else %} + {% assign _path_prefix = site.img_cdn | relative_url %} + {% endif %} + {% else %} + {% assign _path_prefix = site.baseurl %} + {% endif %} + + + {% if page.img_path %} + {% assign _path = page.img_path | append: '/' | replace: '//', '/' %} + {% assign _path_prefix = _path_prefix | append: _path %} + {% endif %} + + {% for _img_snippet in _img_snippets %} + {% if forloop.first %} + {% assign _img_content = _img_snippet %} + {% continue %} + {% endif %} + + {% assign _left = _img_snippet | split: '>' | first %} + {% assign _right = _img_snippet | remove: _left %} + + {% unless _left contains 'src=' %} + {% continue %} + {% endunless %} + + {% assign _left = _left | remove: ' /' | replace: ' w=', ' width=' | replace: ' h=', ' height=' %} + {% assign _attrs = _left | split: '" ' %} + + {% assign _width = nil %} + {% assign _height = nil %} + {% assign _lqip = nil %} + {% assign _class = nil %} + + {% for _attr in _attrs %} + {% unless _attr contains '=' %} + {% continue %} + {% endunless %} + + {% assign _pair = _attr | split: '="' %} + {% capture _key %}{{ _pair | first }}{% endcapture %} + {% capture _value %}{{ _pair | last | remove: '"' }}{% endcapture %} + + {% case _key %} + {% when 'width' %} + {% assign _width = _value %} + {% when 'height' %} + {% assign _height = _value %} + {% when 'src' %} + {% assign _src = _value %} + {% when 'lqip' %} + {% assign _lqip = _value %} + {% when 'class' %} + {% assign _class = _value %} + {% endcase %} + {% endfor %} + + + {% if _class %} + {% capture _old_class %}class="{{ _class }}"{% endcapture %} + {% assign _left = _left | remove: _old_class %} + {% endif %} + + {% assign _final_src = nil %} + + {% unless _src contains '//' %} + {% assign _final_src = _path_prefix | append: _src %} + {% capture _src_from %}"{{ _src }}"{% endcapture %} + {% capture _src_to %}"{{ _final_src }}"{% endcapture %} + {% assign _left = _left | replace: _src_from, _src_to %} + {% endunless %} + + {% if _lqip %} + {% unless _lqip contains ':' %} + {% assign _final_lqip = _path_prefix | append: _lqip %} + {% capture _lqip_from %}"{{ _lqip }}"{% endcapture %} + {% capture _lqip_to %}"{{ _final_lqip }}"{% endcapture %} + {% assign _left = _left | replace: _lqip_from, _lqip_to %} + {% endunless %} + {% endif %} + + + {% assign _left = _left | replace: 'src=', 'data-src=' %} + {% if _left contains 'class=' %} + {% assign _left = _left | replace: 'class="', 'class="lazyload '%} + {% else %} + {% assign _left = _left | append: ' class="lazyload"' %} + {% endif %} + + + {% if _lqip %} + {% assign _left = _left | replace: ' lqip=', ' data-lqip="true" src=' %} + {% else %} + {% if _width and _height %} + + {%- capture _svg -%} + src="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 {{ _width }} {{ _height }}'%3E%3C/svg%3E" + {%- endcapture -%} + {% assign _left = _svg | append: ' ' | append: _left %} + {% assign _class = _class | append: ' shimmer' %} + {% endif %} + {% endif %} + + + {% assign _left = _left | append: ' data-proofer-ignore' %} + + {% if page.layout == 'home' %} + + {%- capture _wrapper_start -%} +
+ {%- endcapture -%} + {% assign _img_content = _img_content | append: _wrapper_start %} + {% assign _right = _right | prepend: '>` is wrapped by `` --> + {% assign _parent = _right | slice: 1, 4 %} + + {% if _parent == '' %} + + {% assign _size = _img_content | size | minus: 1 %} + {% capture _class %} + class="img-link{% unless _lqip %} shimmer{% endunless %}" + {% endcapture %} + {% assign _img_content = _img_content | slice: 0, _size | append: _class | append: '>' %} + {% else %} + + {%- capture _wrapper_start -%} + + {%- endcapture -%} + {% assign _img_content = _img_content | append: _wrapper_start %} + {% assign _right = _right | prepend: '> + {% assign _img_content = _img_content | append: debug | append: IMG_TAG | append: _left | append: _right %} + + {% endfor %} + + {% if _img_content %} + {% assign _content = _img_content %} + {% endif %} + +{% endif %} + + + +{% if _content contains '
' %} + {% assign _code_spippets = _content | split: '
' %} + {% assign _new_content = '' %} + + {% for _snippet in _code_spippets %} + + {% if forloop.last %} + {% assign _new_content = _new_content | append: _snippet %} + + {% else %} + + {% assign _left = _snippet | split: '><' | last%} + + {% if _left contains 'file="' %} + {% assign _label_text = _left | split: 'file="' | last | split: '"' | first %} + {% assign _label_icon = 'far fa-file-code fa-fw' %} + {% else %} + {% assign _lang = _left | split: 'language-' | last | split: ' ' | first %} + {% capture _label_text %}{% include language-alias.html language=_lang %}{% endcapture %} + {% assign _label_icon = 'fas fa-code fa-fw small' %} + {% endif %} + + {% capture _label %} + + {% endcapture %} + + {% assign _new_content = _new_content | append: _snippet + | append: '
' + | append: _label + | append: '
' + | append: '
' + %} + + {% endif %} + + {% endfor %} + + {% assign _content = _new_content %} + +{% endif %} + + + +{% assign heading_levels = '2,3,4,5' | split: ',' %} +{% assign _heading_content = _content %} + +{% for level in heading_levels %} + {% capture mark_start %}{% endcapture %} + + {% assign left = snippet | split: mark_end | first %} + {% assign right = snippet | slice: left.size, snippet.size %} + {% assign left = left | replace_first: '">', '">' | append: '' %} + + {% assign _new_content = _new_content | append: mark_start + | append: left | append: anchor | append: right + %} + + {% endfor %} + + {% assign _heading_content = _new_content %} + + {% endif %} +{% endfor %} + +{% assign _content = _heading_content %} + + +{{ _content }} diff --git a/_includes/related-posts.html b/_includes/related-posts.html new file mode 100644 index 0000000..8476a6d --- /dev/null +++ b/_includes/related-posts.html @@ -0,0 +1,104 @@ + + + +{% assign TOTAL_SIZE = 3 %} + + +{% assign TAG_SCORE = 1 %} + + +{% assign CATEGORY_SCORE = 0.5 %} + +{% assign SEPARATOR = ':' %} + +{% assign score_list = '' | split: '' %} +{% assign last_index = site.posts.size | minus: 1 %} + +{% for i in (0..last_index) %} + {% assign post = site.posts[i] %} + + {% if post.url == page.url %} + {% continue %} + {% endif %} + + {% assign score = 0 %} + + {% for tag in post.tags %} + {% if page.tags contains tag %} + {% assign score = score | plus: TAG_SCORE %} + {% endif %} + {% endfor %} + + {% for category in post.categories %} + {% if page.categories contains category %} + {% assign score = score | plus: CATEGORY_SCORE %} + {% endif %} + {% endfor %} + + {% if score > 0 %} + {% capture score_item %}{{ score }}{{ SEPARATOR }}{{ i }}{% endcapture %} + {% assign score_list = score_list | push: score_item %} + {% endif %} +{% endfor %} + +{% assign index_list = '' | split: '' %} + +{% if score_list.size > 0 %} + {% assign score_list = score_list | sort | reverse %} + {% for entry in score_list limit: TOTAL_SIZE %} + {% assign index = entry | split: SEPARATOR | last %} + {% assign index_list = index_list | push: index %} + {% endfor %} +{% endif %} + + +{% assign less = TOTAL_SIZE | minus: index_list.size %} + +{% if less > 0 %} + {% for i in (0..last_index) %} + {% assign post = site.posts[i] %} + {% if post.url != page.url %} + {% capture cur_index %}{{ i }}{% endcapture %} + {% unless index_list contains cur_index %} + {% assign index_list = index_list | push: cur_index %} + {% assign less = less | minus: 1 %} + {% if less <= 0 %} + {% break %} + {% endif %} + {% endunless %} + {% endif %} + {% endfor %} +{% endif %} + +{% if index_list.size > 0 %} + + +{% endif %} diff --git a/_includes/search-loader.html b/_includes/search-loader.html new file mode 100644 index 0000000..634325b --- /dev/null +++ b/_includes/search-loader.html @@ -0,0 +1,45 @@ + + +{% capture result_elem %} +
+ {title} + +

{snippet}

+
+{% endcapture %} + +{% capture not_found %}

{{ site.data.locales[include.lang].search.no_results }}

{% endcapture %} + + diff --git a/_includes/search-results.html b/_includes/search-results.html new file mode 100644 index 0000000..07981ff --- /dev/null +++ b/_includes/search-results.html @@ -0,0 +1,10 @@ + + +
+
+
+ {% include_cached trending-tags.html %} +
+
+
+
diff --git a/_includes/sidebar.html b/_includes/sidebar.html new file mode 100644 index 0000000..1c81685 --- /dev/null +++ b/_includes/sidebar.html @@ -0,0 +1,104 @@ + + + + diff --git a/_includes/toc.html b/_includes/toc.html new file mode 100644 index 0000000..1eb3dcd --- /dev/null +++ b/_includes/toc.html @@ -0,0 +1,13 @@ +{% assign enable_toc = false %} +{% if site.toc and page.toc %} + {% if page.content contains ' +
{{- site.data.locales[include.lang].panel.toc -}}
+ +
+{% endif %} diff --git a/_includes/topbar.html b/_includes/topbar.html new file mode 100644 index 0000000..0092f69 --- /dev/null +++ b/_includes/topbar.html @@ -0,0 +1,70 @@ + + +
+
+ + {% assign paths = page.url | split: '/' %} + + {% if paths.size == 0 or page.layout == 'home' %} + + {{ site.data.locales[include.lang].tabs.home | capitalize }} + + {% else %} + {% for item in paths %} + {% if forloop.first %} + + + {{ site.data.locales[include.lang].tabs.home | capitalize }} + + + + {% elsif forloop.last %} + {% if page.collection == 'tabs' %} + {{ site.data.locales[include.lang].tabs[item] | default: page.title }} + {% else %} + {{ page.title }} + {% endif %} + + {% elsif page.layout == 'category' or page.layout == 'tag' %} + + + {{ site.data.locales[include.lang].tabs[item] | default: page.title }} + + + {% endif %} + {% endfor %} + {% endif %} + + + + + +
+ {% if page.layout == 'home' %} + {{- site.data.locales[include.lang].title | default: site.title -}} + {% elsif page.collection == 'tabs' or page.layout == 'page' %} + {%- capture tab_key -%}{{ page.url | split: '/' }}{%- endcapture -%} + {{- site.data.locales[include.lang].tabs[tab_key] | default: page.title -}} + {% else %} + {{- site.data.locales[include.lang].layout[page.layout] | default: page.layout | capitalize -}} + {% endif %} +
+ + + + + + + {{ site.data.locales[include.lang].search.cancel }} +
+
diff --git a/_includes/trending-tags.html b/_includes/trending-tags.html new file mode 100644 index 0000000..6b1d732 --- /dev/null +++ b/_includes/trending-tags.html @@ -0,0 +1,46 @@ + + +{% assign MAX = 10 %} + +{% assign size_list = '' | split: '' %} +{% assign tag_list = '' | split: '' %} + +{% for tag in site.tags %} + {% assign size = tag | last | size %} + {% assign size_list = size_list | push: size %} + + {% assign tag_str = tag | first | append: '::' | append: size %} + {% assign tag_list = tag_list | push: tag_str %} +{% endfor %} + +{% assign size_list = size_list | sort | reverse %} + +{% assign tag_list = tag_list | sort_natural %} + +{% assign trending_tags = '' | split: '' %} + +{% for size in size_list limit: MAX %} + {% for tag_str in tag_list %} + {% assign tag = tag_str | split: '::' %} + {% assign tag_name = tag | first %} + {% assign tag_size = tag | last | plus: 0 %} + {% if tag_size == size %} + {% unless trending_tags contains tag_name %} + {% assign trending_tags = trending_tags | push: tag_name %} + {% break %} + {% endunless %} + {% endif %} + {% endfor %} +{% endfor %} + +{% if trending_tags.size > 0 %} +
+
{{- site.data.locales[include.lang].panel.trending_tags -}}
+
+ {% for tag_name in trending_tags %} + {% assign url = tag_name | slugify | url_encode | prepend: '/tags/' | append: '/' %} + + {% endfor %} +
+
+{% endif %} diff --git a/_includes/update-list.html b/_includes/update-list.html new file mode 100644 index 0000000..0ab7a45 --- /dev/null +++ b/_includes/update-list.html @@ -0,0 +1,39 @@ + + +{% assign MAX_SIZE = 5 %} + +{% assign all_list = '' | split: '' %} + +{% for post in site.posts %} + {% if post.last_modified_at and post.last_modified_at != post.date %} + {% capture elem %} + {{- post.last_modified_at | date: "%Y%m%d%H%M%S" -}}::{{- forloop.index0 -}} + {% endcapture %} + {% assign all_list = all_list | push: elem %} + {% endif %} +{% endfor %} + +{% assign all_list = all_list | sort | reverse %} + +{% assign update_list = '' | split: '' %} + +{% for entry in all_list limit: MAX_SIZE %} + {% assign update_list = update_list | push: entry %} +{% endfor %} + +{% if update_list.size > 0 %} +
+
{{- site.data.locales[include.lang].panel.lastmod -}}
+
    + {% for item in update_list %} + {% assign index = item | split: '::' | last | plus: 0 %} + {% assign post = site.posts[index] %} + {% assign url = post.url | relative_url %} +
  • + {{ post.title }} +
  • + {% endfor %} +
+
+ +{% endif %} diff --git a/_javascript/_copyright b/_javascript/_copyright new file mode 100644 index 0000000..dedc8ed --- /dev/null +++ b/_javascript/_copyright @@ -0,0 +1,3 @@ +Chirpy v<%= pkg.version %> (<%= pkg.homepage %>) +© 2019 <%= pkg.author %> +<%= pkg.license %> Licensed diff --git a/_javascript/categories.js b/_javascript/categories.js new file mode 100644 index 0000000..15d8251 --- /dev/null +++ b/_javascript/categories.js @@ -0,0 +1,7 @@ +import { basic, initSidebar, initTopbar } from './modules/layouts'; +import { categoryCollapse } from './modules/plugins'; + +basic(); +initSidebar(); +initTopbar(); +categoryCollapse(); diff --git a/_javascript/commons.js b/_javascript/commons.js new file mode 100644 index 0000000..05a9765 --- /dev/null +++ b/_javascript/commons.js @@ -0,0 +1,5 @@ +import { basic, initSidebar, initTopbar } from './modules/layouts'; + +basic(); +initSidebar(); +initTopbar(); diff --git a/_javascript/home.js b/_javascript/home.js new file mode 100644 index 0000000..70af328 --- /dev/null +++ b/_javascript/home.js @@ -0,0 +1,8 @@ +import { basic, initSidebar, initTopbar } from './modules/layouts'; +import { initLocaleDatetime, imgLazy } from './modules/plugins'; + +basic(); +initSidebar(); +initTopbar(); +initLocaleDatetime(); +imgLazy(); diff --git a/_javascript/misc.js b/_javascript/misc.js new file mode 100644 index 0000000..c7a19d6 --- /dev/null +++ b/_javascript/misc.js @@ -0,0 +1,7 @@ +import { basic, initSidebar, initTopbar } from './modules/layouts'; +import { initLocaleDatetime } from './modules/plugins'; + +basic(); +initSidebar(); +initTopbar(); +initLocaleDatetime(); diff --git a/_javascript/modules/components/back-to-top.js b/_javascript/modules/components/back-to-top.js new file mode 100644 index 0000000..777a659 --- /dev/null +++ b/_javascript/modules/components/back-to-top.js @@ -0,0 +1,20 @@ +/** + * Reference: https://bootsnipp.com/snippets/featured/link-to-top-page + */ + +export function back2top() { + const $window = $(window); + const $btn = $('#back-to-top'); + + $window.on('scroll', () => { + if ($window.scrollTop() > 50) { + $btn.fadeIn(); + } else { + $btn.fadeOut(); + } + }); + + $btn.on('click', () => { + $window.scrollTop(0); + }); +} diff --git a/_javascript/modules/components/category-collapse.js b/_javascript/modules/components/category-collapse.js new file mode 100644 index 0000000..d6027a1 --- /dev/null +++ b/_javascript/modules/components/category-collapse.js @@ -0,0 +1,36 @@ +/** + * Tab 'Categories' expand/close effect. + */ +const childPrefix = 'l_'; +const parentPrefix = 'h_'; +const collapse = $('.collapse'); + +export function categoryCollapse() { + /* close up top-category */ + collapse.on('hide.bs.collapse', function () { + /* Bootstrap collapse events. */ const parentId = + parentPrefix + $(this).attr('id').substring(childPrefix.length); + if (parentId) { + $(`#${parentId} .far.fa-folder-open`).attr( + 'class', + 'far fa-folder fa-fw' + ); + $(`#${parentId} i.fas`).addClass('rotate'); + $(`#${parentId}`).removeClass('hide-border-bottom'); + } + }); + + /* expand the top category */ + collapse.on('show.bs.collapse', function () { + const parentId = + parentPrefix + $(this).attr('id').substring(childPrefix.length); + if (parentId) { + $(`#${parentId} .far.fa-folder`).attr( + 'class', + 'far fa-folder-open fa-fw' + ); + $(`#${parentId} i.fas`).removeClass('rotate'); + $(`#${parentId}`).addClass('hide-border-bottom'); + } + }); +} diff --git a/_javascript/modules/components/clipboard.js b/_javascript/modules/components/clipboard.js new file mode 100644 index 0000000..f803843 --- /dev/null +++ b/_javascript/modules/components/clipboard.js @@ -0,0 +1,123 @@ +/** + * Clipboard functions + * + * Dependencies: + * - popper.js (https://github.com/popperjs/popper-core) + * - clipboard.js (https://github.com/zenorocha/clipboard.js) + */ + +const clipboardSelector = '.code-header>button'; +const ICON_SUCCESS = 'fas fa-check'; +const ATTR_TIMEOUT = 'timeout'; +const ATTR_TITLE_SUCCEED = 'data-title-succeed'; +const ATTR_TITLE_ORIGIN = 'data-bs-original-title'; +const TIMEOUT = 2000; // in milliseconds + +function isLocked(node) { + if ($(node)[0].hasAttribute(ATTR_TIMEOUT)) { + let timeout = $(node).attr(ATTR_TIMEOUT); + if (Number(timeout) > Date.now()) { + return true; + } + } + return false; +} + +function lock(node) { + $(node).attr(ATTR_TIMEOUT, Date.now() + TIMEOUT); +} + +function unlock(node) { + $(node).removeAttr(ATTR_TIMEOUT); +} + +function getIcon(btn) { + let iconNode = $(btn).children(); + return iconNode.attr('class'); +} + +const ICON_DEFAULT = getIcon(clipboardSelector); + +function showTooltip(btn) { + const succeedTitle = $(btn).attr(ATTR_TITLE_SUCCEED); + $(btn).attr(ATTR_TITLE_ORIGIN, succeedTitle).tooltip('show'); +} + +function hideTooltip(btn) { + $(btn).tooltip('hide').removeAttr(ATTR_TITLE_ORIGIN); +} + +function setSuccessIcon(btn) { + let btnNode = $(btn); + let iconNode = btnNode.children(); + iconNode.attr('class', ICON_SUCCESS); +} + +function resumeIcon(btn) { + let btnNode = $(btn); + let iconNode = btnNode.children(); + iconNode.attr('class', ICON_DEFAULT); +} + +export function initClipboard() { + // Initial the clipboard.js object + if ($(clipboardSelector).length) { + const clipboard = new ClipboardJS(clipboardSelector, { + target(trigger) { + let codeBlock = trigger.parentNode.nextElementSibling; + return codeBlock.querySelector('code .rouge-code'); + } + }); + + const clipboardList = document.querySelectorAll(clipboardSelector); + [...clipboardList].map( + (elem) => + new bootstrap.Tooltip(elem, { + placement: 'left' + }) + ); + + clipboard.on('success', (e) => { + e.clearSelection(); + + const trigger = e.trigger; + if (isLocked(trigger)) { + return; + } + + setSuccessIcon(trigger); + showTooltip(trigger); + lock(trigger); + + setTimeout(() => { + hideTooltip(trigger); + resumeIcon(trigger); + unlock(trigger); + }, TIMEOUT); + }); + } + + /* --- Post link sharing --- */ + + $('#copy-link').on('click', (e) => { + let target = $(e.target); + + if (isLocked(target)) { + return; + } + + // Copy URL to clipboard + navigator.clipboard.writeText(window.location.href).then(() => { + const defaultTitle = target.attr(ATTR_TITLE_ORIGIN); + const succeedTitle = target.attr(ATTR_TITLE_SUCCEED); + // Switch tooltip title + target.attr(ATTR_TITLE_ORIGIN, succeedTitle).tooltip('show'); + lock(target); + + setTimeout(() => { + target.attr(ATTR_TITLE_ORIGIN, defaultTitle); + unlock(target); + }, TIMEOUT); + }); + }); +} diff --git a/_javascript/modules/components/img-lazyload.js b/_javascript/modules/components/img-lazyload.js new file mode 100644 index 0000000..edad9dd --- /dev/null +++ b/_javascript/modules/components/img-lazyload.js @@ -0,0 +1,27 @@ +/** + * Set up image lazy-load + */ + +function stopShimmer($node) { + $node.parent().removeClass('shimmer'); +} + +export function imgLazy() { + const $images = $('#core-wrapper img[data-src]'); + + if ($images.length <= 0) { + return; + } + + /* Stop shimmer when image loaded */ + document.addEventListener('lazyloaded', function (e) { + stopShimmer($(e.target)); + }); + + /* Stop shimmer from cached images */ + $images.each(function () { + if ($(this).hasClass('ls-is-cached')) { + stopShimmer($(this)); + } + }); +} diff --git a/_javascript/modules/components/img-popup.js b/_javascript/modules/components/img-popup.js new file mode 100644 index 0000000..7f78d99 --- /dev/null +++ b/_javascript/modules/components/img-popup.js @@ -0,0 +1,22 @@ +/** + * Set up image popup + * + * See: https://github.com/dimsemenov/Magnific-Popup + */ + +export function imgPopup() { + if ($('.popup') <= 0) { + return; + } + + $('.popup').magnificPopup({ + type: 'image', + closeOnContentClick: true, + showCloseBtn: false, + zoom: { + enabled: true, + duration: 300, + easing: 'ease-in-out' + } + }); +} diff --git a/_javascript/modules/components/locale-datetime.js b/_javascript/modules/components/locale-datetime.js new file mode 100644 index 0000000..214f2bf --- /dev/null +++ b/_javascript/modules/components/locale-datetime.js @@ -0,0 +1,51 @@ +/** + * Update month/day to locale datetime + * + * Requirement: + */ + +/* A tool for locale datetime */ +class LocaleHelper { + static get attrTimestamp() { + return 'data-ts'; + } + + static get attrDateFormat() { + return 'data-df'; + } + + static get locale() { + return $('html').attr('lang').substring(0, 2); + } + + static getTimestamp(elem) { + return Number(elem.attr(LocaleHelper.attrTimestamp)); // unix timestamp + } + + static getDateFormat(elem) { + return elem.attr(LocaleHelper.attrDateFormat); + } +} + +export function initLocaleDatetime() { + dayjs.locale(LocaleHelper.locale); + dayjs.extend(window.dayjs_plugin_localizedFormat); + + $(`[${LocaleHelper.attrTimestamp}]`).each(function () { + const date = dayjs.unix(LocaleHelper.getTimestamp($(this))); + const text = date.format(LocaleHelper.getDateFormat($(this))); + $(this).text(text); + $(this).removeAttr(LocaleHelper.attrTimestamp); + $(this).removeAttr(LocaleHelper.attrDateFormat); + + // setup tooltips + const tooltip = $(this).attr('data-bs-toggle'); + if (typeof tooltip === 'undefined' || tooltip !== 'tooltip') { + return; + } + + const tooltipText = date.format('llll'); // see: https://day.js.org/docs/en/display/format#list-of-localized-formats + $(this).attr('data-bs-title', tooltipText); + new bootstrap.Tooltip($(this)); + }); +} diff --git a/_javascript/modules/components/mode-watcher.js b/_javascript/modules/components/mode-watcher.js new file mode 100644 index 0000000..7b2298a --- /dev/null +++ b/_javascript/modules/components/mode-watcher.js @@ -0,0 +1,21 @@ +/** + * Add listener for theme mode toggle + */ +const $toggleElem = $('.mode-toggle'); + +export function modeWatcher() { + if ($toggleElem.length === 0) { + return; + } + + $toggleElem.off().on('click', (e) => { + const $target = $(e.target); + let $btn = + $target.prop('tagName') === 'button'.toUpperCase() + ? $target + : $target.parent(); + + modeToggle.flipMode(); // modeToggle: `_includes/mode-toggle.html` + $btn.trigger('blur'); // remove the clicking outline + }); +} diff --git a/_javascript/modules/components/search-display.js b/_javascript/modules/components/search-display.js new file mode 100644 index 0000000..7862f39 --- /dev/null +++ b/_javascript/modules/components/search-display.js @@ -0,0 +1,122 @@ +/** + * This script make #search-result-wrapper switch to unloaded or shown automatically. + */ +const $btnSbTrigger = $('#sidebar-trigger'); +const $btnSearchTrigger = $('#search-trigger'); +const $btnCancel = $('#search-cancel'); +const $content = $('#main>.row'); +const $topbarTitle = $('#topbar-title'); +const $searchWrapper = $('#search-wrapper'); +const $resultWrapper = $('#search-result-wrapper'); +const $results = $('#search-results'); +const $input = $('#search-input'); +const $hints = $('#search-hints'); +const $viewport = $('html,body'); + +// class names +const C_LOADED = 'loaded'; +const C_UNLOADED = 'unloaded'; +const C_FOCUS = 'input-focus'; +const C_FLEX = 'd-flex'; + +class ScrollBlocker { + static offset = 0; + static resultVisible = false; + + static on() { + ScrollBlocker.offset = window.scrollY; + $viewport.scrollTop(0); + } + + static off() { + $viewport.scrollTop(ScrollBlocker.offset); + } +} + +/*--- Actions in mobile screens (Sidebar hidden) ---*/ +class MobileSearchBar { + static on() { + $btnSbTrigger.addClass(C_UNLOADED); + $topbarTitle.addClass(C_UNLOADED); + $btnSearchTrigger.addClass(C_UNLOADED); + $searchWrapper.addClass(C_FLEX); + $btnCancel.addClass(C_LOADED); + } + + static off() { + $btnCancel.removeClass(C_LOADED); + $searchWrapper.removeClass(C_FLEX); + $btnSbTrigger.removeClass(C_UNLOADED); + $topbarTitle.removeClass(C_UNLOADED); + $btnSearchTrigger.removeClass(C_UNLOADED); + } +} + +class ResultSwitch { + static on() { + if (!ScrollBlocker.resultVisible) { + // the block method must be called before $(#main) unloaded. + ScrollBlocker.on(); + $resultWrapper.removeClass(C_UNLOADED); + $content.addClass(C_UNLOADED); + ScrollBlocker.resultVisible = true; + } + } + + static off() { + if (ScrollBlocker.resultVisible) { + $results.empty(); + if ($hints.hasClass(C_UNLOADED)) { + $hints.removeClass(C_UNLOADED); + } + $resultWrapper.addClass(C_UNLOADED); + $content.removeClass(C_UNLOADED); + + // now the release method must be called after $(#main) display + ScrollBlocker.off(); + + $input.val(''); + ScrollBlocker.resultVisible = false; + } + } +} + +function isMobileView() { + return $btnCancel.hasClass(C_LOADED); +} + +export function displaySearch() { + $btnSearchTrigger.on('click', function () { + MobileSearchBar.on(); + ResultSwitch.on(); + $input.trigger('focus'); + }); + + $btnCancel.on('click', function () { + MobileSearchBar.off(); + ResultSwitch.off(); + }); + + $input.on('focus', function () { + $searchWrapper.addClass(C_FOCUS); + }); + + $input.on('focusout', function () { + $searchWrapper.removeClass(C_FOCUS); + }); + + $input.on('input', () => { + if ($input.val() === '') { + if (isMobileView()) { + $hints.removeClass(C_UNLOADED); + } else { + ResultSwitch.off(); + } + } else { + ResultSwitch.on(); + if (isMobileView()) { + $hints.addClass(C_UNLOADED); + } + } + }); +} diff --git a/_javascript/modules/components/sidebar.js b/_javascript/modules/components/sidebar.js new file mode 100644 index 0000000..9d8567e --- /dev/null +++ b/_javascript/modules/components/sidebar.js @@ -0,0 +1,25 @@ +/** + * Expand or close the sidebar in mobile screens. + */ + +const $body = $('body'); +const ATTR_DISPLAY = 'sidebar-display'; + +class SidebarUtil { + static isExpanded = false; + + static toggle() { + if (SidebarUtil.isExpanded === false) { + $body.attr(ATTR_DISPLAY, ''); + } else { + $body.removeAttr(ATTR_DISPLAY); + } + + SidebarUtil.isExpanded = !SidebarUtil.isExpanded; + } +} + +export function sidebarExpand() { + $('#sidebar-trigger').on('click', SidebarUtil.toggle); + $('#mask').on('click', SidebarUtil.toggle); +} diff --git a/_javascript/modules/components/toc.js b/_javascript/modules/components/toc.js new file mode 100644 index 0000000..dd46994 --- /dev/null +++ b/_javascript/modules/components/toc.js @@ -0,0 +1,13 @@ +export function toc() { + if (document.querySelector('#core-wrapper h2,#core-wrapper h3')) { + // see: https://github.com/tscanlin/tocbot#usage + tocbot.init({ + tocSelector: '#toc', + contentSelector: '.post-content', + ignoreSelector: '[data-toc-skip]', + headingSelector: 'h2, h3', + orderedList: false, + scrollSmooth: false + }); + } +} diff --git a/_javascript/modules/components/tooltip-loader.js b/_javascript/modules/components/tooltip-loader.js new file mode 100644 index 0000000..a906600 --- /dev/null +++ b/_javascript/modules/components/tooltip-loader.js @@ -0,0 +1,12 @@ +/** + * Initial Bootstrap Tooltip. + */ +export function loadTooptip() { + const tooltipTriggerList = document.querySelectorAll( + '[data-bs-toggle="tooltip"]' + ); + + [...tooltipTriggerList].map( + (tooltipTriggerEl) => new bootstrap.Tooltip(tooltipTriggerEl) + ); +} diff --git a/_javascript/modules/layouts.js b/_javascript/modules/layouts.js new file mode 100644 index 0000000..28f7962 --- /dev/null +++ b/_javascript/modules/layouts.js @@ -0,0 +1,3 @@ +export { basic } from './layouts/basic'; +export { initSidebar } from './layouts/sidebar'; +export { initTopbar } from './layouts/topbar'; diff --git a/_javascript/modules/layouts/basic.js b/_javascript/modules/layouts/basic.js new file mode 100644 index 0000000..fb36a8b --- /dev/null +++ b/_javascript/modules/layouts/basic.js @@ -0,0 +1,7 @@ +import { back2top } from '../components/back-to-top'; +import { loadTooptip } from '../components/tooltip-loader'; + +export function basic() { + back2top(); + loadTooptip(); +} diff --git a/_javascript/modules/layouts/sidebar.js b/_javascript/modules/layouts/sidebar.js new file mode 100644 index 0000000..8795693 --- /dev/null +++ b/_javascript/modules/layouts/sidebar.js @@ -0,0 +1,7 @@ +import { modeWatcher } from '../components/mode-watcher'; +import { sidebarExpand } from '../components/sidebar'; + +export function initSidebar() { + modeWatcher(); + sidebarExpand(); +} diff --git a/_javascript/modules/layouts/topbar.js b/_javascript/modules/layouts/topbar.js new file mode 100644 index 0000000..cfcd0ed --- /dev/null +++ b/_javascript/modules/layouts/topbar.js @@ -0,0 +1,5 @@ +import { displaySearch } from '../components/search-display'; + +export function initTopbar() { + displaySearch(); +} diff --git a/_javascript/modules/plugins.js b/_javascript/modules/plugins.js new file mode 100644 index 0000000..fa7a7dd --- /dev/null +++ b/_javascript/modules/plugins.js @@ -0,0 +1,6 @@ +export { categoryCollapse } from './components/category-collapse'; +export { initClipboard } from './components/clipboard'; +export { imgLazy } from './components/img-lazyload'; +export { imgPopup } from './components/img-popup'; +export { initLocaleDatetime } from './components/locale-datetime'; +export { toc } from './components/toc'; diff --git a/_javascript/page.js b/_javascript/page.js new file mode 100644 index 0000000..7b31813 --- /dev/null +++ b/_javascript/page.js @@ -0,0 +1,9 @@ +import { basic, initSidebar, initTopbar } from './modules/layouts'; +import { imgLazy, imgPopup, initClipboard } from './modules/plugins'; + +basic(); +initSidebar(); +initTopbar(); +imgLazy(); +imgPopup(); +initClipboard(); diff --git a/_javascript/post.js b/_javascript/post.js new file mode 100644 index 0000000..9a5a61b --- /dev/null +++ b/_javascript/post.js @@ -0,0 +1,17 @@ +import { basic, initSidebar, initTopbar } from './modules/layouts'; +import { + imgLazy, + imgPopup, + initLocaleDatetime, + initClipboard, + toc +} from './modules/plugins'; + +basic(); +initSidebar(); +initTopbar(); +imgLazy(); +imgPopup(); +initLocaleDatetime(); +initClipboard(); +toc(); diff --git a/_layouts/archives.html b/_layouts/archives.html new file mode 100644 index 0000000..18e95f5 --- /dev/null +++ b/_layouts/archives.html @@ -0,0 +1,36 @@ +--- +layout: page +# The Archives of posts. +--- + +{% include lang.html %} + +{% assign df_strftime_m = site.data.locales[lang].df.archives.strftime | default: '/ %m' %} +{% assign df_dayjs_m = site.data.locales[lang].df.archives.dayjs | default: '/ MM' %} + +
+ +{% for post in site.posts %} + {% capture cur_year %}{{ post.date | date: "%Y" }}{% endcapture %} + + {% if cur_year != last_year %} + {% unless forloop.first %}{% endunless %} +
{{ cur_year }}
+
    + {% assign last_year = cur_year %} + {% endif %} + +
  • + {% assign ts = post.date | date: '%s' %} + {{ post.date | date: "%d" }} + + {{ post.date | date: df_strftime_m }} + + {{ post.title }} +
  • + + {% if forloop.last %}
{% endif %} + +{% endfor %} + +
diff --git a/_layouts/categories.html b/_layouts/categories.html new file mode 100644 index 0000000..0515097 --- /dev/null +++ b/_layouts/categories.html @@ -0,0 +1,138 @@ +--- +layout: page +# All the Categories of posts +--- + +{% include lang.html %} + +{% assign HEAD_PREFIX = 'h_' %} +{% assign LIST_PREFIX = 'l_' %} + +{% assign group_index = 0 %} + +{% assign sort_categories = site.categories | sort %} + +{% for category in sort_categories %} + {% assign category_name = category | first %} + {% assign posts_of_category = category | last %} + {% assign first_post = posts_of_category | first %} + + {% if category_name == first_post.categories[0] %} + {% assign sub_categories = '' | split: '' %} + + {% for post in posts_of_category %} + {% assign second_category = post.categories[1] %} + {% if second_category %} + {% unless sub_categories contains second_category %} + {% assign sub_categories = sub_categories | push: second_category %} + {% endunless %} + {% endif %} + {% endfor %} + + {% assign sub_categories = sub_categories | sort %} + {% assign sub_categories_size = sub_categories | size %} + +
+ +
+ + + + {% capture _category_url %}/categories/{{ category_name | slugify | url_encode }}/{% endcapture %} + {{ category_name }} + + + {% assign top_posts_size = site.categories[category_name] | size %} + + {% if sub_categories_size > 0 %} + {{ sub_categories_size }} + {% if sub_categories_size > 1 %} + {{ + site.data.locales[lang].categories.category_measure.plural + | default: site.data.locales[lang].categories.category_measure + }} + {% else %} + {{ + site.data.locales[lang].categories.category_measure.singular + | default: site.data.locales[lang].categories.category_measure + }} + {% endif -%} + , + {% endif %} + + {{ top_posts_size }} + + {% if top_posts_size > 1 %} + {{ + site.data.locales[lang].categories.post_measure.plural + | default: site.data.locales[lang].categories.post_measure + }} + {% else %} + {{ + site.data.locales[lang].categories.post_measure.singular + | default: site.data.locales[lang].categories.post_measure + }} + {% endif %} + + + + + {% if sub_categories_size > 0 %} + + + + {% else %} + + + + {% endif %} +
+ + + + {% if sub_categories_size > 0 %} +
+
    + {% for sub_category in sub_categories %} +
  • + + + {% capture _sub_ctg_url %}/categories/{{ sub_category | slugify | url_encode }}/{% endcapture %} + {{ sub_category }} + + {% assign posts_size = site.categories[sub_category] | size %} + + {{ posts_size }} + + {% if posts_size > 1 %} + {{ + site.data.locales[lang].categories.post_measure.plural + | default: site.data.locales[lang].categories.post_measure + }} + {% else %} + {{ + site.data.locales[lang].categories.post_measure.singular + | default: site.data.locales[lang].categories.post_measure + }} + {% endif %} + +
  • + {% endfor %} +
+
+ {% endif %} +
+ + + {% assign group_index = group_index | plus: 1 %} + {% endif %} +{% endfor %} diff --git a/_layouts/category.html b/_layouts/category.html new file mode 100644 index 0000000..84fa487 --- /dev/null +++ b/_layouts/category.html @@ -0,0 +1,24 @@ +--- +layout: page +# The Category layout +--- + +{% include lang.html %} + +
+

+ + {{ page.title }} + {{ page.posts | size }} +

+ +
    + {% for post in page.posts %} +
  • + {{ post.title }} + + {% include datetime.html date=post.date wrap='span' class='text-muted small' lang=lang %} +
  • + {% endfor %} +
+
diff --git a/_layouts/compress.html b/_layouts/compress.html new file mode 100644 index 0000000..bb34487 --- /dev/null +++ b/_layouts/compress.html @@ -0,0 +1,10 @@ +--- +# Jekyll layout that compresses HTML +# v3.1.0 +# http://jch.penibelst.de/ +# © 2014–2015 Anatol Broder +# MIT License +--- + +{% capture _LINE_FEED %} +{% endcapture %}{% if site.compress_html.ignore.envs contains jekyll.environment or site.compress_html.ignore.envs == "all" %}{{ content }}{% else %}{% capture _content %}{{ content }}{% endcapture %}{% assign _profile = site.compress_html.profile %}{% if site.compress_html.endings == "all" %}{% assign _endings = "html head body li dt dd optgroup option colgroup caption thead tbody tfoot tr td th" | split: " " %}{% else %}{% assign _endings = site.compress_html.endings %}{% endif %}{% for _element in _endings %}{% capture _end %}{% endcapture %}{% assign _content = _content | remove: _end %}{% endfor %}{% if _profile and _endings %}{% assign _profile_endings = _content | size | plus: 1 %}{% endif %}{% for _element in site.compress_html.startings %}{% capture _start %}<{{ _element }}>{% endcapture %}{% assign _content = _content | remove: _start %}{% endfor %}{% if _profile and site.compress_html.startings %}{% assign _profile_startings = _content | size | plus: 1 %}{% endif %}{% if site.compress_html.comments == "all" %}{% assign _comments = "" | split: " " %}{% else %}{% assign _comments = site.compress_html.comments %}{% endif %}{% if _comments.size == 2 %}{% capture _comment_befores %}.{{ _content }}{% endcapture %}{% assign _comment_befores = _comment_befores | split: _comments.first %}{% for _comment_before in _comment_befores %}{% if forloop.first %}{% continue %}{% endif %}{% capture _comment_outside %}{% if _carry %}{{ _comments.first }}{% endif %}{{ _comment_before }}{% endcapture %}{% capture _comment %}{% unless _carry %}{{ _comments.first }}{% endunless %}{{ _comment_outside | split: _comments.last | first }}{% if _comment_outside contains _comments.last %}{{ _comments.last }}{% assign _carry = false %}{% else %}{% assign _carry = true %}{% endif %}{% endcapture %}{% assign _content = _content | remove_first: _comment %}{% endfor %}{% if _profile %}{% assign _profile_comments = _content | size | plus: 1 %}{% endif %}{% endif %}{% assign _pre_befores = _content | split: "" %}{% assign _pres_after = "" %}{% if _pres.size != 0 %}{% if site.compress_html.blanklines %}{% assign _lines = _pres.last | split: _LINE_FEED %}{% capture _pres_after %}{% for _line in _lines %}{% assign _trimmed = _line | split: " " | join: " " %}{% if _trimmed != empty or forloop.last %}{% unless forloop.first %}{{ _LINE_FEED }}{% endunless %}{{ _line }}{% endif %}{% endfor %}{% endcapture %}{% else %}{% assign _pres_after = _pres.last | split: " " | join: " " %}{% endif %}{% endif %}{% capture _content %}{{ _content }}{% if _pre_before contains "
" %}{% endif %}{% unless _pre_before contains "
" and _pres.size == 1 %}{{ _pres_after }}{% endunless %}{% endcapture %}{% endfor %}{% if _profile %}{% assign _profile_collapse = _content | size | plus: 1 %}{% endif %}{% if site.compress_html.clippings == "all" %}{% assign _clippings = "html head title base link meta style body article section nav aside h1 h2 h3 h4 h5 h6 hgroup header footer address p hr blockquote ol ul li dl dt dd figure figcaption main div table caption colgroup col tbody thead tfoot tr td th" | split: " " %}{% else %}{% assign _clippings = site.compress_html.clippings %}{% endif %}{% for _element in _clippings %}{% assign _edges = " ;; ;" | replace: "e", _element | split: ";" %}{% assign _content = _content | replace: _edges[0], _edges[1] | replace: _edges[2], _edges[3] | replace: _edges[4], _edges[5] %}{% endfor %}{% if _profile and _clippings %}{% assign _profile_clippings = _content | size | plus: 1 %}{% endif %}{{ _content }}{% if _profile %}
Step Bytes
raw {{ content | size }}{% if _profile_endings %}
endings {{ _profile_endings }}{% endif %}{% if _profile_startings %}
startings {{ _profile_startings }}{% endif %}{% if _profile_comments %}
comments {{ _profile_comments }}{% endif %}{% if _profile_collapse %}
collapse {{ _profile_collapse }}{% endif %}{% if _profile_clippings %}
clippings {{ _profile_clippings }}{% endif %}
{% endif %}{% endif %} diff --git a/_layouts/default.html b/_layouts/default.html new file mode 100644 index 0000000..0047570 --- /dev/null +++ b/_layouts/default.html @@ -0,0 +1,76 @@ +--- +layout: compress +# Default layout +--- + + + +{% include origin-type.html %} + +{% include lang.html %} + +{% capture prefer_mode %} + {% if site.theme_mode %} + data-mode="{{ site.theme_mode }}" + {% endif %} +{% endcapture %} + + + + {% include head.html %} + + + {% include sidebar.html lang=lang %} + +
+
+ {% include topbar.html lang=lang %} + {{ content }} + {% include_cached search-results.html lang=lang %} +
+
+ + {% include_cached footer.html lang=lang %} + +
+ + + + {% if site.pwa.enabled %} + + {% endif %} + + {% include js-selector.html %} + + {% if page.mermaid %} + {% include mermaid.html %} + {% endif %} + + {% include_cached search-loader.html %} + + diff --git a/_layouts/home.html b/_layouts/home.html new file mode 100644 index 0000000..4cda9e4 --- /dev/null +++ b/_layouts/home.html @@ -0,0 +1,110 @@ +--- +layout: page +refactor: true +--- + +{% include lang.html %} + +{% assign pinned = site.posts | where: 'pin', 'true' %} +{% assign default = site.posts | where_exp: 'item', 'item.pin != true and item.hidden != true' %} + +{% assign posts = '' | split: '' %} + + + +{% assign offset = paginator.page | minus: 1 | times: paginator.per_page %} +{% assign pinned_num = pinned.size | minus: offset %} + +{% if pinned_num > 0 %} + {% for i in (offset..pinned.size) limit: pinned_num %} + {% assign posts = posts | push: pinned[i] %} + {% endfor %} +{% else %} + {% assign pinned_num = 0 %} +{% endif %} + + + +{% assign default_beg = offset | minus: pinned.size %} + +{% if default_beg < 0 %} + {% assign default_beg = 0 %} +{% endif %} + +{% assign default_num = paginator.posts | size | minus: pinned_num %} +{% assign default_end = default_beg | plus: default_num | minus: 1 %} + +{% if default_num > 0 %} + {% for i in (default_beg..default_end) %} + {% assign posts = posts | push: default[i] %} + {% endfor %} +{% endif %} + +
+ {% for post in posts %} + +
+ {% if post.image %} + {% if post.image.lqip %} + {% capture lqip %}lqip="{{ post.image.lqip }}"{% endcapture %} + {% endif %} + + {% assign src = post.image.path | default: post.image %} + {% unless src contains '//' %} + {% assign src = post.img_path | append: '/' | append: src | replace: '//', '/' %} + {% endunless %} + + {% assign alt = post.image.alt | xml_escape | default: 'Preview Image' %} + + {{ alt }} + {% endif %} + +
+

+ {{ post.title }} +

+ +
+

+ {% include no-linenos.html content=post.content %} + {{ content | markdownify | strip_html | truncate: 200 | escape }} +

+
+ + + +
+ +
+
+ {% endfor %} +
+ + +{% if paginator.total_pages > 1 %} + {% include post-paginator.html %} +{% endif %} diff --git a/_layouts/page.html b/_layouts/page.html new file mode 100644 index 0000000..148f873 --- /dev/null +++ b/_layouts/page.html @@ -0,0 +1,68 @@ +--- +layout: default +--- + +{% include lang.html %} +{% include origin-type.html %} + +{% if layout.tail_includes %} + {% assign has_tail = true %} +{% endif %} + +
+ +
+ {% capture padding %} + {% unless page.layout == 'home' %}px-1{% endunless %} + {% endcapture %} + +
+ {% capture _content %} + {% if layout.refactor or page.layout == 'page' %} + {% include refactor-content.html content=content lang=lang %} + {% else %} + {{ content }} + {% endif %} + {% endcapture %} + + {% if page.layout == 'page' or page.collection == 'tabs' %} + {% assign tab_key = page.title | downcase %} + {% assign title = site.data.locales[lang].tabs[tab_key] | default: page.title %} +

+ {{ title }} +

+
+ {{ _content }} +
+ {% else %} + {{ _content }} + {% endif %} +
+
+ + + +
+
+ {% include_cached update-list.html lang=lang %} + {% include_cached trending-tags.html lang=lang %} +
+ + {% for _include in layout.panel_includes %} + {% assign _include_path = _include | append: '.html' %} + {% include {{ _include_path }} lang=lang %} + {% endfor %} +
+
+ + +{% if has_tail %} +
+
+ {% for _include in layout.tail_includes %} + {% assign _include_path = _include | append: '.html' %} + {% include {{ _include_path }} lang=lang %} + {% endfor %} +
+
+{% endif %} diff --git a/_layouts/post.html b/_layouts/post.html new file mode 100644 index 0000000..77822a6 --- /dev/null +++ b/_layouts/post.html @@ -0,0 +1,133 @@ +--- +layout: page +refactor: true +panel_includes: + - toc +tail_includes: + - related-posts + - post-nav + - comments +--- + +{% include lang.html %} + +

{{ page.title }}

+ + + +
+ {{ content }} +
+ +
+ + + {% if page.categories.size > 0 %} + + {% endif %} + + + {% if page.tags.size > 0 %} + + {% endif %} + +
+
+ + {% if site.data.locales[lang].copyright.license.template %} + + {% capture _replacement %} + + {{ site.data.locales[lang].copyright.license.name }} + + {% endcapture %} + + {{ site.data.locales[lang].copyright.license.template | replace: ':LICENSE_NAME', _replacement }} + + {% endif %} +
+ + {% include post-sharing.html lang=lang %} + +
+ +
diff --git a/_layouts/tag.html b/_layouts/tag.html new file mode 100644 index 0000000..3b90b8c --- /dev/null +++ b/_layouts/tag.html @@ -0,0 +1,23 @@ +--- +layout: page +# The layout for Tag page +--- + +{% include lang.html %} + +
+

+ + {{ page.title }} + {{ page.posts | size }} +

+
    + {% for post in page.posts %} +
  • + {{ post.title }} + + {% include datetime.html date=post.date wrap='span' class='text-muted small' lang=lang %} +
  • + {% endfor %} +
+
diff --git a/_layouts/tags.html b/_layouts/tags.html new file mode 100644 index 0000000..7800ca0 --- /dev/null +++ b/_layouts/tags.html @@ -0,0 +1,22 @@ +--- +layout: page +# All the Tags of posts. +--- + +
+ {% assign tags = '' | split: '' %} + {% for t in site.tags %} + {% assign tags = tags | push: t[0] %} + {% endfor %} + + {% assign sorted_tags = tags | sort_natural %} + + {% for t in sorted_tags %} + + {% endfor %} +
diff --git a/_plugins/posts-lastmod-hook.rb b/_plugins/posts-lastmod-hook.rb new file mode 100644 index 0000000..1fd6ecf --- /dev/null +++ b/_plugins/posts-lastmod-hook.rb @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby +# +# Check for changed posts + +Jekyll::Hooks.register :posts, :post_init do |post| + + commit_num = `git rev-list --count HEAD "#{ post.path }"` + + if commit_num.to_i > 1 + lastmod_date = `git log -1 --pretty="%ad" --date=iso "#{ post.path }"` + post.data['last_modified_at'] = lastmod_date + end + +end diff --git a/_posts/2023-07-18-spring.md b/_posts/2023-07-18-spring.md new file mode 100644 index 0000000..29a33a8 --- /dev/null +++ b/_posts/2023-07-18-spring.md @@ -0,0 +1,2217 @@ +--- +title: Spring +--- + +## Java and Maven Installation Steps (Ubuntu) + +- java 17 is needed for spring framework 6 / spring boot 3 +- download deb file from [here](https://www.oracle.com/java/technologies/javase/jdk17-archive-downloads.html) +- run `sudo apt install ./jdk-17_linux-x64_bin.deb` +- download binary tar.gz file from [here](https://maven.apache.org/download.cgi) +- run `tar xzvf apache-maven-3.9.3-bin.tar.gz` +- add the following to ~/.bashrc - + ```shell + export JAVA_HOME="/usr/lib/jvm/jdk-17" + export PATH="$PATH:$JAVA_HOME/bin/" + + export M2_HOME="~/apache-maven-3.9.3" + export MAVEN_OPTS="-Xms256m -Xmx512m" + export PATH="$PATH:$M2_HOME/bin/" + ``` +- note - when creating projects using start.spring.io, it comes bundled with the maven wrapper + +## Rest + +- evolution of http - http1 ➙ http1.1 ➙ http2 ➙ http3 +- tls is the newer standard and ssl is old (e.g. http3 only supports / uses tls) +- **safe methods** - only fetch information and do not cause changes. e.g. - GET, HEAD (like GET but requests for metadata), OPTIONS (supported http methods by the url), TRACE (echoes the request, helps understand if the request was altered by intermediate servers) +- **idempotent methods** - safe methods, PUT, DELETE (POST is not idempotent) +- status codes - 100 series for informational purpose, 200 series for success, 300 series for redirects, 400 series for client side errors and 500 series for server side errors +- rest - representational state transfer. it is stateless +- **richardson maturity model** - maturity of restful resources. this was probably needed because unlike soap, rest doesn't really have as many standards + - level 0 - swamp of pox - e.g. soap. pox here stands for plain old xml. typically uses just one url and one kind of method + - level 1 - resources - use multiple uris for identifying specific resources. e.g. /products/123 + - level 2 - use http verbs in conjunction with level 1. e.g. POST for creating a product + - level 3 - hateoas - hypermedia as the engine of application state. server returns links in the response to indicate what other actions are available. this helps with the idea of self discovery / self documenting of apis +- **marshalling** (pojo to json) / **unmarshalling** (json to pojo) is done with the help of jackson +- so far, finding [this pdf](https://docs.spring.io/spring-boot/docs/3.2.x/reference/pdf/spring-boot-reference.pdf) good for reference +- spring was introduced by rod johnson as a simpler **alternative to j2ee, thus replacing xml with pojos** +- spring boot is a wrapper around spring, which can do things like auto-configuration e.g. autoconfigure h2 if it is on the classpath, starter dependencies and so on +- **convention over configuration** - there are reasonable defaults, which we can override as needed +- spring boot **has an embedded tomcat server**, which can route requests to the application. earlier, the idea used to be to build war applications (we build jar applications now) and manually deploy them to tomcat servers. tomcat is also called the "servlet container" +- mvc - model view controller. a `DispatcherServlet` running underneath directs requests to / handles responses from the controller +- the controller calls a service, which has the business logic (interacting with db) and returns a model (pojo) +- servlet api is abstracted away from us, but that is what gets used underneath i.e. our requests are sent to servlets that can then forward these requests to our business logic +- the "servlet container" i.e. tomcat is responsible for **converting http requests / response to corresponding servlet request / servlet response** +- we can optionally add **filters** - these can **perform pre / post processing on our servlet requests / servlet responses** - e.g. spring security filters +- so entire flow according to my understanding -
+ ![webmvc architecture](/assets/img/spring/webmvc-architecture.drawio.png) +- `@Service` for service, `@Controller` for controllers +- extend the `CommandLineRunner` interface for initial bootstrapping +- by default in spring boot, package scan happens for any components that are in the same package or inside of any nested packages +- spring context creates components (i.e. instances) via this package scan and holds on to it + ```java + @SpringBootApplication + public class Spring6WebappApplication { + + public static void main(String[] args) { + ApplicationContext ctx = SpringApplication.run(Spring6WebappApplication.class, args); + BookController bookController = ctx.getBean(BookController.class); + } + } + ``` +- we can also autowire the `ApplicationContext` as well +- dependency injection - needed dependency is automatically injected for us. this can be achieved via (3 ways) - + - constructor (instantiation) + - setters + - using field injection i.e. `@Autowired` +- favoured method is using constructor injection with properties marked `private final`. this means the class cannot be instantiated (aka application fails) if the dependency is not available, instead of the dependency causing null pointer exceptions later +- dependency injection works with concrete classes / interfaces (think interface segregation in the i of solid principles) +- inversion of control (2 points) - + - it is the underlying framework that does the heavy lifting for us so that we can focus on the business logic. heavy lifting includes things like instantiation of objects + - allows dependencies to be injected at runtime. the dependencies are not predetermined +- primary beans - if we have two different concrete classes implementing an interface, and we try to use dependency injection for this interface, we get the error **expected single matching bean but found 2**. using `@Primary`, we can ask spring to prefer one of the implementations over another +- we can use `@Qualifier` to specify the bean name explicitly as well. useful when for e.g. we have multiple implementations as described above +- we can also "name" the parameters we want to use dependency injection for correctly. e.g. we have two concrete classes `EnglishGreetingService` and `SpanishGreetingService`. we can use the former using the correct name for the constructor arg + ```java + public Il8NController(GreetingService englishGreetingService) { + this.greetingService = englishGreetingService; + } + ``` +- by default, unless we name the bean, the name used for e.g. for `HelloService` would be `helloService`. we can name beans explicitly as well, e.g. `@Service("bonjourService")` +- profiles - we can annotate a bean with `@Profile` + ```java + @Service + @Profile("EN") + public EnglishHelloService implements GreetingService { } + ``` +- this means that the bean would only be instantiated when that particular profile is active. e.g. - + ```java + @SpringBootTest + @ActiveProfiles("EN") + class IL8NControllerTest { } + ``` +- a bean can be available in multiple profiles - `@Profile({ "EN", "English" })` +- we can also add a bean to be available by default - `@Profile({"EN", "default"})`. this means that if no bean is available, add this bean to the application context. e.g. in this case, use the `EnglishHelloService` implementation when any other bean for the `GreetingService` is not available +- so, we have discussed different techniques to resolve conflicts / to achieve inversion of control - `@Primary`, `@Service`, `@Qualifier`, naming the fields "correctly", `@Profile` (named and default), etc +- bean lifecycle methods - we can hook into the various lifecycle stages that a bean goes through, e.g. when the bean properties are set, when its instantiation is over and so on. we can either implement interfaces like `InitializingBean`, `DisposableBean` or annotations like `@PreDestroy` and `@PostConstruct` +- bean scopes - we can set scope via for e.g. `@Scope(BeanDefinition.SCOPE_PROTOTYPE)`. the different options are - + - **singleton** - it is the default scope of beans, one object per application context + - **prototype** - a new instance is returned every time it is referenced. so, the instance isn't stored in the container. this also means that once an instance is no longer used / referenced, it gets garbage collected + - **web scopes** - for web environments, the instance isn't stored in the container + - **session** - one instance per user per session + - **request** - one instance per http request + - **global session** - one instance per application lifecycle, like singleton +- three lifecycle phases - **initialization**, **use** and **destruction**. steps 1-7 below are for initialization +- note: steps 5 and 6 are done by us manually if we use `@Bean` inside `@Configuration` + 1. **application context is created** + 2. **bean factory is created** + 3. then, **bean definitions are loaded** into the bean factory from all different sources like component scan. the bean factory only contains metadata & references to the beans & has not instantiated them yet + 4. **bean factory post processors** act on the beans to configure them, e.g. fields annotated with `@Value` are set via `PropertySourcesPlaceholderConfigurer`. we can implement `BeanFactoryPostProcessor` if we want, the idea is to configure beans before they are instantiated + 5. **beans are instantiated**, and we do dependency injection using constructors. beans have to be instantiated in the correct order because of the dependency graph + 6. we use **setters** after initialization, e.g. we do dependency injection for setters. in general for good development practice, optional dependencies should use dependency injection via setters while required dependencies should use dependency injection via constructors + 7. **bean post processing** can happen, which is further broker down into 3 steps. note - this is **bean post processing**, step 4 was **bean factory post processing** + 1. pre-init bean post processor - implement `BeanPostProcessor` to call `postProcessBeforeInitialization` + 2. initializer - calls method annotated with `@PostConstruct` + 3. post-init bean post processor - implement `BeanPostProcessor` to call `postProcessAfterInitialization` + 8. **use phase** - application context maintains references to the beans with scope singleton, so they don't get garbage collected etc. we can look into the context anytime by implementing `ApplicationContextAware` and using `setApplicationContext` + 9. **destruction phase** - when close is called on application context. `@PreDestroy` method is called on beans before they are marked for garbage collection +- spring mvc - based on java servlet api, which is blocking. remember **servlet** (servlet container i.e. tomcat, dispatcher servlet, servlet request / servlet response, etc) +- spring webflux uses project reactor and not java servlet api, so it is non blocking +- similarly, `RestTemplate` is the older standard and is on the way to deprecation unlike `WebClient` +- spring works using **proxies** +- proxies wrap a class to add behavior, e.g. transaction proxies +- proxies help in adding behavior without modifying code +- proxies don't act on internal logic like calling private methods +- aspect oriented programming - helps in adding common behavior to many locations +- usually used for **cross cutting concerns** +- spring aop is easier to implement, does runtime weaving +- aspectj is a bit more difficult to implement, does compile time weaving, and has more features +- performance of compile time weaving > runtime weaving +- `JoinPoint` is the code +- `PointCut` is what selects a `JoinPoint` +- `Advice` is what gets applied to `JoinPoint`. three advices have been discussed here - `@Before`, `@AfterReturning` and `@Around` +- example - all methods annotated with `@AspectDebugger` should generate logs + - AspectDebugger.java - + ```java + @Target(ElementType.METHOD) + @Retention(RetentionPolicy.RUNTIME) + public @interface AspectDebugger { + } + ``` + - DebuggingAspect.java - + ```java + @Slf4j + public class DebuggingAspect { + + @Pointcut("@annotation(AspectDebugger)") + public void executeLogging() { + } + + @Before("executeLogging()") + public void logMethodCall(JoinPoint joinPoint) { + log.debug("started executing method: %s, with args: %s\n", + joinPoint.getSignature().getName(), Arrays.toString(joinPoint.getArgs())); + } + + @AfterReturning(value = "executeLogging()", returning = "retVal") + public void logMethodCall(JoinPoint joinPoint, Object retVal) { + log.debug("finished executing method: %s, with return value: %s\n", + joinPoint.getSignature().getName(), retVal); + } + + @Around("executeLogging()") + public Object trackExecutionTime(ProceedingJoinPoint joinPoint) throws Throwable { + Long startTime = System.currentTimeMillis(); + Object retVal = joinPoint.proceed(); + Long endTime = System.currentTimeMillis(); + log.debug("method: %s took: %dms to execute\n", + joinPoint.getSignature().getName(), endTime - startTime); + return retVal; + } + } + ``` +- lombok - code generation at compile time +- enable "annotation processing" in intellij for it to work with lombok +- `@Data` - shortcut for `@Getter`, `@Setter`, `@EqualsAndHashCode`, `@ToString`, `@RequiredArgsConstructor` +- `@NonNull` - throw an exception if null value is passed for field +- `@Value` - immutable variant (i.e. `private final`) of `@Data` +- `@SneakyThrows` - throw checked exceptions without declaring it in the throws clause +- `@Synchronized` - better version of `synchronized` +- `@Log` for java util logger. this is not usually recommended +- `@Slf4j` for slf4j logger. slf4j is actually a generic logging facade which uses logback bts in spring +- we can see the generated implementation inside the target folder (intellij has a decompiler that can parse this .class file for us) +- delombok - with the help of lombok plugin in intellij, we can generate the code for an annotation. this provides us with a starting point +- get list can be done by annotating controller method with `@RequestMapping("/api/v1/beer")` +- get by id - make use of path variable + ```java + @RequestMapping("/api/v1/beer") + public class BeerController { + // ... + @RequestMapping(value = "/{beerId}", method = RequestMethod.GET) + public Beer getBeerById(@PathVariable UUID beerId) { + // ... + ``` +- spring-boot-dev-tools - live reload +- using request body for e.g. for create requests. also, it is a good practice to add the location header, which specifies the id of the newly created object - + ```java + @PostMapping + public ResponseEntity saveBeer(@RequestBody Beer beer) { + Beer savedBeer = beerService.saveBeer(beer); + HttpHeaders headers = new HttpHeaders(); + headers.add(HttpHeaders.LOCATION, "/api/v1/beer/" + savedBeer.getId()); + return new ResponseEntity(headers, HttpStatus.CREATED); + } + ``` +- unit test - test specific sections of code, called code coverage. should execute very fast and in unity i.e. not have external dependencies +- integration test - include the spring context, database and message brokers +- functional test - these tests run against a running instance of the service +- testing pyramid - large number of unit tests, fewer integration and even fewer functional tests +- mock mvc - helps us unit test our controllers +- `@WebMvcTest` - create test splices so that the entire context is not brought up. only the controllers specified are instantiated and not even their dependencies. if we do not specify the controller explicitly, all controllers are instantiated +- we mock the dependencies of the controller using mockito +- **mocks** - predefined answers to the method calls. can assert on executions, e.g. assert it was called with a specific parameter +- **spy** - wrapper around the actual object +- the assertion of execution can be done using `verify` +- **argument matchers** - match the arguments of the execution of mocks. e.g. disallow the predefined response if the matching fails +- **argument captors** - capture the arguments of the execution of mocks +- apart from stubbing response, we can also perform assertions on executions of mocks - + ```java + verify(beerService).updateBeerById(eq(beer.getId()), any(Beer.class)); + ``` +- we can use `ArgumentCaptor` from mockito to help us capture arguments passed to mocks + ```java + ArgumentCaptor id_ = ArgumentCaptor.forClass(UUID.class); + verify(beerService).deleteBeerById(id_.capture()); + assertEquals(id, id_.getValue()); + ``` +- use `@MockBean` for injecting the service mocks into the controller +- we use `jsonpath`, which comes from [jayway jsonpath](https://github.com/json-path/JsonPath) +- we use hamcrest matchers e.g. notice the use of `is` + ```java + @WebMvcTest(controllers = {BeerController.class}) + class BeerControllerTest { + + @Autowired + MockMvc mockMvc; + + @MockBean + BeerService beerService; + + @Test + void getBeerById() throws Exception { + Beer beer = Beer.builder().id(UUID.randomUUID()).build(); + when(beerService.getBeerById(beer.getId())).thenReturn(beer); + + mockMvc.perform(get("/api/v1/beer/" + beer.getId()) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().isOk()) + .andExpect(content().contentType(MediaType.APPLICATION_JSON)) + .andExpect(jsonPath("$.id", is(beer.getId().toString()))); + } + } + ``` +- using json path capabilities in assertions - + ```java + .andExpect(jsonPath("$.length()", is(2))) + .andExpect(jsonPath("$[?(@.id == '%s')]", one.getId().toString()).exists()) + .andExpect(jsonPath("$[?(@.id == '%s')]", two.getId().toString()).exists()); + ``` +- spring boot does configure an object mapper for us by default which we should prefer using in our test by autowiring instead of creating a new one so that our tests are closer to the real word scenario. we use this object mapper for creating request body in post requests +- if the request body contains json, we need to provide the content type header as well + ```java + mockMvc.perform(post("/api/v1/beer") + .accept(MediaType.APPLICATION_JSON) + .contentType(MediaType.APPLICATION_JSON) + .content(objectMapper.writeValueAsString(req))) + .andExpect(status().isCreated()) + .andExpect(header().exists("Location")) + .andExpect(header().string("Location", "/api/v1/beer/" + beer.getId())); + ``` +- when testing using mock mvc, `delete("/api/v1/beer/" + id.toString())` can be written as `delete("/api/v1/beer/{beerId}", id.toString())` to make use of positional binding +- we can also auto-configure mock mvc in a non-`@WebMvcTest` (such as `@SpringBootTest`) by annotating it with `@AutoConfigureMockMvc` +- the default error handling mechanism uses `DefaultHandlerExceptionResolver`, `ResponseStatusExceptionResolver` (maybe more?), which extends `AbstractHandlerExceptionResolver` +- we can annotate the methods inside controllers with `@ExceptionHandler` to handle specific exceptions i.e. we provide the annotation the exception it should handle. we can use this in the methods of controllers. the downside of this is that it is scoped to a single controller +- so, we can annotate a class with `@ControllerAdvice` to handle exceptions globally and continue to use `@ExceptionHandler` on the methods of this class + ```java + public class NotFoundException extends RuntimeException {} + + @ControllerAdvice + public class ErrorHandler { + + @ExceptionHandler(NotFoundException.class) + public ResponseEntity handleMethodNotFound() { + return ResponseEntity.notFound().build(); + } + } + ``` +- `@ResponseStatus` - we can annotate "custom exceptions" with this annotation to use a specific status for that exception. understand we cannot change code of existing pre-built exceptions, so this only works for custom exceptions. this way, we can skip the controller advice shown above + ```java + @ResponseStatus(HttpStatus.NOT_FOUND) + public class NotFoundException extends RuntimeException { + } + ``` +- to prevent having too many custom exceptions / no point of having custom exceptions that are only used once, we can use `ResponseStatusException`. it allows us to throw exceptions with a response status + ```java + catch (Exception e) { + throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Foo", e); + } + ``` +- spring boot's `ErrorController` defines how to handle errors, e.g. respond with whitelabel pages in browsers vs json for rest requests. we can configure it using following properties - + ```properties + # whether to include errors attribute - think this includes validation errors? + server.error.include-binding-errors=never + # whether to include exception attribute + server.error.include-exception=false + # whether to include message attribute - think this is for exception message? + server.error.include-message=never + # whether to include stack trace + server.error.include-stacktrace=never + # whether to display error page in browsers + server.error.whitelabel.enabled=true + ``` +- i observed that by setting the `server.error` properties to as verbose as possible, the errors property in the response was pretty decent (i.e. include the error message, field name, etc) +- however, when testing via mock mvc, something like this was not working - + ```java + .andExpect(jsonPath("$.errors.length()", is(2))) + .andExpect(jsonPath("$.errors[?(@.defaultMessage == '%s')]", "must not be blank").exists()) + .andExpect(jsonPath("$.errors[?(@.defaultMessage == '%s')]", "must not be nullable").exists()) + ``` +- i think this is more to do with how mock mvc isn't actually like a full blown integration test. so, to test the validation handling via mock mvc, i did the below - + ```java + MvcResult result = mockMvc.perform(post("/api/v1/beer") + .accept(MediaType.APPLICATION_JSON) + .contentType(MediaType.APPLICATION_JSON) + .content(objectMapper.writeValueAsString(beer))) + .andExpect(status().isBadRequest()) + .andReturn(); + + MethodArgumentNotValidException e = (MethodArgumentNotValidException) result.getResolvedException(); + assertNotNull(e); + List defaultMessages = e.getBindingResult().getFieldErrors("beerName").stream() + .map(DefaultMessageSourceResolvable::getDefaultMessage) + .toList(); + assertEquals(2, defaultMessages.size()); + assertTrue(defaultMessages.contains("must not be null")); + assertTrue(defaultMessages.contains("must not be blanker")); + ``` +- error handling - already discussed earlier - if the exception thrown is annotated with `@ResponseStatus`, it can be handled by `ResponseStatusExceptionResolver`. however, if its not, spring will wrap it around `ServletException`. this is not something mock mvc can handle. so basically, below will not work in such cases - + ```java + MvcResult result = mockMvc.perform(put("/api/v1/beer/{beerId}", beerDto.getId()) + // ... + .andReturn(); + result.getResolvedException() + ``` +- unit testing spring services example - + ```java + @ContextConfiguration(classes = {BeerCSVServiceImpl.class}) + @ExtendWith(SpringExtension.class) + class BeerCSVServiceTest { + + @Autowired + BeerCSVService beerCSVService; + + // ... + ``` +- now, we can use `@MockBean` etc. note how we configure `BeerServiceImpl` but autowire `BeerService` +- rest template - spring automatically autowires a RestTemplateBuilder with sensible defaults for us +- use uri component builder - as we add things like query parameters, we don't have to worry about things like encoding special characters etc, unlike when we directly provide the string url by performing concatenations ourselves +- here we expect the server to return an object of type jpa's Page, and so, we want to deserialize the response into this. now Page is an interface, so we can instead use PageImpl. jackson cannot directly convert to PageImpl (i think this happens because PageImpl does not have the right constructor etc) so we use our own wrapper like below based on ([this](https://stackoverflow.com/a/77316854/11885333)) - + ```java + @JsonIgnoreProperties("pageable") // ignore the pageable property in the response + public class JacksonPage extends PageImpl { + + public JacksonPage(List content, int number, int size, long totalElements) { + super(content, PageRequest.of(number, size), totalElements); + } + } + ``` +- rest template code - note `UriComponentsBuilder`, `ParameterizedTypeReference` + ```java + @Service + @Slf4j + public class BeerClientServiceImpl implements BeerClientService { + + @Override + public Page listBeers(String beerName) { + + UriComponentsBuilder uriComponentsBuilder = UriComponentsBuilder.fromPath("/v1/beer"); + if (beerName != null) uriComponentsBuilder.queryParam("beerName", beerName); + + return restTemplate.exchange( + uriComponentsBuilder.toUriString(), + HttpMethod.GET, + null, + new ParameterizedTypeReference>() { + } + ) + .getBody(); + } + + @Override + public BeerDto getBeerById(UUID beerId) { + + UriComponents uriComponents = UriComponentsBuilder.fromPath("/v1/beer/{beerId}") + .buildAndExpand(beerId); + + return restTemplate.exchange( + uriComponents.toUriString(), + HttpMethod.GET, + null, + new ParameterizedTypeReference() { + } + ) + .getBody(); + } + } + ``` +- note - if we don't really have the need for mapping to a full blown pojo, we can use Map or better JsonNode. JsonNode has methods to parse json and extract different attributes from it etc + ```java + List beerNames = new ArrayList<>(); + response.getBody() + .get("content") + .elements() + .forEachRemaining(beerNode -> beerNames.add(beerNode.get("beerName").asText())); + log.info("response body = [{}]", beerNames); + ``` +- creating a beer - note `HttpEntity` + ```java + @Override + public BeerDto createBeer(BeerDto beerDto) { + ResponseEntity response = restTemplate.exchange( + "/api/v1/beer", + HttpMethod.POST, + new HttpEntity<>(beerDto), + Void.class + ); + URI location = response.getHeaders().getLocation(); + return getBeer(location.getPath()); + } + ``` +- there is a way to unit test rest template using `@RestClientTest`. i am not a fan of so many annotations, so i prefer `@SpringBootTest`, unless i want to do unit testing of services, where i can use `@ExtendWith(SpringExtension.class)`. [my full so answer](https://stackoverflow.com/a/77339935/11885333) + ```java + @Slf4j + @SpringBootTest + class BeerClientServiceImplTest { + + @Autowired + BeerClientService beerClientService; + + @Autowired + ObjectMapper objectMapper; + + @Autowired + RestTemplate beerServiceRt; + + MockRestServiceServer mockServer; + + @BeforeEach + void setUp() { + mockServer = MockRestServiceServer.createServer(beerServiceRt); + } + + @Test + @SneakyThrows + void listBeers() { + Page stubbedResponse = new PageImpl<>( + List.of(BeerDtoMocks.two), PageRequest.of(1, 1), 1 + ); + mockServer.expect(method(HttpMethod.GET)) + .andExpect(requestTo(containsString("/api/v1/beer"))) + .andRespond(withSuccess() + .body(objectMapper.writeValueAsString(stubbedResponse)) + .contentType(MediaType.APPLICATION_JSON)); + + Page response = beerClientService.listBeers(null); + assertEquals(BeerDtoMocks.two.getBeerName(), response.getContent().get(0).getBeerName()); + } + } + ``` +- similarly, to mock post calls (we need to return id in location header) - + ```java + UUID id = UUID.randomUUID(); + URI location = UriComponentsBuilder.fromPath("/api/v1/beer/{beerId}") + .buildAndExpand(id) + .toUri(); + mockServer.expect(method(HttpMethod.POST)) + .andExpect(requestTo(containsString("/api/v1/beer"))) + .andRespond(withAccepted().location(location)); + ``` +- spring 6 introduced [`RestClient`](https://spring.io/blog/2023/07/13/new-in-spring-6-1-restclient/) as an alternative to `RestTemplate`, with fluent api like `WebClient` +- actuator helps us in monitoring and managing our applications through http endpoints +- we can see all available endpoints [here](https://docs.spring.io/spring-boot/docs/current/reference/html/actuator.html#actuator.endpoints) +- adding actuator in spring boot + ```xml + + org.springframework.boot + spring-boot-starter-actuator + + ``` +- by default, all endpoints are enabled but not exposed, only the health endpoint is exposed. to expose all endpoints, use `management.endpoints.web.exposure.include=*` +- we can see the health at /actuator/health +- it would return `{ status: "UP" }` if it works fine +- this endpoint can for e.g. be useful for configuring readiness probe of spring boot applications deployed on kubernetes +- add property `management.endpoint.health.show-details=ALWAYS`, [docs](https://docs.spring.io/spring-boot/docs/current/reference/html/actuator.html#actuator.endpoints.health) to show more details +- we can also add custom health checks to show up when we hit the health endpoint (not discussed) +- we can see arbitrary information about the app at /actuator/info +- inside pom.xml inside `spring-boot-maven-plugin`, add below - + ```xml + + + + build-info + + + + ``` +- this gives build time, version, maven coordinates of the project, etc +- it generates a file at target/classes/META-INF/build-info.properties +- add the plugin below - + ```xml + + pl.project13.maven + git-commit-id-plugin + + ``` +- to enable all git related information like branches, last commit, etc., [add below](https://docs.spring.io/spring-boot/docs/2.6.6/reference/html/actuator.html#actuator.endpoints.info.git-commit-information) + ```properties + management.info.git.mode=full + ``` +- it generates a file at target/classes/git.properties +- we can add custom endpoints to actuator as well (not discussed) +- we can secure the health endpoints using spring security! - e.g. allow all users to access the health endpoint and only users with a role of admin to access other endpoints + ```java + @Configuration + public class SecurityConfig extends WebSecurityConfigurerAdapter { + + @Override + protected void configure(HttpSecurity http) throws Exception { + http.authorizeRequests() + .requestMatchers(EndpointRequest.to(HealthEndpoint.class)).permitAll() + .requestMatchers(EndpointRequest.toAnyEndpoint()).hasRole("ADMIN"); + + http.csrf().and().httpBasic(); + } + } + ``` +- metrics - can integrate with many other monitoring systems like cloudwatch, datadog, prometheus, etc. by using micrometer which is vendor neutral, just like slf4j for logging +- it would return information like jvm memory usage, system cpu usage, etc +- hitting `/actuator/metrics/` will show what all endpoints we can hit, then we can hit them via for instance `/actuator/metrics/application.ready.time` +- opencsv - convert csv records to pojo. define pojo as such - + ```java + @Data + @AllArgsConstructor + @NoArgsConstructor + @Builder + public class BeerCSVRecordDto { + + @CsvBindByName + private Integer row; + + @CsvBindByName(column = "count.x") // specify column name explicitly + private Integer countX; + } + ``` +- now, use the code below - + ```java + File file = ResourceUtils.getFile("classpath:data/beers.csv"); + List records = new CsvToBeanBuilder(new FileReader(file)) + .withType(BeerCSVRecordDto.class) + .build() + .parse(); + ``` +- note - `ResourceUtils` comes from spring, can be used for reading files in classpath easily + +## JPA + +- ddl - data definition language - creating / dropping tables, indices, etc +- dml - data manipulation language - insert, update and delete data +- dql - data query language - retrieving data, joins, aggregations, etc +- dcl - data control language - grant / revoke access +- at its core, jdbc (java database connectivity) is used to interact with sql databases +- jdbc is used to prepare sql statements, bind arguments, scroll through results, etc +- clearly, this is low level api and therefore tedious to work with +- idea is to work with java objects instead of `java.sql.ResultSet` +- **object / relational paradigm mismatch** / **impedance mismatch** - object models and relational models do not work well together out of the box. some examples are - + - granularity - e.g. let us say user has an address (one to one). in java, there would be a separate address class to represent this, and the user class will contain a reference to the address class. in sql, the same user table might have multiple columns for address like state, city, zip code, etc + - inheritance - e.g. we have multiple billing details, credit card and bank account. in java, there would be separate classes representing credit card and bank account, both extending a common super class billing details. sql doesn't support inheritance like this + - identity - == in java is for instance identity. equals in java is for instance equality, where all fields can be compared. equality of two rows in database is done by database identity i.e. comparing only the primary key. all three things are different + - association - in java, we can represent them using object references, e.g. for one to many, the one side would have a list as an object reference, while the many side will only have a single object reference. in sql however, we just have a foreign key constraint +- hibernate is basically an orm (object relational mapper) +- so, this helps with interoperability between java objects and underlying rdbms using metadata +- jpa - jakarta persistence api is a specification. hibernate implements jpa +- other hibernate components - + - hibernate validator - implementation of bean validation (jsr 303) + - hibernate envers - audit trail of data + - hibernate search - uses apache lucene underneath to add text search capabilities + - hibernate ogm (object grid mapper) - reusing hibernate for no sql databases including key value, graph, document, etc + - hibernate reactive - non blocking way of interacting with the database + - hibernate jpamodelgen - static metamodel (discussed later) +- spring data commons - helps unify access to different kinds of data stores, be it relational or no sql, and makes code even more concise +- spring data jpa is a jpa specific implementation of spring data, adding functionality like generating implementations based on interface method names +- other spring data components - + - spring data jdbc - sits on top of spring data. so, it eliminates the magic that spring data jpa might have, but at the same time eliminates boilerplate unlike when interacting with jdbc directly + - spring data rest - exposing spring data repositories as rest resources + - spring data mongodb - for mongodb (document database) + - spring data redis - for redis (key value database) + - spring data neo4j - for neo4j (graph database) +- simple class example with id - + ```java + @Entity + @Data + @AllArgsConstructor + @NoArgsConstructor + public class Message { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + private String text; + } + ``` +- `EntityManagerFactory` / `EntityManager` are jpa, while `SessionFactory` / `Session` are specific to hibernate, so i assume we should always try using the former. note the syntax below of starting and committing a transaction + ```java + @Test + public void loadFromStorage() throws Exception { + List messages; + + try (EntityManagerFactory emf = Persistence.createEntityManagerFactory("jpa-one")) { + try (EntityManager em = emf.createEntityManager()) { + em.getTransaction().begin(); + Message message = Message.builder().text("hello world!").build(); + em.persist(message); + em.getTransaction().commit(); + + em.getTransaction().begin(); + messages = em.createQuery("select m from Message m", Message.class).getResultList(); + messages.get(0).setText("updated hello!"); + em.getTransaction().commit(); + } + } + + assertAll( + () -> assertEquals(1, messages.size()), + () -> assertEquals("updated hello!", messages.get(0).getText()) + ); + } + ``` +- using spring data jpa, this is even simpler - + ```java + @Test + public void loadFromStorage() { + Message message = Message.builder().build(); + message.setText("hello spring data jpa!"); + messageDao.save(message); + + Iterable messages = messageDao.findAll(); + assertEquals("hello spring data jpa!", messages.iterator().next().getText()); + } + ``` +- note - performance of spring data is considerably slower than regular hibernate when dealing with very huge amounts of data +- for the most part, we should use / be able to use jpa annotations, coming from jakarta.persistence. we should have to use ones coming from hibernate for specific use cases only +- we can have global annotations which do not need to be put into a specific file, like `@NamedQuery`. we can keep global metadata inside a file package-info.java +- for rapid prototyping, we can set `spring.jpa.hibernate.ddl-auto=update` but for production, prefer using `validate` instead +- to log the sql statements, use `spring.jpa.show-sql=true` or `logging.level.org.hibernate.SQL=DEBUG` (the later will use the logger i.e. have package name etc. before to help maintain the standard log format). for debugging purpose, we can log the values as well i.e. without the property `logging.level.org.hibernate.orm.jdbc.bind=TRACE` set to trace like this, logs will show the sql but not the actual values in statements like insert +- hikari - maintains a connection pool to the database. establishing a connection to the database is a complex / resource intensive operation +- database migration - prior to or in conjunction with the application. help track history, successful vs unsuccessful scripts etc. and thus avoid data loss +- two popular solutions - liquibase (more complex and robust) and flyway +- both have integrations with spring boot (preferred since automated?), maven / gradle plugins and have clis as well +- flyway commands - + - migrate - migrate to latest version + - clean - drop all database objects (NOT FOR PRODUCTION) + - info - print information about migrations + - validate - validate available migrations with applied migrations + - undo - undo the most recently applied migration + - baseline - baseline an existing database i.e. we start using flyway from an intermediary state and not from get go + - repair - repair the schema history tables maintained by flyway +- add the flyway dependency for mysql (version comes from spring boot starter parent) + ```xml + + org.flywaydb + flyway-mysql + + ``` +- files should be inside of resources/db/migration and have the format `V1__init-beer.sql` +- note - if encountering too many problems with h2 vs mysql (e.g. i encountered one with uuid described above), we can use db/migration/\ folder - is it better to just use test containers instead? +- flyway automatically creates the `flyway_schema_history` table for us the first time around and adds these scripts to it as rows + + | installed_rank | version | description | type | script | checksum | installed_by | installed_on | execution_time | success | + | -------------- | ------- | ----------- | ---- | ------------------- | ---------- | ------------ | -------------------------- | -------------- | ------- | + | 1 | 1 | init-beer | SQL | V1\_\_init-beer.sql | -978541020 | SA | 2023-07-22 20:38:03.365998 | 4 | TRUE | + +- my doubt - hopefully, there is some "serious" locking / transaction level that flyway uses. e.g. what if i have horizontally scaled instances - i would not want there to be any consistency issues +- validation - defensive programming +- e.g. do not allow null / white spaces for name - + ```java + @NotNull + @NotBlank + private String beerName; + ``` + and add `@Valid` to the method arguments like so + ```java + public ResponseEntity saveBeer(@Valid @RequestBody BeerDto beer) { + ``` +- we can also apply hibernate validations on our entities (which i don't think is a good practice) and the database type constraints themselves (e.g. column length limits) act as a validation layer as well +- accessing metadata at runtime - we can access the **metadata of our models** at runtime. two options - + - **dynamic metamodel** - using jakarta we get the `EntityManagerFactory` - remember only this - `emf.getMetamodel()`. notice how we get access to the entity and its attributes - + ```java + Metamodel metamodel = emf.getMetamodel(); + Set> managedTypes = metamodel.getManagedTypes(); + ManagedType itemType = managedTypes.iterator().next(); + SingularAttribute idAttribute = itemType.getSingularAttribute("id"); + ``` + - **static metamodel** - hibernate to jpa metamodel generator, using `hibernate-jpamodelgen` dependency. use case - type safe query builder - + ```java + CriteriaBuilder cb = em.getCriteriaBuilder(); + CriteriaQuery query = cb.createQuery(Item.class); + Root fromItem = query.from(Item.class); + Path namePath = fromItem.get(Item_.name); + query.where(cb.like(namePath, cb.parameter(String.class, "pattern"))); + + List items = em.createQuery(query) + .setParameter("pattern", "%Item 1%") + .getResultList(); + ``` +- note - with spring 6, the javax persistence namespace has been renamed to jakarta +- all annotations like `@Id`, `@GeneratedValue`, `@Entity`, etc. come from jakarta.persistence now +- beauty of `CrudRepository` - we can change spring-data-jpa to spring-data-mongodb, without any changes required inside code. this is because it comes from spring-data-commons i believe +- `JpaRepository` extends both `CrudRepository` and `PagingAndSortingRepository` for us, so people usually use this variant +- jpa can generate implementations based on interface method names. some things it supports includes `Like`, `IgnoreCase`, `OrderBy` (with `Asc` / `Desc`), `Distinct`, `LessThan`, `First` / `Top` +- we can return `List`, `Optional`, etc +- the syntax correctness of these methods are verified when the application context loads up +- `@Query` - the method name in this case can be anything +- we can bind parameters by position or by name, and use `@Param` if we bind using name +- we can add the `nativeQuery` to write native sql, but we loose out on portability (swap underlying relational database easily, e.g. integration test vs production) +- `@Query` issue - while this does give more flexibility around writing complex jpql, the correctness of the query is not verified like interface methods i.e. the query will only fail execution when called. maybe because unlike here, jpa has to generate the corresponding concrete implementation in case of interface methods? +- **projections** - spring data jpa can also help change shape of return type instead of using the persistent class as the return type. e.g. we want to fetch less data from database for optimization / exposing less fields to the service layer, etc +- we can use interface or classes for this custom projection +- interface projection - the underlying "proxy class" would be generated by jpa +- **interface projection** has two types - **close projections** and **open projections** +- **close projections** - names of interface methods match the names of the persistent class attributes + ```java + public interface EmployeeView { + + String getFirstName(); + + String getLastName(); + } + ``` +- **open projections** - when we want to do more complex things. notice how we use spel inside `@Value` + ```java + public interface EmployeeView { + + @Value("#{target.firstName} #{target.lastName}") + String getFullName(); + } + ``` +- issue - spring cannot optimize closed projections since it does not know in advance what columns might be required unlike in open projections +- **class projection** - the names of the constructor arguments should match the field names of the persistent class exactly + ```java + @Data + public class EmployeeDto { + private String fullName; + + public EmployeeDto(String firstName, String lastName, String email) { + this.fullName = firstName + " " + lastName; + } + } + ``` +- issue - nesting of projections (e.g. one to many) is not supported by class based projections unlike interface based projections +- for insert, update, delete operations, we can continue using `@Query`, but we also need to add `@Modifying` on top of it +- the automatic generation of implementation based on method names is also supported for delete operations, e.g. `deleteByLevel` +- `deleteByLevel` vs `deleteBulkByLevel` - `deleteByLevel` will first run a query and then delete all objects one by one. this will also thus call "registered callbacks" if any. `deleteBulkByLevel` will run a single jpql query i.e. not load all the elements first, and skip all callbacks +- qbe - **query by example** - allows for dynamic query creation - something we cannot do using techniques like `@Query` / interface method names +- it has three parts - + - **probe** - we set the values used by `ExampleMatcher` in the persistent class + - **`ExampleMatcher`** - provides the rules for matching the properties + - **`Example`** - combines the `ExampleMatcher` and probe +- example of qbe. note - if we do not use `withIgnorePaths`, default values of the probe (e.g. 0 for primitive integer) would be put in the where clause of the sql / jpql for those properties + ```java + User user = new User(); + user.setEmail("@someotherdomain.com"); + + ExampleMatcher matcher = ExampleMatcher.matching() + .withIgnorePaths("level", "active") + .withMatcher("email", match -> match.endsWith()); + + List users = userRepository.findAll(Example.of(user, matcher)); + ``` +- doubt - based on how we are manually setting properties inside for e.g. `withIgnorePaths`, is this a good use case for introducing hibernate-jpamodelgen? +- request param - note how we pass required as false, since it is true by default. use case - e.g. providing pagination related parameters + ```java + public List listBeers(@RequestParam(required = false) Integer pageNumber) { + ``` +- a neat trick - right click on a method -> refactor -> change signature. we can for e.g. add a new argument to the method, e.g. String beerName. we can also provide a default value, e.g. null. this means that the method and all its usage will be appropriately refactored, without us doing this manually in every place +- implementing paging and sorting - + - to repository methods, add an argument of PageRequest - constructed using page number, size, sort object + - repository methods return a Page - contains the content (list of objects), utility methods to go to next / previous page, etc +- implementation - + ```java + // repository + Page findAllByBeerStyle(BeerStyle beerStyle, PageRequest pageRequest); + + // service + PageRequest pageRequest = PageRequest.of( + pageNumber != null && pageNumber > 0 ? pageNumber - 1 : DEFAULT_PAGE_NUMBER, + pageSize != null && pageSize > 0 ? pageSize : DEFAULT_PAGE_SIZE, + Sort.by(Sort.Order.by("beerName"), Sort.Order.by("beerStyle")) + ); + + Page beers = beerRepository.findAllByBeerStyle(beerStyle, pageRequest); + return beers.map(beerMapper::map); // returns new Page by calling map on all elements of page + + // tests - for instance, create a Page object to stub return values + Page beers = new PageImpl<>(List.of(one, two)); + ``` +- **entity type** - they are the persistent classes we use. they have ids (key constraint, identity constraint) and foreign keys for referencing other entity types (referential constraint). they have their own lifecycle and exist independently of other entity types +- **value type** - they belong to another entity type and do not have their own lifecycle. they would not have an identity of their own. some examples of value types - + - address in user. can be represented as **embeddable classes** in jpa + - recall the idea of **weak identities** and **identifying relationships**. e.g. a bid is a weak identity and its **identifying relations** are item and user. so, value types can be represented as a table inside our database as well +- recall - instance identity != instance equality != database identity +- primary keys - should not be null (entity constraint), should be unique (key constraint) and should not be updatable (hibernate does not work well with updatable primary keys) +- due to the restrictions above, and the fact that databases do not "perform optimally" with all types when indexing, it is better to have **surrogate keys** over **natural keys** +- for taking help from jpa to generate surrogate keys, we use `@GeneratedValue` along with `@Id`. otherwise, we will have to take care of assigning identifiers ourselves + - `GenerationType.AUTO` - the default. jpa talks to the underlying database to decide which strategy is the best + - `GenerationType.IDENTITY` - auto incremented primary key column + - `GenerationType.SEQUENCE` - a table is maintained separately, and this is called every time before an insert + - `GenerationType.TABLE` - an extra table called `HIBERNATE_SEQUENCES` is maintained, where there is one row for each entity. this table would be referred to before every insert +- sequence vs auto increment - why we should consider sequence - in case of auto increment, we need to wait for response from the database for ids. in case of sequence, hibernate is "aware" of the id. so, our instances would have an id assigned to them even if the actual insert inside the db has not happened yet (multiple inserts can be batched, which is when this might be useful) +- another option - uuid - for globally unique ids. advantage - is random and fairly unique across systems and databases. disadvantage - more space and is thus less efficient compared to the incremented ids + ```java + @Data + @Builder + @Entity + @AllArgsConstructor + @NoArgsConstructor + public class PersistentBeer { + + @Id + @GeneratedValue + @UuidGenerator // org.hibernate.annotations.UuidGenerator + @Column(columnDefinition = "binary(16)") + private UUID id; + + @Version + private Integer version; + + // ... + } + ``` +- note - had to add the `columnDefinition` because without it, h2 was failing when `ddl-auto` was set to `validate` but mysql works without this as well +- calling methods, like `repo.save(obj)` doesn't always guarantee obj will be updated by jpa, so always use `obj = repo.save(obj)` instead. remember how first level cache is used by jpa etc, so that is where these things probably become important +- override table name using `@Table`. by default, our camel cased classes are converted to snake case. note - sql is case insensitive +- we can also pass properties like schema etc to `@Table` +- `hibernate.auto_quote_keyword` - have hibernate automatically add quotes to reserved keywords which might be used as table / column names. remember that for spring boot, the prefix of `spring.jpa.properties` might come into picture, i.e. `spring.jpa.properties.hibernate.auto_quote_keyword=true` +- we can also use backticks / double quotes explicitly, e.g. `@Table("\"User\"")` +- if for e.g. we need a naming strategy, e.g. prefix all tables names with `CE_`. we can use naming strategy for this - + ```java + public class CENamingStrategy extends PhysicalNamingStrategyStandardImpl { + + @Override + public Identifier toPhysicalTableName(Identifier name, JdbcEnvironment context) { + return new Identifier("CE_" + name.getText(), name.isQuoted()); + } + } + + // ... + properties.put("hibernate.physical_naming_strategy", CENamingStrategy.class.getName()); + ``` +- **dynamic sql generation** - even when we update some columns, we see all columns being updated ie. previous column values itself are used. when using hibernate, when we load our application, hibernate generates crud statements for all our persistent classes and caches them. this way, it does not have to regenerate them entirely every time 🤯. this behavior can be disabled as well. use case - we only update one column, but our sql will try updating all columns by reusing the previous value, but this can become very slow if the table has a lot of columns +- some classes are never updated once created, e.g. bid. hibernate can avoid dirty checking for such classes, thus making it faster. for this, annotate the persistent class with `@Immutable` +- we can create views using `@Subselect` +- we can also have the regular repositories for these to use them - + ```java + @Entity + @Immutable + @Subselect( + value = "select i.ID as ITEMID, i.NAME as NAME, " + + "count(b.ID) as NUMBEROFBIDS " + + "from ITEM i left outer join BID b on i.ID = b.ITEM_ID " + + "group by i.ID, i.NAME" + ) + @Synchronize({ "ITEM", "BID" }) + public class ItemBidSummary { + + @Id + private Long itemId; + + private String name; + + private long numberOfBids; + } + ``` +- why we should mention table names inside `@Synchronize` - this way, hibernate knows to **flush the updates** for these views before running the query +- so, remember the three annotations along with `@Entity` for views - `@Immutable`, `@Subselect`, `@Synchronize` +- primitive java types, their corresponding wrapper types and most java datetime related types can be directly converted by hibernate to corresponding sql types +- otherwise, if the property extends java.io.Serializable, the property is stored in its serialized form. this can have many issues - + - serialization / deserialization is costly + - if the application is demised, the class is no longer available and therefore the data in the database can no longer be interpreted +- **transient** - some properties need not be persisted. e.g. we might want to store `initialPrice` but not `initialPriceAfterTax`. we can use either the java `transient` keyword, or `@Transient` +- checks can be done using multiple ways. just stay consistent - + - hibernate validator, e.g. `@NotNull`. can help us validate at presentation layer. also, if using hibernate for ddl generation, this annotation would be ignored + - jpa / hibernate annotations, e.g. `@Column(nullable = false)`. exception would be thrown by jpa before the insert / update statement is executed. also, if using hibernate for ddl generation, this annotation would be factored in + - advantage - exception is thrown by hibernate itself without hitting database, thus performant + - disadvantage - duplication of logic if similar constraints are present in ddl as well + - relying on database having `not null` defined for columns. in this case, a constraint violation exception would be thrown by the database + - disadvantage - we lose out on flexibility, since changing constraints requires ddl + - advantage - data integrity guarantees for consumers using this data directly +- annotate properties with `@Generated`, so that hibernate knows that these values are generated by the database, and that hibernate needs to make "an extra round trip" after inserting / updating these entities to fetch the new value, by calling a new select +- jpa / hibernate handle usual java to sql type mapping, e.g. Integer / int in java to integer in sql, long / Long in java to bigint in sql, etc +- the idea is while there are some defaults, we can provide more specific values, for e.g. precision and scale for numeric types, length of string for varchar types, etc. not only that, based on what length we specify, hibernate can also decide the corresponding type for mysql - longtext, mediumtext. similarly, for byte[], it can choose tinyblob, mediumblob and longblob +- my understanding - we can lazy load large data types by annotating using `@Basic(fetch = FetchType.LAZY)`! +- to adjust whether we want to save only date, only timestamp or both date and timestamp, we can use `@Temporal`. default is `@Temporal(TemporalType.TIMESTAMP)`, but we can use just `DATE` / `TIME` +- enums - by default, if we don't add the annotation `@Enumerated(EnumType.STRING)`, the ordinal position will be used. issue - if we introduce a new value, it might affect the position of the existing enum values, thus making our data go haywire +- **property access** - jpa can either access the properties directly via fields, or via getter and setter methods. good practice - let everything use fields. if we need the persistence layer to go via getters and setters, we can do it as follows - + ```java + @Access(AccessType.PROPERTY) // the other one is AccessType.FIELD + private String name; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name.startsWith("AUCTION: ") ? name : "AUCTION: " + name; + } + ``` +- my understanding - the above can also be achieved using `@ColumnTransformer`, in which case we deal with sql instead of java code +- derived properties - calculated at runtime using sql. these are calculated every time the item is "retrieved from the database". so, do consider values getting outdated. doubt - can `@Synchronize` discussed earlier help with this? also, obviously these properties would be ignored in insert and update statements + ```java + @Formula("(select avg(b.amount) from bid b where b.item_id = id)") + private BigDecimal averageBidAmount; + ``` +- **custom converter** - e.g. we want to support a special type for currencies in our object model, but this of course might not be supported by the relational database we use. so, we can use custom converters (remember `@Convert`, `AttributeConverter` and `@Converter`) - + ```java + // target, as seen by object model + class MonetaryAmount implements Serializable { + + private BigDecimal value; + + private Currency currency; + } + + // object model type to relation model type interconversion + @Converter + class MonetaryAmountConverter implements AttributeConverter { + + @Override + public String convertToDatabaseColumn(MonetaryAmount monetaryAmount) { + return monetaryAmount.toString(); + } + + @Override + public MonetaryAmount convertToEntityAttribute(String s) { + String[] split = s.split(" "); // 35.61 USD + return new MonetaryAmount( + new BigDecimal(split[0]), + Currency.getInstance(split[1]) + ); + } + } + + // declaring the attribute + @Convert(converter = MonetaryAmountConverter.class) + @Column(name = "price", length = 63) + private MonetaryAmount buyNowPrice; + ``` +- create and update timestamps - + ```java + @CreationTimestamp(source = SourceType.DB) + private LocalDateTime createdDate; + + @UpdateTimestamp(source = SourceType.DB) + private LocalDateTime updateDate; + ``` +- my understanding - the default is using jvm's time, which might be an issue, since for e.g. for a horizontally scaled application the clocks might not be synchronized. disadvantage here is every insert would then not be "buffered" and have to be flushed immediately, just like generation strategy of identity vs sequence? +- embeddable - recall two kinds of **association** - **composition** and **aggregation**. _embeddable means composition_ +- so, embeddable entities - + - do not have their own identity. primary key is owning entity's primary key + - when owning entity is deleted or saved, same operation is carried out on embeddable entity + - it does not have a lifecycle of its own +- e.g. user (owning) and address - + ```java + @Embeddable + public class Address { + + private String street; + } + + @Entity + public class User { + + @Id + @GeneratedValue + private Long id; + + private String username; + + // note - no annotation needed here + private Address homeAddress; + } + ``` +- different approaches for inheritance have been discussed now - +- **mapped superclass** - mapping all subclasses to different tables + ```java + @MappedSuperclass + public class BillingDetails { + + @Id + @GeneratedValue(strategy = GenerationType.AUTO) + private Long id; + + private String owner; + } + + @Entity + public class BankAccount extends BillingDetails { + + private String account; + + private String bankName; + } + + @Entity + public class CreditCard extends BillingDetails { + + private String number; + + private String exp; + } + ``` +- output -
+ ![mapped superclass](/assets/img/spring/mapped-superclass.png) +- optionally, we could have made `BillingDetails` abstract +- also, to override properties of superclass from the subclass, we can use `@AttributeOverride`, e.g. modify the column name `owner` to `cc_owner` for the credit card table - + ```java + @AttributeOverride( + name = "owner", + column = @Column(name = "cc_owner") + ) + ``` +- this logic around mapped superclass can be extended to repositories as well. note how we use 1. generics and 2. `@NoRepositoryBean`. then, we can have specific methods in subclass dao / generic methods in superclass dao + ```java + @NoRepositoryBean + public interface BillingDetailsDao extends JpaRepository { + + Optional findByOwner(String owner); + } + + public interface CreditCardDao extends BillingDetailsDao { + + Optional findByNumber(String number); + } + + public interface BankAccountDao extends BillingDetailsDao { + } + ``` +- tips with mapped superclass - + - problem - doesn't work with polymorphic associations - we cannot have other entities reference `BillingDetails` / `BillingDetails` cannot reference other entities. this is because `BillingDetails` itself is not a concrete table + - when to use - for top level classes, when further modifications / changes in future are unlikely +- we can instead use **table per class** +- minute changes to code + - add `@Entity` to `BillingDetails` + - replace `@MappedSuperclass` with `@Inheritance(strategy = InheritanceType.TABLE_PER_CLASS)` + ```java + @Entity + @Inheritance(strategy = InheritanceType.TABLE_PER_CLASS) + public abstract class BillingDetails { + // ... + ``` + - remove `@NoRepositoryBean` from `BillingDetailsDao` +- advantage of table per class - supports foreign key +- my understanding - internally, table per class can do a "union of the tables of the subclasses" when querying the superclass. this is not supported when using mapped superclass. e.g. a user has a list of messages - and a message can of type sms, email, etc. so, we can use table per class for message class, and this way, while we see different tables in the relational database for different subclasses, we can have associations to our message class +- what above means i think is that in jpql, we can write `select * from BillingDetails` in table per class, but not in mapped superclass +- remember to create the `BillingDetails` as an abstract class, otherwise a new table for `BillingDetails` was being created +- probably because of how things work, another feature - we can now have foreign keys for a generic `BillingDetails`, i could see a common sequence table - billing_details_seq for both bank_account and credit_card. so, important - does this mean that there can be foreign keys to `BillingDetails` i.e. abstract class when using table per class, but not when using mapped superclass? +- so, it feels like table per class could be desirable for actual polymorphism cases, while invalid when we are just trying to move properties like create and update timestamp, id, etc to a common class, in which case mapped superclass is better +- **single table** hierarchy - a single table is used for representing the superclass, which has all the columns from all the subclasses +- a column for discriminating is used (default is dtype) - this helps determine which subclass a row belongs to +- code - only change is strategy + ```java + @Entity + @Inheritance(strategy = InheritanceType.SINGLE_TABLE) + public abstract class BillingDetails { + // ... + ``` +- output -
+ ![single table](/assets/img/spring/single-table.png) +- advantage - reporting, gains in performance since no unions etc is involved, schema evolution is straight forward, etc +- disadvantage - data integrity, e.g. cannot enforce not null for columns of subclasses at database level (we can use validation techniques however). there is also a denormalization involved here +- when using repositories of subclasses, hibernate will automatically add filtering logic - `where dtype = 'BankAccount'` for us bts +- we can of course use the base class in jpql (since the table is of base class after all) +- **joined** - this strategy will have tables for all subclasses and superclasses +- so, there would be joins involved - the id column in the subclasses (e.g. bank_account below) is both a primary key and a foreign key reference to the superclass (e.g. billing_details below) +- hibernate knows how to perform the joins for us +- code - only change is strategy + ```java + @Entity + @Inheritance(strategy = InheritanceType.JOINED) + public abstract class BillingDetails { + // ... + ``` +- output -
+ ![joined](/assets/img/spring/joined.png) +- e.g. if i run `billingDetailsDao.findAll()`, the sql run is as below. note the left join and the `case when` clause which helps hibernate determine which subclass it might map to + ```sql + select + b1_0.id, + case + when b1_1.id is not null then 1 + when b1_2.id is not null then 2 + end, + -- other columns + from + billing_details b1_0 + left join bank_account b1_1 on b1_0.id = b1_1.id + left join credit_card b1_2 on b1_0.id = b1_2.id + ``` +- e.g. if i run `bankAccountDao.findAll()`, the sql run is as below. note the normal (inner?) join + ```sql + select + b1_0.id, + -- other columns + from + bank_account b1_0 + join billing_details b1_1 on b1_0.id = b1_1.id + ``` +- disadvantage - joins are involved, thus taking a performance hit +- imagine our legacy system has two tables - author and author_details. however, in our new domain models, we would like to see it as one class + ![secondary table](/assets/img/spring/secondary-table.png) +- we can map the above using `@SecondaryTable`. note how we mention the `PrimaryKeyJoinColumn`, because the default was otherwise id i.e. the same column name as that of author table + ```java + @Entity + @SecondaryTable( + name = "author_details", + pkJoinColumns = @PrimaryKeyJoinColumn(name = "author_id") + ) + @Data + @AllArgsConstructor + @NoArgsConstructor + public class Author { + + @Id + @GeneratedValue + private Long id; + + private String name; + + @Column(table = "author_details") + private Instant dob; + + @Column(table = "author_details") + private String countryOfOrigin; + } + ``` +- java collections framework works well with hibernate +- we can use `ElementCollection`. i think that the point is that the child entity is owned by the parent i.e. "composition". features like cascading of persistence, deletion, etc follow. the child object need not be marked with `@Entity` itself. i do not see any real upside of this over the usual `OneToMany` etc annotations by making the child as an `@Entity`, so skipping it for now. we get much more fine grained control this way +- considerations when writing implementations for associations - + - we should always (as a best practice and as a requirement by jpa) use interfaces like `java.util.Set` instead of concrete implementations + - hibernate has its own collection classes for associations like one to many, which helps it with **dirty checking**. so basically, our collection instances are wrapped with these hibernate collections to help with dirty checking etc + - we should consider initializing with an empty collection's concrete implementation to avoid null checks / null pointer exceptions for newly created entities + - when creating bidirectional links, we need to carry out two steps for linking both sides, so, we can also add convenience methods like so - + ```java + public void addBid(Bid bid) { + bids.add(bid); + bid.setItem(this); + } + ``` +- many to one - this is the simplest, directly maps to the foreign key column. default column name used by jpa below is `item_id`. also, notice how we override the fetch type, since the default is eager + ```java + @ManyToOne(fetch = FetchType.LAZY) + private Item item; + ``` +- we can override the foreign key column name using `@JoinColumn` +- we can make this bidirectional, by mapping the one to many side as well. `getBids` will automatically fetch all the bids for an item for us +- one to many - using the `mappedBy` column, we tell hibernate that "load using the foreign key already specified inside the `item` property of `Bid`". the default fetch type is lazy. + ```java + @OneToMany(mappedBy = "item") + private Set bids = new HashSet<>(); + ``` +- it is common to set the cascade option on the `OneToMany`. in this case, we would want to cascade persist and remove +- `orphanRemoval = true` (false by default) tells hibernate that a bid should be deleted if it is removed from an item's collection. understand how this is different from remove cascade - cascade only ensures calls to delete bids are made when we call delete item + ```java + @OneToMany( + mappedBy = "item", + cascade = {CascadeType.PERSIST, CascadeType.REMOVE}, + orphanRemoval = true + ) + private Set bids = new HashSet<>(); + ``` +- note - my understanding - another difference between using `ElementCollection` vs `OneToMany` is that when we do for e.g. collection.clear() in the prior, a single database statement is issued, while deletes happen one by one in the later. so is it safe to assume that relying on cascade when deleting huge chunks of data is not a feasible option, and we should use some custom jpql / try using `deleteBulk` variants? +- another option - when specifying foreign keys, some sql databases support the `on delete cascade` clause. this way, when an item is deleted, its bids are deleted automatically by the database itself. we can tell hibernate about this using - + ```java + @OneToMany( + mappedBy = "item", + cascade = {CascadeType.PERSIST, CascadeType.REMOVE}, + orphanRemoval = true + ) + @OnDelete(action = OnDeleteAction.CASCADE) + private Set bids = new HashSet<>(); + ``` +- as soon as i comment out the OnDelete line, i see a delete statement for each bid of an item, but with that, i only see one delete statement in the output. is my assumption wrong - i can get rid of the `CascadeType.REMOVE` line with `OnDelete`? +- **cascading state transitions** - entities are independent by default. however, we might want for e.g. bids to be persisted when an item is persisted, bids to be deleted when an item is deleted. for this, we already saw - `CascadeType.PERSIST`, `CascadeType.REMOVE`. along with that, we have `orphanRemoval` to delete a bid removed from `item#bids` and finally, remember our ddl can contain `on delete cascade` +- some lombok specific annotations worth adding to one to many - + ```java + @Builder.Default + @EqualsAndHashCode.Exclude + @ToString.Exclude + ``` +- it might be more feasible to use `@Embeddable` for one to one associations. use one to one when we need to track the entity lifecycle separately i.e. if there are **shared references**. meaning - if a user just has a billing address and a shipping address, address can be marked as an embeddable. lets say another entity shipment has an address as well. we might want a shipment and a user to maintain reference to the same address instance. in this case, OneToOne becomes more feasible +- sometimes, when having one to one mapping, people end up using the same primary key for both tables. in this case, we can use the `@PrimaryKeyJoinColumn` +- normally, we would map one to one mapping using a separate foreign key / surrogate key combination, which is when we can use `@OneToOne` +- lastly, if we would like to track one to one relationships via a separate table, we can use the `@JoinTable` annotation. some use cases i can think of + - the relation itself (and not one of the entities) has some attributes + - storing nulls for foreign keys can be troublesome sometimes. so, it might be better to store all possible relations if any in a separate table altogether +- for one to many side, when defining the field, our options are (recall how it is advisable to use java collections interface on lhs, and not concrete implementations) - + - sets (`Set`) - no duplicates, no order + - lists (`List`) - duplicates, order + - bags (`Collection`) - duplicates, no order +- so based on above, for performance, the best type to use is bags. both de duping and maintaining order are expensive operations for hibernate + ```java + private Collection bids = new ArrayList<>(); + ``` +- disadvantage - we cannot eager fetch two or more collections of bags simultaneously, because it results in a **cartesian product** (discussed later) +- again to customize column names etc, the many side of one to many relation can have the `@JoinColumn`, while the one side will have the `mappedBy` to indicate it is not the owning side of the relationship +- my understanding of list - probably, using `List` instead of `Collection` never makes sense, unless we want to use `@OrderColumn`. this annotation basically orders elements inside the list and maintains the index of the element in a separate column of the table via the column name specified in the `@OrderColumn` (note - of course, `@OrderColumn` would be present on the field having the `@OneToMany`). now, this results in a performance degradation - hibernate will all the time do the reordering when we insert an element to the list etc (e.g. inserting / deleting element not at the ends of the list can be an o(n) operation). so, we might be better off just treating order as a separate field using `@Column`, forget about `@OrderColumn`, and let the ui do the grunt work of sorting / maintaining this order. now, we can use `Collection` instead of `List`. however, if one must - + ```java + // ... + @OneToMany(mappedBy = "item") + @OrderColumn(name = "bid_rank") + private List bids = new ArrayList<>(); + + // ... + @ManyToOne + private Item item; + ``` +- output -
+ ![order column](/assets/img/spring/order-column.png) +- again, we can have a `@JoinTable` in case the one to many is optional / the relationship itself has some attributes, and moving them to the many side is logically incorrect + ```java + // ... + @OneToMany(mappedBy = "item") + @OrderColumn(name = "bid_rank") + private List bids = new ArrayList<>(); + + // ... + @ManyToOne + @JoinTable( + name = "item_bids", + joinColumns = {@JoinColumn(name = "bid_id")}, + inverseJoinColumns = {@JoinColumn(name = "item_id")} + ) + private Item item; + ``` +- output -
+ ![join table one to many](/assets/img/spring/join-table-one-to-many.png) +- many to many - one side can just have `mappedBy` for the `@ManyToMany`, the other side can define the `@JoinTable` + ```java + // ... + @ManyToMany + @JoinTable( + name = "item_categories", + joinColumns = {@JoinColumn(name = "item_id")}, + inverseJoinColumns = {@JoinColumn(name = "category_id")} + ) + private Collection categories = new ArrayList<>(); + + // ... + @ManyToMany(mappedBy = "categories") + private Collection items = new ArrayList<>(); + ``` +- output -
+ ![many to many](/assets/img/spring/many-to-many.png) +- cascading options of remove might not make sense for many to many +- using an intermediate table to track the join table using a separate entity altogether. we can use `@EmbeddedId` to track the composite key. jpa does not pass without setting insertable / updatable to false and specifying column name explicitly inside the `Id` class + ```java + @Entity + @Data + @AllArgsConstructor + @NoArgsConstructor + @Builder + public class ItemCategories { + + @EmbeddedId + private Id id; + + @ManyToOne + @JoinColumn(insertable = false, updatable = false) + private Item item; + + @ManyToOne + @JoinColumn(insertable = false, updatable = false) + private Category category; + + private String linkedBy; + + @Data + @AllArgsConstructor + @NoArgsConstructor + @Builder + private static class Id implements Serializable { + + @Column(name = "category_id") + private Long categoryId; + + @Column(name = "item_id") + private Long itemId; + } + } + + // ... + + @OneToMany(mappedBy = "item") + private Collection itemCategories = new ArrayList<>(); + + // ... + + @OneToMany(mappedBy = "category") + private Collection itemCategories = new ArrayList<>(); + ``` +- output of `show create table item_categories` -
+ ![many to many with entity](/assets/img/spring/many-to-many-with-entity.png) +- note - we do not have to touch the id column for the most part - we will just call `setItem` / `setCategory`, and let hibernate do the rest for us +- **entity states** - + - **transient** - when we create a new instance using the `new` operator, the instance is in transient state i.e. it would be lost when no longer referenced. a transient instance will become persistent in multiple ways - e.g. `EntityManager#persist` is called on it directly, or there is a cascading operation from another instance which references this transient instance, etc + - **persistent** - it has a representation in the database. it has a primary key / id set. an instance can become persistent in multiple ways - via `EntityManager#persist`, or it is fetched using a query directly, fetched due to for e.g. lazy loading, etc. persistent instances are always associated with a persistent context + - **removed** - an entity can be deleted from the database in multiple ways - via `EntityManager#remove`, removed via orphan removal, etc + - **detached** - e.g. we find an entity using `EntityManager#find`, and then close the persistence context. our application logic still has a handle to this instance. the instance is now in detached state. we can make modifications on this instance and call `merge` later using a new `EntityManager` i.e. a detached instance from one persistence context can be merged into another persistence context +- **persistence context** - a persistence context is created when we call `EntityManager em = emf.createEntityManager()`, and closed when we call `em.close()` +- when persistence context is closed (`em.getTransaction().commit()`?), hibernate performs **dirty checking** to get the changes made by application +- then, it performs a sync with the underlying database using right dml. this sync process is called **flushing**. we can also call `em.flush()` manually when needed to achieve the same? +- e.g. hibernate can perform the flush before a query to ensure the updated data is reflected in the query +- the persistence context also represents a *unit of work* +- the persistence context also acts as a **first level of cache** - if an entity is queried "again" in a persistence context, the same instance is returned again instead of hitting the database again. this way, during the entire unit of work i.e. inside the persistence context, the entity seen is the same everywhere, and then after the end, the entity can be safely written to the database +- recall impedance mismatch - so, based on above, hibernate guarantees instance identity, therefore instance equality and database identity both automatically. to validate - will this be true then - `repo.findById(1) == repo.findAll().findFirst(where id = 123)` + - does this mean we do not have to override equals? we should, and that too using a **business key** (discussed later) +- persistence context is scoped to a thread +- my understanding, tying things together - when we call `EntityManager#persist`, the instance goes into persistent state. **during this, hibernate has to assign an identity to the instance**. now, if we use something like auto increment, hibernate has to actually perform the insert into the database. if we do not use sequence generator etc, hibernate can delay this execution till flushing! +- by techniques like delaying flushing dml to the end, batching, etc, hibernate ensures that the database locks are acquired for a short duration (database locks are needed for write operations) +- **lazy** - further, when we for e.g. run `Item item = em.getReference(Item.class, itemId);`, hibernate does not immediately run the sql. the id of the item instance is initialized (since we provided it) but other properties are not. the item object is like a proxy, and the sql is not run until another property is accessed, e.g. `item.getName()` +- if for e.g. we try to access `item.getName()` after closing the persistence context, we will get a `LazyInitializationException` +- **refresh** - e.g. "someone else" makes changes to the database. we can cause hibernate to refetch our instance using `em.refresh(item)` +- one seemingly clever approach - override the equals method to use the database identifier for equality. disadvantages - + - multiple transient instances added to a set will coalesce into one, since all have their id set as null + - when we call save on transient instances in a set, since their id changes, their hash code changes, and therefore they break the collection +- solution - use a **business key** i.e. a combination of other attributes which make it unique +- therefore, **do not use the surrogate key for equals** - hibernate already uses it for its **first level of cache** as discussed earlier +- we can use the foreign entity association for equals and hash code - e.g. for the bid entity, the business key can be a combination of item and its amount. this might mean using the business key of the foreign entity association internally +- initial databases had **2 phase locking**, while modern databases have **mvcc** +- **mvcc** - **multi version concurrency control** - with this, the locking is reduced even further, so that - + - readers do not block writers + - writers do not block readers + - multiple writers can however still not access a record +- for this to work, **multiple versions** of the same record need to exist +- some common problems have been discussed now - +- **the lost update** problem - + - transaction one starts to add 10 to our balance + - so, transaction one reads the balance as 100 + - transaction two starts to add 20 to our balance + - so, transaction two also reads the balance as 100 + - transaction one commits 110 to the database + - transaction two commits 120 to the database + - so the final state is 120, which should have ideally been 130, i.e. the update of transaction one is lost +- **unrepeatable read** problem - + - transaction one tries finding current balance and reads 100 + - transaction two comes in, adds 10 and commits changes to database + - transaction one tries finding current balance again and reads 110 this time + - so, transaction one has read different values for the same row during its execution +- **phantom read** problem - + - transaction one tries generating a statement and finds 110 _transactions_ for the month of february + - transaction two comes in, adds 10 and commits changes to database + - transaction one tries generating a statement and finds 111 _transactions_ for the month of february + - so, transaction one has read different result sets for the same query during its execution + - my understanding - basically, it is like unrepeatable read, but instead of just the values, the amount of rows increase or decrease, so its due to insert or delete, unlike unrepeatable read which is due to update +- so, both jpa and sql have **isolation levels** (recall i of acid!). remember - as we increase isolation level, performance degrades. in multi user concurrent systems like today, we might have to sacrifice some amount of isolation for better performance and scalability. just remember the name, the definition will become obvious - + - **read uncommitted isolation** - all problems are allowed + - **read committed isolation** - dirty reads are not permitted + - **repeatable read isolation** - nothing is permitted except phantom reads + - **serializable isolation** - emulates serial execution i.e. transactions are executed one after another and not concurrently. none of the four problems are permitted. this relies on table locks and not just row level locks +- my understanding 😠 - despite what i wrote above, apparently, due to the change in industry standard from 2 phase locking to mvcc, at least in mysql, lost update is not prevented by an isolation level of repeatable read as well. it is prevented by serializable isolation level, which does not use mvcc at all, and uses 2 phase locking!! this is why, we should use `@Version` always, or at least that is what i understand from [this](https://stackoverflow.com/a/53564708/11885333) answer +- jpa uses the isolation level of database connection - most resolve to read committed, but mysql uses repeatable read +- however, recall how persistence context cache is used when we attempt to retrieve the same row twice. this means that while isolation level is read committed, we are effectively using repeatable read +- **optimistic concurrency control** - hibernate supports maintaining **version columns** for us automatically, using which ensures _first commit wins_ in case of parallel transactions. it is easy to use, so probably use it always +- note - use optimistic concurrency control only when it is acceptable to detect conflicts late in a unit of work. concurrent updates should not be a frequent scenario, otherwise a lot of cpu cycles would be wasted i.e. the computation would be performed and then the update would have to be rejected +- to enable versioning, we use `@Version` +- we should not have to set version manually, it should be handled by hibernate for us automatically - if hibernate feels that the entity has changed during dirty checking, it would automatically bump up the version number for us bts +- when updating, instead of the where clause having `where id = ?`, the where clause now has `where id = ? and version = ?` +- we can use int, short, long, and hibernate will wrap again from 0 if the version limit is reached +- `OptimisticLockException` is raised if version is changed by another concurrent transaction +- we might not like the extra version column. hibernate can use the timestamp fields like last modified by to help achieve optimistic locking + ```java + @Version + private LocalDateTime lastUpdatedAt; + ``` +- tip - due to jvms being possibly deployed on different operating systems, the time might not be guaranteed to be accurate / synchronized in all of them (clock skew). so, we can tell hibernate to ask the database for the timestamp. disadvantage - a database hit is required every time, just like when using auto incremented ids +- how i tested if optimistic locking is working in my application - try updating using same version twice - the second update should throw an exception. also note how i disable the transaction on the test method so that this test is executed "out of a transaction". finally, recall how exception would be wrapped by `ServletException` when using mock mvc + ```java + @Test + @SneakyThrows + @Transactional(propagation = Propagation.NOT_SUPPORTED) + void updateBeerByIdOptimisticLockingCheck() { + PersistentBeer beer = beerRepository.findAll().get(0); + BeerDto beerDto = beerMapper.map(beer); + + beerDto.setBeerName("updated beer name"); + mockMvc.perform(put("/api/v1/beer/{beerId}", beerDto.getId()) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON) + .content(objectMapper.writeValueAsString(beerDto))) + .andExpect(status().isNoContent()); + + beerDto.setBeerName("updated beer name again"); + ServletException e = assertThrows( + ServletException.class, + () -> mockMvc.perform(put("/api/v1/beer/{beerId}", beerDto.getId()) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON) + .content(objectMapper.writeValueAsString(beerDto))) + .andExpect(status().is5xxServerError()) + ); + assertTrue(e.getMessage().contains("ObjectOptimisticLockingFailureException")); + } + ``` +- **optimistic lock mode** - imagine item to category is many to one. we have many categories and items, and we would like to find the sum of prices for all items for each category. however, when we were iterating through categories, midway through, category for an item was changed, thus making us consider an item into two (or maybe no) categories +- basically, we have the unrepeatable read problem (category_id of item has been updated). note - recall how we discussed that hibernate has default of read committed, and with the help of hibernate persistence context cache, it kind of becomes repeatable read. so, why do we still have the problem? in our case, a result set is returned for every query. so, while hibernate persistence context cache would contain the older version of the item, it would load this item in the result set. yes, the older version of the item is loaded but it can still happen that multiple result sets contain an item / no result sets contain an item +- so, we can set lock mode = optimistic. this way, after performing all the operations (during commit), **for each item that we loaded**, hibernate would rerun a select and match the version column. if it has changed, it would throw the `OptimisticLockException` + ```java + EntityManager em = emf.createEntityManager(); + em.getTransaction().begin(); + + for (Long categoryId : CATEGORIES) { + List items = em.createQuery("select i from Item i where i.category.id = :catId", Item.class) + .setLockMode(LockModeType.OPTIMISTIC) + .setParameter("catId", categoryId) + .getResultList(); + + for (Item item : items) + totalPrice = totalPrice.add(item.getBuyNowPrice()); + } + + em.getTransaction().commit(); + em.close(); + ``` +- my understanding - why do i even need `LockModeType.OPTIMISTIC` if i already added `@Version` - e.g. understand in above example that we actually never modified Items for our query! our problem was that items that we read were modified! by default, jpa will only perform version checking using `@Version` for updates (maybe deletes as well, not sure). here, we want it to perform the version checking for the items we selected as well! so, we use `LockModeType.OPTIMISTIC` +- of course, for `LockModeType.OPTIMISTIC` to work, we need to have a `@Version` column, otherwise what will it check! +- note - i think we can annotate jpa repository methods with `@Lock(LockModeType.OPTIMISTIC)` as well +- disadvantage of lock mode - if we use 100 locks, we will get 100 additional queries for checking the version as described earlier +- i think that the point is that while transaction's isolation applies to the whole unit of work, the lock would apply to particular operations inside that transaction +- **optimistic force increment lock mode** - another problem - e.g. we want to find an item's highest bid. while performing the calculation, someone concurrently added a new bid. so, essentially our highest bid might be wrong. this cannot be caught by adding a version to bid as well +- a trick to solve this - enforce that when the item is read, its version is incremented. this way, when there is a flush, it would be noticed that the item version had changed (because a new bid was added to it) + ```java + Item item = em.find(Item.class, itemId, LockModeType.OPTIMISTIC_FORCE_INCREMENT); + bid.setItem(item); + bidRepo.save(bid); + // saving bid increments item version as well + // even though item did not change (bid has item_id, bid to item is many to one) + ``` +- this is a common operation - forceful increment of a root instance when child data is modified +- another advantage of optimistic force increment lock mode - recall how in optimistic lock mode, the version checking happens and then the transaction is committed. it can happen that during this phase itself, there is an update to the database! this is what optimistic force increment lock mode helps solve - i think because the root item's version needs to be incremented, it needs to be locked, just "reading" the version is not enough +- **pessimistic locks** - optimistic locks (we discussed two of them above) are implemented by jpa / hibernate using the version column, but pessimistic locks take help of the actual underlying database locks +- the difference between optimistic locks and pessimistic locks - + - **optimistic locks use version checks in for e.g. where clause of dml statements**, e.g. update only when version = 2 + - **pessimistic locks use database locks**. they can be shared (read locks are usually shared) or exclusive (e.g. write locks are usually exclusive). e.g. of doing this in mysql etc is `select ... for update`. **the idea is the rows which match the select clause cannot be touched till the lock is released / update is over** +- **pessimistic force increment lock mode** - just the like its optimistic counterpart. the only difference is that here, we increment the version at the beginning of the transaction, and not at the end. we now have a db lock on that record as well till the transaction gets over, so concurrent transactions cannot write to that row. whether they can read or not depends on whether the database uses is 2 phase locking or mvcc. syntax - `LockMode.PESSIMISTIC_FORCE_INCREMENT` +- **pessimistic read** - acquire a read (recall how it is implemented as shared) lock +- **pessimistic write** - acquire a write (recall how it is implemented as exclusive) lock +- so five locks have been discussed - **optimistic**, **optimistic force increment**, **pessimistic force increment**, **pessimistic read**, **pessimistic write** +- deadlocks - deadlocks can happen easily in concurrent applications, e.g. one thread tries updating item 1 and then item 2, while another thread tries updating item 2 then item 1. thread 1 waits for lock 2, thread 2 waits for lock 1. "underlying dbms" have capabilities around realizing this and aborting one of the transactions +- one solution - set `hibernate.order_updates` property to true, so that updates are processed in order by all applications +- spring data jpa has an "implicit transactional context" that kicks in for the repository methods we call if there is no existing transaction. however, when we use for e.g. `@DataJpaTest`, it has its own `@Transactional`. so, behavior of test (using explicit transaction provided by the jpa test) might not be the same as the actual service layer code (using implicit transaction of repositories). so, we should try using explicit transactions as a best practice +- both spring and jakarta have the transactional annotations, i believe either can be used +- we can also use `@Transactional` on repository methods +- because of how spring proxies / aop works, `@Transactional` would not kick in when calling internal methods +- tests - annotate classes with `@DataJpaTest`, it does have its own `@Transactional`. reason for writing jpa tests - e.g. we use jpa's query dsl. while it does have compile time checking, we should assert the functionality of our query +- note - the `@DataJpaTest` annotation wasn't picking up the properties file, where i had configured h2 url and parameters like MODE=MYSQL (otherwise flyway migration scripts were failing). so, i had to add the below based on [this](https://stackoverflow.com/a/57345507/11885333) + ```java + @DataJpaTest + @AutoConfigureTestDatabase(replace = AutoConfigureTestDatabase.Replace.NONE) + ``` +- **if we annotate our test class with `@Transactional`, it rolls back the transaction at the end of each test method by default**. caveat - remember when using `RANDOM_PORT`, `DEFINED_PORT`, etc. a real servlet environment is used bts. thus, client and server run on different threads. therefore, only client side transaction is rolled back +- if a method in bean 1 calls a method in bean 2, which transaction is the method in bean 2 executed? this is defined via **transaction propagation** - + - **required** - if a transaction exists, the process is continued in that transaction. else, a new transaction is created + - **supports** - if a transaction exists, the process is continued in that transaction. else, no transaction is created + - **mandatory** - if a transaction exists, the process is continued in that transaction. else, `TransactionRequiredException` is thrown + - **requires new** - if a transaction exists, it is suspended and a new transaction is created. else, a new transaction is created + - **not supported** - if a transaction exists, it is suspended. else, no transaction is created + - **never** - if a transaction exists, `IllegalTransactionStateException` is thrown. else, no transaction is created + - **nested** - if a transaction exists, a sub transaction would be created. this means a **save point** is created and then the processes continues. if there is an error in the sub transaction, the changes would be rolled back up to the save point and then continued. if no transaction was present, a new transaction would be created +- optionally, we can specify `rollbackFor` to rollback the transaction for certain exceptions, or `noRollbackFor` to not rollback the transaction for certain exceptions +- inside `@Transactional` apart from propagation, isolation, (rollback for / no rollback for), etc. we can specify - + - **time out** - after this, the transaction will automatically rollback + - **read only** - marking transactions as read only allows jpa to make optimizations. so, remember parameters like this, `@Immutable`, etc +- using `@Transactional` is the **declarative**, preferred approach. we can use an imperative approach via `TransactionTemplate` + ```java + TransactionTemplate transactionTemplate = ...; + transactionTemplate.setIsolationLevel(...); + transactionTemplate.setPropagationBehavior(...); + transactionTemplate.execute((status) -> { + return ""; + }); + ``` +- we can load data by navigating the entity graph - `item.getSeller().getAddress().getCity()` - the focus of the next few points +- **fetch plan** - what to load +- **fetch strategy** - how to load +- **fetch profile** - store the fetch plan and fetch strategy as a fetch profile to reuse it later +- we define the default - **lazy** or **eager** in the domain models mapping +- we should try defaulting to lazy when possible, so that data is loaded on demand +- again, hibernate proxies are used to implement this functionality for us +- if for e.g. our entity is in detached state, we might get a `LazyInitializationException` when trying to access the lazily loaded fields +- my understanding - e.g. we want to find the size of a collection in one to many. if we run `item.getBids().size()`, i think the entire collection would be loaded due to the proxy nature. we can instead use `Hibernate.size(item.getBids())` to avoid this full query. this way, only the `count(*)` query would be run, and the `item.getBids()` still remains uninitialized. similarly, we have `Hibernate.contains` etc +- issues - + - lazy loading leads to **n + 1 selects problem** + - eager loading can lead to **cartesian product problem** +- we should avoid both extremes, and try finding a middle ground between both +- n + 1 selects problem - 1 query for fetching all items, then n queries for each item's seller + ```java + List items = em.createQuery("select i from Item i").getResultList(); + for (Item item : items) { + assertNotNull(item.getSeller.getUsername()); + } + ``` +- cartesian product problem - when we try eager loading of two collections with one sql query. e.g. an item has 3 images and 3 bids. it would result in an sql table with 9 rows. while it is automatically deduped for us if we use `Set`, this is not a desirable outcome, since a lot of duplicated rows are sent across the network from database to application. it is more performant to break the query into smaller individual parts +- apart from the above problem, we can have a lot of nested eager fetch statements, e.g. item has bids, which can have seller, which can have address and so on. hibernate has a `hibernate.max_fetch_depth` property. my understanding - after this depth is reached, hibernate will start issuing individual select statements like in lazy loading. by default, there is no preset limit for this property, while sql dialects like mysql set it to 2 by default +- **batch size** is one possible solution for n + 1 selects query problem. we annotate the User entity with `@BatchSize` like below - + ```java + @Entity + @BatchSize(size = 10) + public class User { + } + ``` +- refer the item example above, where each `item.getSeller().getUsername()` was resulting in a separate db call. with the current method, there would be a call like below - 10 user proxies would be initialized in one go - + ```sql + select * from users where id in (?, ?, ...) + ``` +- apparently, hibernate is more optimized then i thought it is! it will internally create several batch loaders, which i assume _hopefully_ run in parallel, i.e. if i specify batch size to be 32, and i have to load 31 items, there would be three fetches of sizes 16, 10 and 5, instead of one big fetch of 32. this behavior is configurable via `batch_fetch_style` +- the `BatchSize` argument can also be set on collections - + ```java + @BatchSize(size = 10) + private Set bids = new HashSet<>(); + ``` +- **fetch mode - subselect** is another solution for n + 1 selects query problem. we annotate with `@Fetch` like below - + ```java + @Fetch(FetchMode.SUBSELECT) + private Set bids = new HashSet<>(); + ``` +- refer the item example above, where each `item.getSeller().getUsername()` was resulting in a separate db call. with the current method, there would be a call like below - fetch all users for all items in one go - + ```sql + select * from bid where item_id in ( + select id from item where id in (?, ?, ...) + ) + ``` +- of course, such optimizations are restricted to a persistence context, because after that, probably hibernate discards the entities it stores in memory, and they are garbage collected +- **fetch mode - select** is a solution for the cartesian product problem. we annotate with `@Fetch` like below - + ```java + @Fetch(FetchMode.SELECT) + private Set bids = new HashSet<>(); + + @Fetch(FetchMode.SELECT) + private Set images = new HashSet<>(); + ``` +- with the current method, there would be separate calls for bids and images +- now cartesian product of course happens when setting fetch type as eager. since it is a global setting, it is not a reommended approach. the best approach is to dynamically fetch eagerly as and when needed +- dynamic eager fetching in jpql - `select i from Item i left join fetch i.bids` +- same support is present in criteria builder as well (not discussed) +- **fetch profiles** - global metadata, so while we can place it on a class, the best place for them is inside package-info.java + ```java + @FetchProfiles({ + @FetchProfile( + name = "fetch_bids", + fetchOverrides = @FetchProfile.FetchOverride( + entity = Item.class, + association = "bids", + mode = FetchMode.JOIN + ) + ), + @FetchProfile( + name = "fetch_images", + fetchOverrides = @FetchProfile.FetchOverride( + entity = Image.class, + association = "images", + mode = FetchMode.JOIN + ) + ) + }) + ``` +- since fetch profile is a hibernate specific feature, entity manager by itself is not enough for it. this technique of using unwrap to obtain a hibernate session from jpa entity manager is common - + ```java + em.unwrap(Session.class).enableFetchProfile("fetch_bids"); + Item item = em.find(Item.class, 123); + ``` +- jpa also has **entity graphs** for similar functionality +- **filtering data** - examples - + - when data is read from database by hibernate, restrict some data + - when data is written to database by hibernate, add some audit logs +- we can execute **side effects** using **event listeners**, which help hook into the lifecycle of hibernate +- `@PostPersist` - invoked after the entity is stored inside the database +- we can anotate any method with this, the class need not extend any special interface etc +- we can use the argument as `Object` to capture for all entities, or specify the type of the entity to capture it only for specific entities + ```java + public class PersistEntityListener { + + @PostPersist + public void logMessage(Object entityInstance) { + User currentUser = CurrentUser.INSTANCE.get(); + log.save("Entity instance persisted by " + + currentUser.getUsername() + + ": " + + entityInstance + ); + } + } + ``` +- we have many more annotations like `@PostPersist` for different points in the lifecycle +- for the entity listener above to work, the entity must be annotated with the right listeners - + ```java + @EntityListeners(PersistEntityListener.class) + @Entity + public class Item { + // ... + ``` +- we can also place it directly inside the entity itself, in which case the method will not have any arguments - we would use `this` instead + ```java + @Entity + public class User { + // ... + + + @PostPersist + public void logMessage() { + User currentUser = CurrentUser.INSTANCE.get(); + log.save("Entity instance persisted by " + + currentUser.getUsername() + + ": " + + this + ); + } + } + ``` +- this was all jpa i.e. annotations like `@PostPersist`, `@PreRemove`, etc. hibernate has an even more powerful api - **hibernate interceptors** (skipping for now since code is a bit more involved) +- envers - helps maintain multiple versions of the data +- we need to annotate entity we would like to audit using `@Audited`, and the properties we would like to skip for auditing using `@NotAudited` +- whenever we modify the data in some way, a new record is inserted in the **revinfo** table. this contains a **primary key (rev)** and a **timestamp**. use of timestamp - "give me a list of items as they were on last friday" +- now, each audited table will have a corresponding **foreign key (rev)** pointing to the revinfo table, and a **revtype** column which indicates whether the item was inserted, updated or deleted
+ ![envers](/assets/img/spring/envers.png) + +## Spring Security + +- security is a non functional requirement i.e. it isn't a part of business concerns, but it is critical +- includes https, firewalls, and application security (the focus of spring security) +- when we add the spring security dependencies, we get a session based authenticated app by default, where the default user name is user and the password is printed in console +- why spring security - + - supports a lot of different mechanisms like basic username / password authentication, oauth, jwt, etc + - supports lot of features like path or method level security with authorization etc +- recall flow - user <-> servlet container <-> filters <-> dispatcher servlet <-> controller handler +- **spring security** adds a lot of its own **filters** as well +- spring security architecture - + - user sends their details + - spring security filters will populate the "authentication object" with the user auth details - in spring security, this "authentication object" is the standard responsible to hold details related to current user + - then, this "authentication object" is forwarded to "authentication manager" + - the "authentication manager" talks to different "authentication providers". it tries all the "authentication providers" our application has configured, and selects the one that is successful + - the "authentication provider" takes the "authentication object" populated with credentials as input, and returns the "authentication object" populated with principal, authorities, etc as output + - we can have different "authentication provider"s - like ldap, oauth, username and password, etc + - "authentication providers" can take help of classes like - + - "user details service" / "user details manager" (which can retrieve users from the given principal) + - note how the communication between "user details service" and "authentication provider" is using "user details" object, and not "authentication object" like the rest of the flow + - "password encoder" + - finally, the authentication object is stored in the "security context" +- diagram -
+ ![spring security architecture](/assets/img/spring/spring-security-architecture.drawio.png) +- some concrete implementations of classes discussed above - no need to remember these, this just validates our understanding of the above diagram + - `UsernamePasswordAuthenticationToken` is an implementation of the `Authentication` object + - `ProviderManager` is an implementation of `AuthenticationManager` + - `DaoAuthenticationProvider` is an implementation of `AuthenticationProvider` + - `InMemoryUserDetailsManager` is an implementation of `UserDetailsManager` + - `User` is an implementation of `UserDetails` +- by default, the following `SecurityFilterChain` is configured for us, visible inside `SpringBootWebSecurityConfiguration` + ```java + @Bean + @Order(SecurityProperties.BASIC_AUTH_ORDER) + SecurityFilterChain defaultSecurityFilterChain(HttpSecurity http) throws Exception { + http.authorizeHttpRequests((requests) -> requests.anyRequest().authenticated()); + http.formLogin(withDefaults()); + http.httpBasic(withDefaults()); + return http.build(); + } + ``` +- this says - + - any request should be authenticated + - for ui as in when hitting endpoints from browser, show the basic form + - when hitting endpoints from postman etc, use basic authentication +- when we specify our own `SecurityFilterChain`, this bean would not be used +- for e.g. protecting all paths except some - + ```java + @Bean + public SecurityFilterChain securityFilterChain(HttpSecurity http) throws Exception { + http.authorizeHttpRequests((requests) -> requests + .requestMatchers("/notices", "/contact").permitAll() + .requestMatchers("/**").authenticated() + ); + http.formLogin(Customizer.withDefaults()); + http.httpBasic(Customizer.withDefaults()); + return http.build(); + } + ``` +- recall how authentication providers use `UserDetailsManager`. there are multiple implementations of `UserDetailsManager` like - + - `InMemoryUserDetailsManager` + - `JdbcUserDetailsManager` + - `LdapUserDetailsManager` +- all the `UserDetailsManager` implementations we discussed deal with the `UserDetails` object, which has functionality for getting authorities, username, password, etc +- recall we discussed that we use `Authentication` for communication between spring security classes. so, since the `UserDetailsManager` deals with `UserDetails`, the `AuthenticationProvider` converts the `UserDetails` object into `Authentication` object +- one of the `UserDetailsManager` implementations is `JdbcUserDetailsManager`. it expects tables to be present in a certain way e.g. tables for users, groups, authorities, etc. e.g. [refer the ddl here](https://docs.spring.io/spring-security/reference/servlet/authentication/passwords/jdbc.html) +- then, after ensuring the database has these tables, we can add a few records to the users and authorities tables +- then, we just add spring-data-jpa and correct driver for the database connection to the dependencies +- finally add the bean below - + ```java + @Bean + public UserDetailsManager userDetailsManager(DataSource dataSource) { + return new JdbcUserDetailsManager(dataSource); + } + ``` +- what if JdbcUserDetailsManager is not good for us due to the schema rigidity, and we want something custom, we can implement our own `UserDetailsService`. what is `UserDetailsService` 😫 - it is `UserDetailsManager` with only `loadByUsername`. our goal is to map the user representation in our system (customer in this case) that our data source understands to `UserDetails` object, which is implemented by `User` + ```java + @Bean + public UserDetailsService userDetailsService() { + return (username) -> customerDao.findByEmail(username) + .map(customer -> new User( + customer.getEmail(), // username + customer.getPassword(), // password + List.of(new SimpleGrantedAuthority(customer.getRole())) // authorities + )) + .orElseThrow(() -> new UsernameNotFoundException("customer with email " + username + " not found")); + } + + @Bean + public PasswordEncoder passwordEncoder() { + return new BCryptPasswordEncoder(); + } + ``` +- notice how with so less lines of code, we have a custom authentication + authorization built! - all we did was + - specify the `UserDetailsManager` slice to use via `UserDetailsService` + - the password encoder to use + - authenticate endpoints using a bean of `SecurityFilterChain` +- why did we not have to do any password validation? because `AuthenticationProvider` (concrete implementation is `DaoAuthenticationProvider`) does it for us automatically based on the password encoder we configure! remember, we configured user details manager, not authentication provider +- password encoder - + - encoding - e.g. base64. an algorithm is used to encode. this doesn't involve any secret. we can usually use decoding to retrieve the actual value. so, it is not ideal for passwords + - encryption - a secret key is used, so it is more secure than encoding. however, we can still use decryption to get back the original value, if the secret is leaked + - hashing (1 way) - e.g. bcrypt. use a function to obtain a hash value. it is not reversible, so it is very secure. to validate, we pass the input and **match it** with the stored hashed value. now what does match it actually mean - + - every time the hash is generated for the **same input**, the output is different! this way, if two users have the same password, the same representation is **not** stored inside the database, thus making it even more secure. the hashing algorithm knows if the raw input **matches** the stored hash value +- since i used the bcrypt password encoder, the stored value looks like this - `$2a$10$aj6zt3F9zLr9U39kwVUCxusnd.DvqakuP9/lxp8n8yFHnKrOvIuIK`. here, the beginning i.e. $2a gives the version of bcrypt used, and after that, $10 gives the number of rounds used +- for brcypt (or generally any hashing algorithm?) we can configure - + - **strength** + - **number of rounds** + - **salt** +- a simple registration process based on the `UserDetailsService` and `AuthenticationProvider` we configured above - + ```java + @PostMapping("/register") + @ResponseStatus(HttpStatus.CREATED) + public void registerUser(@RequestBody PersistentCustomer customer) { + customerDao.findByEmail(customer.getEmail()).ifPresent((existing) -> { + throw new RuntimeException("customer with email " + existing.getEmail() + " already exists"); + }); + customer.setPassword(passwordEncoder.encode(customer.getPassword())); + customerDao.save(customer); + } + ``` +- if we wanted more customization, **instead of** providing implementation of `UserDetailsManager` via `UserDetailsService#loadByUsername`, we can provide a bean of `AuthenticationProvider` +- understand how based on flow diagram we saw, unlike returning `UserDetails` object via concrete implementation `User`, we now have to return `Authentication` object via concrete implementation `UsernamePasswordAuthenticationToken` + ```java + @Component + @RequiredArgsConstructor + public class CustomAuthenticationProvider implements AuthenticationProvider { + + private final CustomerDao customerDao; + + private final PasswordEncoder passwordEncoder; + + @Override + public Authentication authenticate(Authentication authentication) throws AuthenticationException { + PersistentCustomer customer = customerDao.findByEmail(authentication.getName()) + .orElseThrow(() -> new BadCredentialsException("customer with email " + authentication.getName() + " does not exist")); + if (!passwordEncoder.matches(authentication.getCredentials().toString(), customer.getPassword())) { + throw new BadCredentialsException("passwords do not match for customer with email " + authentication.getName()); + } + return new UsernamePasswordAuthenticationToken( + customer.getEmail(), + customer.getPassword(), + List.of(new SimpleGrantedAuthority(customer.getRole())) + ); + } + + @Override + public boolean supports(Class authentication) { + return (UsernamePasswordAuthenticationToken.class.isAssignableFrom(authentication)); + } + } + ``` +- cors - cross origin resource sharing +- origin = protocol (http) + domain + port +- communication is stopped across origins **by browsers** to prevent security issues +- so, for e.g. a different website cannot use our api unless our apis allow this website's domain explicitly +- browsers make a **preflight request** - the request is made by the browser, to which the backend responds with what methods and endpoints are allowed +- we can either configure cors using `@CrossOrigin(domain)` on a per controller basis (usually not ideal), or use the below - + ```java + // configure the SecurityFilterChain bean like so + http.cors(Customizer.withDefaults()); + + @Bean + public CorsConfigurationSource corsConfigurationSource() { + CorsConfiguration configuration = new CorsConfiguration(); + configuration.setAllowedOrigins(List.of("http://localhost:4200/")); + configuration.setAllowedMethods(List.of("*")); + configuration.setAllowedHeaders(List.of("*")); + configuration.setAllowCredentials(true); + UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource(); + source.registerCorsConfiguration("/**", configuration); + return source; + } + ``` +- something i didn't know - for e.g. recall the action method on forms? from my understanding, this is not protected by cors, i.e. if a website evil.com has its action set to netflix.com, even if netflix configures cors correctly, this form action would go through! this concept is important in csrf discussed below +- also my understanding of where csrf might be important - cors depends on browser the client uses, what if the client uses a browser that does not have cors functionality? +- csrf - security vulnerability (unlike cors, which is a guard rail provided by browsers) +- csrf - cross site request forgery +- example - + - we log into netflix.com, and netflix stores a cookie in our browser - recall how cookies are scoped to a domain + - assume we click on a malicious link, which actually makes a put api call to netflix.com, to for e.g. change the password of the current user + - since netflix had already stored a cookie in our browser, the request goes through, and netflix thinks it is a request from a legitimate user, and the password of our account is changed easily! +- solution - a **secure random csrf token** is generated, which is **unique per session** +- so, assume with csrf implemented correctly, our ui receives a csrf token inside a cookie / response header, etc along with a separate cookie for authentication +- for further requests, we forward this csrf token inside the request header / request body along with the authentication cookie. do not send csrf token as a cookie, since then we are back to the same problem as authentication cookie! we can receive the csrf token as a cookie, but then we need to parse it and send it as a request body / header. this parsing cannot be done by evil.com, since it is a different domain, so it does not have access to cookies +- disabling csrf - `http.csrf(csrf -> csrf.disable());` / `http.csrf(AbstractHttpConfigurer::disable);` +- configuring csrf correctly - we can use `CookieCsrfTokenRepository`, which writes the csrf token to a cookie named `XSRF-TOKEN` and reads it from an http request header named `X-XSRF-TOKEN` or the request parameter `_csrf` +- [this documentation](https://docs.spring.io/spring-security/reference/servlet/exploits/csrf.html) seems to have a good explanation for csrf, skipping for now +- my doubt - if we for e.g. send jwt not as a cookie but as a header, wouldn't we automatically be protected by csrf? because the malicious website cannot "parse" or "access" the jwt, just like it cannot access or parse the csrf cookie +- authentication error - 401, authorization error - 403 +- authentication happens before authorization +- authorities are stored via interface `GrantedAuthority` and concrete implementation `SimpleGrantedAuthority` +- these authorities are available on both `UserDetails` (used between `UserDetailsManager` and `AuthenticationProvider`) and `Authentication` object (used between `AuthenticationProvider` and `AuthenticationManager`) +- code example - + ```java + http.authorizeHttpRequests((requests) -> requests + .requestMatchers("/myAccount").hasAuthority("view_account") + .requestMatchers("/myBalance").hasAnyAuthority("view_account", "view_balance") + .requestMatchers("/user").authenticated() + .requestMatchers("/contact").permitAll() + ); + ``` +- like authority, we have hasRole and hasAnyRole as well +- my understanding - spring requires that roles have the `ROLE_` prefix + - so when using hasRole etc, do not specify the `ROLE_` prefix + ```java + .requestMatchers("/myBalance").hasAnyRole("user", "admin") + .requestMatchers("/myLoans").hasRole("user") + ``` + - either save to the database with the `ROLE_` prefix, or when mapping to `GrantedAuthority` inside `UserDetailsService`, add the `ROLE_` prefix (internally, our schema stores one to many for `PersistentCustomer` and `PersistentAuthority`) + ```java + @Entity + @Data + @AllArgsConstructor + @NoArgsConstructor + @Table(name = "authorities") + public class PersistentAuthority { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Integer id; + + private String name; + + @ManyToOne + @JoinColumn(name = "customer_id") + private PersistentCustomer customer; + + public GrantedAuthority map() { + return new SimpleGrantedAuthority("ROLE_" + name); + } + } + ``` +- authority - individual actions like "view account", "view balance", etc +- role - group of authorities +- one practice used at my firm - + - think of privilege as action + resource combination - "view balance", "view card", etc - these map to authorities + - different roles have different authorities - admins and ops can have "edit card", all users will have "view account" etc + - allow assigning multiple roles to users +- **filters** - we can write our own filters and inject them into the spring security flow +- **filter chain** - represents a collection of filters which have to be executed in a defined order +- so, on `HttpSecurity http`, we can call `http.addFilterBefore`, `http.addFilterAfter` and `http.addFilterAt` + ```java + @Slf4j + public class UserLoggingFilter implements Filter { + + @Override + public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws IOException, ServletException { + + // typically this typecasting might be needed, not used here though + HttpServletRequest request = (HttpServletRequest) servletRequest; + HttpServletResponse response = (HttpServletResponse) servletResponse; + + Authentication authentication = SecurityContextHolder.getContext().getAuthentication(); + if (authentication != null) { + log.info("user {} with authorities {} has logged in", authentication.getName(), authentication.getAuthorities()); + } + + filterChain.doFilter(servletRequest, servletResponse); + } + } + + http.addFilterAfter(new UserLoggingFilter(), BasicAuthenticationFilter.class); + ``` +- we implemented `Filter` above. we can **instead** use - + - `GenericFilterBean` - has access to a lot of other things like context, environment, etc + - `OncePerRequestFilter` - to ensure that the filter is executed only once, even if it is invoked multiple times by the underlying logic +- tokens - when the clients login successfully, they are returned a token from the backend. the clients should then attach this token to every request to access protected resources +- advantage of using tokens + - we do not share our credentials for every request every time like in for e.g. basic auth, we just pass around the token every time + - if tokens are compromised we can easily regenerate them. credentials cannot be changed easily for every user + - tokens can have a expiry attached to them, post which they have to be regenerated + - tokens allow storing of other user related information like name, email, roles, etc. this way, the backend can simply use these without every time for e.g. "fetching" this information + - we can reuse tokens for different kinds of applications like maps, email, etc + - statelessness - for horizontally scaled applications since it doesn't need sessions +- jwt tokens - they have the format `<
>.<>.<>` +- header - metadata like algorithm used for generating token, e.g. hs256 (stands for hmacsha256?). it is in base64 encoded format +- payload - name, email, roles, who issued the token, expiry, etc. it is also in base64 encoded format +- e.g. someone can easily decode the payload using base64 and add a role to it and encode it back again using base64. solution - signature +- signature - a digital signature for tokens. it helps ensure that the token has not been tampered +- the algorithm in header is used to generate this signature - `hmacsha256(base64(header) + '.' + base64(payload), secret)`. the secret here is only known to the backend +- on receiving the token, the backend can recompute the signature using the provided header and payload. if the signatures do not match, the backend can conclude that the token is invalid +- try to compare how jwt matches all the advantages we had mentioned for using tokens +- add [these](https://github.com/jwtk/jjwt#maven) maven dependencies - + ```xml + + io.jsonwebtoken + jjwt-api + ${jjwt.version} + + + + io.jsonwebtoken + jjwt-impl + ${jjwt.version} + runtime + + + + io.jsonwebtoken + jjwt-jackson + ${jjwt.version} + runtime + + ``` +- disable spring security's session creation + ```java + http.sessionManagement(session -> session.sessionCreationPolicy(SessionCreationPolicy.STATELESS)); + ``` +- we generate the jwt using `OncePerRequestFilter`. notes + - we should do this when we can be sure that the authentication is successful, so we use `addFilterAfter` + - using `shouldNotFilter`, we ensure that this token is generated only when the user logs in, which happens using the /user path + + ```java + // secret can come from application.properties + http.addFilterAfter(new JWTTokenGeneratorFilter(secret), BasicAuthenticationFilter.class); + + @RequiredArgsConstructor + public class JWTTokenGeneratorFilter extends OncePerRequestFilter { + + private final String secret; + + @Override + protected void doFilterInternal(HttpServletRequest request, HttpServletResponse response, FilterChain filterChain) throws ServletException, IOException { + + Authentication authentication = SecurityContextHolder.getContext().getAuthentication(); + + if (authentication != null) { + + SecretKey key = Keys.hmacShaKeyFor(secret.getBytes(StandardCharsets.UTF_8)); + + String serializedAuthorities = authentication + .getAuthorities() + .stream() + .map(GrantedAuthority::getAuthority) + .collect(Collectors.joining(",")); + + String jwt = Jwts.builder() + .claim("username", authentication.getName()) + .claim("authorities", serializedAuthorities) + .issuedAt(new Date()) + .expiration(new Date(new Date().getTime() + (24 * 60 * 60 * 1000))) + .signWith(key) + .compact(); + + response.setHeader(HttpHeaders.AUTHORIZATION, jwt); + } + } + + @Override + protected boolean shouldNotFilter(HttpServletRequest request) throws ServletException { + return !request.getServletPath().equals("/user"); + } + } + ``` +- verifying the token - this time, we use `addFilterBefore` and also invert the condition inside `shouldNotFilter` + ```java + http.addFilterBefore(new JWTTokenValidatorFilter(secret), BasicAuthenticationFilter.class); + + @RequiredArgsConstructor + public class JWTTokenValidatorFilter extends OncePerRequestFilter { + + private final String secret; + + @Override + protected void doFilterInternal(HttpServletRequest request, HttpServletResponse response, FilterChain filterChain) throws ServletException, IOException { + + String jwt = request.getHeader(HttpHeaders.AUTHORIZATION); + + if (jwt != null) { + try { + SecretKey key = Keys.hmacShaKeyFor(secret.getBytes(StandardCharsets.UTF_8)); + + Claims payload = Jwts.parser() + .verifyWith(key) + .build() + .parseSignedClaims(jwt) + .getPayload(); + + Authentication authentication = new UsernamePasswordAuthenticationToken( + payload.get("username"), + null, + AuthorityUtils.commaSeparatedStringToAuthorityList(payload.get("authorities", String.class)) + ); + + SecurityContextHolder.getContext().setAuthentication(authentication); + } catch (Exception e) { + throw new BadCredentialsException("invalid token received"); + } + } + } + + @Override + protected boolean shouldNotFilter(HttpServletRequest request) throws ServletException { + return request.getServletPath().equals("/user"); + } + } + ``` +- method level security - add `@EnableMethodSecurity` on any `@Configuration` / `@SpringBootApplication` class + ```java + @Configuration + @RequiredArgsConstructor + @EnableMethodSecurity + public class SecurityConfig { + ``` +- in the pre and post annotations, we can also use spel (spring expression language) +- `@PreAuthorize` - decide if a user is authorized to call a method before actually invoking the method + ```java + @PreAuthorize("hasAnyRole('user', 'admin')") + @PreAuthorize("hasAuthority('view_details')") + @PreAuthorize("#username == authentication.principal.username") + public void preAuthorizeExample(String username) { + } + ``` +- for complex requirements - we can call custom methods, methods on beans, etc afaik from inside these annotations. then we can for e.g. pass the authentication object from inside the annotation to these methods as well +- `@PostAuthorize` - would not stop the method from being executed, but would run after the invocation +- spring aop is used for implementing these annotations bts +- `@PreFilter` and `@PostFilter` - works on objects of type collections. helps filter inputs / outputs. i don't see its use case as of now + +### OAuth + +- oauth2 - authentication and authorization standard +- e.g. there is a third party app called tweet analyzer that uses tweet data to show analytics to a front user +- option 1 - we give tweet analyzer our credentials. this option is insecure, since - we share our credentials with a third party app, thus we compromise our credentials. tweet analyzer can now do everything that we as an account owner can do, e.g. create tweets, i.e. there is no restricted access +- option 2 - twitter gives temporary access to tweet analyzer app +- oauth2 is a specification / protocol, which we need to implement +- it has various "grant types" - "authorization code" and "client credentials" are the two most important grant type flows for now +- "resource owner" - the end user i.e. us +- end users own "resources", e.g. *tweets* in our case +- "client" - *tweet analyzer* in our case. it is the third party application trying to get restricted access to "resource" +- "authorization server" - "resource owners" should have an account inside this "authorization server" +- "resource server" - the application which maintains the "resources", e.g. *twitter* in our case +- sometimes "resource server" and "authorization server" can be clubbed into one +- "scopes" - the granular permission that the "client" wants, that "authorization server" gives +- general flow - + - first, the tweet analyzer apps needs to register itself with twitter. this gives them the "client credentials" - the "client id" and the "client secret" + - the resource owners then tries logging in with twitter and not our application + - the resource owners are prompted to provide consent to the client to perform the actions specified via scopes on the resources + - if the resource owners consent to this, the client is provided with an "access token" and a "refresh token" to issue api calls to twitter on behalf of the client +- "authorization code" grant type flow - + - resource owner goes to client + - client redirects to authorization server with - + - client id - helps authorization server identify the client + - scope + - redirect uri - where the authentication server redirects post successful authentication + - response type - code to specify that it is an authorization code type flow + - state - help with csrf like attacks + - resource owners enter their credentials here + - assume successful authentication here + - client receives an authorization code + - client goes to authorization server with - + - authorization code + - client id + - client secret - to prove itself + - grant type - specify that it is an authorization code grant type flow + - redirect uri + - client gets an access token for the resource owner from the authorization server + - client can then use this access token to make requests to the resource server on behalf of resource owner +- this architecture of first getting an authorization code and then getting the access token helps with better security. the first step helps with verifying the resource owner and the second step with verifying the client +- "implicit" grant type flow (deprecated, removed from oauth 2.1) - + - resource owner goes to client + - client redirects to authorization server with - + - client id - helps authorization server identify the client + - scope + - redirect uri - where the authentication server redirects post successful authentication + - response type - specify that it is an authorization code grant type flow + - state - help with csrf like attacks + - resource owners enter their credentials here + - assume successful authentication here + - client receives the access token directly i.e. the entire flow authorization code onwards is skipped +- so, the obvious drawback - the client itself is not verified. anyone can mimic the url where they redirect to authorization server. remember it does not have the client secret, only the client id. and in exchange, they can directly obtain the correct access code +- "password" grant type flow (deprecated, removed from oauth 2.1) - + - resource owner goes to client + - resource owner "directly gives" the credentials to the client + - client goes to the authorization server with - + - resource owner's actual credentials + - client id + - client gets an access token for the resource owner from the authorization server +- so, the obvious drawback is compromise of credentials +- use case of client credentials grant type flow - no resource owner is involved, so useful for service to service communication. organization a (client) interacts with organization b (resource server and authorization server) +- "client credentials" grant type flow - + - client sends request to authorization server with - + - client id + - client secret + - grant type - specify that it is an client credentials grant type flow + - scope + - client gets back access token + - client uses this access token to request resource server +- "refresh token" helps avoiding resource owners from initiating entire login flow again after access token expires + - client sends request to resource server with expired access token, hence gets a 401 + - client sends request to authorization server with - + - client id + - client secret + - grant type - specify it is refresh token type flow + - scope + - refresh token + - client receives back a fresh access and refresh token + - the client can use this new access token now to make requests to the resource server +- my understanding - refresh tokens do not typically have an expiration, but can have one +- also, refresh tokens can be "rolling" i.e. they are single use and should be replaced with the new refresh token received every time a request for a fresh access token is made +- how can a resource server verify the access token provided by the client? - three options - + - api interaction between authorization server and resource server. drawback - an additional api call from resource server to authorization server every time + - both authorization server and resource server can have access to the same shared storage. drawback - shared storage + - recommended - when the resource server boots up, it gets a public certificate from the authorization server. this public certificate is used to validate if the access token has been tampered. also called "jwk endpoint" +- oidc - openid connect - oauth helped with authorization. by adding openid on top of it, we can use it for authentication as well +- a specific scope called "openid" is added to the list of scopes to get the identity details of the resource owner +- this way, we additionally get an id token along with access and refresh tokens +- the id token is in the form of jwt +- unlike access token, id token contains things like user name, email, etc - this is what helps with authentication +- add the below dependency - + ```xml + + org.springframework.boot + spring-boot-starter-oauth2-resource-server + + ``` +- **specify conversion logic for jwt to GrantedAuthority** - + ```java + import lombok.NonNull; + import org.springframework.core.convert.converter.Converter; + import org.springframework.security.core.GrantedAuthority; + import org.springframework.security.core.authority.SimpleGrantedAuthority; + import org.springframework.security.oauth2.jwt.Jwt; + + public class JWTToGrantedAuthorityConverter implements Converter> { + + @Override + public Collection convert(@NonNull Jwt source) { + Map> realmAccess = source.getClaim("realm_access"); + if (realmAccess == null || realmAccess.isEmpty()) { + return List.of(); + } + return realmAccess.get("roles").stream() + .map(role -> "ROLE_" + role) + .map(SimpleGrantedAuthority::new) + .collect(Collectors.toList()); + } + } + + // ... + JwtAuthenticationConverter jwtAuthenticationConverter = new JwtAuthenticationConverter(); + jwtAuthenticationConverter.setJwtGrantedAuthoritiesConverter(new JWTToGrantedAuthorityConverter()); + http.oauth2ResourceServer(configurer -> configurer.jwt( + jwtConfigurer -> jwtConfigurer.jwtAuthenticationConverter(jwtAuthenticationConverter) + )); + ``` +- recall - specify the path that our application should use to download the public certificate, to help it verify the access and id tokens - + ``` + spring.security.oauth2.resourceserver.jwt.jwk-set-uri=http://localhost:8080/realms/client-credentials-demo/protocol/openid-connect/certs + ``` +- so, two things are being done by our resource server - + - it is verifying the access token using the certificate + - it is parsing the token to get user roles, and this is possible because the token is in jwt format. recall how payload and header are just base64 encoded +- so, now assuming we are doing client credentials flow, we can try getting access token from postman i.e. postman here can act like the client application. it is the `token_endpoint` property in the config url earlier + ``` + url - http://localhost:8080/realms/spring-6-security/protocol/openid-connect/token + + x-www-form-url-encoded + grant_type - client_credentials + scope - openid + client_id - ... + client_secret - ... + ``` +- we can see the decoded and parsed version of access token and id token obtained above in the website [jwt.io](https://jwt.io/). it would explain why the convertor maps roles using `realm_access.roles` +- add the token using `Authorization: Bearer <>` +- authorization code grant type flow by itself would only work when we use jsp, thymeleaf, etc i.e. server side templating languages +- however, we cannot hide the client secret in spa applications, since the entire source code is accessible from the browser +- so, we use pkce - proof key for code exchange +- so, the client generates + - "code verifier" - a random cryptic string + - "code challenge" - base64(sha256(code verifier)) +- the ui first when asking for the authorization code in the "authorization code" grant type flow sends the code challenge +- bts, the authorization server stores this code challenge, and returns the authorization code +- the ui then sends a request for an access token. this request unlike in the regular "authorization code" grant type flow which includes the client secret, includes the code verifier +- the authorization server then compares this code verifier with the code challenge which it had stored +- if the values match, the authorization server returns the right tokens +- so my understanding - "authorization code" grant type flow is almost same as "pkce", except + - the first request from the client includes the code challenge + - the second request from the client does not include the client secret, but includes the code verifier +- so, with a mitm kind of attack - if someone gets access to authorization code, it is not enough, they need the code verifier as well, and they cannot predict code verifier from the code challenge, since it is encrypted using sha256 +- so, in oauth2.1, they have started clubbing authorization code + pkce grant types together +- my understanding - if someone gains access to our client id, why cant they self generate the code verifier and code challenge and ask for a new access token? they can, but the redirect uri might help us here by redirecting to our own website! (note - there is some component of specifying valid redirect uris when registering clients with the authorization server) +- now is there not a second issue above - redirecting to a legitimate app from an illegitimate app? - solved by the "state" parameter, which helped us with csrf attacks diff --git a/_posts/2023-07-23-relational-databases.md b/_posts/2023-07-23-relational-databases.md new file mode 100644 index 0000000..447ab40 --- /dev/null +++ b/_posts/2023-07-23-relational-databases.md @@ -0,0 +1,565 @@ +--- +title: Relational Databases +--- + +## Downsides of File Based Systems + +- data redundancy - data repeated at different places +- data inconsistency - data update at one place might not be reflected at another place +- difficult data access - searching through records can be difficult +- security problems - granular control to allow access to databases +- difficult concurrent access - erroneous updates if people try editing files simultaneously, file locks allow only one person to edit files at a time +- integrity constraints - we can't enforce constraints like ensuring a specific data type for an attribute +- databases backup and recovery features are less efficient + +## Entity Relationship Data Model + +- er model is a high-level conceptual data model +- they are used in documentations via er diagrams +- entity - an object like a particular employee or project e.g. an employee jack +- entity type - type of the entity e.g. Employee +- entity set - group of all entities (not entity types) +- attribute - an entity has attributes like age, name +- an entity type is represented as a rectangle +- an attribute is represented as an oval. it can be of following types - + - simple attribute + - composite attribute - composed of multiple attributes e.g. name from first name and last name. it is represented as a tree of ovals + - multivalued attribute - can take an array of values e.g. phone number. the oval has a double outline + - derived attribute - calculated from other attributes e.g. age from birthdate. the oval has a dotted outline +- key attribute - has a value which is distinct for each entity, also called primary key e.g. ssn (social security number) of an employee. represented by an underline on the attribute +- composite key - multiple keys combine to uniquely identify an entity. e.g. vin (vehicle identification number) using state and a number. represent as a composite attribute and underline the key attribute as well +- natural key - use an attribute to uniquely identify an entity. e.g. isbn of book +- relationship - an association between two entities e.g. jack works on project xyz +- relationship type - type of relation e.g. works_on +- relationship set - group of all relationships (not relationship types), just like entity set +- a relationship type is represented as a diamond +- degree - defined on a relationship type, it represents the number of participating entities. it can be of the following types - + - **unary** (recursive) - an entity type is linked to itself, e.g. an employee supervises another employee + - **binary** - two entity types are linked, e.g. employee works on a project + - **ternary** - three entity types are linked, e.g. supplier supplies parts to project +- binary relationship constraints - + - cardinality - represent by writing 1 / N on the arrow + - **one to one** - an entity in set a can be associated to at most one entity in set b and vice versa as well e.g. an employee manages a department + - **one to many** - an entity in set a can be associated to many entities in set b but an entity in set b can be associated to at most one entity in set a e.g. employees are a part of a department + - **many to many** - an entity in set a can be associated to many entities in set b and vice versa e.g. employees work on a project + - participation - + - **total participation** - each entity must participate at least once in the relation, e.g. in employees working on a project, a project has total participation, represented as a double line + - **partial participation** - an entity need not participate in the relation, e.g. in employees working on a project, an employee has partial participation (e.g. hr), represented as a single line +- attributes on relation types - unless cardinality is many to many, since a table is created for many to many, we should try and move attributes of relationships to one of the tables +- weak entity - they cannot exist independently e.g. a course cannot exist without a program. they don't have key attributes (look above) of their own. they are identified via their owner or identifying entity type, and the relation between the weak and identifying entity is called identifying relationship. the attribute which helps in differentiating between the different weak entities of an identifying entity is called a **partial key**. e.g. dependents of an employee. weak entity is represented as a double line for the rectangle and identifying relationship is represented as a double line for the diamond. partial key is represented as a dotted underline. weak entity should of course, have a total participation +- strong entity - have their own key attributes + +## ER Diagram Example + +- entities - + - students have a name, a student identifier, one or more contact numbers + - programs have a name, a program identifier + - courses have a name, a course identifier +- relationships - + - student takes up one or more courses + - student must enroll in a program + - program contains courses + +![er diagram example](/assets/img/relational-databases/er-diagram-example.drawio.png) + +## Relational Model + +- relation - collection of related data, represented as a table +- tuple - also called records, represented as a row, an instance of the type of object stored in the table +- attribute - represented as a column, describe the record +- relation schema - relation name with its attributes' names e.g. employee(id, name, phone number) +- database schema - combination of all relation schemas +- database instance - information stored in a database at a particular time +- domain - set of acceptable values an attribute can contain +- in a relation, sequence of rows and columns are insignificant +- keys - we need keys to fetch tuples easily and to establish a connection across relations +- different types of keys are - + - **super key** - set of attributes that can uniquely identify any row. super key is like a power set. e.g. in employee, (id), (phone), (id, name), (name, phone), (id, phone), (id, name, phone) are all super keys + - **candidate key** - minimal set of attributes that can uniquely identify any row e.g. id, phone number. (id, name) is not a candidate key as id itself can uniquely identify any row + - **primary key** - one out of all the candidate keys is chosen as the primary key e.g. id of employee + - **composite key** - candidate keys that have two or more attributes e.g. vehicle(state, number) + - **alternate key** - any candidate key not selected as the primary key + - **foreign key** - the primary key of a relation when used in another relation is called a foreign key. it helps in connecting the two relations, the referencing and referenced relation +- **integrity constraints** - to maintain the integrity of database i.e. maintain quality of information as crud keeps happening, following rules are present - + - **domain constraint** - each value of an attribute must be within the domain + - **entity constraint** - all relations must have primary key, it cannot be null + - **referential constraint** - foreign key must either reference a valid tuple or be null + - **key constraint** - primary key must be unique +- common relational database operations - crud i.e. create, read, update, delete + +## Functional Dependency + +- X ➔ Y means given X, we can determine Y e.g. in student(id, name), id ➔ name but reverse is not true +- X is called **determinant** while Y is called **dependent** +- **armstrong's axioms** are a set of inference rules to determine all functional dependencies + - axiom of reflexivity - if Y ⊆ X, then X ➔ Y + - axiom of augmentation - if X ➔ Y, then XZ ➔ YZ + - axiom of transitivity - if X ➔ Y and Y ➔ Z, then if X ➔ Z +- prime attribute - a part of any candidate key +- partial dependency - when a non-prime attribute is dependent on a prime attribute +- transitive dependency - when a non-prime attribute is dependent on another non-prime attribute + +## Normalization + +- normalization helps in determining the level of redundancy in a database and providing fixes for them +- there are six normal forms, but only 1nf, 2nf, 3nf and bcnf have been discussed +- sometimes, we do not normalize our database entirely. it not only improves performance for analytics, but if data is duplicated, it works like a double check, thus **reducing chances of corrupt data** + +### First Normal Form + +for being in first normal form or 1nf, relation shouldn't have a multivalued attribute. e.g. + +| id | name | phone | +|-----|------|------------------------| +| 1 | jack | 8745784547, 6587784512 | +| 2 | jane | 3412478452 | + +should be converted to + +| id | name | phone | +|-----|------|------------| +| 1 | jack | 8745784547 | +| 1 | jack | 6587784512 | +| 2 | jane | 3412478452 | + +### Second Normal Form + +for being in second normal form or 2nf, relation should be in 1nf and shouldn't have partial dependencies. e.g. + +| student_id | course_id | course_fee | +|------------|-----------|------------| +| 1 | 1 | 120 | +| 2 | 2 | 150 | +| 1 | 2 | 150 | + +this has partial dependency course_id ➔ course_fee since primary key is (student_id, course_id). +so, it should be split into two tables + +| student_id | course_id | +|------------|-----------| +| 1 | 1 | +| 2 | 2 | +| 1 | 2 | + +| course_id | course_fee | +|-----------|------------| +| 1 | 120 | +| 2 | 150 | + +note how this also reduced data redundancy by storing the course_fee values only once + +### Third Normal Form + +for being in third normal form or 3nf, relation should be in 2nf and shouldn't have transitive dependencies. e.g. + +| student_id | country | capital | +|------------|---------|-----------| +| 1 | india | delhi | +| 2 | nepal | kathmandu | +| 3 | nepal | kathmandu | + +this has transitive dependency country ➔ capital since the capital can be derived from country, and the primary key is student_id. so, it should be split into + +| student_id | country | +|------------|---------| +| 1 | india | +| 2 | nepal | +| 3 | nepal | + +| country | capital | +|---------|-----------| +| india | delhi | +| nepal | kathmandu | + +### Boyce Codd Normal Form + +- for being in boyce-codd normal form or bcnf, relation should be in 3nf and a dependency A ➔ B is allowed only if A is a super key, doesn't matter what B is which make sense, as super keys should be able to find everything. so to check for bcnf, only check if lhs of dependency is super key or not +- e.g. - AB ➔ C and C ➔ B. candidate keys are AB and AC. neither of the dependencies are partial or transitive, so it is in 3nf already. however, C is not a super key, yet we have C ➔ B. so, it is not in bcnf +- my understanding - for bcnf, split into two tables - AC (AC is candidate key) and BC (C is candidate key) +- basically, since prime ➔ non-prime was covered in 2nf, non-prime ➔ non-prime was covered in 3nf, we wanted to remove (prime / non-prime) ➔ prime in bcnf + +## About SQL + +- sql is a standard that has been adopted by various vendors for their implementations. the implementations include db2 by ibm, oracle rdbms by oracle, sql server by microsoft, postgresql and mysql which are opensource, etc. this blog is about mysql implementations of concepts, so things can be different for other distributions +- application / client layer - helps in client connections, authentication and authorization +- server layer - it parses, analyzes and optimizes queries. it also maintains cache and buffers. it makes an execution plan which gets fed into the storage engine layer +- storage engine layer - this layer actually writes and retrieves data from the underlying physical storage. mysql supports different storage engine layers like InnoDB, MyISAM, etc. which we can view by `show engines`. InnoDB is the default. e.g. the way transactions are carried out in them can be different + +## Database Commands + +- `show databases` - list all the database. it would only show the databases that we are authorized to view +- `use database_name` - selecting the database with name database_name. future queries would be performed on the selected database +- `show create database mysql` - shows the command using which the database was created +- `show tables` - display the tables in the current database +- `create database if not exists movie_industry` - create the database if it doesn't exist +- `drop database if exists movie_industry` - drop the database if it exists + +## Table Commands + +- we have a lot of data types in mysql, look [here](https://dev.mysql.com/doc/refman/8.0/en/data-types.html), categorized into numeric data types, date and time data types, string data types, spatial data types, json data type. e.g. numeric data type can have int, bigint, tinyint, decimal +- `describe user` - describe the structure of a table +- `show create table user` - shows the command using which the table was created +- we can provide a constraint for non-nullable fields using `not null` +- we can provide a default value using `default` +- we can automatically assign the next integer using `auto_increment`. auto increment has a few restrictions - + - there can be only one column in a table marked as auto increment + - the auto increment column should be indexed + - the auto increment column cannot have a default value +- create table example - + ```sql + create table if not exists actors ( + id int auto_increment, + first_name varchar(20) not null, + second_name varchar(20) not null, + dob date not null, + gender enum("male", "female", "other") not null, + marital_status enum("married", "divorced", "single") not null default "unknown", + net_worth_in_millions decimal not null, + primary key (id) + ); + ``` +- we can use `default` while inserting data to instruct mysql to use the default value. it would work for auto increment id as well. we can also not specify the column name altogether +- insert into table by not specifying id - + ```sql + insert into actors (first_name, second_name) values ("jennifer", "aniston"); + ``` +- insert into table by specifying id which is auto increment - + ```sql + insert into + actors (first_name, second_name, id) + values + ("jennifer", "aniston", default), + ("johnny", "depp", default); + ``` +- querying in tables by selecting all columns - + ```sql + select * from actors; + ``` +- select specific columns and filter results using `where` clause - + ```sql + select first_name, second_name from actors where first_name = "tom"; + ``` +- we have a lot of operators in mysql, look [here](https://dev.mysql.com/doc/refman/8.0/en/non-typed-operators.html) +- we can use the `like` operator with where clause for pattern matching. `_` can be used to match exactly one character, `%` can be used to match 0 or more characters - + ```sql + select * from actors where first_name like '_enn%'; -- matches jennifer + ``` +- we can use `cast` to change data type +- e.g. order query results by number, but number would be treated as strings i.e. 2 > 10 + ```sql + select * from actors order by cast(age as char); + ``` +- we can `limit` the number of results returned, and `offset` it from a certain point. note: sql will automatically handle even if our limit or offset goes beyond the number of rows by giving back sensible results + ```sql + select first_name from actors order by age desc limit 4 offset 3; + ``` +- delete selective rows - + ```sql + delete from actors where gender = "male" order by age desc limit 3; + ``` +- for deleting all rows, a faster method is `truncate actors`, it would delete the table entirely and recreate it +- update selective rows - + ```sql + update actors set age = 25 order by first_name limit 3; + ``` +- we can alter name and data type of column, provide a default value. note: while altering data type, the new and old data types should be compatible - + ```sql + alter table actors change first_name firstName varchar(20) default "anonymous"; + ``` +- adding a column - + ```sql + alter table actors add first_name varchar(20); + ``` +- deleting a column - + ```sql + alter table actors drop first_name; + ``` +- indices help in querying data efficiently, just like we search for words in a dictionary. downside is the overhead of creating, storing and maintaining these indices. internally, mysql uses b / b+ trees with the keys of the nodes as primary indices. this helps in efficient querying of data +- we can create an index on name to speed up queries - + ```sql + alter table actors add index index_name (first_name); + ``` +- we can also drop that created index - + ```sql + alter table actors drop index index_name; + ``` +- alter table name - + ```sql + alter table actors rename Actors; + ``` +- delete table - + ```sql + drop table if exists actors; + ``` +- aliases can be used to give temporary names, as they help us write queries that are more readable + ```sql + select + t1.first_name as a, t2.first_name as b + from + actors as t1, actors as t2 + where + t1.net_worth_in_millions = t2.net_worth_in_millions and t1.id > t2.id; + ``` +- distinct is a post-processing filter i.e. works on the resulting rows of a query & can be used on multiple columns + ```sql + select distinct first_name, last_name from actors; + ``` +- aggregate methods like `min`, `max`, `sum`, `count` can be used - + ```sql + select count(*) from actors; + ``` +- group by - helps group rows based on a particular column. we cannot use columns **not** present in group by for select, having, or order by clauses + ```sql + select gender, avg(net_worth_in_millions) from actors group by gender; + ``` +- while the where clause helps us filter rows, the having clause helps us filter groups + ```sql + select + marital_status, avg(net_worth_in_millions) as avg_net_worth_in_millions + from + actors + group by + marital_status having avg_net_worth_in_millions > 200 + ``` +- adding a foreign key constraint - + ```sql + alter table digital_assets + add constraint digital_assets_actor + foreign key (actor_id) references actors(id); + ``` + +## Joins + +- **cross join** - cartesian product of the rows of the two tables +- **inner join** - all rows of both the tables where the condition (called the join predicate) is satisfied +- **left outer join** - result of inner join + all rows of the left table, with null for the columns of the right table +- **right outer join** - result of inner join + all rows of the right table, with null for the columns of left table +- **full outer join** - result of inner join + all rows of the left table, with null for the columns of the right table + all rows of the right table, with null for the columns of the left table +- **self join** - using the same table on both sides of the join +- inner join example - assume digital_assets table contains social media links, where the asset_type is an enum containing twitter etc. and url is the link + ```sql + select + actors.first_name, actors.second_name, digital_assets.asset_type, digital_assets.url + from + actors inner join digital_assets + on + actors.id = digital_assets.actor_id; + ``` + if the same column name is not there in the two tables, the "table." prefix can be removed e.g. `first_name` in place of `actors.first_name`, though i prefer being explicit +- the above query can be rewritten as below, with **no** performance impact + ```sql + select + actors.first_name, actors.second_name, digital_assets.asset_type, digital_assets.url + from + actors, digital_assets + where + actors.id = digital_assets.actor_id; + ``` +- union clause - merely clubs results together, doesn't join the tables. e.g. the following query will display a list of all actress names, followed by all male actor names + ```sql + select concat(first_name, ' ', last_name) from actors where gender = 'female' + union + select concat(first_name, ' ', last_name) from actors where gender = 'male' + ``` + note: duplicates are automatically removed since it is a "union", which can be prevented using `union all` +- left outer join syntax (right join would have similar syntax, not discussed). e.g. in the below query, actors without social media handles would be displayed too, with the columns for `asset_type` and `url` holding null - + ```sql + select + actors.first_name, actors.second_name, digital_assets.asset_type, digital_assets.url + from + actors left outer join digital_assets + on + actors.id = digital_assets.actor_id; + ``` +- natural join - syntactic sugar, no need to explicitly specify the columns to use for join, i won't use it + +## Nested Queries + +- nested queries are slower but sometimes the only way to write a query +- the following is an example of **nested scalar query**, since the nested query returns a single value. e.g. find all actors who had updated their digital assets most recently + ```sql + select + first_name + from + actors inner join digital_assets on digital_assets.actor_id = actors.id + where + digital_assets.last_updated = ( + select max(digital_assets.last_updated) from digital_assets + ); + ``` +- e.g. find all actors who are on facebook + ```sql + select * from actors where id in ( + select actor_id from digital_assets where asset_type = 'facebook' + ) + ``` +- e.g. find actors who updated their social handles on their birthday + ```sql + select + actors.first_name + from + actors inner join digital_assets + on + actors.id = digital_assets.actor_id and + actors.dob = digital_assets.last_updated + ``` +- the following is an example of a nested query where it returns a collection of columns. the query returns the same results as the example as above + ```sql + select first_name from actors where (id, dob) in + (select actor_id, last_updated from digital_assets); + ``` + +## Correlated Queries + +- the subquery references columns from the main query +- note: we can use the `exists` operator to check if the subquery returns any rows +- e.g. find actors with their names in their twitter handles - + ```sql + select + actors.first_name + from + actors inner join digital_assets + on + actors.id = digital_assets.actor_id + where + digital_assets.url like concat('%', actors.first_name, '%') and + digital_assets.asset_type = 'twitter' + ``` +- the query returns the same results as the example as above + ```sql + select first_name from actors where exists ( + select + * + from + digital_assets + where + digital_assets.actor_id = actors.id and + digital_assets.url like concat('%', actors.first_name, '%') and + digital_assets.asset_type = 'twitter' + ) + ``` +- difference between nested queries and correlated queries - in nested queries, the subquery runs first and then the main query runs. in correlated queries, the subquery runs for every row of the main query, and the subquery runs after the main query + +## Multi Table Operations + +- multi table delete use case - delete related data from multiple tables + ```sql + delete + actors, digital_assets -- tables to delete rows from + from + actors, digital_assets + where + actors.id = digital_assets.actor_id and + digital_assets.asset_type = 'twitter' + ``` + we mention the tables to delete rows from, note how this isn't required when deleting from one table +- we can similarly have multi table updates - + ```sql + update + actors inner join digital_assets + on + actors.id = digital_assets.actor_id + set + actors.first_name = upper(actors.first_name) + where + digital_assets.asset_type = 'facebook' + ``` +- note: a subquery cannot have select for tables being updated or deleted in the outer query +- copy a table **without the data** and just the structure - `create table copy_of_actors like actors` +- insert data from one table into another - `insert into copy_of_actors(name) select first_name from actors` + +## Views + +- views can be created by combining multiple tables +- we can filter out rows and columns +- now, a complex query becomes a simple single table query +- we can create views from other views as well, and we can perform the same joins and filtering on views that we would otherwise perform on a table +- when we do `show tables`, we see the views as well, we can see the type of table i.e. whether it is a normal table (also referred to as base table) or a view by using the command `show full tables` +- e.g. of creating a view - + ```sql + create view actors_twitter_accounts as + select + first_name, second_name, url + from + actors inner join digital_assets + on + actors.id = digitalassets.actor_id + where + asset_type = 'twitter' + ``` +- views are basically like stored queries, so they get updated whenever the tables get updated +- we can use `create or replace` to either create a view or replace it if one already exists. e.g. for single actors + ```sql + create or replace view single_actors as + select * from actors where marital_status = 'single'; + ``` +- we can update or delete rows from the underlying base tables using views. however, there are conditions e.g. it shouldn't have specific types of joins, group by statements or aggregation functions, etc. + ```sql + insert into single_actors + (first_name, second_name, dob, gender, marital_status, net_worth_in_millions) + values + ('charlize', 'theron', '1975-08-07', 'female', 'single', 130); + ``` +- e.g. i try inserting a row into this view, which fails the filtering clause used to create the view + ```sql + insert into single_actors + (first_name, second_name, dob, gender, marital_status, net_worth_in_millions) + values + ('tom', 'hanks', '1956-07-09', 'male', 'married', 350); + ``` +- now, since views can update their base tables, this went through and updated the table. however, since the view's query filters out married actors, we don't see the row in the view. we have essentially updated a row in a table through a view which will not be visible in the view. if this behavior is not desirable, we can use the check option while creating the view + ```sql + create or replace view single_actors + as select * from actors where marital_status = 'single' + with check option; + ``` +- now the insert statement for tom hanks will fail +- if we create views using other views, the check option can have scopes of **local** and **cascade**. local means that only the check option of the view being used for the update will be considered, while cascade looks at the check option of the views being used by this view itself as well +- we can drop views using `drop view single_actors` + +## Triggers + +- triggers are statements that get invoked when we perform an operation like insert, update or delete +- note: if we perform an operation like truncate which is equivalent to delete, triggers won't be invoked +- triggers can be **row level** or **statement level** +- row level triggers are invoked once per row, e.g. if a statement updated 25 rows then it gets invoked 25 times, while statement level triggers are invoked once per statement +- triggers can be invoked at 6 phases - (before, after) * (insert, update, delete) +- e.g. of trigger - + ```sql + delimiter ** + create trigger net_worth_check + before insert on actors + for each row + if new.net_worth_in_millions < 0 or new.net_worth_in_millions is null then + set new.net_worth_in_millions = 0; + end if; + ** + delimiter ; + + insert into actors (first_name, net_worth_in_millions) values ('tom', 350); + insert into actors (first_name, net_worth_in_millions) values ('young', null); + insert into actors (first_name, net_worth_in_millions) values ('old', -540); + + select * from actors; -- actors young and old will have net_worth_in_millions adjusted to 0 + ``` +- show triggers - `show triggers;` +- drop triggers - `drop trigger if exists net_worth_check;` +- we can also include multiple statements by enclosing statements after `for each row` inside a begin-end block + +## Transactions + +- we use transactions since we want either all the statements or none of them to go through +- there can be storage engines which don't support transactions / apply locking using different methods +- irrespective of whether transactions are supported, databases should have some form of locking to disallow concurrent access from modifying the data. e.g. InnoDB supports row level locking so that multiple users can modify the data in the same table. this also makes it a little slower +- we can start and commit a transaction using - + ```sql + start transaction; + -- statements + commit; + ``` +- we can roll back a transaction using + ```sql + start transaction; + -- statements + rollback; + ``` diff --git a/_posts/2023-07-23-warehouse-and-snowflake.md b/_posts/2023-07-23-warehouse-and-snowflake.md new file mode 100644 index 0000000..db6b088 --- /dev/null +++ b/_posts/2023-07-23-warehouse-and-snowflake.md @@ -0,0 +1,826 @@ +--- +title: Warehouse and Snowflake +--- + +## Warehouse + +- we need data for - + - operational data keeping - oltp (online transactional processing). we do not need a very long history of the data + - analytical decision making - olap (online analytical processing). millions of records are analyzed at a time, so we need fast query processing +- data warehouse - + - centralized location for all data sources + - optimized for analytic processing + - should also be user friendly for decision makers + - must load data consistently and repeatedly using etl +- data lake - + - used for storing raw (unstructured) data. data is not processed (structured) unlike in warehouse, where we have data thats user friendly and optimized for performance + - because of this, technology used by data lake is big data, while warehouses use databases + - used when the use case is not properly defined yet. data can be taken out of lake + +### Technologies in Warehouses + +- relational databases - uses popular sql, primary key + foreign key to easily perform joins, etc +- in memory databases - for high query performance, used in for e.g. data marts. in traditional databases, data is stored in hdd / ssd in disk and loaded into memory for querying. this is eliminated here to increase performance. challenge - lack of durability, resolve via snapshots / images to help restore to a specific point. e.g. sap hana, amazon memory db, etc +- cubes - data is stored in a multidimensional array. my understanding - e.g. imagine we want sales for a particular customer and product, the value will already be aggregated across remaining dimensions like time. it uses the mdx language for querying +- ods - operational data storage - used for operational decision making and not analytical / strategic decision making. there is a need for realtime unlike in data warehouses. now, we can have a separate warehouse and an ods, or optionally, treat ods as the **staging layer for our warehouse** along with using it for operational decision making +- elt - we extract and load the data into the warehouse directly. then, we use the compute of the warehouse to perform transformations. this is possible with architectures like snowflake. we can also perform the e and l of the elt using realtime streaming. elt allows for much more flexible transformations as compared to etl, since in etl, we perform transformations to load data into the core layer, while the transformations move towards the client in elt +- indexes - make reads faster, writes slower. useful when we join, filter by, etc. on the data based on that column +- b tree index - this is the default / most common type of index. we can break data into a multi level tree. helpful when column has high cardinality
+ ![b tree](/assets/img/warehouse-and-snowflake/b-tree.drawio.png) +- bitmap index - useful when the column has a low cardinality. the position of bit corresponds to the row number, and if its 1, it means that row has that value + + | pk | payment type | + | -- | ------------ | + | 1 | visa | + | 2 | mastercard | + | 3 | mastercard | + | 4 | visa | + + | payment type | bitmap | + | ------------ | ------ | + | visa | 1001 | + | mastercard | 0110 | + +- fact table indexing example - using b tree index on surrogate key (automatically setup if we use primary key?) and bitmap index on the dimension column foreign keys +- massive parallel processing - a task can be broken down into multiple subtasks. this way, these subtasks can run in parallel, thus optimizing performance. till now, we talked about breaking down "compute". for scaling storage, the underlying architecture can be shared disk architecture (underlying storage is one) or shared nothing architecture (underlying storage is also broken down) +- columnar storage - traditionally, queries are processed row wise. if we use columnar storage, data is stored column wise. so, if we need to process only a small subset of columns, we do not have to process entire rows and go through all the data like in traditional relational databases + +### Warehouse Architecture + +- we put the data **as is** into the staging layer of the warehouse +- we can do some minute transformations like renaming and adjusting positions of columns when performing a union between employee datasets from different sources +- we put the data into the core / access / warehouse layer by performing transformations from the staging layer. recall this is done to make data user friendly and optimized for querying +- why do we need a staging layer i.e. why not load into the warehouse layers directly from the sources - + - we risk burdening our oltp systems + - also, data from sources can be in different formats like crm, files like xml, etc. we get all the data from these different sources into for e.g. a relational db, which helps us use sql / a standard way to perform transformations, e.g. perform joins easily which would not be possible otherwise when data comes from different sources +- staging layer can be temporary (more common) or permanent +- temporary - after the data is loaded into the warehouse layer, we truncate the staging layer. this way, the next time we want to load data into the warehouse layer, we need to perform a diff - we need to know the last inserted row inside the warehouse layer, and calculate the new rows inside the sources since then, and accordingly add the new data to the warehouse. this diff checking can be done based on the surrogate key, created date column if maintained, etc +- persistent staging layer - maybe easier since the entire warehouse layer can be rewritten every time? +- why data marts i.e. why not use core layer directly - + - core layer has a lot of tables - we can have data marts on top of this. now, users will only have access to tables which they need + - core layer isn't burdened with queries from all consumers, and consumers only consume capacity of their data marts + - allow us have different types of databases, e.g. in memory vs cubes based on use case +- watermark - we keep track of the last record that we load from the sources into the core layer. based on the value of the watermark, we need to put the data into the staging layer and accordingly append data to the warehouse layer +- initial load - first extraction. it is much slower and puts load on sources, so it is best if done during weekends etc. so that the systems are not impacted +- delta load - incrementally loading data. usually, timestamp fields like created / modified date. the same workflow as the initial load can be reused, but now we run it on a schedule regularly instead of just once, and we also run it on a subset of data by filtering out and retaining only the new / modified data +- sometimes, for fact tables a delta load is possible due to timestamp like columns, but for dimension tables which might be relatively smaller, we can just do a full load every time +- when loading data from (temporary) staging to core layer, we can either just insert / append or update as well. we usually do not process deletes, but can use delete markers to go about this + + ![warehouse architecture](/assets/img/warehouse-and-snowflake/data-warehouse.png) + +- basic transformations when loading to core layer - deduplication, filtering out rows, filtering out columns, cleaning data (e.g. when performing a union, one source contains m and f, while another contains male / female), key generation (surrogate key) +- advanced transformations when loading to core layer - joining, splitting (split address into city, state, etc), aggregations / groupings, deriving columns (calculate profit using sales, costs, discounts) + +### Dimensional Modelling + +- dimensional modelling - method of organizing data, which again helps in 🥁... usability and performance +- facts - measurements e.g. profit +- dimensions - give context to the measurements e.g. product category +- note - dimensions modelling is organizing tables into facts and dimensions. but, based on use case, we can also rather organize data as flat tables in our warehouse, e.g. join and group data to produce the final aggregated form directly, instead of expecting our end users to perform aggregations +- dimensional modelling e.g. - we shouldn't have very wide tables - instead of duplicating product and category information in all rows, which can make scanning data slower, extract them to a dimension table, and only maintain the product id as a foreign key in the fact table, and product + category in a separate table. similarly with date - extract date into a different dimension table and store the date dimension id in the fact table. optionally, store pre calculated values in the date dimension table like day of week, month, etc +- e.g. if we have data like this - + + | date | product | category | profit | + | -------- | -------------- | ---------- | ------ | + | 5/8/2023 | tangy tomato | vegetables | 23 | + | 6/8/2023 | cheese cracker | snacks | 19 | + | 5/8/2023 | chocolate cake | snacks | 99 | + +- it can be dimension modelled this way - + + | date_id | product_id | profit | + | -------- | ---------- | ------ | + | 05082023 | 1 | 23 | + | 06082023 | 2 | 19 | + | 05082023 | 3 | 99 | + + | date_id | month | day_of_week | + | -------- | ------ | ----------- | + | 05082023 | august | saturday | + | 06082023 | august | sunday | + + | product_id | product | category | + | ---------- | -------------- | ---------- | + | 1 | tangy tomato | vegetables | + | 2 | cheese cracker | snacks | + | 3 | chocolate cake | snacks | + +- identifying facts - are measurable / can be aggregated as compared to dimensions which are descriptive. facts can also mark events sometimes, which is why it is accompanied by a date +- parts of a fact table - primary key to uniquely identify a row, foreign keys to dimension tables and then the actual facts themselves +- grain - the most atomic level of a fact table - what does a row in the fact table actually represent. keep it fine level for flexibility, so that various aggregations can be made +- dimensions help in grouping / filtering facts for our use case +- we usually have multiple dimensions clustered around the fact, thus it is called a star schema - the structure looks like that of a star, with the fact table at the center and the center and the dimension tables at the tips of the star spiking out of the fact table +- fact tables to dimension tables are generally many to one +- denormalization or data redundancy might be used in warehouses to help with query performance, e.g. the column category in the product dimension table discussed above, notice the repeating category snacks +- snowflake schema - a star schema is a snowflake schema with one level. if the denormalization above is actually degrading our performance / affecting our consistency, we can also have multiple levels e.g. the category column can be extracted into its own dimension table in the product dimension we discussed. so, when we have multiple levels of dimension tables, it is also called a snowflake schema. note it can result in multiple joins, thus degrading performance +- surrogate keys - use auto generated integer ids instead of natural keys for **both** primary keys and foreign keys. they are more performant compared to natural keys which are usually strings. we can also use -1 for dummy dimensions discussed below to make our intent clearer. exception - date dimensions - instead of an auto incremented integer, represent the actual date time as an integer, e.g. 050820232232 (first 8 characters represent date, remaining 4 time) +- it is common to keep pre calculated aggregates in the fact table, e.g. instead of expecting users to perform calculations to get the profit earned per order line item, just maintain it as a separate column in the fact table which can be used. this way users do not perform erroneous calculations +- if changing to surrogate keys (recall why it is a best practice) in dimension tables, remember to correctly map the values in the fact table foreign key column to use the surrogate key instead of the natural key +- date dimension - pre calculated values like day of week, month name, etc +- foreign key / surrogate key in date dimension does not have to be meaningless i.e. auto incremented integer, it can be of the format 06082023 +- optionally, since date dimension is very predictable, we can pre-populate the date dimension table for the next 10 years or so in advance +- consider populating all variations, e.g. month full name, month abbreviation, month as integer in case of date dimension + +### Additivity in Facts + +- additive facts - can be added across all dimensions. e.g. adding units sold across the date dimension tells us the number of units sold for a particular product, adding the units sold across the product dimension tells us the number of units sold at a particular date. note - i get confused in terminology. across product means group by date + + | product_id | units sold | date | price | + | ---------- | ---------- | -------- | ----- | + | 1 | 2 | 06082023 | 23 | + | 2 | 2 | 06082023 | 19 | + | 1 | 5 | 10082023 | 11 | + +- semi-additive facts - only added across some dimensions. e.g. imagine a fact table where the grain is our balance on a particular date for a particular folio + + | portfolio_id | date | balance | + | ------------ | -------- | ------- | + | 1 | 06082023 | 100 | + | 2 | 06082023 | 50 | + | 1 | 10082023 | 110 | + +- adding balance across the date dimension does not make sense, since balance is a cumulative number + + | portfolio_id | balance | + | ------------ | ------- | + | 1 | 210 | + | 2 | 50 | + +- but adding it across portfolios tells us our total balance on a particular date + + | date | balance | + | -------- | ------- | + | 06082023 | 150 | + | 10082023 | 110 | + +- non-additive facts - cannot be added across any dimension. e.g. price of a product (refer the example in additive facts). there is no meaning of that fact by itself, unless multiplied with the number of units sold +- nulls for facts in fact tables - usually in tools, average ignores null, sums will treat nulls as a 0, etc. but sometimes, we might want to replace nulls with 0, it depends on our use case +- nulls for foreign keys in fact tables - nulls can result in problems, therefore introduce a row in the dimension table with a dummy value, and have the rows with null as foreign key in the fact table point to this dummy value's primary key instead +- my understanding - year to date facts - we often want year to date calculations i.e. things like month to date (days from beginning of month to last business date of that month / today, depending on use case) and so on. however, these should not be stored, because then people will start performing aggregations on these calculated values. so, its is better to just store the underlying value and perform this calculation on the fly + +### Types of Fact Tables + +- transactional fact table - one row / grain indicates one event / transaction. e.g. one row represents one customer support call. characteristic - will have **many dimensions** (foreign keys). disadvantage - **grow very rapidly** in size, and often need to be aggregated +- periodic snapshot fact table - one row is a **summary of multiple events over a day**, etc. e.g. number of customer support calls over a day. note that a fact table can **contain multiple facts**, like number of calls, average call duration, etc. characteristic - because of its nature, it **will not (cannot) have many dimensions**, since one row is an aggregation across a dimension. advantage - they grow slower compared to the transactional fact table +- accumulation snapshot fact table - one row summarizes many events. my understanding - unlike periodic fact table where a grain is an accumulation of the same type of event, a grain in accumulation snapshot fact table is the **accumulation of the lifetime of events of that fact**. e.g. one row has production date, shipping date, return date, etc. characteristic - it **has many date dimensions**. so, this is also an example of **a role playing dimension**. this too grows slower in size as compared to the transactional fact table +- factless fact table - a fact table can have multiple facts i.e. measurements. sometimes, there is no such fact and we just have dimensions. e.g. a fact table where a new record for every new employee that is registered. it will have dimensions like department id, employee id, position id, date, etc, but no fact. we can perform aggregations like number of employees who joined last month + +### Types of Dimensions + +- conformed dimensions - dimensions shared across multiple facts, e.g. date dimension. this helps combine the facts using the shared dimension. e.g. if we have two different facts for sales and profits, we can combine them using the date dimension. this helps us compare the cost and sales side by side +- degenerate dimension - e.g. we have a sales fact table, with a foreign (surrogate) key for the category dimension. the category dimension only has two columns - the surrogate key and the category name. so, we can instead directly store the category name in the sales fact table. this usually occurs in the transactional fact table +- junk dimensions - e.g. imagine we have a lot of indicators that are eating up a lot of width (therefore space) of the fact table, thus impacting its performance. so, we can instead extract these dimensions to its own table. note - the cardinality of these dimensions should be low. also, the number of rows in this junk dimension grows exponentially, e.g. m values for one column and n values for another column basically mean m * n combinations. so, we can only store the dimensions we come across in this junk dimension instead of storing all combinations in a precomputed fashion. another idea could be to split junk dimensions i.e. group related junk dimensions together + + | amount | payment_method | incoming / outgoing | + | ------ | -------------- | ------------------- | + | 23 | credit card | incoming | + | 12 | cash | outgoing | + + | amount | flag | + | ------ | ---- | + | 23 | 1 | + | 12 | 3 | + + | pk | payment_method | incoming / outgoing | + | -- | -------------- | ------------------- | + | 1 | credit card | incoming | + | 3 | cash | outgoing | + +- role playing dimension - referenced in the fact table multiple times. e.g. date dimension for order date vs shipping date. an additional optimization for our users - use views - data is not duplicated, but users see different dimension tables for the different "roles" the date dimension might be playing, thus increasing readability + +### Slowly Changing Dimensions + +this suggests various solutions with how we should handle changing of dimensions. dimensions are considered to fairly static as compared to facts + +- type 0 - only retain the original data. only works when our dimensions do not change. e.g. date dimension +- type 1 - we overwrite the old with the new values in the dimension table. this way, we loose the old value. e.g. a new category for biscuits was introduced. so, the category changes from snacks to biscuits. issue - this can suddenly kind of maybe show reduced sales for dashboards which were monitoring the category for snacks. also, imagine if the category of snacks was replaced altogether with its more specific counterparts. any dashboards for e.g. grouping or filtering based on this dimension would suffer. understand that the idea is that for the same surrogate key in the dimension table, we overwrote its value +- type 2 - add a new row instead of update existing. any new biscuits related products will point to this new category for biscuits, while older biscuits products will continue pointing to snacks. this way, we maintain the history accurately + + | pk | product | category_fk | + | --- | ------------------ | ----------- | + | 1 | oatmeal biscuits | 1 | + | 2 | sunglasses | 2 | + | *3* | *oatmeal biscuits* | *3* | + + | pk | category | + | --- | ----------- | + | 1 | snacks | + | 2 | accessories | + | *3* | *biscuits* | + +- the issue with type 2 and its fix - we do not have a way of telling for e.g. the current categories in our system, since we now have multiple rows - snacks and biscuits, while we know that snacks is no longer a valid category in our systems. so, we can introduce new columns for effective and expiry dates in the category table. e.g. notice the values in the date dimensions below + + | pk | category | effective_date | expiry_date | + | --- | ----------- | -------------- | ----------- | + | 1 | snacks | 06082023 | 08082023 | + | 2 | accessories | 06082023 | 31129999 | + | *3* | *biscuits* | *08082023* | *31129999* | + +- we can also mix types, e.g. mix type 1 and type 2. use type 1 for product name, type 2 for category +- type 3 - we introduce a new column, so we will have two columns - previous category and current category in the category dimension table. this way, we can lay out a clear demise plan in place with maintaining backwards compatibility. it is not for unpredictable / frequent changes, use type 2 for that. this is for a more structured change like a reorg in a company + +## Snowflake + +- we can create and run queries inside worksheets +- we can see the snowflake_sample_data database by default with some sample data + ```sql + select * from snowflake_sample_data.tpch_sf1.customer + ``` +- we use snowflake's virtual warehouses for mpp (massive parallel processing). this allows the query to be processed in parallel in small chunks +- virtual warehouse sizes - xs (1 server), s (2 servers), m (4 servers),...4xl (128 servers) +- when not in use, a warehouse can be suspended. configure this behavior automatically using auto suspend and auto resume +- creating warehouses can be done by using the ui / even issuing sql statements in the worksheet +- in the worksheet, we can set the context i.e. the current database, schema and warehouse or use commands like `use warehouse` or `use database` +- multi clustering - when creating a warehouse, we can create it as a cluster and set the minimum and maximum number of warehouses allowed +- i think multi clustering is an enterprise feature, i do not see the option for it in the ui +- based on when load is high, this cluster will automatically add or remove warehouses for us (think asg in aws) +- multi clustering is better for more number of queries, for more complex queries we might need to consider vertical scaling (increase size of warehouse) +- notice difference between **more number of queries** vs **more complex queries** +- so basically there is a queue of the queries, which gets assigned one by one to the warehouses +- scaling policy - standard is the default whereas economy preserves the cost + - standard - add additional virtual warehouses if there is a task queued + - economy - add additional virtual warehouses if the estimated time for the current cluster is at least 6 minutes +- optimize virtual warehouse usage - + - have dedicated virtual warehouses for different use cases since they have different workload types + - understand horizontal scaling (more concurrent queries) vs vertical scaling (more complex queries), and choose the right one based on use case +- snowflake editions - **standard**, **enterprise** (has all features of standard with additional features like multi cluster warehouse, time travel upto 90 days as opposed to the 1 day inside standard, materialized views, etc), **business critical** (all features of enterprise with extended support etc) and **virtual private** (think dedicated hosts in aws ec2) +- we are charged for storage (after compression) and compute (warehouse) +- for storage, we have two options to choose between - **on demand** (pay for what you use) and **capacity** (pay upfront) +- 40$ per tb per month for on demand storage, 23$ per tb per month for capacity storage +- for xs virtual warehouse, we consume 1 credit per hour consumed by second i.e. if we consume for half an hour, we use half a credit (minimum is one minute). number of credits consumed by a warehouse depends on size (1 credit per server, so medium would consume 4 credits per hour) +- for virtual warehouse, we are charged in terms of credits. there is a conversion of credit and dollars associated with it. e.g. for cloud provider as aws and in the us-east-1 region - 2$ per credit for compute if using standard edition +- methods of loading data in snowflake - + - **bulk / batch loading** - uses our compute. e.g. copy command + - **continuous loading** - doesn't use our compute, serverless. e.g. snowpipe +- stages - location from where data can be loaded + - **external** - maintains url, access credentials, etc. e.g. s3 buckets + - **internal** - local storage maintained by snowflake +- note - there are costs considerations around data transfer when moving data from different regions or different clouds vs same cloud and same region +- creating a stage - + ```sql + create or replace database our_first_db; + + create or replace database manage_db; + create or replace schema manage_db.external_stages; + + create or replace file format manage_db.external_stages.csv_format + type = csv field_delimiter = ',' skip_header = 1; + + create or replace stage manage_db.external_stages.bucketsnowflakes3 + url = 's3://bucketsnowflakes3'; -- the bucket is unprotected + + list @manage_db.external_stages.bucketsnowflakes3; -- lists files + + create or replace table our_first_db.public.orders ( + order_id varchar(30), + amount int, + profit int, + quantity int, + category varchar(30), + subcategory varchar(30) + ); + + copy into our_first_db.public.orders + from @manage_db.external_stages.bucketsnowflakes3 + file_format = manage_db.external_stages.csv_format + files = ('OrderDetails.csv'); + ``` +- doing some transformations before loading data - + ```sql + copy into orders_ex (order_id, profit, profitable_flag) from ( + select + s3.$1, + s3.$2, + iff(cast(s3.$3 as int) > 0, 'profitable', 'non profitable') + from + @manage_db.external_stages.bucketsnowflakes3 s3 + ) + file_format = manage_db.external_stages.csv_format + files = ('OrderDetails.csv'); + ``` +- instead of `files` where we specify the full names of the files in an array like structure, we can specify a regex to match file names using the `pattern` keyword +- lets say we have a column of type integer in the create table statement, but the data in the csv inside s3 is bad and one of the rows in the csv has a string for the corresponding column. we can configure the behavior on encountering an error as follows - + ```sql + -- ... + files = ('OrderDetails_error.csv') + on_error = skip_file; + ``` + the options for `on_error` are - + - **abort_statement** - the default. abort the copying and rollback the rows copied + - **continue** - skip the row where the error happened and continue the loading of data + - **skip_file** - skip the file where the error happened but continue loading other files. we can also configure the error limit per file in this case, e.g. **skip_file_3** would mean skip the file if three or more errors happen (so skip_file actually means skip_file_1?) +- before actually copying over the data, we can also do a dry run of the copy - this way we can know beforehand if the copying will go through without actually executing it. we configure this using **validation_mode** i.e. if we provide this option, the data is not actually copied + ```sql + -- ... + files = ('OrderDetails_error.csv') + validation_mode = return_errors; + ``` + the two options are - + - **return_errors** - returns all errors if any during the execution of the entire thing. the output will contain the files where the error occurred, the row number, the reason of the error, etc + - **return_n_rows** - e.g. return_5_rows would mean perform the validation on only the first 5 rows, and if a failure occurs, throw the exception, and if no, return these 5 rows. note - the difference is this returns the processed rows while the above returns files where exceptions occurred +- if column has type `varchar(10)` but the source csv column has values of larger lengths, the copy command will fail. we can prevent this failure by using `truncatecolumns = true`, so that columns with greater lengths are just truncated i.e. electronics will become electronic +- by default, if we rerun the same copy command more than once, the rows will not be duplicated 🤯. we can change this behavior by providing `force = true`. note that this can lead to duplicates +- to view the history of copy commands i.e. source stage, success vs failure count, etc, use - + ```sql + select * from copy_db.information_schema.load_history; + ``` +- note that the command above was for a single database. to view the same thing across databases, use the snowflake db + ```sql + select * from snowflake.account_usage.load_history; + ``` +- for loading unstructured data (e.g. json), we might not be able to load it directly like above i.e. csv rows were easily mapping one to one with table rows +- so, we first load the json to a new table which has only one column of type `variant` +- we then transform this data (e.g. flatten) to load into our own tables + ```sql + create or replace stage manage_db.public.s3_json + url = 's3://bucketsnowflake-jsondemo'; + + create or replace file format manage_db.public.json + type = json; + + create or replace table our_first_db.public.json_demo ( + raw_json variant + ); + + copy into our_first_db.public.json_demo + from @manage_db.public.s3_json + file_format = manage_db.public.json + files = ('HR_data.json'); + ``` +- now, assume the json has the format as below - + ```json + { + "city": "Louny", + "first_name": "Dag", + "gender": "Male", + "id": 2, + "job": { + "salary": 43000, + "title": "Clinical Specialist" + }, + "last_name": "Croney", + "prev_company": [ + "MacGyver, Kessler and Corwin", + "Gerlach, Russel and Moen" + ], + "spoken_languages": [ + { "language": "Assamese", "level": "Basic" }, + { "language": "Papiamento", "level": "Expert" }, + { "language": "Telugu", "level": "Basic" } + ] + } + ``` +- we can for e.g. query city as follows - + ```sql + select raw_json:city from our_first_db.public.json_demo; + ``` +- recall raw_json was the variant column in our table. the output for e.g. of above would be a column containing cells of the format `"Bakersfield"`. so, now to convert this to a string i.e. `Bakersfield` (without quotes), we can do the below - + ```sql + select raw_json:city::string from our_first_db.public.json_demo; + ``` +- for nested object e.g. refer job in the json, this would work - + ```sql + raw_json:job.salary::int job_salary + ``` +- for nested arrays e.g. refer languages in the json, this would work - note how we can only grab one language at a time since this is like one to many + ```sql + raw_json:spoken_languages[0].language::string first_language + ``` +- so, the above solution works for arrays if we are fine with introducing new columns like first_language, second_language, etc +- but what if we want a table that is like if we had to perform a join between employee data and spoken languages - + ```sql + select + json_demo.raw_json:first_name::string first_name, + flattened.value:language::string language + from + our_first_db.public.json_demo json_demo, + table(flatten(raw_json:spoken_languages)) flattened; + ``` +- the output of this command would look like this - + + | first_name | language | + | ---------- | ---------- | + | Portia | Kazakh | + | Portia | Lao | + | Dag | Assamese | + | Dag | Papiamento | + | Dag | Telugu | + +- now theoretically we could have done as below - + ```sql + select + raw_json:first_name::string first_name, + raw_json:spoken_languages[0].language::string language + from + our_first_db.public.json_demo + union all + select + raw_json:first_name::string first_name, + raw_json:spoken_languages[1].language::string language + from + our_first_db.public.json_demo + union all + select + raw_json:first_name::string first_name, + raw_json:spoken_languages[2].language::string language + from + our_first_db.public.json_demo; + ``` +- _notice the index of spoken_languages above_. the downside is the output would be as follows i.e. there would be nulls inside the language row for people having less than three languages + + | first_name | language | + | ---------- | ----------- | + | Portia | Kazakh | + | Portia | null | + | Portia | null | + | Dag | Assamese | + | Dag | Papiamento | + | Dag | Telugu | + +- caching - snowflake has caching enabled by default, and it is cached for 24 hrs. to ensure this however, ensure that queries go on the **same warehouse**. this is why having dedicated virtual warehouses for dedicated groups can help +- we can confirm if the cache was used by clicking on the query id - it shows table scan + aggregation for the first time, and shows query result reuse the second time onwards +- clustering - snowflake creates cluster keys for columns to create micro partitions. this prevents full table scans +- we can explicitly do clustering + - do it for columns which are usually used in where clauses + - do it for columns frequently used in joins (similar to above) + - avoid extremes - + - not useful for columns which have too many unique values, e.g. id + - not useful for columns which have too less unique values, e.g. gender +- we can confirm clustering performance by clicking on the query id - it shows how many total partitions are there and how many partitions are used +- connecting to s3 securely using integration objects - + - create an iam role - + - select the trusted entity as the same account id in which this role is being created + - select the requires external id parameter and enter a random value here for now + - above steps result in a trust policy like below. note that both values entered above are placeholders for now - + ```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam::8502136:root" + }, + "Action": "sts:AssumeRole", + "Condition": { + "StringEquals": { + "sts:ExternalId": "something-random" + } + } + } + ] + } + ``` + - create an integration object inside snowflake - + ```sql + create or replace storage integration snowflake_s3_demo + type = external_stage + storage_provider = s3 + enabled = true + storage_aws_role_arn = 'arn:aws:iam::8502136:role/SnowflakeDemo' + storage_allowed_locations = ('s3://snowflake-demo-3b98x97') + ``` + - run `describe storage integration snowflake_s3_demo` and copy the values under `STORAGE_AWS_IAM_USER_ARN` and `STORAGE_AWS_EXTERNAL_ID`. replace the values in the trust policy for principal and external id with this + - now, we can use the integration object when creating a stage - + ```sql + create or replace stage manage_db.external_stages.csv_folder + url = 's3://snowflake-demo-3x7' + storage_integration = snowflake_s3_demo + file_format = manage_db.file_formats.csv_fileformat + ``` +- snowpipe - enables loading of data automatically when for e.g. a new file is added to the s3 bucket +- snowpipe is serverless i.e. our compute is not used for this +- this near realtime ability is achieved via s3 notifications sent to snowflake managed sqs queue +- setting up a snowpipe - + - create a pipe - + ```sql + create pipe snowpipe_demo.public.s3 + auto_ingest = true as + copy into snowpipe_demo.public.employee + from @snowpipe_demo.public.s3_csv + file_format = snowpipe_demo.public.csv + pattern = '.*employee.*\.csv'; + ``` + - run the describe command to grab the queue arn - `describe pipe snowpipe_demo.public.s3` + - set up event notification on the s3 bucket with this sqs arn as the destination +- to view pipes, use `show pipes` or we can specify database as well using `show pipes in database snowpipe_demo` +- to make changes to the pipe, pause it first - + ```sql + alter pipe snowpipe_demo.public.s3 set pipe_execution_paused = true; + ``` +- even if we want to make changes to data, e.g. to have existing files picked up the snowpipe, pause the snowpipe before running the copy command manually to load the data of existing files +- time travel - e.g. we make an erroneous update like this - + ```sql + update test set first_name = 'Shameek'; + ``` +- we can now go back in time to look at what the data looked like before the erroneous update - + ```sql + -- go back a specific amount of seconds + select * from test at (offset => -60 * 2); + -- OR go back to a certain timestamp + alter session set timezone = 'UTC'; + select current_timestamp; + select * from test at (timestamp => '2023-07-28 03:36:21.779'::timestamp); + -- OR before a certain query (the erroneous update in this case) was executed + select * from test before (statement => '01adeb9c-0604-af37-0000-007bd70792b5'); + ``` +- note - for the `before` statement query issued above, snowflake has a history of all queries executed which we can see in the ui +- e.g. of restoring - + ```sql + truncate table test; + insert into test ( + select * from test before (statement => '01adebc9-0604-af9c-0000-007bd707b315') + ); + ``` +- optionally, load the time traveled data into a backup table and then load it from here into the original table instead of loading the data into the original table directly as described above +- if we accidentally drop a table / schema / database, we can run the undrop command, e.g. `undrop table test` to restore it + - optionally, if we accidentally run `create or replace table test...`, we can restore the test table before the replace command was executed by first renaming the current wrongly instantiated table, e.g. `alter table test rename to test_aux`, and then running the undrop command to restore the test table before the replace to our database +- we can go back upto 90 days in editions enterprise and above, and upto 1 day in standard edition. however, the default is set to 1. therefore, we have to change the retention period manually to 90 days for editions other than standard - + ```sql + alter table test_tt set data_retention_time_in_days = 2; + ``` +- failsafe - protection of historical data in case of a disaster +- the failsafe period **starts after the time travel period ends** +- the failsafe period is for 7 days +- this is not queryable / usable by front users like in time travel. the idea is to reach out to snowflake support after a disaster occurs to restore the table to a previous state +- the failsafe period cannot be configured like time travel +- table type - table type is a property of the table. the different table types are - + - permanent tables - this is the default. we have both time travel (0-90 days) and failsafe + - transient tables - we have time travel (0-1 day). but no failsafe + ```sql + create or replace transient table -- ... + ``` + - temporary - we have time travel (0-1 day) but no failsafe. note - this is only scoped to a session i.e. we loose this table when the session is closed / cannot view it from other sessions + ```sql + create or replace temporary table -- ... + ``` +- the types above are not only scoped to a table, but to database / schemas as well +- we would pay for additional storage for failsafe / time travel, so use transient tables for "reproducible" data like staging layer of warehouse? +- **zero copy cloning** - when we use the clone command, the new table reuses the data and metadata of the older table. this way, it is cost efficient. the additional updates however do not effect one another +- we can clone storage objects (databases, tables, schemas) and stages, file formats, tasks, etc +- we can use time travel with cloning as well - + ```sql + create table cloned + clone source + before (timestamp => ...) + ``` +- swap table / schemas - swaps the underlying metadata and data as well + ```sql + alter table swap_demo.public.development + swap with swap_demo.public.production; + ``` +- data sharing - data is not copied again, so it is automatically immediately up to date for the consumer +- snowflake users it is shared with have to use their own compute resources for this +- creating a share - + ```sql + create or replace share orders_share; + grant usage on database data_share_demo to share orders_share; + grant usage on schema data_share_demo.public to share orders_share; + grant select on table data_share_demo.public.orders to share orders_share; + ``` +- add account to share - + ```sql + alter share orders_share add account = <>; + ``` +- create a database from the share inside the consumer account - + ```sql + create database orders_db from share <>.orders_share; + ``` +- now, the consumer can start consuming the data from this newly created database +- till now, we assumed that the consumers have their own snowflake account when sharing data. non snowflake users can access shares via a reader account. however, our compute is used in this case +- create a reader account + ```sql + create managed account analytics + admin_name = analytics + admin_password = 'P4$$w0rcl' + type = reader; + ``` +- add the reader account to the share - + ```sql + show managed accounts; -- use the value of "locator" for the value below + alter share orders_share add account = QBB35692; + ``` +- in the reader account, create database from share - + ```sql + show shares; + create database orders_db from share <>.orders_share; + ``` +- create a virtual warehouse inside the reader account (looks like parent account virtual warehouses and reader account virtual warehouses are not exposed to each other?) +- for granting select on all tables in a database / schema - + ```sql + -- instead of + grant select on table data_share_demo.public.orders to share orders_share; + -- do + grant select on all tables in database data_share_demo to share orders_share; + -- or + grant select on all tables in schema data_share_demo.public to share orders_share; + ``` +- views - e.g. instead of sharing all data, we want to share some restricted data. we can do this via views. e.g. - + ```sql + create or replace view data_share_demo.public.loan_payments_cpo as ( + select loan_id, principal + from data_share_demo.public.loan_payments + where loan_status = 'COLLECTION_PAIDOFF' + ); + ``` +- however, the issue with the above is for e.g. if we grant a role select on this view, and if a user with that role runs the command `show views`, they can view things like view definition. ideally, i would not have wanted to expose the fact that loan_status maintains an enum, since it is not even present in the projection clause of the view creation statement +- creating a secure view - `create or replace secure view...` +- note - we cannot use shares with normal views, we have to use secure views +- data sampling - use a subset of dataset when for e.g. testing workflows out +- two methods of sampling in snowflake - + - row or bernoulli method - every row is chosen with a probability of percentage p. so, it maybe more random since continuous rows are not chosen + ```sql + select * from snowflake_sample_data.tpcds_sf10tcl.customer_address + sample row (1) seed (25); -- seed helps reproduce same results when using randomness + ``` + - block or system method - every block is chosen with a probability of percentage p. so, it maybe a bit more quicker, since it uses micro partitions + ```sql + select * from snowflake_sample_data.tpcds_sf10tcl.customer_address + sample system (1) seed (25); + ``` +- tasks - it stores an sql statement that can be scheduled to be executed at a certain time or interval + ```sql + create or replace task task_db.public.customer_insert + warehouse = compute_wh + schedule = '1 minute' + as + insert into customers (created_date) values (current_timestamp); + ``` +- notice how tasks use our compute unlike snowpipe, materialized views, etc? +- on running `show tasks`, feels like tasks are suspended by default. so, run the following - + ```sql + alter task task_db.public.customer_insert resume; + ``` +- for crons, - `schedule = 'USING CRON * * * * * UTC'` +- tree of tasks - a root task, which can then have children (multiple levels are allowed). one child task can have one parent task, but one parent task can have multiple children. when declaring a child task, instead of `schedule`, we use `after task_db.public.parent_task` +- note - i think the parent task needs to be suspended first i.e. we first suspend the parent task, create and resume the child task and then finally resume the parent task, else we get an error. even as a best practice that feels right +- getting execution history of tasks like errors, completion time, etc. it also has records for the next queued execution + ```sql + select * from table(task_db.information_schema.task_history(task_name => 'customer_insert')); + ``` +- tasks can also have a `when` clause, and the task is executed only if the condition evaluates to true, else the task is skipped +- streams - helps with cdc (change data capture) to capture the delta (changes) of the source data. so, streams help capture dml (i.e. crud) changes +- we only pay for the storage of metadata columns of the stream that helps determine whether the row was deleted, updated, etc. the rows in streams reference the original source for the actual data +- create a stream - + ```sql + create or replace stream streams_demo.public.sales_raw_stream + on table streams_demo.public.sales_raw; + ``` +- we can run select on the stream table just like we would on a normal table + ```sql + select * from streams_demo.public.sales_raw_stream; + ``` +- the stream has three additional columns - `METADATA$ACTION`, `METADATA$ISUPDATE`, `METADATA$ROW_ID` +- once we process the stream, the data in the stream is deleted. it feels like stream is like an "auto generated temporary staging layer" of the warehouse. e.g. if i insert into a table by running a select on the stream table, the stream table clears up +- an update corresponds to two rows in streams - an insert and a delete for `METADATA$ACTION`, and true for `METADATA$ISUPDATE` in both rows. so, `METADATA$ACTION` is always either insert or delete, and we need to determine if the change is due to an update using `METADATA$ISUPDATE` +- e.g. of using streams - imagine store is a static reference table. we want to process the changes in sales table to a table used for analytics, that is like a join between sales and store tables. so, we can assume that for every record in the sales table, there would be a record in this sales analytics table, with added information about the store. so, the stream is needed for the sales table, and not the store table, and we update the final table used for analytics by joining the sales stream table and store reference table + ```sql + create or replace stream streams_demo.public.sales_raw_stream + on table streams_demo.public.sales_raw; + + merge into streams_demo.public.sales_final sf + using ( + select sa.*, st.employees, st.location + from streams_demo.public.sales_raw_stream sa + join streams_demo.public.store_raw st + on sa.store_id = st.store_id + ) src + on src.id = sf.id + when + matched + and src.METADATA$ACTION = 'DELETE' + and not src.METADATA$ISUPDATE + then delete + when + matched + and src.METADATA$ACTION = 'INSERT' + and src.METADATA$ISUPDATE + then update set + sf.product = src.product, + sf.price = src.price, + sf.amount = src.amount, + sf.store_id = src.store_id, + sf.location = src.location, + sf.employees = src.employees + when + not matched + and src.METADATA$ACTION = 'INSERT' + and not src.METADATA$ISUPDATE + then insert values ( + src.id, + src.product, + src.price, + src.amount, + src.store_id, + src.location, + src.employees + ); + ``` +- we can use streams in the `when` clause of tasks! so, we can pretty much build an entire etl pipeline just using snowflake - + ```sql + when system$stream_has_data('stream-name') + as -- the entire sql for stream processing defined above + ``` +- stream types - standard and append-only. append-only captures only inserts while standard captures inserts, updates and deletes. default is standard as seen above +- change tracking - tables have a change tracking property. we can set it to true as follows - + ```sql + alter table names set change_tracking = true; + ``` +- now, with change tracking enabled, we can basically see the changes in a table in the same format as we saw in streams - + ```sql + select * from names + changes (information => default) + at (offset => -240); + ``` +- my understanding - **the difference is that unlike streams, this does not get deleted. its almost like we have a rolling window of cdc until the time travel / retention period** +- again - notice the use of default in the changes clause above. we can also use append_only instead +- materialized view - if we run an expensive query frequently, it can lead to bad user experience. so, we can instead use materialized views + ```sql + create or replace materialized view orders_mv as + -- ... + ``` +- so, materialized views are updated automatically when its base tables are updated. this updating is maintained by snowflake itself. when we query using materialized view, data is always current +- this means that if the materialized view has not been updated completely by the time we initiate a query, snowflake will use the up to date portions of the materialized view and fetch the remaining data from the base tables +- since background services of snowflake are being used for updating materialized views, it adds to the cost independent of our virtual warehouses +- use materialized views if data is not changing frequently and view computation is expensive. if data is changing frequently, use change tracking / streams + tasks +- [has a lot of limitations i think 😭](https://docs.snowflake.com/en/user-guide/views-materialized#limitations-on-creating-materialized-views) - joins, some aggregation functions, having clause, etc are not supported at the time of writing +- dynamic data masking - returns masked results for security purpose, e.g. pii (personally identifiable information) + ```sql + create or replace masking policy phone + as (val varchar) returns varchar -> + case + when current_role() in ('ACCOUNTADMIN') then val + else '#####' + end; + + alter table customers + modify column phone_number + set masking policy phone; + ``` +- some more masking policy examples - + - we just want to see the domain of the emails - + ```sql + when current_role() not in ('ACCOUNTADMIN') then regexp_replace(val, '+\@', '****@') + ``` + - we want to be able to do comparisons, e.g. we want to join by name, but we do not want to allow seeing of the names. we can use `sha2(val)`, so that while users see an encrypted value, it is a consistent hash, so running it on the same value will produce the same result + +### Access Management + +- rbac (role based access control) i.e. privileges are assigned to roles, which are inturn assigned to users +- in snowflake we have dac (discretionary access control) i.e. every object has an owner, who can grant access to that resource. so, all objects have an owner, which is a role, and this role has all privileges on that object by default. the objects on which we can grant privileges are also called securable objects, e.g. warehouses, databases, tables, etc +- role hierarchy - the parent role will automatically have the privileges of all of its child roles +- my understanding + - a user can have multiple roles + - the public role is assigned to all new users by default + - the default role is the one that determines what role to use when for e.g. a new worksheet is opened by the user, or maybe like when no role is specified + - for all roles to be used, set secondary role to all. e.g. we have a system account, which has warehouse access via a different role, and access to tables via yet another role. we cannot specify both roles in for e.g. the jdbc url. so, we can instead set the secondary role to all for permissions from all roles to kick in for a user anytime the user makes a query +- system defined roles - + - account admin - + - the top level role + - can manage things like reader accounts + - avoid using this, and users using this should use mfa + - do not create objects using this, as otherwise we would have to manually add privileges to users that need it (it is at the top of hierarchy so no role inherits "from" it) + - only account admin can view things like usage / billing information + - security admin - + - can manage any object grant globally - my doubt - does this mean it can do this for objects that it (or its nested children) do not own as well? + - can be used to create and manage roles but thats usually done by useradmin? + - example - (note the hierarchy i.e. sales_user is a child of sales_admin, which is inturn a child of sysadmin. this is a best practice) + ```sql + create or replace role sales_admin; + create or replace role sales_user; + + grant role sales_user to role sales_admin; + grant role sales_admin to role sysadmin; + + create or replace user simon_sales_user + password = 'p@$$worcl' + default_role = sales_user; + grant role sales_user to user simon_sales_user; + + create or replace user olivia_sales_admin + password = 'p@$$worcl' + default_role = sales_admin; + grant role sales_admin to user olivia_sales_admin; + ``` + - sysadmin - + - create warehouses, databases, etc + - custom roles should be attached to sysadmin as a best practice. this way, the objects created by these custom roles can be managed by sysadmin. otherwise, this would not be possible + - example - we run the below from inside sysadmin. despite us granting ownership to sales_admin, sysadmin can still perform all the operations on these objects since sysadmin inherits permissions from sales_admin. refer above, this setup was basically done by security admin + ```sql + create or replace database sales_db; + grant ownership on database sales_db to role sales_admin; + grant ownership on schema sales_db.public to role sales_admin; + ``` + - now, from inside sales_admin, we can run the below - + ```sql + grant usage on database sales_db to role sales_user; + grant usage on schema sales_db.public to role sales_user; + grant select on table sales_db.public.customers to role sales_user; + ``` + - useradmin - + - used to create / manage users and roles + - unlike securityadmin, it does not have ability to grant privileges to all objects, only on objects that it owns + - public role + - every user is granted this role by default + +![role hierarchy](/assets/img/warehouse-and-snowflake/role-hierarchy.png) diff --git a/_posts/2023-08-12-messaging-systems.md b/_posts/2023-08-12-messaging-systems.md new file mode 100644 index 0000000..ffa3f37 --- /dev/null +++ b/_posts/2023-08-12-messaging-systems.md @@ -0,0 +1,259 @@ +--- +title: Messaging Systems +--- + +## Kafka + +### Setup + +- note - environment should have java 8+ installed +- download the zip from [here](https://www.apache.org/dyn/closer.cgi?path=/kafka/3.5.0/kafka_2.13-3.5.0.tgz) +- unzip it - `tar -xzf kafka_2.13-3.5.0.tgz` +- note - the 2.13... here is not the kafka, but the scala version? + +### Staring using Zookeeper + +- in one terminal, start zookeeper - `zookeeper-server-start.sh ~/kafka_2.13-3.5.0/config/zookeeper.properties` +- in another terminal, start kafka - `kafka-server-start.sh ~/kafka_2.13-3.5.0/config/server.properties` + +### Starting using Kraft + +- generate a cluster uuid - `KAFKA_CLUSTER_ID="$(~/kafka_2.13-3.5.0/bin/kafka-storage.sh random-uuid)"` +- format log directories - `kafka-storage.sh format -t $KAFKA_CLUSTER_ID -c ~/kafka_2.13-3.5.0/config/kraft/server.properties` +- start kafka - `kafka-server-start.sh ~/kafka_2.13-3.5.0/config/kraft/server.properties` + +### Concepts + +- helps with system integrations. sources produce data into kafka, and targets consume from kafka +- distributed, resilient, fault tolerant +- created by linkedin, now maintained by ibm, cloudera, confluent, etc +- works with spark, flink, hadoop, etc +- a sequence of messages is called a data stream +- kafka topic - a particular stream of data +- a topic is identified by topic name +- topics support any kind of message format like json, avro, binary, etc +- we can produce data using kafka producers, and consume data using kafka consumers +- topics are split into partitions +- **messages within a partition are ordered** +- **messages in a partition get an id called offset**. note - so offsets are specific to a partition +- so, order is only guaranteed inside one partition +- **offsets are not reused in a partition even if previous messages are deleted from it** +- immutability - once data is written into a partition, it cannot be updated / deleted, we can just append (add) data to it +- my understanding - we basically interact with kafka producers and consumers in our code, and they internally do things like batching, where we provide network configuration, security parameters, etc +- producers can optionally send a key along with the message. this key can be a string, number, binary, etc +- if this key is null, then the message can end up in any partition +- if this key is not null, this key is hashed to produce the partition number. this partition number then determines the partition the message should go to. use case - e.g. we have a delivery service, where our trucks send its coordinates every 5 seconds. we should ensure that a truck sends its coordinates to the same partition to ensure ordering, therefore the truck can use its id as the kafka message key. messages with the same key end up in the same partition +- internally kafka partitioner determines the partition using murmur2 algorithm +- parts of a message - key, body, compression (e.g. gzip, snappy, etc or even none), headers (key value pairs) and a timestamp (can be set by the system or by the user) +- kafka message serializer - help in serializing our messages which are objects into bytes. e.g. if our key is an integer and our value is a string, kafka will use its inbuilt integer and string serializer respectively for this +- consumers - pull model i.e. consumers request for data from the brokers, and not the broker pushing data into the consumers +- consumers can deserialize using deserializers similar to serializers +- best practice - do not change serializer in the producer, since that will break the deserializers in the consumers. so, create a new topic instead and have the consumers to start pulling from this new topic +- **consumers in kafka read as a consumer group** +- **consumers in a group read from exclusive partitions** i.e. multiple consumers of the same group cannot read from the same partition +- so, if we have more consumers in a consumer group than the number of partitions, (number of consumers - number of partitions) consumers remain idle +- however, a consumer in a consumer group can read from multiple partitions (e.g. when number of partitions > number of consumers) +- of course consumers from different consumer groups can read from the same partition +- if suppose a consumer from a consumer group is removed, the partitions that consumer was responsible for is automatically distributed among the other members of that consumer group +- a consumer group id is used to help identify the consumers part of the same group +- consumer offset - **consumers store the offsets they have read up till in a topic called __consumer_offsets periodically**. this way, if they die and come back up, they can continue reading from the same position in the partition where they left off +- a kafka cluster has multiple kafka brokers. each broker is identified by an id +- **each broker only contains some partitions of a topic** - so data is distributed. understand the implication of this - **this way, our topic is not limited to scale by the capability of only one worker node** in our kafka cluster +- broker discovery mechanism - consumers do not need to connect to all brokers in advance. they only need to connect to one broker, and by that they are automatically able to connect to all brokers since on initiating connection with one broker, all metadata related to the other brokers, partitions, etc is sent +- topic replication factor - if a broker is down, another broker is still available to produce data to and receive data from. **replication factor = how many copies i.e. how many brokers will have the same partition's copy** +- in sync replicas (isr) - all replica brokers that have caught up with the broker +- since there are multiple partitions, there is a leader among these partitions, and producers can only send data to this leader +- consumers by default only consume from the leader. so i think the replication factor only helps with disaster recovery in this case +- however, in newer versions, kafka consumers can read from replica brokers as well, if the replica broker is closer to them (e.g. we should have the consumer read from the isr in same az and not the leader / another isr in a different az to help reduce network latency and costs). this feature is called rack awareness, and for this to work, `rack.id` on the broker should have the same value as `client.rack` on the consumer +- producer acknowledgements - + - acks = 0 means producer will not wait for acknowledgement + - acks = 1 means producer will wait for acknowledgements from leader. data can be lost if leader goes down unexpectedly before replication goes through to other brokers. it was the default earlier + - acks = all (or -1) means producer will wait for acknowledgement from all replicas along with the master as well. default kafka 3.x onwards + - this option goes hand in hand with the `min.insync.replicas` option, which states how many replicas should acknowledge the data. if its value is 1, it means that only the leader has to acknowledge the data + - so, one ideal configuration to start with would be setting min isr to 2, acknowledgement mode to -1 and setting replication factor to be 3. this way, at least one replica and the leader have the write before the producer can consider the message successfully written into kafka +- topic durability - if replication factor is m, and say we want isr to be n, then we can tolerate m - n brokers going down. so, for e.g. don't over optimize i.e. if min in sync replicas are 3, (acknowledgement mode is all) and replication factor is 3, that means we cannot withstand any broker going down, which might be too much +- retries - note - this is producer retries not consumer, don't confuse with concepts like dlq here 😅. retries here refer to transient failures like kafka saves the message but acks fail, required number of brokers (min insync replicas) are unavailable at the time so kafka cannot save the message, etc. focussing on the newer versions here - + - retries (`retries`) are set to infinite (2147483647) by default. so, after the producer sends the message and if there is a failure for some of the transient reasons discussed above, the producer would again retry sending the message + - idempotence (`enable.idempotence`) is set to true by default. imagine that kafka was able to save the message i.e. write it to the replication factor number of partitions, but the ack failed. so, the producer thinks that some stuff have failed and will retry sending. so, since this property is set to true, kafka would know not to re add this message to the partitions, and would just try sending the ack again. this helps with exactly once semantics (and not duplicating thus resulting in at least once). now, from what i understood, it also helps with ordering. so, if for example the producer sends the first batch and kafka fails to commit it, when the second batch is received by kafka, kafka would throw an out of order exception to the producer. with this property, its almost like a sequence number is sent with each batch. this way, both ordering and exactly once semantics are ensured + - max in flight requests (`max.in.flight.requests.per.connection`) is set to 5 by default. **this is basically how many concurrent requests producer will send without receiving the acknowledgements for them**. after this number, if our application calls send on the producer, it will start blocking. this needed to be 1 in older versions to maintain ordering, but with idempotence now, it is enough to keep this <= 5 based on what we discussed above and [this](https://docs.confluent.io/platform/current/installation/configuration/producer-configs.html#max-in-flight-requests-per-connection) + - delivery timeout (`delivery.timeout.ms`) is set to 120000 i.e. 2 minutes by default. now retries is infinite does not mean producer would just keep retrying endlessly in case of failure, since the time it first sent the message, it would keep retrying until this timeout occurs. again remember that this retrying decision is being done by the producer which we write, so we can configure it in the properties +- zookeeper - helps with managing multiple brokers. so, helps with issues like leader election, sending notifications to other brokers if a brokers goes down, etc +- kafka up to 2.x cannot work without zookeeper. however, kafka from 3.x can work without zookeeper using kraft, and kafka 4.x onwards will not use zookeeper at all +- zookeeper itself too runs in master slave mode, runs odd number of servers underneath +- because of this change of migrating away from zookeeper, we should not mention zookeeper configuration inside our connections, but only mention broker endpoints. this change can even be seen in the kafka cli etc, e.g. when running kafka-topics.sh, we do not specify the zookeeper endpoint. this way when we change from 3.x to 4.x, there would be slim to no change required from us +- understand how the offsets are associated to a consumer group on a per partition basis +- as we add / remove more consumers to a group, the existing consumers are notified of this and they accordingly adjust the partitions that they listen to +- when a new partition is added to a topic, this new partition also needs to be assigned to one of the consumers of a group subscribed to the topic +- partition rebalance - moving of partitions between consumers - can happen due to adding new partitions to the topic / adding or removing consumers in a group +- there are different strategies to partition rebalance (`partition.assignment.strategy`) - + - **eager rebalance** - all consumers give up their ownership i.e. the partition they were responsible for. then a fresh calculation is made and the consumers are randomly assigned the partitions again. issue - it might happen that an existing consumer now starts listening to a new partition. also, for albeit a brief period when the rebalancing is happening, there would be no consumers at all, this phenomenon where there are no consumers at all during a brief period is called stop the world event + - **cooperative rebalance / incremental rebalance** - process is uninterrupted for unaffected partitions, e.g. imagine consumer 1 was subscribed to partition 1, and consumer 2 was subscribed to partitions 2 and 3. if a new consumer is added, only for e.g. partition 3 would be reassigned to this new consumer, but data from partitions 1 and 2 continues flowing uninterrupted +- **static group membership** - by default, when a consumer leaves a group, the partition they owned is reassigned. we can specify a `group.instance.id` which makes the consumer a static member. this way there is no rebalance until `session.timeout.ms` (heartbeat mechanism discussed later), so the consumer has this much time to be able to come back up, otherwise the partition would be rebalanced. use case - consumers for e.g. maintain a cache and this way, a rebuilding of that cache is not required by the new consumer. feels like without this property, the partition would be reassigned to another consumer and not wait for the session timeout? +- quick question - how to implement a fan out pattern in kafka - do not assign the consumer group id / specify a different value for the consumer group id for each of your horizontally scaled instances - this way all the instances will receive the message +- producer compresses the batch of messages before sending it to the broker +- this helps with things like better utilization of disk on kafka, better throughput, etc +- compression can be specified at producer / topic level +- compression can be specified at producer level or the broker level as well using `compression.type` - + - producer - the default. use the compressed batch from the producer as is and write directly without recompression + - none - all batches are decompressed by the broker + - specify a type like lz4 explicitly. if the compression format is the same as done by the producer then store as is, else decompress and recompress using the specified format +- so, the summary of above according to my understanding is, leave compression type at broker level to be producer (it is the default), and set the compression type to be snappy or something at the producer config (default is none) +- batching settings - increasing batch sizes improves throughput, means lesser network calls, compression becomes more effective, etc. but of course it introduces latency for downstream consumers + - `linger.ms` - how long the producer should wait before sending the message to kafka. default is 0 + - `batch.size` - if the batch fills to this value before `linger.ms` is over, send the batch. default is 16 kb +- `partitioner.class` - in earlier versions of kafka, if we specify no key for our message, the messages are sent to partitions in round robin fashion using **round robin partitioner**. disadvantage - for e.g. remember batching happens at partition level, so this means we cannot utilize batching effectively, since there is a batch being created for every partition. **sticky partitioner** is the default in newer versions of kafka. this means that instead of round robbin, producer would fill one batch (until `linger.ms` or `batch.size`) and then send to one partition. after this, a new batch is started. so we can leave this property untouched in newer versions +- delivery semantics - this is for consumers + - at least once - default and usually preferred. commit offset after processing of message is over. if processing of message fails or imagine consumer crashes after receiving messages, message will be read again and reprocessed since the offset was not committed. so, the processing logic must be idempotent + - at most once - commit offset as soon as message is received. if processing of message fails or imagine that after receiving messages, the consumer crashes, messages will be lost and not read again. this case ensures a message would not be processed multiple times + - exactly once - this would only be possible if both source and sink is kafka. we use the transactional api in this case. e.g. when using kafka streams for transformations, we can use this +- to make our processing idempotent with at least once semantics, for a given message, we should add an id, e.g. imagine how we know for an object if it needs to be updated or created in the database based on its id property. otherwise, we can use kafka coordinates - every message will have a unique (topic + partition + offset) combination, so for e.g. we could generate an id like this - `__` (understand why a separator like _ is needed - otherwise there is no way to differentiate between partition 2 offset 22 and partition 22 offset 2) +- offsets are committed after at least `auto.commit.interval.ms` time has passed since us calling poll(). the default value of this is 5 seconds. my understanding - e.g. we poll every 7 seconds, and auto commit interval is 5 seconds. when the second poll is called, the first poll would be committed. however, if we poll every 5 seconds, and auto commit interval is 7 seconds, **the first poll would be committed when the third poll is called** +- for staying inside at least once semantics, because of what was described above, our processing should be synchronous - before we call poll the next time, our current batch should have been successfully processed, so that if by chance the next poll has to commit, it can be sure that we have already successfully processed our current batch. in auto commit, commitAsync is called +- we can disable auto committing as well, and instead manually commit offsets using `consumer.commitSync()` / `consumer.commitAsync()` +- the auto offset reset (`auto.offset.reset`) property defines how to consume from a topic if there is no initial offset i.e. a new consumer group has just started listening - the default is latest i.e. start consuming from the end of the partition. we can set it to earliest. my understanding - earliest corresponds to the `--from-beginning` flag in the cli for kafka console consumer +- we can also reset consumer offsets. internally, feels like this might be possible since it is as simple as adding a message to the __consumer_offsets topic, due to the cleanup policy being compact? (discussed later) +- consumers send a heartbeat every `heartbeat.interval.ms` seconds (3 seconds by default), and if no heartbeats are received for `session.timeout.ms` seconds (45 seconds by default), the consumer is considered dead. this heartbeat related functionality is carried out by the heartbeat thread +- if a new poll call is not made in `max.poll.interval.ms`, the consumer is considered to have failed processing of that message. my understanding - this is important because all offset commits are done by newer poll calls for the previous polls? so maybe this way, kafka can know that for some reason, message processing has been stuck or has failed, and it has to re send the message for processing? +- for replicating data across kafka clusters, e.g. if cluster is across regions, or for e.g. when we are hitting performance limits with one kafka cluster and need multiple kafka clusters, etc, we can use tools like mirror maker 2. replication can be active active (two way replication, e.g. data producers in multiple regions) or active passive (one way, e.g. for global resiliency) +- when we try to connect to kafka, kafka brokers have a setting called `advertise.listeners`. this way, when the client tries connecting to the kafka broker, the broker returns this value and the client instead tries connecting using this value if the value it initially tried connecting using was different. e.g. imagine client tries connecting using a public ip, but the value returned by the broker using `advertise.listeners` is the private ip address +- partition count - if we change the partition count suddenly, understand it would affect ordering of messages with same keys etc +- more partitions = more parallelism +- partitions should be usually 3 times the number of brokers, so 3 partitions per broker +- replication factor - if we change this, we increase load on our kafka custer, since there is more network calls etc involved for the replicas +- replication factor should be usually 3 +- [topic naming guide](https://cnr.sh/essays/how-paint-bike-shed-kafka-topic-naming-conventions) - `..`. for message type, all possible values are mentioned in the link, some common ones are `queuing` for classic use cases, `etl` for cdc, etc. dataset name is like database name and data name is like table name. also use snake case +- [debezium](https://github.com/debezium/debezium) uses kafka connectors and kafka ecosystem underneath, and helps do realtime cdc by using database's transaction logs +- so, two common patterns with kafka - + - use applications like spark, flink, (or even kafka itself) etc to read from kafka and generate realtime analytics + - use kafka connect to write to s3, hdfs, etc from kafka and generate batch analytics from this +- kafka metrics - monitor a lot of things like how many under replicated partitions exist i.e. how many partitions have issues with in sync replicas +- we can enable in flight encryption ssl, authentication and authorization +- kafka has data retention for 7 days by default +- but until then, everything is internally in file formats, e.g. i tried poking around in the log.dir folder on my local i.e. inside /tmp/kraft-combined-logs/ +- partitions are internally made up of segments +- so, there is one (the latest) active segment, and other segments can be consider obsolete +- a segment is closed means it is available for log cleanup - this helps delete obsolete data from the disk of kafka +- how to cleanup logs - there are two possible values for `cleanup.policy` on a topic - `compact` (default for __consumer_offsets) and `delete` (default for all user defined topics) +- a segment is closed and a new one is started when either the `log.segment.bytes` size is reached, or if `log.retention.hours` is reached +- if we set cleanup policy to be compact - a new segment is created, and only the values for the latest keys for a topic is retained, and others are discarded. so e.g. segment 1 has value a for key x and value b for key y, and segment 2 has value c for key y, the newly created segment would have value a for key x and value c for key y. this behavior also makes sense for the consumer offsets topic if i think about it +- for very large messages, either tweak configuration parameters to increase maximum limits, or better, use something like sqs extended client of aws is possible + +## RabbitMQ + +- messaging systems - + - used for application to application communication + - they are near realtime - messages can be processed by consumers instantly + - helps establish a standard - both producers and consumers would have to obey this messaging system specifications, instead of each source having integration logic for each target +- rabbitmq features - + - rabbitmq is open source + - multiple instances can be deployed into a cluster for high availability + - web interface for management and monitoring + - built in user access control + - built in rest apis (mostly for diagnostic purposes but can be used for messaging, not recommended) +- running rabbitmq - + ``` + docker container run -d -p 5672:5672 -p 15672:15672 rabbitmq:3.13.1-management + ``` +- publisher / producer - sends message on the exchange +- subscriber / consumer - consumes message from the queues +- queue - the buffer tht stores messages before the consumers consume from this queue +- exchange - routes messages to the right queue +- routing key - the exchange uses this parameter of the messages to decide how to route it to the queues +- binding - link between exchanges and queues +- message durability - guarantees that messages survive server restarts and failures +- by default, everything is "transient" i.e. lost on rabbitmq server restarts! +- to ensure message durability, we need to set two parameters - + - mark queues as durable - we need to set this when creating queues + - use persistent delivery mode when publishing messages. spring does this by default for us +- rabbitmq also has two types of queues - + - "classic" - the default. has good performance, but cannot withstand node failure, since it is only present on the primary node + - "quorum" - replicated across different servers. maintains consistency using quorum +- rabbitmq can store messages either in memory or on disk +- the "default exchange" is used if we do not specify the exchange and just specify the routing key + ```java + rabbitTemplate.convertAndSend("example.rabbitmq", "hello world"); + ``` +- some consumers - + ```java + @Component + @Slf4j + public class Consumer { + + @RabbitListener(queues = "example.rabbitmq") + public void consume(String message) { + log.info("consumed: [{}]", message); + } + } + ``` +- assume our producer is faster than the consumer. using below, 3 threads are created, one for each consumer. this way, our slow consumers can keep up with the fast producer, without us having spun up additional instances of the consumer + ```java + @RabbitListener(queues = "example.rabbitmq", concurrency = "3") + ``` +- spring rabbitmq uses jackson for serialization / deserialization of pojos +- a naming convention example - x.name for exchanges, q.name.sub_name for queues +- "fan out exchange" - implements the publish subscribe pattern - it broadcasts the message to all queues bound to it + - e.g. we have a direct exchange x.hr + - it has bindings for two queues - q.hr.marketing and q.hr.accounting + - when binding, the binding key can be empty + - similarly, when producing, the routing key can be empty + - now, any messages put on the exchange x.hr will flow to both the queues + - in the snippet below, we specify the exchange name. the routing key is ignored, hence it is set to an empty string + ```java + rabbitTemplate.convertAndSend("x.hr", "", employee); + ``` +- "direct exchange" - send messages to selective queues instead of broadcasting to all queues + - e.g. we have a direct exchange x.picture + - we have two queues - q.picture.image and q.picture.vector + - q.picture.image is bound using two binding keys to the exchange - png and jpg + - q.picture.vector is bound using one binding key to the exchange - svg + - now, when our routing key is png / jpg, it goes to the image queue + - when our routing key is svg, it goes to the vector queue + - so, exchange sends the message to queues where routing key = binding key + - note - if the routing key does not match any rule, the message would be discarded +- "topic exchange" - + - with direct exchange, we can only route messages using a single criteria - e.g. we only used image type above + - using topic exchange, we can route messages based on multiple criteria + - note about wildcards - + - `*` can substitute for 1 word + - `#` can substitute for 0 or more words + - e.g. we have a topic exchange x.picture + - we can send images to different queues based on image size, image type, source of image, etc + - the producer will just produce the messages using routing keys like source.size.type, e.g. mobile.large.png, desktop.small.svg and so on + - e.g. we have queues for different purposes. e.g. we want an image queue like earlier. we can have binding keys of the form either `#.png` and `#.jpg` or `*.*.png` and `*.*.jpg` + - this is true decoupling - the producer just tells the kind of messages being published, while the consumer selectively decides the messages it wants to receive based on the binding key + - similarly, if we need a consumer to consume messages for all large svg, we would use `*.large.svg` +- dead letter exchanges - + - in case of an error during consumption, spring will by default requeue the message + - we could be stuck in an infinite loop during this consumption and requeueing + - thus, we can use a "dead letter exchange" - the message after failure is forwarded to this dead letter exchange, which in turn forwards it to another queue depending on how we set the binding for this dead letter exchange + - then, from this special queue bound to the dead letter exchange, we can notify the consumers of the error + - configuring the dead letter exchange for a queue - just click on "Dead letter exchange ?" and enter the exchange name beside "x-dead-letter-exchange" + ![dead letter exchange](/assets/img/messaging-systems/dead-letter-exchange.png) + - note - we can change the routing key of the queue when moving a message to the dead letter exchange + - note - we cannot throw any exception for this too work - we need to throw `AmqpRejectAndDontRequeueException` +- time to live - + - if a message is present in a queue for longer than this timeout, it is declared "dead" + - the message from the actual queue would be moved into the dead letter exchange if configured after this timeout + - along with configuring dead letter exchange like we saw above, we can configure the queue with this ttl as well. it will then automatically move the messages to dead letter exchange in bot scenarios - timeouts and errors +- retry mechanism - + - some errors can be intermittent + - so, we might want to retry after x seconds for n times, before moving a message to dlq + - say we have three exchanges and three corresponding queues - work, wait and dead + - wait exchange is the dead letter exchange for work queue - when there is a failure in our consumer, the message is sent to wait exchange for "backoff" like functionality + - work exchange is the dead letter exchange for wait queue - when the message has been sat in wait queue for sometime, it is moved to work exchange for retrying + - finally, if our consumer notices that it has already tried reprocessing the message 3 times or so, it would move the message into the dead exchange which then goes into the dead queue + - we can get metadata around retires etc from rabbitmq headers +- retry mechanism in spring - + - on the above approach, there is a lot of manual code and configuration from our end + - using spring, we do not need all this logic - spring can automatically handle the retry and backoff for us, and it will move the failed messages to the dead letter exchange + - we only to ensure our queue has the right dead letter exchange configured on it + - apart from that, we can configure the retry logic (exponential backoff) like so - + ``` + spring.rabbitmq.listener.simple.retry.enabled=true + spring.rabbitmq.listener.simple.retry.initial-interval=3s + spring.rabbitmq.listener.simple.retry.max-interval=10s + spring.rabbitmq.listener.simple.retry.max-attempts=5 + spring.rabbitmq.listener.simple.retry.multiplier=2 + ``` + - retry at 3s, then 6s (refer multiplier), and remaining 2 retries at 10s gaps +- diff --git a/_posts/2023-08-19-hadoop.md b/_posts/2023-08-19-hadoop.md new file mode 100644 index 0000000..c685d9c --- /dev/null +++ b/_posts/2023-08-19-hadoop.md @@ -0,0 +1,1020 @@ +--- +title: Hadoop +--- + +## Introduction + +- big data is for terabytes or petabytes of data +- explosion of data - rate at which data is being generated is very high +- 3 vs of big data - + - data volume - as the resolution of camera has increased, so has the size of the media it generates + - data velocity - speed at which data is generated. earlier, batch jobs i.e. at a period were more common. the shift is towards near realtime / realtime now + - data variety - data used to just be in the form of tables, where rdbms systems worked great. now, we have unstructured data in the form of media etc as well i.e. variety of data has increased + - **structured data** - row column format in a table. e.g. rdbms + - **semi structured data** - well defined structure, but not necessarily structured in a tabular format, e.g. json, xml + - **unstructured data** - e.g. text files, audio, etc + - some new vs - veracity (trustworthiness of data, e.g. user input might not be as trustworthy?), value (should be able to drive business value) +- vertical scaling vs horizontal scaling - + - horizontal scaling is more scalable + - horizontal scaling is more available / fault tolerant + - horizontal scaling is more cost effective +- shared nothing - each processing has its own storage. relatively faster +- shared disk - each processing unit works on the same underlying architecture. time taken for data movement is high, since unlike in shared nothing where the storage can be local / closely located to the processing, it has to be located far away +- partitioning - file is broken down (partitioned) and stored in smaller parts in different nodes. also called distributed +- replication - the parts are stored in different nodes so that a node failure does not stop our processing. also called redundancy. number of total copies is determined by "replication factor" +- **4 points** - hadoop allows for horizontal scaling, follows shared nothing architecture, has partitioning, has replication +- seek time - time required for head to point to the right data on hard disk +- transfer rate - time required to move data from head of hard disk to ram +- hadoop is and isn't good at - **4 points** - + - processing large files - it is not for small files + - processing sequentially - it is not good for random access since there is no indexing like in rdbms + - handling unstructured data - it not for acid like / 3nf etc properties like in rdbms + - processing frequently changing data + +## Evolution + +- why do we need hadoop at all - e.g. when using rdbms, we can create indexes on our frequently used columns like name. however, when for e.g. google has to search its database by our search term, there is no such way easy of indexing. so, it would process our query on its data that is distributed in parallel +- so around 2004, google published a paper on gfs (google file system) and google map reduce +- in parallel around the same time, doug cutting was working on nutch +- yahoo hired doug and hadoop was created from nutch +- hadoop's hdfs = google's gfs and hadoop's map reduce = google's map reduce +- facebook launched hive, big query is google's equivalent of this +- with hive (and pig), we write sql to query or add data to hdfs, thus making writing complex operations much easier. this translates to map reduce underneath +- hbase - nosql database system on top of hdfs to store unstructured data. big table is google's equivalent of this. we store data in a denormalized format for better performance +- sqoop - data transfer (to and from) between database (mostly rdbms) and hdfs +- flume - streaming logs from distributed systems into hdfs +- [spark](/posts/spark) - complete package +- cloudera, hortonworks, etc. bundle different tools like hadoop together and distribute them + +## Hadoop Components + +- yarn - yet another resource negotiator. it is a "cluster manager". it is needed because recall how hadoop makes use of horizontal scaling, while abstracting away all the complexities underneath away from us. refer [hadoop 2.x architecture](#hadoop-2x) below for how "resource manager", "node manager" and "application master" work +- hdfs - stores large amounts of data in small chunks to allow processing them in parallel. refer [hdfs architecture](#hdfs-architecture) below for how "name node", "data node" etc work +- map reduce framework - we write simple map reduce programs discussed in this post. this is automatically run in a distributed fashion with the help of yarn, on distributed data with the help of hdfs. note - writing map reduce directly is not common, so tools like hive etc came into picture + +## Theory + +- my understanding - **hadoop = map reduce + hdfs + yarn** in todays world +- hadoop operating modes + - standalone - doesn't use hdfs and reads and writes directly to hard disk + - pseudo distributed - only one machine that can run both master and slave, uses hdfs + - distributed - minimum 4 nodes are needed, for production workloads +- map is run on all slave nodes, reduce is run to aggregate the results from all these slave nodes +- each machine is said to hold a split of the data +- the mapper function would be called once per split - so mappers of different splits would run in parallel +- for hadoop to work, each row of data should be processable independently and out of order +- the mapper outputs a key value pair +- while map is called for all rows, reduce is called once for each key, which is why the input of reduce contains an iterable +- one confusion i had cleared? - don't think of this map and reduce like in arrays (or even spark?). we are using `context.write`, so output of both map and reduce can contain as many elements as we want, just that map would be called once per data element, while reduce once per key along with all the values for that key. the data structure which allows multiple items for the same key is called multi bag +- so, in between the map and reduce, there is a shuffle that happens bts to help group results of map by key +- since a reduce can only run on one slave node at a time, all values for a key need to be first brought into one slave node during shuffle +- understand that output type of key / value of map = input type of key / value of reduce +- all the keys that go to a reducer are sorted by default +- number of mappers = number of splits of data. we cannot configure the number of mappers +- number of reducers by default is 1. in this case, outputs of all mappers are collected, sorted by key and then sent grouped by key to send one by one on a key wise basis to the reducer +- internally, after map process, each key is assigned to a partition +- number of partitions = number of reducers +- so, basically after map, the assigning of a partition to a key helps determine which reducer a key should go to +- the partition that an item should go to is determined based on its key - something like (consistent_hash(key) % number of partitions). so, items with the same key cannot go to different reducers +- while doing this, we should avoid skews / hot partitions +- after the partition is determined via partitioning, the shuffle phase helps get the output of map to the right partition +- finally, the items that arrive at a partition are sorted and then grouped by key, so that the reducer can get (key, iterable of values) +- remember that while the same key cannot go to different partitions, multiple keys can go to the same partition. this is why we need the sort + group operations +- we can hook into partitioning, sorting and grouping phase - helps achieve secondary sorting, joining, etc. discussed later + +## Combiners + +- to reduce the overhead of shuffle, we can add a combiner - this means before shuffling, first combine the outputs of map on a single node +- e.g. if for word count, instead of shuffling, we can first ensure we reduce at the slave node level. this way, a key would be present at most once in one slave node. this reduces the amount of data to shuffle +- we can use the same class for combiner and reducer if we want +- combine may or may not run. e.g. if hadoop feels the amount of data is too less, the combine operation might not run. so, following points are important - + - our combine operation should be optional i.e. we should be sure that even if our combine operation does not run, our results stay the same. e.g. we want to find out all the words that occur 200 or more times. we can only add the values for a key in a combiner. writing the word to the context based the condition that it occurs 200 or more times can only stay inside the reducer since at that point, the reducer has all the values. basically, it might happen that one worker's combine sees count as 150 for a particular word and another worker's combiner sees count as 60 for the same word + - input and output format of combine operation should be same so that it whether it runs or not makes no difference (and of course these types should also be the same as output of map and input of reduce) +- so, the entire process looks like this? - map -> combine -> partition -> shuffle -> sort -> group -> reduce + +## HDFS Commands + +- hdfs - hadoop distributed file system +- to list all folders and files in hdfs recursively - `hdfs dfs -ls -R /`. this command works with folders as well i.e. at the end, specify a custom path instead of / +- use `hdfs dfs -put first-speech.txt` to put a file into hadoop. it is placed in /user/shameek (inside hdfs) by default, else specify the custom path at the end of the command +- get a file from hdfs into local - `hdfs dfs -get first-speech.txt` +- read the output from hdfs directly instead of copying it to local first - `hdfs dfs -cat output/part-r-00000` +- change permissions - `hdfs dfs -chmod 777 ExamScores.csv` +- cp copy a file from one location to another inside hdfs - `hdfs dfs -cp ExamScores.csv ExamScores2023.csv` +- moving file from one location to another inside hdfs - `hdfs dfs -mv ExamScores.csv ExamScores2021.csv` +- browse the file system using gui - go to http://localhost:9870/ -> utilities -> browse the file system + +## HDFS Architecture + +- hdfs - hadoop distributed file system +- hdfs is used for terabytes and petabytes of data +- name node is a daemon running on master +- data nodes are daemons running on slave nodes +- name node maintains metadata e.g. which file is stored where. recall how file is stored in distributed mode, replicated mode, etc. these records are maintained in the form of metadata in the name node +- e.g. if we have a file of 300mb. we submit it to the name node, which would then break the file into **splits** of 128mb (default), so 128mb + 128mb + 44mb and stored in different slave nodes, so that they can be processed in parallel +- secondary name node and name node - secondary name node has something called "edit logs". to me, this feels like transaction logs in database i.e. all changes are continuously recorded in the edit logs of the secondary name node. the "fs image" is present on the name node, which is like the current snapshot of the system, e.g. the chunks of file described above is present in data node 1, data node 2 and data node 7. as changes happen continuously, e.g. we add / remove / modify files etc, the changes come up in the edit logs of the secondary name node. the secondary name node then periodically looks and then modifies the fs image of the name node to reflect the current state of the system +- hadoop 2.x onwards, a standby name node is present as well. so, hadoop 1.x has a single point of failure unlike hadoop 2.x +- hdfs client - gets the metadata from name node and accordingly requests data nodes for the data i.e. - + - hdfs client asks name node where the data is / tells name node it wants to store file x + - name node responds with how to store file / where the file is stored + - hdfs client then accordingly interacts with data nodes + ![hdfs architecture](/assets/img/hadoop/hdfs.drawio.png) +- my understanding - why the above breaking might be needed - e.g. if name node directly responded to the hdfs client by gathering data from data nodes, entire point of distributing data is lost +- because of this distributed nature of data, there is a checksum present on the name node metadata, and the hdfs client itself calculates the checksum from the data it gathers from the data nodes. these two checksums are compared to verify integrity of data
+- data nodes also send heartbeats to the name node periodically + +## Resource Management Architecture + +### Hadoop 1.x + +- job tracker - daemon located in master, this is where we submit the map reduce jobs via hdfs client +- the job tracker then breaks the job into multiple tasks and submits to task tracker +- task trackers run on the slave nodes. there can be multiple instances of task trackers running on a single slave node +- rack awareness - name node is rack aware i.e. for e.g. client is directed to the closest data node where the data might be present out of all the data nodes having the replicated data. (recall kafka had something similar) +- just like name node vs data node in hdfs, here, the job tracker is a daemon running on master node while the task tracker is a daemon running on slave nodes +- multiple slots can be present on a slave node - understand how a server can be multi core and therefore, perform multiple tasks at a time +- so, these slots are basically jvms which run slices of work + +![hadoop1.x](/assets/img/hadoop/hadoop1.x.drawio.png) + +### Issues + +- hadoop 2.x and 3.x are similar, just performance improvements +- hadoop 1.x vs hadoop 2.x - in hadoop 1.x, cluster resource management and data processing both is done by map reduce framework. in hadoop 2.x, cluster resource management has been delegated to yarn (yet another resource negotiator), while map reduce framework is only responsible for data processing. the underlying storage continues to be hdfs in both versions +- so, map reduce in hadoop 1.x = map reduce (data processing) + yarn (resource management) in hadoop 2.x + +### Hadoop 2.x + +- so now, map reduce is just used for data processing, while cluster resource management is done by yarn +- so, map reduce, spark, etc sit on top of yarn, while hive, pig, etc sit on top of map reduce +- it does things like - + - resource management + - assigning tasks to nodes that have sufficient resources + - rescheduling failed tasks to new nodes +- yarn has two components - resource manager and node manager +- resource manager runs on master +- resource manager has two components - resource scheduler and application manager +- resource scheduler does not deal with any logic around retrying etc, it just cares about assigning of resources (like ram, etc) based on availability +- application manager is responsible for spinning up application masters +- now, when we submit a job, the resource manager with the help of its two components, spins up an application master +- understand that application master is like another container, it is not like a daemon running on master node perennially. so, the application master is scoped to the lifecycle of the application +- now, the application master coordinates with resource scheduler of resource manager to spawn containers that can execute our map / reduce tasks +- containers execute our actual tasks +- a node can have multiple containers, just like in hadoop1.x, multiple slots could be present on a slave node +- the node manager sends heartbeats for health monitoring of node (recall how in hdfs, data nodes do the same thing) +- note - master node is also called as the controller node +- are all the components listed below - resource manager, node manager, resource scheduler, application master, application manager, container, basically components of yarn and map reduce? + +![hadoop2.x](/assets/img/hadoop/hadoop2.x.drawio.png) + +- location constraint - to avoid a lot of data transfer over the network, execute the tasks on the node which is the closest to the data +- so two different things? - location constraint - schedule work on node having data and rack awareness - if for e.g. there is replication, direct node to closest replica +- now, we know that there can be multiple containers being concurrently executed on this node until all its resources are not used up. if more jobs are spawned, the jobs would have to wait in a queue +- how these containers / tasks get scheduled on the node is determined by the scheduling policy - + - fifo scheduler - first in first out scheduler. e.g. assume a job takes 5 minutes, and uses up all the resources of this node. a new job that is submitted almost immediately after this job, and takes say 10 seconds, will still have to wait for the entire duration of 5 minutes till the first job is complete, since there are no resources available for this second job to execute + - capacity scheduler - divide all the resources into multiple parts, e.g. give 30% of the resources to promotional and remaining 70% to searching. this way, both these parts will individually act as fifo schedulers, but a short promotional workload will not be stalled by long running searching and indexing jobs. this is the default and usually preferred one. by default, only one queue is present - default, with 100% of the capacity + - fair scheduler - accept all jobs, and as more jobs come in / go out, allocate each of them equal amount of resources + +## Hadoop Streaming + +- a utility that helps write map reduce programs in non java languages like python, r, etc +- e.g. of using hadoop streaming on my local - `hadoop jar ~/hadoop-3.3.6/share/hadoop/tools/lib/hadoop-streaming-3.3.6.jar -files wordcount_mapper.py,wordcount_reducer.py -mapper wordcount_mapper.py -reducer wordcount_reducer.py -input wordcount_input -output output`. here wordcount_mapper and wordcount_reducer are just simple python programs. we read from the input file wordcount_input, mapper outputs to stdout which is then used as input for wordcount_reducer and finally the reducer's output is stored inside output/part-00000 +- wordcount_mapper.py - + ```py + #!/usr/bin/python3 + import sys + + for line in sys.stdin: # for all lines + words = line.split() # grab all words + for word in words: # for all words + print ('{0}\t{1}'.format(word, 1)) # output (word, 1) + ``` +- wordcount_reducer.py + ```py + #!/usr/bin/python3 + import sys + + prev_word = None + prev_count = 0 + word = None + + for line in sys.stdin: # for all (word, 1) + + line = line.strip() + word, count = line.split('\t') + count = int(count) + + if word == prev_word: + prev_count += count # add to previous word count + else: # if current word is not the same as last word + if prev_word: + print('{0}\t{1}'.format(prev_word, prev_count)) # print previous word + prev_word = word # update previous word + prev_count = count + + if prev_word == word: + print('{0}\t{1}'.format(prev_word, prev_count)) + ``` + +### mrjob + +- developed by yelp +- makes it much easier to write and work with map reduce in python - things like chaining jobs etc. become much easier +- we just write one file using clean coding principles unlike using two files like specified in hadoop streaming +- allows writing tests locally (i.e. without support around hdfs etc) +- even aws emr etc work with mrjob + +## WordCount Example + +- initial [pom.xml](https://gist.github.com/shameekagarwal/71f127eb24ffe9997c3488cdf8364313) +- run `mvn clean package` +- command to submit job - `~/hadoop-3.3.6/bin/hadoop jar ./target/hadoop-1.0-SNAPSHOT.jar org.example.One input output` +- visit status of job at http://localhost:8088/cluster/apps +- note - for most classes, i find there are two packages we can import from - mapred and mapreduce. we should try using mapreduce where possible +- a basic example for word count - + ```java + public class One { + + public static class MapClass extends Mapper { + + @Override + protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { + String body = value.toString().toLowerCase().replaceAll("[^a-z\\s]", ""); + String[] words = body.split(" "); + for (String word : words) { + if (word.length() >= 7) { + context.write(new Text(word), new LongWritable(1)); + } + } + } + } + + public static class Reduce extends Reducer { + + @Override + protected void reduce(Text key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + long sum = 0L; + for (LongWritable longWritable : values) { + sum += longWritable.get(); + } + context.write(key, new LongWritable(sum)); + } + } + + public static void main(String[] args) throws Exception { + + Path in = new Path(args[0]); + Path out = new Path(args[1]); + + Configuration configuration = new Configuration(); + Job job = Job.getInstance(configuration); + + try { + FileSystem hdfs = FileSystem.get(configuration); + hdfs.delete(out, true); + } catch (Exception ignored) { + } + + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(LongWritable.class); + + job.setMapperClass(MapClass.class); + job.setCombinerClass(Reduce.class); + job.setReducerClass(Reduce.class); + + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + + FileInputFormat.setInputPaths(job, in); + FileOutputFormat.setOutputPath(job, out); + + job.setJarByClass(One.class); + job.submit(); + } + } + ``` +- the mapper and reducer classes that we extend are generics, where the types are for the key and value of input and output respectively +- we also recursively delete the output folder because if we rerun jobs without doing this, there is a failure around folder already exists +- the output format has files like part-r-00000, where r indicates that the output is due to a reduce operation and the last number is the partition id +- recall how by default, number of reducers is 1. to change the number of reducers, simply write `job.setNumReduceTasks(2)` +- e.g. in this case, i see two files in the output folder - part-r-00000 and part-r-00001 +- built in functions - e.g. map reduce ships with a `LongSumReducer` which we could have used here - sum for each key, where the value is long +- my confusion cleared - `setOutputKeyClass` and `setOutputValueClass` are used for reducer outputs, while `setMapOutputKeyClass` and `setMapOutputValueClass` are used for map outputs. i think there are some times when we do not need to include the map ones, but i think i might as well just include all of them every time tbh + +## Constructing Map Reduce Logic + +### Numeric Summary Metrics + +- e.g. imagine we have a list of rows, where each row has a subject name and score obtained by any student. we want to calculate the average score for each subject + + | subject | marks | + | ----------- | ----- | + | chemistry | 75 | + | mathematics | 81 | + | chemistry | 79 | + +- constructing map reduce logic - **since we want to group based on subject, output key of map should be subject**. the numerical statistic that we want to perform, e.g. average in this case, can be done inside the reducer +- so, remember - map's job is to output the right key, and reduce's job is to output the right value based on all the values available for a key +- our map would try to make a key for the subject name, and output the marks as the value +- our reduce would just run (sum of all values / size of list of values) +- if we use the combiner as the same function that was used for reducer - e.g. if one node had 55 and 65 for chemistry, and another node had 75 for chemistry, the right average would be 65, but in our case, the combiner would output be 60 on node 1 and 75 for node 2, thus making the reducer output to be 67.5 +- recall how output of map = input of combiner = output of combiner = input of reducer. so, we can instead output a tuple as the value from the map as (marks, 1). combiner can then output (sum of marks, size). this way, the reducer now receives a list of tuples, and it has to add the first value of tuples for the total and divide it by the sum of second values of the tuple for the final average +- if we want to use custom data types - for keys, we must implement the `WritableComparible` interface, while the data types used for values must implement the `Writable` interface +- we need to write implementation of things like serialization and deserialization. hadoop input and output classes have helpers for this, e.g. `readUTF` / `writeUTF` for strings, `readDouble` / `writeDouble` for doubles, etc + - remember to keep the order of serialization and deserialization to be the same + - remember to keep a no args constructor (used by hadoop internally) +- so, we would need an extra class to store the total marks and number of students with that marks, if we want to use combiners + ```java + public static class MapClass extends Mapper { + + @Override + protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { + String[] record = value.toString().split(","); + context.write(new Text(record[0]), new AverageWritable(Long.parseLong(record[1]), 1L)); + } + } + + public static class Combine extends Reducer { + + @Override + protected void reduce(Text key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + long count = 0; + long score = 0; + for (AverageWritable value: values) { + score += value.getTotal(); + count += value.getNoOfRecords(); + } + context.write(key, new AverageWritable(score, count)); + } + } + + public static class Reduce extends Reducer { + + @Override + protected void reduce(Text key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + long count = 0; + long totalScore = 0; + for (AverageWritable value: values) { + totalScore += value.getTotal(); + count += value.getNoOfRecords(); + } + context.write(key, new DoubleWritable((totalScore * 1.0) / count)); + } + } + ``` +- the custom data type AverageWritable looks like below - + ```java + @NoArgsConstructor + @AllArgsConstructor + @Data + public class AverageWritable implements Writable { + + private long total; + + private long noOfRecords; + + @Override + public void write(DataOutput out) throws IOException { + out.writeLong(total); + out.writeLong(noOfRecords); + } + + @Override + public void readFields(DataInput in) throws IOException { + total = in.readLong(); + noOfRecords = in.readLong(); + } + } + ``` + +### Filtering + +- e.g. if we want to filter the data based on a condition, we can perform the filtering in the map function, and the reduce can just be an identity function +- e.g. if we make the output key of the map function as null, all the items would be received by the reducer in one go and it can write out all the items at once +- notice the use of singleton for `NullWritable` to reduce memory used + ```java + public class MapClass extends Mapper { + + @Override + protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { + String row[] = value.toString().split(","); + if (row[2].equalsIgnoreCase("Books")) { + context.write(NullWritable.get(), value); + } + } + } + ``` +- we do not call `setReducerClass` so that the identity reducer can kick in. identity reducer = a reducer that will just call `context.write(key, value)` for all the values that it receives - + ```java + job.setMapperClass(MapClass.class); + ``` + +### Distinct Values + +- if we want the distinct values, e.g. something that works like the `distinct` clause in sql +- we have a file with a word in every new line, and we would like to find a list of all the distinct words +- we can again use null writable instead of outputting dummy values like 1 for performance +- map class - + ```java + public class MapClass extends Mapper { + + @Override + protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { + context.write(value, NullWritable.get()); + } + } + ``` +- understand how the reducer here is not exactly identity - it would output one value for a key, not multiple like in the above example of filtering. reducer / combiner - + ```java + public class Reduce extends Reducer { + + @Override + protected void reduce(Text key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + context.write(key, NullWritable.get()); + } + } + ``` +- note - the output was in sorted order - recall why this happens due to the sorting after the shuffle process + +### Top N Records + +- e.g. each row has user id and their number of followers, and we want to show the top n users + + | user_id | followers | + |---------|-----------| + | 1 | 30 | + | 2 | 30000 | + | 3 | 20 | + | 5 | 50 | + | 6 | 6000 | + +- my understanding - solution 1 - output key as null for all rows, so one reducer gets all the rows. there is a bottleneck here, since we cannot have more than one reducer for top n records +- all mappers work on subsets of data +- e.g. we can get all mappers to find the top n of the data they are responsible for +- note - it can happen that the mappers output less than n if the data that they have is small +- for a mapper to output top n records, it can do so only after all records in the partition it is responsible for have been processed, because mappers are called once per record for all records in the split it is responsible for - `cleanup` +- note - we have written the user for ascending order - priority queue will have the user with the lowest number of followers at the top. so, we just try to ensure priority queue size doesn't go over three, and that incoming element just needs to be larger than that whats at the top of the priority queue (i.e. smallest in the priority queue) +- we use User as output of map, so we could have just implemented writable, but we implement writable comparable so that we can use its compare to function, used by priority queue - + ```java + @Data + @AllArgsConstructor + @NoArgsConstructor + public class User implements WritableComparable { + + private String userId; + + private Integer numberOfFollowers; + + @Override + public void write(DataOutput out) throws IOException { + out.writeUTF(userId); + out.writeInt(numberOfFollowers); + } + + @Override + public void readFields(DataInput in) throws IOException { + userId = in.readUTF(); + numberOfFollowers = in.readInt(); + } + + @Override + public int compareTo(User o) { + return numberOfFollowers - o.getNumberOfFollowers(); + } + } + ``` +- map - + ```java + @Slf4j + public class MapClass extends Mapper { + + private final PriorityQueue pq = new PriorityQueue<>(); + + @Override + protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { + String[] row = value.toString().split("\t"); + User user = new User(row[0], Integer.parseInt(row[1])); + + if (pq.size() < 3 || pq.peek().getNumberOfFollowers() < user.getNumberOfFollowers()) pq.add(user); + if (pq.size() > 3) pq.poll(); + + log.info("pq is [{}], user is [{}]", pq, user); + } + + @Override + protected void cleanup(Mapper.Context context) throws IOException, InterruptedException { + while (!pq.isEmpty()) { + log.info("writing user [{}]", pq.peek()); + context.write(NullWritable.get(), pq.poll()); + } + } + } + ``` +- **in mapper** - above, we used cleanup of mapper. this technique is called in mapper. it is an alternative to, and sometimes more optimal than combiners + - in case of combiner, the mapper would write to files, then the combiner would read from and again write to the files + - in case of in mapper, we do everything in memory using for e.g. priority queue here. so while there is memory overhead, it is more optimal from performance pov +- lets say for all these n values, the mappers output the same key, say null +- now, all map outputs can come into the same list into a reducer this way +- so, the reducer basically receives the combination of top n outputs of all mappers +- note - for this to work, we had to use a single reducer +- here cleanup is not needed like in map, since reducer itself will get all the values +- note - a weird thing i have experienced here - `pq.add(value)` changes everything in priority queue to whats added the last time to the priority queue - like a pass by reference vs value thing, but why? however, cloning the user i.e. `pq.add(new User(value.getUserId(), value.getNumberOfFollowers()));` fixed the issue + ```java + @Slf4j + public class Reduce extends Reducer { + + @Override + protected void reduce(NullWritable key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + + PriorityQueue pq = new PriorityQueue<>(); + + for (User value : values) { + if (pq.size() < 3 || pq.peek().getNumberOfFollowers() < value.getNumberOfFollowers()) { + pq.add(new User(value.getUserId(), value.getNumberOfFollowers())); + } + if (pq.size() > 3) pq.poll(); + log.info("pq is [{}], user is [{}]", pq, value); + } + + while (!pq.isEmpty()) { + log.info("writing user [{}]", pq.peek()); + context.write(NullWritable.get(), pq.poll()); + } + } + } + ``` +- so, the obvious bottleneck is that we are limited to using just one reducer +- we know that one reducer receives all the keys that it is responsible for in sorted order +- however, this order breaks across reducers - e.g. reducer 1 receives (a,5), (d,6), (w,5), while reducer 2 receives (b,2), (c,5), (e,7). the output from the two reducers are sorted at an individual level, but this order breaks when combined +- with "total order partitioning" (not discussed here), the idea is that the reducer 1 receives (a,5), (b,2), (c,5), while reducer 2 receives (d,6), (e,7), (w,5), i.e. we are ensuring keys received across reducers are ordered as well +- if we implement a custom partitioner, a naive way would be send letters a-j to partition 1, k-r to partition 2 and s-z to partition 3. while this does ensure even distribution in terms of the number of keys, this can mean uneven distribution since there can be hot keys. all of this is handled by the total order partitioner + +### Indexes + +- search engines periodically visit websites and store the text in their own database - they create an index +- web pages are crawled repeatedly for all the data to build an index and keep it updated +- then, when a user initiates a search, these engines search through their own index instead of going to the websites +- inverted indexing - search engines generate an index based on the contents of the websites. e.g. mango is contained in files 1 and 3, war in files 1 and 5 and so on. the input was just files, while the output has the key as word, the value as the files containing this word. this structure is called an inverted index +- analogy behind inverted index - website themselves are an index - we type in a url and get back the content. the key is the url and the value the content. however, we generate an inverted index by using content as keys and the urls as values, so that for a search term, we know what urls may contain relevant information to it +- tf - term frequency - number of times a word appears in a document / total number of words in the document. e.g. if mango appears 5 times in a document with 1000 words, tf = 0.005 +- while calculating the tf, all words are considered equally important, so to help scale the rare words up, we use idf i.e. rare words across documents are bumped up +- idf - inverse document frequency - log (total number of documents / number of documents having the word). e.g. if 1,000 files have the word we are searching for out of 1,000,000, idf = 3 +- so, we would want the value of tf * idf to be high for our website to come up on the top +- so, all these calculations around building indexes from huge amounts of raw data (websites) very fast using distributed processing is what big data helps with +- a simple way of achieving this - we know that our output should contain the word as key and list of urls containing it as output. so, the map should output for all words on the page, that word as the key the url as value. now, the reducer receives all the urls for a word + +## File Formats + +- file formats - used when we wrote the following bit of code - + ```java + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + ``` +- the different possible options for input formats are - + - `TextInputFormat` - file contains only values. key is line number, which is why we were using `LongWritable` for key of map till now everywhere + - `KeyValueTextInputFormat` - used when input file contains key as well + - `SequenceFileInputFormat` - uses compression, useful when we chain map and reduce jobs i.e. input of the second job is the output from the first job + - `NLineInputFormat` - recall how by default file is split into segments of 128mb each. this way, for e.g. if we have 6 slave nodes and only a 500mb file, we cannot use all our slave nodes properly. this is where this option is useful, whereby we can specify the number of lines that should go into per split, thus helping us utilize our cluster more effectively +- the different possible options for output formats are - + - `TextOutputFormat` - each line has the key and value separated by a tab + - `SequenceFileOutputFormat` - uses compression, useful when we chain map and reduce jobs +- so for e.g. for the exams example discussed in the section before, the format of a line was for e.g. Chemistry,79. so, we can use the `KeyValueTextInputFormat` class for it as follows i.e. note how the map doesn't have to extract the key by using split on the value like earlier. note - specify the separator as well, since tab is the default - + ```java + public static class MapClass extends Mapper { + + @Override + protected void map(Text key, Text value, Mapper.Context context) throws IOException, InterruptedException { + context.write(key, new AverageWritable(Long.parseLong(value.toString()), 1L)); + } + } + + // ... + configuration.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ","); + job.setInputFormatClass(KeyValueTextInputFormat.class); + ``` + +## Chaining Jobs + +- e.g. imagine we have data in the following format i.e. each row has marks obtained for a student - the school that student is from and the subject. for all subjects, we would like to obtain the school with the highest average, and the actual average + + | school | subject | marks | + | ------------------ | --------- | ----- | + | Bigtown Academy | Chemistry | 44 | + | Bigtown Academy | French | 69 | + | Mediumtown College | Biology | 61 | + | Largetown School | French | 67 | + +- so, we can break the problem as follows into two separate map reduce jobs - +- first job's map output - key = (school, subject), value = (marks, 1) (recall the value is this strange tuple because of the constraint when using combiners around types) + ```java + public static class MapClass extends Mapper { + + @Override + protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { + String[] record = value.toString().split(","); + ExamScoresV2KeyWritable newKey = new ExamScoresV2KeyWritable(record[0], record[1]); + AverageWritable averageWritable = new AverageWritable(Long.parseLong(record[2]), 1L); + context.write(newKey, averageWritable); + } + } + ``` +- first job's combiner output - key = (school, subject), value = (sum of marks, total students) + ```java + public static class Combine extends Reducer { + + @Override + protected void reduce(ExamScoresV2KeyWritable key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + long count = 0; + long score = 0; + for (AverageWritable value: values) { + score += value.getTotal(); + count += value.getNoOfRecords(); + } + context.write(key, new AverageWritable(score, count)); + } + } + ``` +- first job's reducer output - key = (school, subject), value = average of the school in the subject + ```java + public static class Reduce extends Reducer { + + @Override + protected void reduce(ExamScoresV2KeyWritable key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + long count = 0; + long score = 0; + for (AverageWritable value: values) { + score += value.getTotal(); + count += value.getNoOfRecords(); + } + context.write(key, new DoubleWritable(score * 1.0 / count)); + } + } + ``` +- second job's map output - key = subject, value = (school, its average for that subject). however, notice how it can read directly the key from the output of the earlier job, so we can also set the input format on the job directly as `job.setInputFormatClass(KeyValueTextInputFormat.class)` + ```java + public static class MapClass extends Mapper { + + @Override + protected void map(Text key, Text value, Mapper.Context context) throws IOException, InterruptedException { + String[] record = key.toString().split(","); + context.write(new Text(record[1]), new SchoolAverageWritable(record[0], Double.parseDouble(value.toString()))); + } + } + ``` +- second job's combiner output - key = subject, value = (school with maximum average for the subject, the average) + ```java + public static class Reduce extends Reducer { + + @Override + protected void reduce(Text key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + SchoolAverageWritable max = new SchoolAverageWritable(null, -1); + for (SchoolAverageWritable value: values) { + max = max.getAverage() > value.getAverage() ? max : new SchoolAverageWritable(value.getSchool(), value.getAverage()); + } + context.write(key, max); + } + } + ``` +- second job's reducer output - _same as above_ +- so, the entire thing has been broken down into two jobs, which can be run one after another +- while we can run manually, hadoop can help achieve this via code using "job control" + ```java + Configuration configurationOne = new Configuration(); + Configuration configurationTwo = new Configuration(); + + ControlledJob controlledJobOne = new ControlledJob(configurationOne); + ControlledJob controlledJobTwo = new ControlledJob(configurationTwo); + + // notice how input of second job = output of first job + // these static calls of getJob do stuff like setting types on job, + // setting inputs and outputs, setting mappers, calling jarByClass, etc + // all of which we have seen earlier + Job jobOne = FiveJobOne.getJob(configurationOne, new Path(args[0]), new Path(args[1])); + Job jobTwo = FiveJobTwo.getJob(configurationTwo, new Path(args[1]), new Path(args[2])); + + controlledJobOne.setJob(jobOne); + controlledJobTwo.setJob(jobTwo); + + // adding dependency + controlledJobTwo.addDependingJob(controlledJobOne); + + JobControl jobControl = new JobControl("SchoolWithHighestAverage"); + jobControl.addJob(controlledJobOne); + jobControl.addJob(controlledJobTwo); + + // some thread stuff we have to do + // when running controlled jobs + Thread thread = new Thread(jobControl); + thread.setDaemon(true); + thread.start(); + + while (!jobControl.allFinished()) { + Thread.sleep(500); + } + ``` +- now recall how if chaining jobs, we can make use of compression - we can notice this if we try to run `cat` on the intermediate outputs (i.e. what we specify using `arg[1]` above) + ```java + // inside job 1 - + job.setOutputFormatClass(SequenceFileOutputFormat.class); + + // inside job 2 - + job.setInputFormatClass(SequenceFileInputFormat.class); + ``` +- the caveat of the above is output format of key / value of reduce of first job = input format of key / value of map of second job, which was not really needed otherwise if not using compression i.e. we could write double from reduce, and while reading read as string and parse this string into double +- my doubt - since max is needed, could we have used secondary sorting? is secondary sorting usually more optimal for finding maximum? + +## Pre and Post Processing + +- pre and post processing - to perform some steps after and before the job +- these pre and post processing steps work just like map tasks +- so the effective structure of hadoop can be said to be as follows + - multiple maps in the form of pre processing + - the actual map + - an optional combiner + - the shuffle step done by hadoop internally which then helps run reduce + - the reduce on a per key basis + - multiple maps in the form of post processing +- so, the structure when using pre and post processing looks like follows i.e. this replaces the `job.setMapper` etc calls - (the 4 types in between are for input key class, input value class, output key class and output value class). note - i think for adding combiner however, like stated below, i had to go back to `job.setCombinerClass` + ```java + // pre processing + ChainMapper.addMapper(job, PreProcessing.class, Text.class, Text.class, Text.class, Text.class, confOne); + + // the actual map, but syntax is same + ChainMapper.addMapper(job, MapClass.class, Text.class, Text.class, Text.class, AverageWritable.class, confTwo); + + // combiner + job.setCombinerClass(Combine.class); + + // reducer (note how it is setReducer and not addReducer like addMapper, since only one reducer can be used) + ChainReducer.setReducer(job, Reduce.class, Text.class, AverageWritable.class, Text.class, DoubleWritable.class, confTwo); + + // post processing + ChainReducer.addMapper(job, PostProcessing.class, Text.class, DoubleWritable.class, Text.class, DoubleWritable.class, confTwo); + ``` + +## Optimization + +- optimizing disk io in hadoop - in hadoop, the file is read from / written to disk at each step + - reduce size using pre processing - e.g. drop extraneous data + - use sequence file formats + - optimize the file itself before sending it to hadoop - e.g. xml would be much worse to process due to extra lines of tags compared to something like csv +- optimizing network io - this happens during shuffle + - add a combiner + - order input data using keys beforehand so that there is less network required during shuffling +- optimizing processing - this is more "code based" + - if we have to create something like `new LongWritable(1)` in the map class for e.g. in word count, we can instead create it at the global class level and reference it in the map task. this way, we don't create a new object every time, thus saving up on time for creation of these objects and more importantly garbage collection time + - use string builders instead of string if strings change frequently + - there is some time spent in instantiating a jvm. a new jvm is created for each task in a job by default, e.g. imagine a chain of mappers initially when using pre processing. however, we can reuse jvm across these tasks. we should observe how garbage collection works after this optimization. `conf.set("mapreduce.job.jvm.tasks", "10")`. 10 means reuse jvm for 10 tasks, 1 is the default i.e. 1 jvm per task and setting it to -1 means use one jvm for all tasks. note - this jvm reuse can only happen in a job, not across jobs + - recall why and how n line input format can be useful + - null writable - when we are just interested in the key (e.g. find the most frequently occurring words), and not the value, instead of using a dummy value like `new Text("")`, we can instead use `NullWritable.get()`, and notice how this is using singleton pattern, thus matching the first point of this section optimizing processing +- logging - this can be useful for for e.g. pseudo distributed, in standalone i can see the logs directly in the console as well. to view the logs, go to http://localhost:8088/cluster -> tools -> local logs -> userLogs. this will have a link to all job logs. go to the last job we ran -> and now this will have logs for all containers. i was just using lombok's `@Slf4j` and could automatically see the logs properly without any extra configuration +- hadoop also shows something called counters in ui, and this can be very useful for the health of job. we can add custom counters to it. we simply need to do is as follows (note - we have to use an enum i think) + ```java + enum RemovedRows { + LOW_SCORES, INVALID_DATA + } + + context.getCounter(RemovedRows.LOW_SCORES).increment(1); + ``` +- relational databases - we usually deal with files in hadoop because relational databases cant cope with massive amounts of data. yet we can read from / write to (preferable because this data is usually much smaller than input) relational databases +- when reading from database, each map task (remember how in production we will have multiple slave nodes etc) will initiate a read from the database. this can overload the database with jdbc connections (db proxy is the solution here?) + +## Unit Testing + +- mrunit - unit testing out map reduce code +- ps - this did not work for me, basically mrunit was relying on mapred versions and not mapreduce? however, written the code snippet below for reference +- adding the dependency - (note - i had to add the classifier for this to work) - + ```xml + + org.apache.mrunit + mrunit + 1.1.0 + test + hadoop2 + + ``` +- `MapDriver`, `ReduceDriver`, `MapReduceDriver` - it is as simple as us specifying the class we used e.g. for mapping we use MapClass, then specify the input and the expected output, and call runTest on these drivers to perform the assertion + ```java + public class TwoTest { + + MapDriver mapDriver; + + @Before + public void setUp() throws Exception { + mapDriver = MapDriver.newMapDriver(new Two.MapClass()); + } + + @Test + public void test() throws IOException { + mapDriver.addInput(new Text("chemistry"), new Text("79")); + mapDriver.addInput(new Text("chemistry"), new Text("91")); + mapDriver.addInput(new Text("mathematics"), new Text("67")); + + mapDriver.addOutput(new Text("chemistry"), new AverageWritable(79, 1)); + mapDriver.addOutput(new Text("chemistry"), new AverageWritable(91, 1)); + mapDriver.addOutput(new Text("mathematics"), new AverageWritable(67, 1)); + + mapDriver.runTest(); + } + } + ``` + +## Secondary Sorting + +- each node can have multiple partitions (which are recall 128 mb in size) +- now, for reduce to work, values for a key need to go to the same partition +- because of the way the shuffle process works, the values for a key in the reduce process come in random order +- now, imagine we want the values for a key to be in sorted order as well to for e.g. find the maximum +- one way can be we simply find the maximum by looping over all elements (`O(n)`), since we already have all the values for that key - inefficient +- so, we do something called secondary sorting +- now, we would like to ensure that the reducer gets the iterable of values in sorted order. so, here is how we can achieve it - + - construct a key where key = (actual_key, value) in the map process + - write a custom partitioner so that the partition is determined only using the actual_key part of the key (`Partitioner#getPartition`) + - ensure sort takes into account the key as is, so both (actual_key, value) are used (`WritableComparable#compareTo` i.e. present inside our custom key class) + - ensure group takes into account only the actual_key part of the key (`WritableComparator#compare`) +- so, example of an implementation of secondary sorting - imagine we have a csv, where each row has the subject name and the marks obtained by a particular student in it. we want highest score for each subject. so, we need to sort by both subject and marks, but use only subject for partitioning and grouping. so, the key would be a tuple of (subject, marks). we can also have a combiner that works just like the reducer, except that it needs to input and output the same tuple of (subject, maximum marks) (maximum needs to consider data across all nodes, but maximum from every node is sufficient for evaluating this) +- custom key - also determines how to sort, which uses first subject and then score (descending). so, items for a specific key (subject) are sorted by values (marks in descending) + ```java + @Data + @AllArgsConstructor + @NoArgsConstructor + public class ExamSubjectAndScoreKey implements WritableComparable { + + private String subject; + + private Integer score; + + @Override + public int compareTo(ExamSubjectAndScoreKey o) { + int result = subject.compareTo(o.subject); + return result == 0 ? o.score - score : result; + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeUTF(subject); + out.writeInt(score); + } + + @Override + public void readFields(DataInput in) throws IOException { + subject = in.readUTF(); + score = in.readInt(); + } + } + ``` +- grouping comparator, group using subject only. note - we have to add the constructor with call to super, otherwise we get a npe + ```java + public class SubjectComparator extends WritableComparator { + + public SubjectComparator() { + super(ExamSubjectAndScoreKey.class, true); + } + + @Override + public int compare(WritableComparable a, WritableComparable b) { + ExamSubjectAndScoreKey keyA = (ExamSubjectAndScoreKey) a; + ExamSubjectAndScoreKey keyB = (ExamSubjectAndScoreKey) b; + return keyA.getSubject().compareTo(keyB.getSubject()); + } + } + ``` +- partitioner, partition using subject only - + ```java + public class SubjectPartitioner extends Partitioner { + + @Override + public int getPartition(ExamSubjectAndScoreKey key, IntWritable score, int numPartitions) { + return key.getSubject().hashCode() % numPartitions; + } + } + ``` +- configure both partitioner and grouping comparator using - + ```java + job.setPartitionerClass(SubjectPartitioner.class); + job.setGroupingComparatorClass(SubjectComparator.class); + ``` +- map, combine and reduce - note how reduce and combiner are the same apart from the output format. my doubt - i thought secondary sorting only helps with reducer values being sorted i.e. how can we use `values.iterator.next()` for combiner? + ```java + public static class MapClass extends Mapper { + + @Override + protected void map(Text key, Text value, Mapper.Context context) throws IOException, InterruptedException { + int score = Integer.parseInt(value.toString()); + context.write(new ExamSubjectAndScoreKey(key.toString(), score), new IntWritable(score)); + } + } + + public static class Combine extends Reducer { + + @Override + protected void reduce(ExamSubjectAndScoreKey key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + context.write(key, values.iterator().next()); + } + } + + public static class Reduce extends Reducer { + + @Override + protected void reduce(ExamSubjectAndScoreKey key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + context.write(new Text(key.getSubject()), values.iterator().next()); + } + } + ``` + +## Joining + +- imagine doing this using what we know up till now +- e.g. we have a csv where each row represents an order for a customer. so, it contains customer id, date and total + + | customer id | total | + | ----------- | ------ | + | 18 | 233.28 | + | 17 | 27.35 | + | 18 | 202.23 | + +- another csv contains a row per customer, where each row has the customer id and country of origin + + | customer id | country | + | ----------- | ------- | + | 1 | France | + | 2 | Russia | + | 3 | Germany | + +- now, we would like to find totals by countries - so, we would use joins +- job 1 - map first csv into (customer id, [total, *null*]), identity reducer +- job 2 - map second csv into (customer id, [*0*, country]), identity reducer +- job 3 - the two above outputs can be combined since they have the same format, e.g. recall how we can specify not just a file but folder as well when running hadoop jobs, and the folder here would contain the outputs from both jobs above. now, we use an identity mapper, and then perform a reduce to get (country, total) for every customer. basically, in the iterable, there would be multiple values where country is null, and just one value where the country is not null but the total is 0. understand that the reducer of this job is called once for every key i.e. customer. we don't want to output one row per customer, but one row per country - so, we need yet another job's reduce to help us do some grouping +- job 4 - identity mapper, reduce can now just sum the totals, as the key is now country +- using secondary sorting - we would tag data from country csv with 1 and data from sales csv with 2. map would read from both files. now, we would perform secondary sorting logic - this way, we would have a dataset where the first row has key = customer_id, 1 for the country data, and following rows have key = customer_id, 2 for the sales data. we can group keys with multiple values under same reducer due to secondary sorting logic, this would output country, sum_of sales. so, the output of this first job is basically for each customer, there is a row, where the key is country and the value is total amount of sales for this customer. so, we can follow this up with a second job that has an identity mapper and a reducer to calculate the total +- so, this trick around secondary sorting basically helped us eliminate jobs 1 to 3 +- we can tag the two datasets in the configuration as follows + ```java + Path in1 = new Path(args[0]); + Path in2 = new Path(args[1]); + Path out = new Path(args[2]); + + Configuration configuration = new Configuration(); + configuration.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ","); + + configuration.set(in1.getName(), "1"); + configuration.set(in2.getName(), "2"); + ``` +- the mapper can be written as follows. **note how it extracts the tag and creates the new key using it, so that it can be used during the secondary sorting phase** - + ```java + public static class MapClass extends Mapper { + + @Override + protected void map(Text key, Text value, Mapper.Context context) throws IOException, InterruptedException { + FileSplit fileSplit = (FileSplit) context.getInputSplit(); + Integer tag = Integer.parseInt(context.getConfiguration().get(fileSplit.getPath().getName())); + Integer customerId = Integer.parseInt(key.toString()); + context.write(new CustomerAndTagKey(customerId, tag), value); + } + } + ``` +- each mapper is responsible for a split of the data, and that file split's name is used to tag the different files to help determine what table they belong to +- the reducer can now be certain that the first row would represent the country - + ```java + public static class Reduce extends Reducer { + + @Override + protected void reduce(CustomerAndTagKey key, Iterable values, Reducer.Context context) throws IOException, InterruptedException { + Iterator values$ = values.iterator(); + String country = values$.next().toString(); + double total = 0; + while (values$.hasNext()) { + total += Double.parseDouble(values$.next().toString()); + } + context.write(new Text(country), new DoubleWritable(total)); + } + } + ``` +- `CustomerTagKey#compareTo` - use both customer id and the tag. ensure that in the iterable received for a customer, first record contains the country, and remaining contain the totals for that customer + ```java + @Override + public int compareTo(CustomerAndTagKey o) { + return customerId.equals(o.customerId) ? tag - o.tag : customerId - o.customerId; + } + ``` +- `CustomerPartitioner#getPartition` - only use the customer id for determining the partition + ```java + @Override + public int getPartition(CustomerAndTagKey customerAndTagKey, Text text, int numPartitions) { + return customerAndTagKey.getCustomerId().hashCode() % numPartitions; + } + ``` +- `CustomerComparator#compare` - only use the customer id to group + ```java + @Override + public int compare(WritableComparable a, WritableComparable b) { + CustomerAndTagKey keyA = (CustomerAndTagKey) a; + CustomerAndTagKey keyB = (CustomerAndTagKey) b; + return keyA.getCustomerId().compareTo(keyB.getCustomerId()); + } + ``` +- now, we need to chain another job for actually totaling across customers, already discussed +- apparently what we discussed till now is called a reduce side join, there is another type called map side join, which can be more performant in some cases, but has limitations (same as [spark's](/posts/spark) broadcast join?) - + - reduce side join - tagging datasets, so reducer gets an iterable, which has a value for each row from both datasets which are a part of the join + - map side joins - one dataset is small enough to fit in a jvm, and the join is done in map side and not in reduce side +- e.g. of map side join - we have a handful of stop words to flag in some analysis for our use case, which can easily fit in a jvm diff --git a/_posts/2023-09-24-docker-and-kubernetes.md b/_posts/2023-09-24-docker-and-kubernetes.md new file mode 100644 index 0000000..9643446 --- /dev/null +++ b/_posts/2023-09-24-docker-and-kubernetes.md @@ -0,0 +1,1488 @@ +--- +title: Docker and Kubernetes +--- + +## About Docker + +- docker is a tool for managing containers +- container is a package of our code along with the dependencies and libraries to run that code +- docker follows a client server architecture + - we issue commands via cli to the docker client + - all tasks like creating containers, pulling images, etc. is done by docker daemon (dockerd) +- docker can be run natively in linux, so for macOS and windows, a virtualization layer is needed +- docker engine - dockerd, docker client +- docker desktop - docker engine, docker cli, kubernetes, docker compose, etc + +## Why use Docker + +- the same piece of code will always yield the same application i.e. doesn't rely on host environment +- having similar development, staging and production environments +- easily manage different projects running different versions of dependencies +- easily switch between versions of dependencies +- virtual machines are not as easily reproducible as containers since they have their own dedicated OS +- sharing and distributing is very convenient using Dockerfile, image, etc + +![docker vs vm](/assets/img/docker-and-kubernetes/docker-vs-vm.drawio.png) + +## Images and Containers + +- images are templates for containers, and a container is a running instance of an image +- containers are lightweight, isolated and run independent of each other +- we can use official prebuilt images, the most common source is [docker hub](https://hub.docker.com) +- note: while issuing docker commands + - container name and container id can be used interchangeably, same for image + - first few characters of the image_id are enough to reference the image if they can uniquely identify it +- `docker container run image_name` to create a container from an image +- if the image is not available locally, it is downloaded from dockerhub by docker +- `docker container ls` to list all running containers + - `docker container ls -a` to list all running as well as stopped containers +- Dockerfile is a special file name, as it is the default file docker looks for when we build an image +- Dockerfile contains the instructions for creating our own image +- example of a Dockerfile + ```Dockerfile + FROM node:14-alpine + WORKDIR /app + COPY . . + RUN npm install + EXPOSE 80 + CMD npm run start + ``` +- **all commands except the last instruction `CMD` are used to build the image, `CMD` is used to run the container** +- so basically `CMD` is used for `docker container run...` +- `EXPOSE` is only for documentation purpose +- `docker image build .` - used to build an image using the Dockerfile, `.` here is the build context + - `-t` flag to specify an image tag +- images have layers i.e. docker caches result after every instruction in the Dockerfile +- this means docker can reuse layers if possible - e.g. two different react applications use the same base image - node layer +- so, to optimize i.e. make building of images faster, in the Dockerfile example shown earlier, we can first install dependencies and then copy the source code, as rebuilding of image will be triggered more frequently by a change in the source code than it will be by a change in the dependencies + ```Dockerfile + FROM node:14-alpine + WORKDIR /app + COPY package.json . + RUN npm install + COPY . . + EXPOSE 80 + CMD npm run start + ``` +- `docker container start container_id` - start a stopped container +- we can reattach our terminal to the container using `docker container attach container_id` +- we can view logs using `docker container logs container_id` + - add `-f` flag for following the logs +- flags for `docker container run` - + - `-it` can be used to enter interactive mode + - `--rm` flag to delete the container when we stop it + - `--name` to specify the name of container + - `-d` to run in detached mode i.e. to not block our current terminal and run the container in foreground + - `-p` flag means publish, i.e. map host port to a container port +- `docker image ls` - lists the downloaded images +- `docker image rm image_id` - remove the image with id image_id +- `docker container stop container_id` - stop the container +- `docker container prune` to delete all stopped containers +- to get more information on images and containers, use `docker container inspect container_id` and `docker image inspect image_id` +- `docker container cp host_folder container_id:folder` to copy folder from the host to the container + - we can also reverse the order of arguments to copy folders and files from the container to the host +- we can share images, by sharing the Dockerfile or by hosting it on an image registry like docker hub +- `docker image push image_name:tag` to push images to the registry +- `docker image pull image_name:tag` to pull images from the registry +- we can also tag images using `docker image tag new_image_name old_image_name` +- a full example of running a container - `docker container run -d -p 3000:80 --name=backend --rm backend` +- `docker login` to login to docker hub +- note: i had to generate and use a personal access token in the docker hub ui and use that instead of the docker hub password in the cli +- `docker image rm -f $(docker image ls -a -q)` - deletes all locally downloaded images + - `-q` helps list only image ids + - `-a` helps list intermediate images as well + - `-f` force removal, e.g. if image is referenced by another image +- we can use a file `.dockerignore` to prevent copying files when using command `COPY` inside the Dockerfile e.g. + ``` + node_modules + Dockerfile + .git + ``` + +## Tags + +- **an image tag has two parts - the name / repository of the image and the tag** +- tag is like a version, so we can generate different versions of our image +- the default tag if not specified is latest +- why tags are important - + - rollback to previous versions in the production environment if newer versions have a bug + - newer versions of other images which are used by our images might have breaking changes in future +- suppose we always push and pull using tag latest. when we run `docker container run...`, it looks for the image locally and if it doesn't find it, it goes online to fetch it. but it will find the image with the tag latest, and docker doesn't understand that someone else has pushed a newer version online + +## Layered Architecture + +- all the docker related data like images, containers, etc. can be seen in /var/lib/docker +- the docker image we build contains of layers, and these layers are shared across various images +- e.g. if two images use the same base image, the layer of the base image is shared +- the image layers are read only +- when we create a container, a new layer is created on top of the existing layers of the image +- thus, all writes that we perform during runtime, log files, etc. get written onto this layer +- the persistence during the container's lifetime happens through this writable layer +- this mechanism is called copy on write, and the changes we made are lost unless we use volumes + +## Volumes + +- containers should be stateless as they can be easily created and destroyed, scaled up and down +- we can have data that we want to persist even if containers are killed +- this data shouldn't be stored inside containers, or we may lose that data +- volumes - mapping a persistent storage to docker containers. the persistent storage can be cloud storage, e.g. s3 of aws or our host directory system +- this way, every time a container tries to persist changes, they go to the persistent storage and don't get lost irrespective of how many times the container is started or stopped +- volumes can be of three types - + - anonymous volumes + - named volumes + - bind mounts +- `docker volume ls` shows all volumes +- anonymous volumes are managed by docker +- the **reference** to anonymous volumes are lost after the container shuts down +- if we use `--rm` flag while running the container, the anonymous volume is deleted as well +- we can create anonymous volume by using `VOLUME ./feedback` inside the Dockerfile +- we can also create anonymous volume by using flag `-v /a/b` during `docker container run` where /a/b is the path inside the container +- named volumes are managed by docker too +- unlike anonymous volumes, we don't lose the reference to named volumes after the container is deleted +- use flag `-v` to create named volumes, e.g. `-v feedback:/app/feedback`, where the name of the volume is feedback and the directory of the container it maps to is `/app/feedback` +- bind mounts are managed by us. it can be used for source code, so that the changes that we make to the code get reflected in the container +- in case of bind mounts, we have access to the folder which gets mapped to the container's folder +- in case of clashes, the more specific paths win e.g. if we are using bind mounts for /app of container and anonymous volumes for /app/node_modules of container, /app/node_modules relies on anonymous volumes +- using nodemon with bind mounts prevents us from rebuilding images repeatedly i.e. our changes in source code are accounted for in the running container +- we can use suffix `:ro` so that it specifies to the container that the volume is read only e.g. `-v $(pwd)/app:ro`, so that only hosts and not containers can edit the source code +- note: `docker volume ls` will not list bind mount volumes, since it doesn't manage them +- `docker volume rm volume_name` to remove volumes +- `docker volume prune` to remove volumes not being used by any containers +- `docker volume inspect volume_name` to get details of the volume + +## Arguments and Environment Variables + +- docker supports build time arguments and runtime environment variables +- runtime environment variables can be provided using `ENV PORT 80` inside the Dockerfile +- we can also provide it dynamically using `-e PORT=80`, which makes the earlier method a default +- for situations like api keys where security is a concern, the method suggested above is better +- we can also use build arguments, i.e. dynamic variables used when building an image +- can be done using `ARG PORT=80` in the Dockerfile +- my understanding - so basically, arg is used by all commands above cmd and env is used by cmd? + +### Example + +```Dockerfile +ARG DEFAULT_PORT=80 +ENV PORT $DEFAULT_PORT +EXPOSE $PORT +``` + +- we are giving the value of the build argument to the environment variable +- if we don't provide a port, the port used by container is `80` +- now, we can change the default port while building an image using `docker image build ... --build-arg DEFAULT_PORT=9999 ...` +- we can also receive a dynamic port using `docker container run ... -e PORT=9545 ...` +- if we don't provide a port dynamically, the port specified for building of images gets used + +## Networks + +- there are three kinds of entities with which containers can communicate - + - internet + - host + - other containers +- containers can by default talk to the internet e.g. a public api +- for containers to talk to the host, we can replace localhost by `host.docker.internal` +- e.g. for containers to talk to mongodb running on our host machine, we can use `mongodb://host.docker.internal:27017/favorites` +- for containers to talk to other containers, we can use `docker container inspect ...` to get the container's ip address (available in the key IPAddress) and then use it. e.g. with a mongodb container running, we run `docker container inspect mongodb` and then use `mongodb://the_ip_address:27017/favorites` +- this is not ideal, as this IP could change after a new container replaces the old one +- we can create a docker network, and all containers placed inside the network can reference each other directly using the container names, e.g. `mongodb://mongodb_container_name:27017/favorites` +- `docker network create network_name` to create a network +- `docker container run ... --network=network_name ...` to create a container inside a specific network +- also, we don't need `-p` for the container to which another container connects, i.e. `-p` is only needed when we want our host port to map to the container port, not when another container wants to communicate with it +- docker networks support different kinds of drivers. the default driver is bridge, which we saw above +- there can be other types of drivers and third party plugins for drivers as well +- we can use driver as "host" so that isolation between the container's network and localhost is removed +- examples of usage - `docker network create --driver bridge` or `docker container run --network host` +- we can clean up unused networks using `docker network prune` +- the bridge type of network uses network namespaces behind the scenes. so, on running `ip addr`, we see docker0, which is basically the virtual switch in network namespaces. each container is encapsulated inside its own network namespace. an e.g. is shown below - + ```sh + docker container run nginx + docker container inspect <> | grep SandboxKey + # the output is /var/run/docker/netns/<> + ``` + +## Docker Compose + +- docker compose helps in preventing having to run docker commands from cli repeatedly +- it has syntax in yml which is easier to read and can be shipped with our code +- services in docker compose are containers, for which we can define environment variables, network, image, etc +- version of docker compose I had to use was 3.8 based on my [docker engine version](https://docs.docker.com/compose/compose-file/) + - note - i think it comes with docker in newer version, i just have to use `docker compose` now +- all container names are one level nested under the services key +- can specify networks, volumes key for each container +- for named volumes, we should also mention them under the volumes key in the root of the file +- all the containers are a part of the default network created by docker-compose +- `docker-compose up` starts all the containers and builds the images as well + - flag `-d` can be used to start in detached mode + - add flag `--build` to force the images to be rebuilt +- `docker-compose down` - deletes all the containers and the default network that docker-compose creates + - flag `-v` also removes the volumes which were created +- use `depends_on` key to ensure the order in which containers start e.g. server `depends_on` mongodb container +- `docker-compose build` to build the images +- `docker-compose run service_name` to run a specific container in the compose file under the services key + +## Issues while Containerizing Frontend Apps + +- docker doesn't work in the web browser for e.g. when we make xhr requests + - so referring the backend application just by container name won't work as it utilizes docker networks + - so, we publish the backend on a host port and simply use localhost:that_port in frontend +- reactJS needs the terminal to be in interactive mode to ensure it continues to run + - it is like adding `-it` flag while using `docker container run...`, or setting `stdin_open: true` and `tty: true` inside of the docker compose + +## CMD and ENTRYPOINT + +- when we specify `docker container run image_name xyz`, xyz replaces what there is in CMD +- however xyz appends what is there in ENTRYPOINT +- we can replace what is there in ENTRYPOINT using `--entrypoint` +- useful tip - since a space separated command needs to be a part of different items in an array, use `sh -c`. i.e. `CMD ["a", "b", "c"]` can become `CMD ["sh", "-c", "a b c"]` + +```Dockerfile +FROM ubuntu +ENTRYPOINT [ "sleep" ] +CMD [ "10" ] +``` + +- `docker image build -t ubuntu-sleeper .` +- run `docker container run ubuntu-sleeper`, sleep is of 10 seconds +- run `docker container run ubuntu-sleeper 20`, sleep is of 20 seconds +- run `docker container run -it --entrypoint=bash ubuntu-sleeper`, run bash in interactive mode + +## Setup Containers + +how do we set up initial project e.g. how to run `npm init` when we don't have node installed locally? below is an example for setup using node + +Dockerfile.setup - + +```Dockerfile +FROM node:14-alpine +WORKDIR /app +``` + +docker-compose-setup.yml - + +```yaml +version: "3.8" +services: + npm: + build: + context: ./ + dockerfile: Dockerfile.setup + stdin_open: true + tty: true + volumes: + - ./:/app + entrypoint: npm +``` + +now, we can use commands to help during development like - + +- `docker-compose -f docker-compose-setup.yml run npm init` +- `docker-compose -f docker-compose-setup.yml run npm i express` + +the `npm` in the command is the service name inside docker compose, and entrypoint was given as npm in docker-compose, otherwise we would have to run `docker-compose -f docker-compose-setup.yml run npm npm init` + +## About Kubernetes + +- kubernetes is the most widely used container scheduler +- modern infrastructure is created using immutable images, and an upgrade is performed by replacing the older images with newer ones using rolling updates +- we specify how many resources to run and kubernetes maintains that number +- it ensures that the resources run within the specified memory and cpu constraints +- kubernetes is cloud-agnostic and can also be run on-prem +- it has features like service discovery, load balancing, secret and configuration management, etc + +## Minikube and Kubectl + +- minikube allows us set up a single node cluster on our local workstation +- minikube is useful for development purpose +- kubectl is the kubernetes command line tool which allows to manage a kubernetes cluster +- add alias to .bashrc - `alias kubectl="minikube kubectl --"` +- configuring autocomplete for kubectl (restart terminal after running the command) - + ```bash + echo 'source <(kubectl completion bash)' >> ~/.bashrc + ``` +- minikube can be deployed as a vm or as a container (i am trying as a container for now) +- configuring minikube - + - `minikube config set driver docker` + - `minikube config set memory 8192` + - `minikube config set cpus 4` +- view config using `minikube config view` or `cat ~/.minikube/config/config.json` +- start minikube - + ```bash + minikibe start + minikube status + ``` +- pointing docker client installed locally to minikube's docker daemon - + ```bash + docker container ls + minikube docker-env + eval $(minikube -p minikube docker-env) + docker container ls + ``` +- to ssh into minikube - `minikube ssh`. now also we can run commands like `docker container ls` etc +- to get all running components, we can use `kubectl get all --all-namespaces` +- to shut down minikube, use `minikube stop`. it preserves the state +- to start minikube again, `minikube start` +- to delete the cluster, `minikube delete` +- can format output e.g. `kubectl version --output=yaml`. output format can be json as well +- `minikube ip` to get the ip address of minikube cluster +- an issue on my laptop - minikube cannot pull docker images at times. temporary fix is to pull manually using `docker image pull` after pointing docker client to minikube's docker daemon + +## Cluster Architecture + +- the cluster has master nodes and worker nodes. note: there can be multiple masters in the cluster +- the master nodes schedule and monitor the containers assigned to it on the worker nodes +- different methods of viewing information related to the different components e.g. etcd + - `ps aux | grep etcd` + - `sudo cat /etc/kubernetes/manifests/etcd.yaml` + - `docker container ls | grep etcd` +- it has 7 major components as described below + +### Etcd + +- a distributed key-value store that allows for fast storage and retrieval +- it runs on the port 2379 +- etcdctl is the etcd control client which helps communicate with etcd +- it is used for storing and retrieving information about all kubernetes resources +- the etcd clusters can either be present on the master nodes or be entirely decoupled from them +- **kubeadm runs etcd as a static pod on the master nodes** +- we specify its ip address and port on the api server +- an example of using etcdctl api version 3 - + ```sh + kubectl exec etcd-minikube --namespace=kube-system -- sh -c \ + "ETCDCTL_API=3 etcdctl get / \ + --prefix --keys-only --limit=100 \ + --cacert /var/lib/minikube/certs/etcd/ca.crt \ + --cert /var/lib/minikube/certs/etcd/server.crt \ + --key /var/lib/minikube/certs/etcd/server.key" + ``` + to get the location of the certs, use `kubectl get pod etcd-minikube --namespace=kube-system --output=yaml` +- peer to peer communication in etcd clusters when there are multiple master nodes happens through 2380 +- etcd is distributed i.e. we can read from any of the instances, while all writes go to the master in the etcd cluster which syncs the data on the other replicas +- in case of inconsistencies, the quorum determines if the update is valid. it is the minimum number of nodes in the etcd cluster which should have processed the update, which is floor(n / 2) + 1. the value of fault tolerance is total instances - quorum. so, it is recommended to have an odd number of etcd instances / master nodes depending on the configuration, since fault tolerance is the same for n and n - 1 nodes where n is even + +### Api Server + +- it runs on the master node +- external clients like kubectl communicate changes to the cluster via the api server +- schedulers, controllers, kubelets, etc. monitor the api server for new resources +- they also send updates to the api server which then updates it on the etcd cluster +- so, api server is the only component that directly interacts with the etcd cluster +- the api server on the multiple master nodes can run concurrently i.e. all api servers on all the master nodes can be active at once. however, in case of controller manager and scheduler, to avoid duplication and inconsistencies, they are in the active state on the master node which is elected as the leader while they are in standby mode on the other master nodes +- in case of multiple masters, clients like kubectl interact with a load balancer, where the load balancer routes requests to the multiple api servers + +### Controllers + +- also called controller manager +- different kinds of controllers run on the master node +- for instance, the master node expects heartbeats from the worker nodes. the node controller monitors them and if the heartbeats do not reach the master nodes for a certain time period, the pods on it are evicted +- similarly, we have replication controller to maintain the number of pods of the same type +- the controller manager package installs all the different controllers. to view the different controllers, use - + ```sh + kubectl get pod kube-controller-manager-minikube \ + --namespace=kube-system --output=yaml | grep controllers + ``` + +### Scheduler + +- runs on the master node +- it assigns pods to a specific node +- it does this based on available resources like cpu and memory and filters out nodes which cannot run the pod +- it then based on a priority function ranks the remaining nodes +- the pod then gets scheduled on one of the remaining nodes + +### Kubelet + +- it **runs on all worker nodes** (and optionally on the master node). see how this is different from the components seen above - etcd, api server, controller manager, scheduler +- unlike the rest, kubelet does not run as a static pod, daemon set etc. it **runs via a binary installed on the vms** - i think this flow deviates since is doesn't use static pods / daemon set +- it registers the nodes with the cluster +- picks up the pods from the api server to run on the node and then runs it +- it then sends updates of the status of the pod to the api server +- so, to view information, use `ps aux | grep kubelet` +- this will show the files locations, so, for e.g., use - `cat /var/lib/kubelet/config.yaml` + +### Kube Proxy + +- it runs on all nodes, since it **runs as a daemon set** +- pods in a node can reach pods on other nodes as well because of this +- the kube proxy **assigns an ip to the service** +- to view the ip range from which services are assigned ip addresses, we can use `kubectl get pod kube-apiserver-minikube --namespace=kube-system --output=yaml | grep service-cluster-ip-range` +- it **configures ip tables**, which maps the ip address of services to endpoints +- an endpoint = the ip address of the pod + port of the pod. this port can belong to any one of the containers, set by target port field in the service definition +- if we have multiple pods sitting behind a service, an algorithm similar to round robbin is used + +### Kube Dns + +- it maps the service name to the service ip address +- so, it configures the dns server +- kubernetes uses coredns for achieving this functionality +- on running `kubectl get deployment coredns --namespace=kube-system --output=yaml`, we can see that a config map is mounted as a volume on it +- we get the contents of it using `kubectl get configmap coredns --namespace=kube-system --output=yaml`. it shows the plugins being used by coredns +- there is also a service associated with kube dns, which we can get using `kubectl get service kube-dns --namespace=kube-system --output=yaml | grep clusterIP`. the pods point to this ip, which can be confirmed by inspecting the pod using `kubectl exec any_pod_name -- cat /etc/resolv.conf` + +## Pods + +- the smallest unit in kubernetes +- represents a single running process +- a pod encapsulates one or more containers, but usually we run only one container in a pod +- sidecar pattern - helper containers can be spun alongside the application container in the same pod +- to create a pod in an imperative way using commands, use `kubectl run db --image=mongo` +- to get all running pods, use `kubectl get pods` + - to get more information, we can use `--output=wide`, `--output=yaml` or `--output=json` +- we can do a dry run and get the yaml, e.g. `kubectl run db --image=mongo --dry-run=client --output=yaml` +- to see the list of events that occurred, use `kubectl describe pod db` +- [yaml file to create a pod declaratively](https://gist.github.com/shameekagarwal/8ca1f31a5d76b00c20a5a8a6da3b183b) +- now, we can run `kubectl apply -f file_name.yml` +- we can specify the file name in commands instead of the resource name - `kubectl describe -f file_name.yml`, `kubectl delete -f file_name.yml`, `kubectl get -f file_name.yml` +- suppose we want to execute a command against a container. one way would be to issue commands using docker, e.g. `docker container exec container_name command`, this is just like spawning off another process in an already running container. however, we have to issue this command on a particular node of the cluster. this may not matter for minikube since everything is on our local but would matter for a production cluster. another way is to run `kubectl exec pod_name -- command`. this would by default execute the command on the first container of the pod. we can also specify the container using `--container` flag +- to view logs, use `kubectl logs pod_name`. like in `exec`, the container can be specified explicitly +- containers of the same pod run on the same node. they can talk via localhost i.e. if a container is running in a pod on port 8080, the other container can make requests to localhost:8080. they also share the same volumes +- if we stop the container using `docker container stop container_id`, the pod will restart the container +- to stop a pod, use `kubectl delete pod db` +- when a pod is deleted + - it first sends `TERM` (terminate) signal to all processes of all containers of the pod + - if it does not stop within the `gracePeriod`, `KILL` signal is sent for a forceful shutdown +- all containers part of the same pod coexist in the same node i.e. they cannot be distributed across the nodes +- all of them can also access the same volume + +### Process of Creation + +![pod creation](/assets/img/docker-and-kubernetes/pod-creation.drawio.png) + +## ReplicaSets + +- it is a type of controller i.e. it tries to maintain a specified number of pods +- this provides high fault tolerance, high availability, self-healing mechanism, etc. +- replica sets are the newer version of replication controllers, since replication controllers are deprecated +- note: when setting image using `kubectl set image...` in replica sets, i had to delete the older pods +- [yaml example](https://gist.github.com/shameekagarwal/ec1a7d3c31814c789eae2d0e1c1ae569) +- look how in yml syntax, `spec.template` is the exact same as that of the contents of a pod +- labels defined for a pod should match the labels defined for its replica set i.e. value of `spec.selector.matchLabels` should match `spec.template.metadata.labels` else kubernetes gives an error +- however, the replicaset can manage pods not defined in the `spec.template` section as well. in this case, the labels of pods should match the selector of the replica set +- `spec.replicas` defines the number of pods to run +- use `kubectl get replicasets` and `kubectl get pods` to verify +- verifying the self-healing feature - if we try to delete a pod using `kubectl delete pod pod_name`, we will see that the replica set will automatically spin up a new pod +- deleting the replica set will delete the pods it spun up as well +- `kubectl delete -f replica-set.yml --cascade=orphan`. this will delete the replica set but not the pods. so, in general, to prevent removal of downstream objects, use the `cascade` flag + +### Process of Creation + +only the first part has been described here, the remaining parts are similar to that of a pod + +![replica set creation](/assets/img/docker-and-kubernetes/replica-set-creation.drawio.png) + +## Services + +- pods are short-lived, so using addresses of pods for inter-pod communication is not reliable +- services can be used to expose pods, replication controllers, replication sets, etc +- the controller used here is called endpoint controller +- service can be of different types + - `NodePort` - target port on every node is exposed to the outside world. if we have multiple worker nodes, to hit a particular set of pods, we would have to use `worker_ip:node_port`. this also indicates that the services span multiple nodes without us having to configure anything + - `ClusterIP` - this is the default. exposes the port only inside and not from outside the cluster + - `LoadBalancer` - useful when deploying to cloud + - `ExternalName` - map a service to an external address like a url +- imperative command - `kubectl expose replicaset rs --name=svc --target-port=28017 --type=NodePort` +- note: [the node port cannot be specified](https://github.com/kubernetes/kubernetes/issues/25478) when using `kubectl expose` +- if we run `kubectl describe service svc`, we see that it has inherited all the labels of the replica set. recall how replica set is associated to the pods using labels, services are associated to the pods in the same way +- when describing a service, it also shows all endpoints aka pods it directs traffic to +- the three ports involved in node port are - + - node port - how to access from outside the cluster. hit `http://minikube_ip:NodePort`. if not specified, a free port is chosen at random for its value + - port - incoming traffic i.e. traffic from other pods or outside the cluster hit this port of the service + - target port - port of the pod to which the service should forward traffic. if not specified, it takes the same value as port. so, in yml, usually only the port is specified +- we can run `kubectl get endpoints` to get a list of all the endpoint objects. we can also get more information about a specific endpoint using `kubectl get endpoints endpoint_name --output=yaml` +- we can run `kubectl exec pod_name env` - here, we will get environment variables like `<>_SERVICE_HOST`, `<>_SERVICE_PORT`. this will have the ip address and port of the different services respectively +- communication - till now, we were using `<>` for communication. it can be expanded to `<>.<>`. if we don't specify the namespace-name, it defaults to the namespace in which the resource initiating the request is +- communication to services can be further expanded to `<>.<>.svc` or `<>.<>.svc.cluster.local`. this bit can be confirmed using `kubectl exec any_pod_name -- cat /etc/resolv.conf` under the search field +- by default, direct communication to pods is not enabled. if we enable it, we can use `<>.<>.pod.cluster.local`. here, the modified-pod-ip is constructed by replacing `.` with `-` i.e. 10.244.2.5 becomes 10-244-2-5. again, because of the search field in /etc/resolv.conf, we can skip cluster.local (but not `pod` since `svc` is the default). my doubt - at this point, i might as well use the pod's ip address directly, i mean pod's ip is directly used using modified ip anyway + +### Process of Creation + +![service creation](/assets/img/docker-and-kubernetes/service-creation.drawio.png) + +## Liveliness Probe + +- used for configuring health checks, done at a container level +- if the health check fails, it applies the restart policy which defaults to always +- the restart policy is specified at the pod level and applies to all containers +- `initialDelaySeconds` - when should the probe start +- `timeoutSeconds` - after waiting for how many seconds should the probe fail +- `periodSeconds` - after how many seconds should the probe be repeated +- `failureThreshold` - how many consecutive health checks are allowed to fail +- code example + ```yaml + name: api + image: user-service + livenessProbe: + httpGet: + path: /actuator/health + port: 8080 + initialDelaySeconds: 20 + timeoutSeconds: 5 + periodSeconds: 5 + failureThreshold: 3 + ``` + +## Readiness Probe + +- it is used to determine whether a pod is ready to serve requests +- it has the same configuration as liveliness probe +- ip addresses of unhealthy pods are removed from ip tables, so that the future requests do not make it to them + +## An Example + +- [a complete example](https://gist.github.com/shameekagarwal/1883a95d8be0a74030b77966d80196a0) of + - a database and exposing it using cluster ip + - backend service which talks to db, exposing it using node port, configuring health checks + +## Deployments + +- helps us achieve zero downtime when we deploy services +- we should not create pods or even replica sets directly +- deployments create replica sets behind the scenes +- when we make an update to for e.g. the image version, the deployment will first create a new replica set with the desired number of pods, and once that replica set has successfully scaled the pods, the deployment would mark the desired replicas of the older replica set as 0. a part of `kubectl describe deployment db` - + ``` + Type Reason Age Message + ---- ------ ---- ------- + Normal ScalingReplicaSet 12m Scaled up replica set db-5cc56bf6fb to 1 + Normal ScalingReplicaSet 4m22s Scaled up replica set db-76774bbdf to 1 + Normal ScalingReplicaSet 92s Scaled down replica set db-5cc56bf6fb to 0 + ``` +- a side note - the random characters that we see are actually the hash value of the pod template +- to create a deployment imperatively, use `kubectl create deployment nginx --image=nginx --replicas=2` + - we can also add flags `--dry-run=client --output=yaml` to generate the yaml +- deployment strategy can be rolling update (default) or recreate +- in recreate, the old pods are stopped and new ones are created in its place. this leads to some downtime. use recreate when the coexistence of two versions of the applications can cause inconsistencies e.g. db migrations +- in rolling deployments, the new replica set is scaled up and the old replica set is scaled down simultaneously gradually. they can be tweaked using `maxSurge` and `maxUnavailable` fields. at any given time, we can have a maximum of desired + `maxSurge` or a minimum of desired - `maxUnavailable` pods running. both can be absolute numbers or % and both default to 25%. since both versions of applications run in parallel, the response can be returned from either of the versions at random during deployment +- e.g. of rolling deployment - by using the following code, the deployment order is 3 old ➝ 3 old, 1 new ➝ 2 old, 1 new ➝ 2 old, 2 new ➝ 1 old, 2 new ➝ 1 old, 3 new ➝ 3 new + ```yaml + replicas: 3 + + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 1 + ``` +- everytime we deploy in kubernetes, a rollout takes place and a revision is created +- we can monitor the status of the update to deployment using `kubectl rollout status -f deployment.yml` +- we can view the history of updates using `kubectl rollout history -f deployment.yml` +- we can also create a rollback using `kubectl rollout undo -f deployment.yml` + - if we want to go back to a much older version and not just the previous one, we can use `kubectl rollout undo -f deployment.yml --to-revision=2` +- side note: rollbacks might not always be possible e.g. if we had database migrations. so, we may need to roll forward in some cases i.e. implement a hot fix and redeploy the new changes +- using labels - + - `kubectl get all --show-labels` - show the resources with their labels + - `kubectl get all --selector=name=db,app=demo` - filter the resources using their labels + - e.g. to count the total number of resources in dev environment, use `kubectl get all --selector=env=dev --no-headers | wc -l` +- we can set image of a deployment using `kubectl set image deployment db db=mongo:3.3`, where the first db is the deployment name and the second db is the container name, since we can have multi container pod +- to add the default change cause to the `kubectl rollout history ...` output, append commands with `--record`, e.g. `kubectl apply -f infra --record`. this flag is deprecated but i cannot find its replacement +- to scale deployments imperatively, use `kubectl scale deployment api --replicas=2` +- both in deployments and in services, any one of the labels on pod need to be present in `spec.selector` + +### Process of Creation + +- a deployment controller will watch for new deployment creation requests +- it will then create replica set definitions on api server +- after this, the process of replica set creation is continued + +## Imperative vs Declarative + +- in declarative, we just tell the desired state which kubernetes tries to achieve +- e.g. `apply` follows the declarative approach +- however, in the imperative approach, we have to give clear instructions +- all commands like `create`, `edit`, `replace`, `expose`, `run` etc. are imperative +- using declarative approach we can track configuration using version control as well for iac +- imperative approach can be used for hot fixes / experimental purpose +- when using `apply`, we can see the last yaml configuration converted to json which we had sent under `metadata.annotations` in `kubectl.kubernetes.io/last-applied-configuration`. this is used by kubernetes to keep track of changes and is only available when we use `apply` +- if for e.g. we use `edit` to edit a resource, and that resource is not allowed to be edited, we just use `wq` to exit out of vim, and then that file gets saved to /tmp. we can then use `kubectl replace --force -f <>` to replace the existing resource with our newly configured one + +## Ingress + +- it is like a layer 7 load balancer built inside the kubernetes cluster +- makes the services inside cluster accessible from outside +- we also want features like ssl termination, route requests based on domain, etc +- my understanding - recall how a service based on labels can only expose a set of pods. instead of multiple node ports / load balancers i.e. one for each set of pods, we have one node port / load balancer which directs traffic to the ingress service. the ingress service can then direct traffic to the different cluster ips in the cluster +- kubernetes provides the ingress resource but not the ingress controller i.e. it provides the api which can be utilized by other third party implementations +- minikube has an addon that can be enabled + ```bash + minikube addons enable ingress + minikube addons list | grep ingress + ``` +- to verify, `kubectl get all --all-namespaces` should show the `ingress-nginx-controller-*` pod running +- ingress is spun up using a deployment and a node port to expose it outside the cluster +- it also deploys configmaps to manage configuration and cluster roles to monitor kubernetes resources +- all resources are deployed in the ingress-nginx namespace +- we can also hit the endpoint http://minikube_ip/healthz to verify the working of ingress +- we can also provide a domain so that the requests are routed based on domain names +- we can also provide a catch-all entry +- [in this example](https://gist.github.com/shameekagarwal/97db31a89ba766cf2d0634c561a1b3e9), if requests come from custom-api.com, and start with request path `/api` they are routed to the api service, but all other requests are routed to the devops service +- note: to simulate that requests are coming from a specific domain on our local, we can use `curl -H "Host: custom-api.com" http://192.168.49.2/api/` +- the ingress resource provided by kubernetes has limited functionality, so to configure the ingress controller provided by third party, we use annotations +- e.g. we want traffic from ingress-service/calendar to our calendar-cluster-ip:port. so, the calendar prefix should be removed. we can do this by using the annotation below - + ```yaml + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + ``` +- so, `/calendar` in `rules[x].http.paths.path` gets replaced by the value in `rewrite-target` which is `/` here +- use `kubectl get ingress` to view the ingress resources +- my understanding - view the port of the node port service `ingress-nginx-controller` inside the `nginx-ingress` namespace. this is the port we hit when making requests to worker_node_ip + +## Volumes + +- references to files and directories made available to containers +- the file system can be anywhere, e.g. outside the host as well i.e. this could be used for aws ebs as well +- e.g. it helps us preserve data across pod restarts +- there can be several types of volumes like host path, git repo (like host path but the path is a git repository) and even cloud specific like aws elastic block store +- empty dir volume type - if a container crashes, a new container is spun up in the same pod. however, if we don't specify any volume, the container crash results in a loss of data. this can be prevented using empty dir volume type, which can survive container restarts but not pod restarts. it is usually chosen as the default by third party manifests and is expected to be replaced by a better solution like nfs +- an issue with using host volumes - it needs to be available on each node so that pods on different nodes can have access to it, and this of course is not an issue with minikube +- so, in cloud, we should ideally mount an nfs on each node, else we would have to copy this file on all nodes. for e.g., we should use aws efs. the syntax should be similar + +### Example 1 + +- for docker client to be able to communicate to the correct docker daemon, use the file /var/run/docker.sock +- e.g. we want to run docker commands on the host from pods. so, the container running inside the pod should have docker client installed to issue docker commands, and it should point to the docker daemon of the host +- so, we can use `hostPath` volume type +- [full yaml here](https://gist.github.com/shameekagarwal/f1686cffac86159b5259142f3044f731) +- now, we run the pod using `kubectl apply -f docker.yml` +- then, we can issue commands like `kubectl exec docker -- docker image ls` to list the images on minikube + +### Example 2 + +- recall how for bind volumes in docker, we needed to specify a path in the host. the host now is minikube, so the host path needs to be that of minikube. before running `minikube start`, if i copy files to the path in ~/.minikube/files directory on my workstation, i can see those files in the root on minikube host. we can verify this using `minikube ssh` and then by running `ls /` +- so, suppose we want to specify a configuration file for prometheus +- we can copy this configuration file to minikube and then use host path volumes to reference it +- in this example, a better solution would have been to create a custom image and use `COPY` in the docker file + +```yaml +# ... +spec: + containers: + - # ... + volumeMounts: + - mountPath: /etc/prometheus/prometheus.yml + name: prom-conf + + volumes: + - name: prom-conf + hostPath: + path: /prometheus-conf.yml + type: File +``` + +## Config Maps + +- we can have different sources of configuration like environment variables, files, env files, literals, etc + +### Default Config Map + +- it is used to make calls to the kubernetes api from containers +- `kubectl get configmaps` - kube-root-ca.crt is the config map created by default +- `kubectl describe pods pod_name` will give the mount location of this config map. note that this config map may not be mounted to the pods in kube-system though +- `kubectl exec pod_name -- ls /var/run/secrets/kubernetes.io/serviceaccount` shows that there are three files - namespace, ca.crt and token +- on reading online, i see that this can also be a secret instead of a configmap + +### Mount Volumes + +- config maps can mount the configuration as volumes to running containers +- imperative command - `kubectl create configmap prometheus-config --from-file=prometheus-conf.yml` +- `kubectl describe configmap prometheus-config` +- using in the yml file - + ```yaml + spec: + containers: + - # ... + volumeMounts: + - mountPath: /etc/prometheus + name: prometheus-config + # ... + volumes: + - name: prometheus-config + configMap: + name: prometheus-config + ``` + verify using `kubectl exec prometheus -- cat /etc/prometheus/prometheus-conf.yml` +- instead of providing a file, we can use literals, e.g. `kubectl create configmap --from-literal=foo=bar` +- in this case, if we use volume mounts, a file called foo would be created with its contents as bar + +### Environment Variables + +- e.g. create a file called .env + ``` + client-id=qwerty + client-secret=12345 + ``` +- `kubectl create configmap api-credentials --from-env-file=.env` +- my understanding - the difference between `--from-file` vs `--from-env-file` is in from file, kubernetes does not care about the file's content, while in from env file, it knows how to treat as different key value pairs, so that it can inject them all at once / individually as discussed below +- usage - + ```yaml + containers: + #... + envFrom: + - configMapRef: + name: api-credentials + ``` +- verify using `kubectl exec alpine -- env` +- we can also inject the variables of the config map individually - + ```yaml + containers: + #... + env: + name: CLIENT_ID + valueFrom: + configMapKeyRef: + name: api-credentials + key: client-id + ``` + +## Secrets + +- secrets are similar to config maps +- secrets can be of three types - + - docker-registry - for pulling images from private registry + - tls - for storing certificates + - generic - works like config maps, so can have sources like `--from-env-file`, `--from-file`, `--from-literal` +- creating a secret imperatively - + ```sh + kubectl create secret generic jenkins-credential \ + --from-literal=username=johndoe \ + --from-literal=password=incognito + ``` +- to retrieve the original value - + ```sh + kubectl get secret jenkins-credential --output=json + kubectl get secret jenkins-credential --output=jsonpath="{.data.password}" | base64 --decode + ``` +- to use the secrets, we put them into files /etc/secret/jenkins-user and /etc/secret/jenkins-pass - + ```yaml + spec: + containers: + - # ... + volumeMounts: + - mountPath: /etc/secrets + name: jenkins-credentials + + volumes: + - name: jenkins-credentials + secret: + secretName: jenkins-credential + defaultMode: 0444 + items: + - key: username + path: jenkins-user + - key: password + path: jenkins-pass + ``` +- we made it read only for all users using 0444 as the mode +- verify using `kubectl exec pod_name -- cat /etc/secrets/jenkins-pass` +- if creating secrets declaratively, the values should be base64 encoded first + ```yaml + # ... + data: + username: am9obmRvZQ== + ``` + using sh base64 utility - + ```sh + # to encode + echo -n johndoe | base64 + + # to decode + echo -n am9obmRvZQ== | base64 --decode + ``` +- the only difference between config maps and secrets is that secrets are stored in tmpfs (temporary file storage) thus leaving no trace on the nodes +- secrets should be combined with rbac for limited access +- **cons of using kubernetes secrets**: secrets are stored in plain text in etcd, so anyone with access to etcd can read the secrets. so, we should use solutions like hashicorp vault, integrating it with kubernetes is smooth + +### Docker Registry + +- by default, we use public docker registry +- sometimes we might need private registry +- we use `docker login` when using vanilla docker +- when using kubernetes, we can create the secret of type `docker-registry` + ```sh + kubectl create secret docker-registry registry-credential \ + --docker-server=...\ + --docker-username=...\ + --docker-password=...\ + --docker-email=... + ``` +- we can then specify the name of the secret in pod + ```yaml + spec: + imagePullSecrets: + - name: registry-credential + ``` + +## Namespaces + +- we can spin up multiple clusters to isolate the different environments. this can help prevent accidental changes to the production cluster +- however this has operational and resource overhead +- namespaces help us create different segments on a cluster +- namespaces are like virtual clusters +- we can scope resource limits and permissions to namespaces +- we use the "default namespace" by default +- we can run `kubectl get namespaces` to view all the available namespaces +- kube-public - the resources in this namespace are accessible to all (including unauthenticated) users +- kube-system - `kubectl get all --namespace=kube-system` shows the resources managed by kubernetes itself +- to create a namespace, use `kubectl create namespace staging` +- if we set the namespace in context, we do not have to repeatedly suffix commands by `--namespace=staging`. e.g. we can use `kubectl config set-context $(kubectl config current-context) --namespace=staging` +- when we delete a namespace, the cascading effect deletes all the resources within it as well. the command is `kubectl delete namespace staging` +- in the resource files, we can also specify the `namespace` key under `metadata` + +## RBAC + +- it allows us to control access to resources +- each request goes through three stages - authentication, authorization, and then through admission control +- authentication is done using static password files, static token files, certificates or identity services like ldap +- we can combine rbac with namespaces as well + +## Static Files + +- we create a csv file with 4 columns - password, username, user id and optionally a group +- when starting the api server, we pass the file using a flag `--basic-auth-file=user-details.csv` +- then, if sending requests using curl, we can pass credentials using `-u username:password` +- we can also have tokens instead of passwords +- the flag we pass to api server in this case is `--token-auth-file` +- we can pass the token via curl using `--header "Authorization: Bearer "` +- this method is deprecated / unavailable in newer versions + +## Certificates + +- **we need to be able to generate certificates for creating users** +- we need two files - + - the certificate which is a signed public key (files suffixed with .crt) + - a private key (files suffixed with .key) +- certificates are needed by both servers and clients. there can be multiple combinations of server and client as well. the certificates required are listed below - + - 7 client certificates + - 3 - scheduler, controller manager, kube proxy (to api server) + - api server (to kubelet) + - kubelet (to api server) + - api server (to etcd) + - user using kubectl (to api server) + - 3 server certificates - etcd, kubelet, api server + - the ca's certificate +- etcd cluster can have multiple nodes running on different servers for high availability +- so, we need to generate peer certificates so that the communication between the nodes is encrypted as well +- we can specify a config file while creating certificates using `--config` +- config can be needed for e.g. to provide alternative names +- [docs](https://kubernetes.io/docs/setup/best-practices/certificates/) for the certificates required by kubernetes components and their respective cas +- verify if `openssl version` works to be able to generate certificates +- generating keys and certificates - + ```sh + openssl genrsa -out johndoe.key 2048 + + openssl req -new \ + -key johndoe.key \ + -out johndoe.csr \ + -subj "/CN=johndoe/O=developers" + + openssl x509 -req \ + -in johndoe.csr \ + -CA ~/.minikube/ca.crt \ + -CAkey ~/.minikube/ca.key \ + -CAcreateserial \ + -out johndoe.crt \ + -days 365 + + # view the certificate details + openssl x509 -noout -text -in johndoe.crt + ``` +- first a private key (.key) is generated +- then a certificate signing request (.csr) is generated. here `CN` or common name is analogous to the username and `O` or organization to the group +- then, using the csr and the ca certificate, the signed public key or the signed certificate (.crt) is generated +- instead of doing the steps after generating the csr manually, we can utilize the kubernetes api +- basically a resource type of `CertificateSigningRequest` is created, with the contents of the csr mentioned **in a base64 encoded format** in the yaml file +- the admin can then run `kubectl get certificatesigningrequests` to view pending requests +- requests can be approved via `kubectl certificate approve ` +- similarly, requests can be denied via `kubectl certificate deny ` +- to view the generated signed certificate, we can use `kubectl get certificatesigningrequest --output=yaml`. it is again in the base64 encoded format, so we decode and store it in a file + +## Config + +- we can specify flags like `--key`, `--cert` and `--cacert` when making requests via curl to api server +- we can also specify flags in kubectl everytime, e.g. `kubectl get pods --client-key=... --client-certificate=... --certificate-authority=...` +- by default for all our requests using kubectl, the configuration is specified in ~/.kube/config. it also is like a kubernetes resource with `kind: Config`. so, instead of defaulting to ~/.kube/config in every command, we can specify the file using the `--kubeconfig` flag +- it has three parts - clusters, users and contexts +- clusters refer to the different kubernetes clusters that we would like to access +- the cluster requires the path to the ca server certificate and the api server address +- to get the server address, use `kubectl config view --output=jsonpath="{.clusters[0].cluster.server}"` +- the user requires the path to private key and signed certificate +- we can also provide the base64 encoded data directly instead of the path for the user / cluster +- contexts pair the clusters to users. so, they have the cluster, user and even the namespace to use by default +- the one used by default by kubectl is defined via `current-context` +- create a new cluster - + ```sh + cp ~/.minikube/ca.crt . + + kubectl config set-cluster johndoe \ + --certificate-authority ca.crt \ + --server https://192.168.49.2:8443 # cluster server address + + kubectl config get-clusters # verify that the cluster is created + ``` +- create a new user - + ```sh + kubectl config set-credentials johndoe \ + --client-certificate johndoe.crt \ + --client-key johndoe.key + ``` +- create and set the context - + ```sh + kubectl config set-context johndoe \ + --user johndoe \ + --cluster johndoe # create / edit the context + + kubectl config use-context johndoe # change the context + + kubectl config get-contexts # verify that the context is set + ``` +- we can view the entire config using `kubectl config view` or `cat ~/.kube/config` +- note: the context section can also take the namespace as an argument + +## Authorization Modes + +- node - e.g. used by worker nodes, for kubelet to interact with api server +- rbac - attaching policies to role +- abac - attaching policies to users / groups directly +- webhooks - a third party agent like open policy agent +- to view authorization modes configured on the api server, we can use - `kubectl describe pod kube-apiserver-minikube --namespace=kube-system | grep authorization-mode` + +## Roles and Bindings + +- rules, comprise of - + - verbs, e.g. get, list, create + - resources, e.g. pods + - resource names + - api groups of the resources +- roles - they are a collection of rules. a role is applied to a namespace +- cluster role - same as roles but scoped to clusters +- resources like pods are namespaced while resources like nodes are cluster scoped. to get an exhaustive list, we can use `kubectl api-resources --namespaced=true` or set the flag to false +- subjects - can be user, service accounts (used by pods to interact with kubernetes api) or groups, which are a collection of users and service accounts +- we also have role bindings and cluster role bindings +- `kubectl config get-users` - by default we have only one user minikube +- a few commands we can use include - `kubectl get roles`, `kubectl get clusterroles`, `kubectl get rolebindings`, `kubectl get clusterrolebindings` +- we can also use `kubectl describe clusterrole view` +- we already have some cluster roles and cluster role bindings created by default +- ones prefixed with `system:` should be generally avoided, so we can run `kubectl get clusterroles | grep -v system`. we get four roles, each of them has been described below +- view - can perform get, list and watch operations (verbs) on almost everything +- edit - everything that admin can do except modify roles and role bindings +- admin - everything that cluster-admin can do except modification to namespaces and resource quotas +- cluster-admin - can perform all operations. e.g. the default user minikube has this role. this can be verified by running `kubectl auth can-i "*" "*"` +- to verify if an operation can be performed, we can use for instance `kubectl auth can-i get pods` +- we can impersonate as someone else using `kubectl auth can-i get pods --as=johndoe` +- creating a role binding - + ```sh + kubectl create rolebinding johndoe \ + --clusterrole=view \ + --user=johndoe \ + --namespace=default + ``` +- verify using `kubectl describe rolebinding johndoe`. note: sometimes in kubectl outputs, the namespace field is empty when it is referring to the default namespace +- my understanding - role bindings can reference cluster roles, it just means that the permissions would be granted on the specified namespace only. this allows for role reuse. the view role allows to view in any namespace, and by creating a role binding we can limit the user's usage to a namespace +- delete a role binding using `kubectl delete rolebinding johndoe` +- using role bindings, we can attach one role to multiple subjects +- declaratively creating a cluster role binding using yaml - + ```yaml + apiVersion: rbac.authorization.k8s.io/v1 + + kind: ClusterRoleBinding + + metadata: + name: johndoe-view + + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: view + + subjects: + - apiGroup: rbac.authorization.k8s.io + kind: User + name: johndoe + ``` +- note how role bindings have a single role but can have multiple subjects +- verify using `kubectl auth can-i get pods --as=johndoe --all-namespaces` +- describing the admin cluster role created by default - `kubectl describe clusterrole admin` +- e.g. yml to create a custom role - + ```yaml + apiVersion: rbac.authorization.k8s.io/v1 + + kind: ClusterRole + + metadata: + name: release-manager + + rules: + - resources: ["pods", "pods/attach", "pods/exec", "pods/log", "pods/status"] + verbs: ["*"] + apiGroups: [""] + - resources: ["deployments", "replicasets"] + verbs: ["create", "get", "list", "update", "watch"] + apiGroups: ["", "apps", "extensions"] + ``` +- note: to grant permissions for different operations on pods, specifying the resources as `pods` is not enough, as there can be other sub resources like `pods/logs` etc +- instead of users, we can also use groups in role bindings and cluster role bindings, by changing the kind to groups. the group a user belongs to was specified while generating the certificate +- my understanding - when relying on groups for role bindings, using the correct context is important when running `kubectl auth can-i...` command, since the certificates associated with the context are used to validate the request. by just using `--as`, we specify the user, but for e.g. the group comes from the value of `O` in the subject when generating signed certificates. optionally, we can specify the flags like `--client-certificate`, `--client-key` etc + +## Service Accounts + +- it is used by applications to interact with the kubernetes cluster +- if we run `kubectl get serviceaccounts`, we see the default service account is already present +- we can create a service account using `kubectl create serviceaccount app` +- recall the [default config map](#default-config-map) +- creating a service account creates a secret, which has a token behind the scenes +- so, we can use these tokens to make requests, by adding the header - `Authorization: Bearer ` +- the method above is useful when the functionality is outside the kubernetes cluster +- however, if the application is a part of our kubernetes cluster, we can simply mount the service account. i.e. use the `serviceAccountName` field in pods +- we can also manually set the `automountServiceAccountToken` to false in order to disable the automatic mounting of the default service account +- in newer versions of kubernetes, some changes have been made - service accounts now no longer automatically have a token associated with them. instead, the token request api is used to generate a token which then gets mounted on the pods +- we can generate a token for a service account using `kubectl create token <>`, and provide configuration parameters like expiration time + +## NodeName + +- the scheduler schedules a pod on any node +- if we run `kubectl get pod pod_name --output=yaml`, we can see the node it was scheduled on under `nodeName` +- behind the scenes, a binding object is created which binds the pod to a node +- we can manually specify the node a pod should be scheduled on using the `nodeName` property +- we can use this if we didn't have a scheduler, and this would schedule the pod on the specified node + +## Taint and Toleration + +- taint is set on nodes which prevent any random pod from being scheduled on it +- toleration is set on pods which allows them to be scheduled on a node with taint +- by default, the pods have no toleration +- use case - a worker node has resources to enable running of a certain type of pod +- it means that only pods with toleration **can be** scheduled on the node with that taint +- however, the pods with this toleration can be scheduled on other nodes as well +- this feature is used by kubernetes as well to help ensure that normal pods are not scheduled on the master and only the management pods scheduled by kubernetes itself are +- to taint nodes, use `kubectl taint node node_name key=value:taint-effect` +- similarly, to remove the taint, use `kubectl taint node node_name key=value:taint-effect-`, i.e. suffix the prior command with a `-` symbol +- taint effects can be - + - `NoSchedule` - do not schedule any new pods without the right toleration + - `PreferNoSchedule` - prefer not scheduling + - `NoExecute` - like `NoSchedule` but also evicts the existing pods on the node without the correct toleration +- to apply toleration on pods, use - + ```yaml + spec: + tolerations: + - key: key + operator: Equal + value: value + effect: NoSchedule + ``` + +## Node Selectors and Node Affinity + +- we add labels to nodes and then add selectors for them to pod definitions +- this way, the pods with the node affinity can only be run only on specific nodes +- however, pods without the node affinity can still be spun up on the nodes with labels +- to label a node, we use - `kubectl label node node_name key=value` +- we can use `kubectl get nodes --show-labels` to verify +- to apply selectors for labels on nodes, use - + ```yaml + spec: + nodeSelector: + size: large + ``` +- but, using node selectors we cannot specify complex conditions +- so, we use node affinity + ```yaml + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: size + operator: In + values: + - large + - medium + ``` +- what if the node labels are changed after the pod was already scheduled? what if there are no nodes found with the conditions matching the node affinity value? for these, the value can be `requiredDuringSchedulingIgnoredDuringExecution` or `preferredDuringSchedulingIgnoredDuringExecution` +- my understanding - `requiredDuringSchedulingRequiredDuringExecution` is not available by default +- some operators to use - `Equal`, `In`, `Exists` +- so, overall, to ensure pods of a particular type and only this type end up on a particular node, we need to use node selectors / node affinity and taints and tolerations in conjunction + +## Resource Management + +- we can give an indication and set limits for the resources that can be used by kubernetes components +- specified at the container level +- this helps kubernetes in scheduling +- to enable metrics server, use - `minikube addons enable metrics-server` +- can be written as for e.g. `0.5` or `500m` (500 milli cpu). 1 milli cpu is equivalent to 1 hyperthread / 1 vcpu +- memory can be written as `K` or `Ki` for kilobyte, `M` or `Mi` for megabyte and so on. we can only specify the numerical value as well, its value is in bytes e.g. `256Mi` or `268435456` +- syntax - + ```yaml + containers: + #... + resources: + limits: + memory: 100Mi + cpu: 200m + requests: + memory: 50Mi + cpu: 100m + ``` +- limits - amount of resources that containers should not cross +- if the container crosses the memory limit, it will be terminated / restarted. the pod has status `OOMKilled` (out of memory killed). the pod remains the same, the container changes +- containers are not allowed to use more than the cpu limit for an extended period, so there are no restarts / termination of the containers for crossing the cpu limits as cpu usage gets throttled automatically +- requests - amount of resources that containers are expected to use +- only when the node runs out of memory, the pod that the container exceeding the requests is a part of is evicted from the node, and it gets rescheduled +- if a container's memory request exceeds the available memory on any node (technically sum of the memory requests of all the containers of a pod), the pod stays in `Pending` state indefinitely +- if the memory usage exceeds only the requested amount (and not the limit), the pod can be evicted if another pod enters with a higher qos and needs that memory +- so, memory limit cannot be exceeded while memory request can be exceeded if the node has enough memory +- `kubectl describe nodes` gives details of the available and in use resources +- `kubectl top pods` gives cpu and memory usage details because of the metrics-server addon. to get information related to the containers in the pod as well, we can use `kubectl top pods --containers` +- similarly, we can use `kubectl top nodes` +- prometheus is a better solution than metrics-server for real world use cases +- qos - quality of service determines the priority - guaranteed > burstable > best effort +- guaranteed - resource limit = resource request. note: remember that if only limits are defined, request = limit +- burstable - at least one container has limit / request defined, unequal limits and requests, etc +- best effort - no resources are defined at all +- we can view the qos assigned by kubernetes using `kubectl describe pod pod_name | grep QoS` +- additional concept - priority classes are useful for e.g. when two pods have the same `qosClass`. we can run `k get priorityClasses` and then assign one of the values using `priorityClassName` under `spec` of the pod + +### Limit Ranges + +- limit ranges help us specify the following at a namespace level ([a yaml example](https://gist.github.com/shameekagarwal/75ae269c7c98c48c57ec215c9dbba20e)) - +- `default` - default resources limit +- `defaultRequest` - the default resources request +- `max` and `min` the maximum and minimum permitted values for the requests and limits +- `maxLimitRequestRatio` - the maximum limit to request ratio (limit should ideally be higher than request?) + +### Resource Quotas + +- limits the resources that can be consumed by a namespace. so, if we have multiple namespaces to support environments like dev and prod in our clusters, we can distribute resources equally so that there is no starvation for any of the environments. [a yaml example](https://gist.github.com/shameekagarwal/8343d4bb2e0029ee00f4d57f8f4b9306) +- using resource quotas, we can limit compute (e.g. cpu and memory requests and limits), storage (e.g. persistent volume claims) and object count (e.g. number of pods, number of node ports, etc.) + +## Daemon Sets + +- ensures one pod runs on each node +- e.g. logging and monitoring pods which need to be run on every node can be created using daemon sets +- even kube-proxy is run this way. to verify, use - `kubectl get daemonsets --all-namespaces` +- it used [node name](#nodename) but in newer versions [node affinity](#node-selectors-and-node-affinity) underneath + +## Init Containers + +- defined inside a pod +- before the long-running containers start, we might want to install binaries, etc +- init containers are run to completion one at a time sequentially before the normal containers start running +- their syntax in yaml is the same as normal containers + +## Static Pods + +- a pod created by kubelet itself on the node without involving the api server / etcd is called a static pod +- the kubelet continuously monitors a directory for changes +- so, when we can create a file in it, it gets picked up by the kubelet +- if we edit the file / remove the file, the kubelet automatically changes / terminates the pod accordingly +- this does not work for deployments etc. since they require controllers +- if the node is a part of a cluster, it will notify the api server about the static pod. so, `kubectl get pods` will show the pod, since the etcd cluster etc. know about these pods. however, unlike a normal pod, the only way to modify this pod is to modify the file +- use case - since static pods do not depend on control plane components like controllers, scheduler, etc., they are used to deploy the control plane components themselves +- unless configured otherwise, the directory is `/etc/kubernetes/manifests/` +- we can verify this in minikube after running `minikube ssh` by running `sudo ls /etc/kubernetes/manifests` that it has files for etcd, scheduler, api server and controller manager +- static pods will be suffixed by `-nodename` - `kubectl get pods --all-namespaces | grep minikube` +- if we run `kubectl get pod pod_name --output=yaml`, we can confirm that the owner is a node by going to `ownerReferences.kind` which should have the value `Node` +- to get the static pod path, use - `cat /var/lib/kubelet/config.yaml | grep staticPodPath` + +## Persistent Volumes + +- persistence of state should be decoupled from pods since they can be added / removed easily +- nfs is the way to go for disk storage in cloud. here, aws ebs has been shown +- note: ebs volumes should only be spun up in azs where worker nodes exist, since ebs is scoped to an az +- the `spec.capacity.storage` in the persistent volume defn. should be <= the capacity of ebs +- access modes can be `ReadWriteOnce`, `ReadOnlyMany`, `ReadWriteMany` +- we can run `kubectl get storageclasses` to get the available storage classes +- e.g. if we were using kops with aws, it would automatically add the storage class of gp2 for us +- default storage class admission controller observe requests for persistent volume claims and when a claim does not specify the storage class, it gets assigned the default storage class. when we run `kubectl get storageclasses`, we see that gp2 is marked as default +- [yaml example](https://gist.github.com/shameekagarwal/03e5e9dd6c43439d654792bb8822806d) of persistent volume +- persistent volumes are used through persistent volume claims. the idea is that admins create a set of persistent volumes, and developers use them via persistent volume claims +- there is a one to one mapping i.e. one persistent volume can only be used by one persistent volume claim +- `spec.storageClassName` and `spec.accessModes` should have the same value as that of persistent volume while the value of `spec.resources.requests.storage` should be <= the value of `spec.capacity.storage` so that the persistent volume claim can get a segment of the persistent volume +- because of this, if the persistent volume has more storage than what the persistent volume claim asks for, the claim gets the extra storage as well +- if no matching persistent volume is found, the persistent volume claim remains unbound indefinitely +- [yaml example](https://gist.github.com/shameekagarwal/a2afa15e76ee80c75a2dc19bfd234a54) of persistent volume claim +- usage - + ```yaml + spec: + containers: + - # ... + volumeMounts: + - name: jenkins-home + mountPath: /var/jenkins_home + + volumes: + - name: jenkins-home + persistentVolumeClaim: + claimName: jenkins-storage + ``` +- the status of a persistent volume can be - + - `Available` when no persistent volume claim is bound to it + - `Bound` when a persistent volume claim is bound to it + - `Released` when the persistent volume claim is deleted +- the default reclaim policy of a persistent volume is `Retain`. first, the pod / deployment is deleted, and then the persistent volume claim is deleted. now, the persistent volume has status of released. but it is not available to be bound because it already has existing data from previous pods which need to be deleted first +- so, we delete the persistent volume manually, try to clean up / delete the aws ebs manually and then can create new persistent volumes for the persistent volume claims +- till now, we used the manual method of provisioning volumes, i.e. static persistent volumes +- the dynamic method requires lesser intervention +- however, in case of a conflict, kubernetes will choose the static one +- the persistent volume is created automatically in case of dynamic persistent volumes +- when we delete the deployment and then the persistent volume claim now, the persistent volume as well as the actual nfs ebs volume is deleted automatically. this is because when using dynamic persistent volumes, the reclaim policy of the persistent volume is `Delete` +- [yaml example](https://gist.github.com/shameekagarwal/b5013b4645d62d287aeb2868ae37e5c3) for persistent volume claim for dynamic persistent volume +- the storage classes have a field called volume binding mode. this can be set to `WaitForFirstConsumer` i.e. persistent volume will not be bound to the persistent volume claim till there is a pod for the persistent volume claim. the other value that the binding mode can take is `Immediate` + +## Commands and Arguments + +- difference between command and entrypoint in docker is described [here](#cmd-and-entrypoint) +- for e.g. in the pod definition - + - `spec.containers[*].command` is used for replacing `ENTRYPOINT` of docker file + - `spec.containers[*].args` is used for replacing `CMD` of docker file + +## Security Context + +- when we run a container, we can specify the id of the user used to run the container, capabilities, etc +- we can specify this at the pod level so that all the containers inherit it or at the container level as well +- note: capabilities can only be defined at the container level +- syntax - + ```yaml + securityContext: + runAsUser: 1000 + capabilities: + add: ["MAC_ADMIN"] + ``` +- this is more of a docker functionality which can be specified via kubernetes. basically, since containers run using namespaces, and we do not want root users inside namespaces to perform any critical operations on the host itself. so, docker adds only some capabilities, which we extend using the add capabilities mechanism +- to verify, we can use `kubectl exec -- whoami`. it should display the username or the user id + +## Patching Nodes + +- if we remove a node suddenly, the pods scheduled on it are lost +- if it was a part of a replica set, it would be rescheduled, but not if it was a normal pod +- to stop any further scheduling on the current node, run `kubectl cordon node_name` +- to stop any further scheduling and also evict existing pods, use `kubectl drain node_name` +- if the node included a pod not spun as a part of a controller, we have to add `--force`. this is because that pod would be lost forever +- so, i think drain already does what cordon does +- pods part of replica sets etc. will be rescheduled on other nodes, because that is the job of controllers +- after running the `drain`, we can start the upgrade +- to enable scheduling pods on the node again, run `kubectl uncordon node_name` +- my understanding - suppose a node goes down. so do the pods running on it. the time a controller waits to reconsider rescheduling the pod on another node is defined via `podEvictionTimeout`. this is why draining nodes is important, we don't rely on the timeout, and instead, rescheduling of pods happens gracefully +- to verify, use `kubectl describe node <> | grep Unschedulable` + +## Network Policy + +- ingress - incoming traffic, egress - outgoing traffic +- the response is not considered when classifying traffic as ingress and egress (like security groups in aws) +- kubernetes has an all allow rule by default +- we use labels to link a pod to a network policy +- whether network policies are supported also depends on the networking solution our cluster depends on +- if we don't specify the `podSelector`, traffic from all pods are allowed, same for `namespaceSelector` +- for traffic from outside the cluster, we can also use the `ipBlock` section +- [a yaml example](https://gist.github.com/shameekagarwal/4113cd73c42acab5ddfb3a882eee0391) for a database +- notice how there are multiple rules. so, it works like `ipBlock` or (`namespaceSelector` and `podSelector`) +- if you want to apply it to **all pods**, use `podSelector: {}` +- my understanding - if we want to restrict traffic between namespaces, e.g. ns-1 can only make requests to ns-2, we can use network policies. so, i created one with egress rule using key `kubernetes.io/metadata.name` under `namespaceSelector.matchLabels`. however, when i tried making requests to the cluster ip service in ns-2 from ns-1, the request was failing. i guess this was happening because kube dns is needed for the dns name resolution. so, i added [this](https://stackoverflow.com/a/71127697/11885333) policy as well to get it to work + +## Interface + +- for implementations to work with kubernetes, they should be compatible with the interface +- this allows for kubernetes to be extended with multiple implementations seamlessly +- e.g. cri for container runtime interface, cni for network, csi for storage, etc +- e.g. csi lays a set of rpc calls that the cri will make. the csi implementations should implement these rpcs + +## Json Path + +- using jsonpath, we get a subset of json data +- jsonpath always returns the results in the form of an array +- the root element can be represented as `$` +- we can use `[]` for indexing arrays +- we can have conditions using `?()`, e.g. to get elements greater than 40 in an array, use `$[?(@ > 40)]` +- we can use `in` and `nin` to represent in and not in, e.g. `$[?(@ nin [1, 2])]` +- use `*` to retrieve all elements in an object (`$.*`) or in an array (`$[*]`) +- a convoluted e.g. - we want to extract the laureate whose first and last name we know - + ```json + { + "prizes": [ + { + "category": "physics", + "laureates": [ + { "firstname": "Arthur", "surname": "Ashkin" } + ] + }, + { + "category": "chemistry", + "laureates": [ + { "firstname": "Frances H.", "surname": "Arnold" }, + { "firstname": "George P.", "surname": "Smith" } + ] + } + ] + } + ``` + `$.prizes[*].laureates[?(@.firstname == 'Malala' && @.surname == 'Yousafzai')]` +- to retrieve multiple elements in a list, we can use `$[0,3]` +- we can also use the syntax `$[start:end:step]` e.g. `$[1:5:2]` +- we can use negative indices to retrieve the last elements, e.g. `$[-3:]` for the last three elements +- element present at index end is not included +- step defaults to 1, start and end default to 0 +- while `$[positive]` works, `$[negative]` fails in some cases, so specify an end when using negative indices + +### Usage in Kubectl + +- we mention the json path using `--output=jsonpath="{.items}"` i.e. enclose within `{}` and remove `$` +- we can get merge multiple queries - `jsonpath="{.name}{.cpu}"` +- we can print the results in a tabular format. e.g. to get the node name and its cpu usage in a table, use - `kubectl get nodes --output=custom-columns="NODE:.metadata.name,CPU:.status.capacity.cpu` +- we can also specify a sort order using `--sort-by`, e.g. - `kubectl get pods --all-namespaces --output=custom-columns="NAME:.metadata.name" --sort-by=".metadata.name"` +- to reverse the order of sort by, `k get ... --sort-by=... | tac` + +## ArgoCD + +- advertises gitops approach + - git repository is the source of truth + - an agent ensures and keeps the cluster and repository in sync +- first part - testing and building of application code, creating a docker image out of it and pushing this image to docker hub +- second part - somehow updating the image version in kubernetes manifests to point to this new version e.g. jenkins can do this +- third part - running kubectl apply on the new manifests +- some challenges with the flow above - + - kubectl client, which lives outside the cluster needs to have credentials for kubernetes cluster - this is a security challenge + - kubectl cannot monitor things like health of applications +- now, first and second part stay the same, the third part is swapped with argocd +- argocd is a part of the cluster itself +- pull workflow - the argocd agent pulls the manifests and applies them to the cluster +- argocd keeps both git repository and cluster in sync - thus avoiding drifts. this way, the git repository stays as the single source of truth +- since this is all git, argocd allows easy rollback i.e. have argocd point to the earlier git revision +- one common configuration - do the sync automatically in non production environments, have someone apply the changes in production environments +- both challenges are solved - + - permissions around changes is now managed via git - approvals for changes can be done by selective people only, and the main workhorse argocd stays inside the cluster + - does monitoring of resources +- using crd (custom resource definitions) it extends kubernetes functionality, so that we can use kubernetes like manifests to provide argocd configuration +- we basically create application like below - line 8-11 is for the repository configuration, line 13 is for kubernetes cluster. we just need to run kubectl apply on the file below - + ```yaml + apiVersion: argoproj.io/v1alpha1 + kind: Application + metadata: + name: demo + namespace: argocd + spec: + project: default + source: + repoURL: https://gitlab.com/shameekagarwal/argocd-demo.git + targetRevision: HEAD + path: k8s + destination: + server: https://kubernetes.default.svc + syncPolicy: + automated: + selfHeal: true + prune: true + ``` +- note - the namespace defines where this argocd application should go, not where the resources should go +- kubernetes.default.svc is the endpoint to access the api server from inside the cluster +- head - default branch latest commit +- for private git repositories, we need to provide the private ssh key on the application manifest above +- self heal - false by default. if someone makes manual changes to the kubernetes cluster, using for e.g. kubectl commands, argocd will not try to abandon those changes and sync them with the git repository by default. use true to keep the git repository as the single source of truth +- prune - delete corresponding resource if manifest is deleted. false by default to prevent accidental deletions +- alternatives of argocd - flux, jenkins x +- deploying argocd to our minikube cluster - + ```sh + kubectl create namespace argocd + kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml + ``` +- to access the argocd api server on our local, we first need to expose it outside the cluster. then, methods to obtain password, ip and port are described below (username is admin) + ```sh + kubectl patch svc argocd-server -n argocd -p '{"spec": {"type": "LoadBalancer"}}' + minikube ip # ip + kubectl --namespace argocd get services | grep argocd-server # port + kubectl --namespace argocd get secret argocd-initial-admin-secret \ + --output=jsonpath="{.data.password}" | base64 --decode # password + ``` +- by default, argocd will poll the git repository at a fixed interval like 5 minutes. to get rid of this delay, we can use git webhooks i.e. git will trigger a notification to argocd on change +- in the ui, we can click on refresh so that instead of the default interval, argocd polls the repository immediately +- we can have argocd deployed to one cluster and have it monitor multiple kubernetes clusters + - imagine due to a disaster, one of the kubernetes clusters came down (not the one where argocd is running 🤣). lets assume no backups were needed (stateless workloads). if we create a new cluster and have argocd point to this new cluster instead of the old closed one, argocd will automatically deploy all outstanding workloads from the git repositories and bring it up to speed +- app project - group multiple such applications and manage them easily +- argocd health status - aggregates status from different resources that have been deployed +- application sets - factory for applications i.e. unlike application where kubernetes cluster and git repository is one to one, in application sets, kubernetes cluster and git repository is many to many +- in application sets, templates represent an application and generators are used to populate these applications using parameters. e.g. of list generator - + ```yaml + apiVersion: argoproj.io/v1alpha1 + kind: ApplicationSet + metadata: + name: demo + namespace: argocd + spec: + generators: + - list: + elements: + - cluster: asia + url: https://kubernetes.default.svc + - cluster: americas + url: https://kubernetes.default.svc + template: + metadata: + name: demo + spec: + project: default + source: + repoURL: https://gitlab.com/shameekagarwal/argocd-demo.git + targetRevision: HEAD + path: k8s + destination: + server: \{\{ url \}\} + syncPolicy: + automated: + selfHeal: true + prune: true + ``` +- there are a lot of more generators that we can use in an application set. some common generators have been described below +- git directory generator - useful for monorepo pattern i.e. one single git project has multiple applications, e.g. one folder say app has a kustomize based manifests, another folder say monitoring has helm based manifests and so on. in the application set, we only have to prodvide the git repository link. argocd will scan the git repository for different such kubernetes manifests and generate 2 applications automatically +- cluster generator - we register clusters inside argocd. our application set can then automatically generate applications for all the clusters that argocd knows off. we can use some methods to match certain clusters only +- matrix generator - combine different generators, e.g. cluster generator and git directory generator. this way, if git directory generator finds 4 kustomize applications and cluster generator finds 3 clusters, we end up deploying 4 * 3 = 12 argocd applications + +## Kustomize + +- helps use kubernetes templating using yaml easily +- inbuilt inside newer versions of kubectl +- base folder - manifests file + kustomization.yaml +- overlays folder - environment specific configuration +- each environment inside overlay gets a different directory +- to view the effective generated manifest, use `kubectl kustomize overlays/env` +- to apply, use `kubectl apply --kustomize overlays/env` +- kustomization.yaml can have things like common labels, annotations, common prefixes and suffixes, etc +- resources - what files to process +- commonLabels - add this to all labels **and selectors** - recall how this can easily remove human errors while for e.g. linking services to pods etc +- namePrefix and nameSuffix - add for e.g. application specific prefixes and suffixes to all manifests +- therefore, base/kustomization.yaml - + ```yaml + resources: + \- deployment.yaml + \- service.yaml + + commonLabels: + name: demo + + namePrefix: kustomize- + nameSuffix: -v1 + ``` +- patchesStrategicMerge - which files to use as patches. some properties in the patch files like name, kind, etc can be used to match its corresponding raw file in base, and its properties like replicas can override / add propeties to the raw file +- therefore, overlays/dev/replicas.yaml - + ```yaml + apiVersion: apps/v1 + kind: Deployment + metadata: + name: webapp + spec: + replicas: 2 + ``` +- configMapGenerator - we just specify the name and the file to use here. the name that we specify here should match what we use in deployment.yaml when populating volumes / environment variables using config maps +- therefore, overlays/dev/kustomization.yaml - + ```yaml + resources: + \- ../../base + + patchesStrategicMerge: + \- "replicas.yaml" + + namespace: dev + + configMapGenerator: + \- name: some-config + env: config.properties + ``` +- overlays/dev/config.properties - + ``` + CUSTOM_HEADER=Inside Prod + ``` diff --git a/_posts/2023-11-18-spring-reactive.md b/_posts/2023-11-18-spring-reactive.md new file mode 100644 index 0000000..5313298 --- /dev/null +++ b/_posts/2023-11-18-spring-reactive.md @@ -0,0 +1,1633 @@ +--- +title: Spring Reactive +--- + +## Project Reactor + +- traditional approach - **one thread per request**. issues - + - there a lot of **io tasks**, e.g. calls to database, file system access, calling other microservices, etc. these io tasks **block** the threads + - each thread **consumes resources**, so for 400 concurrent requests, we would end up consuming the resources for 400 threads +- recall how javascript works - once for e.g. web apis have successfully completed the io task, they place the callback on the (micro task / job) queue or (callback / task) queue. then, the event loop picks it from there and places it on the stack +- different paradigms - + - sync + blocking - we make an io call and wait for it to be finished + - async - we delegate the work to a separate thread, which now has to first make the io call and then wait for the call to be finished + - non blocking - we make the io call are notified automatically when the io task is over. we do not have to wait for the io call. i feel this is javascript + - async + non blocking - even the non blocking call is delegated to a separate thread i.e. the separate thread makes the io call, and then is notified automatically when the io task is over. i feel this is what we are trying to achieve using project reactor +- **observer pattern** is used. react as and when a new message is received + - `Publisher` publishes updates + - `Subscriber` subscribes for updates + - publisher has a `subscribe` method, which receives a subscriber + - a `Subscription` object establishes this relationship. this is returned from `subscribe` + - subscriber has the following callbacks which a publisher can call - + - `onNext` for new data + - `onComplete` when the publisher work is done. no more new data would be sent + - `onError` for error. in this case also, no more new data would be sent +- synonyms + - publisher, observable, source, upstream, producer + - subscriber, observer, sink, downstream, consumer +- i think we also move to a more **declarative style** of coding instead of the usual **imperative style** when we use the reactive programming model +- **reactive streams** - a specification just like jpa +- some implementations of reactive streams - + - akka + - rxjava + - project reactor (this is covered) +- so, i think we have four things now - asynchronous, non blocking, observer pattern and declarative style of coding +- project reactor has two different implementations of publisher - `Mono` and `Flux` +- mono can emit 0 or 1 item +- i think 0 means publisher can call `onComplete` directly before `onNext` +- flux can emit 0 to n items +- analogy - mono is `null`, `Optional`, flux is `List`, `Stream` + ```java + Mono mono$ = Mono.just(1); + + // only onNext callback is provided + mono$.subscribe(i -> System.out.println("received: " + i)); + + // providing onNext, onError, onComplete + mono$.subscribe( + (i) -> System.out.println("received: " + i), + (e) -> System.out.println("error: " + e.getMessage()), + () -> System.out.println("completed") + ); + ``` +- use - + - `Mono.just(val)` for one value + - `Mono.empty()` for no values + - `Mono.error(error)` for an exception +- `Mono.just(randomName())` - randomName is always called, even if not used +- to prevent this, we can use `Mono.fromSupplier(() -> randomName())` +- _this way of thinking has been reused when discussing using blocking spring data jpa with reactive webflux later!_ +- **lazy** - nothing happens till we subscribe, any statements inside the mono, chained map, etc would not be run till we subscribe to it +- execute synchronously - use block + ```java + String capitalizedName = Mono.just(faker.name().fullName()) + .map(name -> { + Util.sleep(5); + return name.toUpperCase(); + }) + .block(); + ``` +- from future - `Mono.fromFuture(future)` +- from runnable - `Mono.fromRunnable(runnable)`. note - since runnable does not return anything, in this case, only on complete would be called, not on next. so, we can also use fromRunnable as an alternative to fromSupplier when our method does not have any return value +- i think the main reason for `fromFuture`, `fromRunnable` etc is to help with interoperability +- just like mono, we have `just` in flux, but it accepts varargs - `Flux.just(1, 2, 3)` +- `fromIterable` in flux - + ```java + Flux.fromIterable(List.of( + faker.name().fullName(), + faker.name().fullName(), + faker.name().fullName() + )); + ``` +- java streams can be only consumed once - + ```java + Stream intStream = Stream.of(1, 2, 3); + + // 1 2 3 + intStream.forEach(System.out::println); + + // java.lang.IllegalStateException: stream has already been operated upon or closed + intStream.forEach(System.out::println); + ``` +- we know that traditionally, a flux can have multiple subscribers (discussed later in hot vs cold). but if using `fromStream`, if the same stream is used, we get the same error as described above - + ```java + Stream intStream = Stream.of(1, 2, 3); + Flux intFlux = Flux.fromStream(intStream); + + intFlux.subscribe(...) // works + intFlux.subscribe(...) // will fail with the same exception + ``` +- utility method `range`, e.g. 10 items - + ```java + Flux names$ = Flux.range(1, 10) + .map((i) -> faker.name().fullName()); + ``` +- `log` explained - if we chain log to above example like so - + ```java + Flux names$ = Flux.range(1, 10) + .log() + .map((i) -> faker.name().fullName()) + .log(); + + names$.subscribe((t) -> System.out.println("on next: " + t)); + ``` +- output - since we have two logs, the first one of every pair is for the first log, and the second one for the second log + ``` + [ INFO] (main) | onSubscribe([Synchronous Fuseable] FluxRange.RangeSubscription) // line 3's subscription + [ INFO] (main) | onSubscribe([Fuseable] FluxMapFuseable.MapFuseableSubscriber) // line 6's subscription + [ INFO] (main) | request(unbounded) // for line 3 + [ INFO] (main) | request(unbounded) // for line 6 + [ INFO] (main) | onNext(1) // what line 3's subscriber's onNext is called with + [ INFO] (main) | onNext(Derek Aufderhar) // what line 6's subscriber's onNext is called with + on next: Derek Aufderhar + [ INFO] (main) | onNext(2) + [ INFO] (main) | onNext(Chantell Kuvalis) + on next: Chantell Kuvalis + [ INFO] (main) | onNext(3) + [ INFO] (main) | onNext(Lonnie Pollich) + on next: Lonnie Pollich + ``` +- using a custom subscriber. important note - look how we have to call `request` manually now. this was being done bts automatically for us. note - i also had to call `cancel` manually, otherwise `onComplete` was not being called + ```java + AtomicReference subscriptionAtomicRef = new AtomicReference<>(); + + Flux.range(1, 5) + .subscribeWith(new Subscriber() { + @Override + public void onSubscribe(Subscription subscription) { subscriptionAtomicRef.set(subscription); } + + @Override + public void onNext(Integer integer) { System.out.println("on next: " + integer); } + + @Override + public void onError(Throwable throwable) { System.out.println("on error: " + throwable.getMessage()); } + + @Override + public void onComplete() { System.out.println("on complete"); } + }); + + subscriptionAtomicRef.get().request(5); + Util.sleep(5); + subscriptionAtomicRef.get().cancel(); + ``` +- another note - see how we set `subscriptionAtomicRef`. this is because we wanted to control it from outside the subscription, not inside +- central difference between a normal list and flux - **in flux, since we have implemented the observer pattern, we will get items as they are available, while in list, the entire structure has to be available at one go** +- interval - emit an item every specified duration - + ```java + Flux names$ = Flux.interval(Duration.ofSeconds(1)) + .map(i -> Util.faker.name().fullName()); + names$.subscribe(...); + Util.sleep(5); // to block main thread + ``` +- inter conversion between flux and mono - + ```java + Mono fluxToMono = Flux.range(1, 10).next(); + Flux monoToFlux = Flux.from(Mono.just(1)); + ``` +- take operator - + ```java + Flux.range(1, 3) + .log() + .take(2) + .log() + .subscribe(t -> System.out.println("on next: " + t)); + ``` +- output - see last two lines specially - since we call take, the take calls cancel on its own subscription, and calls on complete for anything thats its downstream + ``` + [ INFO] (main) | onSubscribe([Synchronous Fuseable] FluxRange.RangeSubscription) + [ INFO] (main) onSubscribe(FluxLimitRequest.FluxLimitRequestSubscriber) + [ INFO] (main) request(unbounded) + [ INFO] (main) | request(2) + [ INFO] (main) | onNext(1) + [ INFO] (main) onNext(1) + on next: 1 + [ INFO] (main) | onNext(2) + [ INFO] (main) onNext(2) + on next: 2 + [ INFO] (main) | cancel() + [ INFO] (main) onComplete() + ``` +- `flux.create` - we can manually call `next`, `complete`, `error` on flux sink. emit a country till we encounter canada - + ```java + Flux country$ = Flux.create(fluxSink -> { + String country; + do { + country = Util.faker.country().name(); + fluxSink.next(country); + } while (!"canada".equalsIgnoreCase(country) && !fluxSink.isCancelled()); + fluxSink.complete(); + }); + ``` +- note - had we not called `fluxSink.isCancelled`, when the subscriber would have called cancel on its subscription, the flux sink would have continued emitting items. the subscriber would not have cared, since it has already called its on complete. but resources are still being consumed by the publisher itself +- so, one small confusion i had probably cleared - if we call `subscription.cancel()`, it does not guarantee that publisher would stop running - so i think **it is the job of the producer** to kep checking if subscription has been cancelled, and if it has, it should stop emitting items? +- in create, we have to check manually if the downstream is cancelled, and we have a lot of flexibility in terms of emitting items. in generate, feels like we can emit items in only "one way". the method will be automatically called for us infinitely till we use something like take, cancel the subscription, call `synchronousSink.complete()`, etc. it makes code more concise, and checks like is cancelled is not needed - + ```java + Flux names$ = Flux.generate(synchronousSink -> synchronousSink.next(Util.faker.name().fullName())) + .take(5); + // emit 5 items + // then cancel subscription and trigger on complete + ``` +- how to emit a country till we encounter canada using generate - + ```java + Flux country$ = Flux.generate(synchronousSink -> { + String country = Util.faker.country().name(); + synchronousSink.next(country); + if ("canada".equalsIgnoreCase(country)) synchronousSink.complete(); + }); + ``` +- so, generate vs create - + - function passed to create will be invoked only once, function passed to generate would be invoked multiple times + - create will not take into account downstream demand subscription cancellation etc, generate will +- one small issue with generate that was not in create - state. how can we implement state, that is persisted and available "across" executions? recall the lambda of create is only triggered once i.e. we wrote the loop manually, so we can have the state outside the loop and modify it as needed inside the loop. in generate, the lambda is triggered automatically for us bts, so we cannot do that. one method - outside the flux. but flux generate does provide us state capabilities as well. e.g. emit countries till canada is encountered or we reach the limit (10) + ```java + // method 1 - maintain state manually + AtomicInteger state = new AtomicInteger(1); + Flux country$ = Flux.generate((sink) -> { + String country = Util.faker.country().name(); + sink.next(country); + if ("canada".equalsIgnoreCase(country) || state.getAndIncrement() == 10) sink.complete(); + }); + + // method 2 - using flux state + Flux country$ = Flux.generate( + () -> 1, // initial state + (state, sink) -> { + String country = Util.faker.country().name(); + sink.next(country); + if ("canada".equalsIgnoreCase(country) || state == 10) sink.complete(); + return state + 1; + } + ); + ``` +- handle - it accepts a function which is called once per item, and also has the sink. so we can call complete, next, error etc whatever we want. it feels like a supercharged filter / map, which looks like generate + ```java + Flux handle$ = Flux + .generate((SynchronousSink sink) -> sink.next(Util.faker.country().name())) + .handle((item, sink) -> { + sink.next(item); + if ("canada".equalsIgnoreCase(item)) sink.complete(); + }); + ``` +- we have multiple callbacks / lifecycle hooks - + ```java + Flux numbers$ = Flux.range(1, 10) + .doOnComplete(() -> System.out.println("do on complete")) + .doFirst(() -> System.out.println("do first")) + .doOnNext((i) -> System.out.println("do on next " + i)) + .doOnSubscribe((subscription) -> System.out.println("do on subscribe")) + .doOnRequest((number) -> System.out.println("do on request " + number)) + .doOnError((throwable) -> System.out.println("do on error " + throwable.getMessage())) + .doOnTerminate(() -> System.out.println("do on terminate")) + .doFinally((signalType) -> System.out.println("do finally " + signalType)); + ``` +- limit rate - e.g. not all the data is loaded in facebook at one go. as and when we scroll down, more content is loaded. similarly with limit rate, instead of the subscription calling request(unbounded), the subscription calls request(specified_number). once 75% of the specified_number is consumed by the subscriber, more request(specified_number - consumed_items) amount of items are added. so, its like a buffer of maximum specified_number is maintained, which is filled back up when 75% of it is consumed. limit rate also accepts a second argument to customize the 75%, not discussed here + ```java + Flux.range(1, 100) + .log() + .limitRate(10); + ``` +- output - + ``` + [ INFO] (main) | onSubscribe([Synchronous Fuseable] FluxRange.RangeSubscription) + [ INFO] (main) | request(10) -- fill the queue with 10 items + [ INFO] (main) | onNext(1) + [ INFO] (main) | onNext(2) + ... + [ INFO] (main) | onNext(8) + [ INFO] (main) | request(8) -- 75% consumed, request enough to fill the queue with 10 items + [ INFO] (main) | onNext(9) + ``` +- handling errors - we already know that we can provide an on error callback from a subscriber. but to handle it in the pipeline itself - + - on error return - a hardcoded fallback value would be returned - + ```java + Flux.range(1, 100) + .map(i -> 10 / (3 - i)) + .onErrorReturn(-1); + // 5 -> 10 -> -1 -> onComplete (not onError) + ``` + - on error resume - a different publisher i.e. flux / mono would be resumed from the point of error - + ```java + Flux.range(1, 100) + .map(i -> 10 / (3 - i)) + .onErrorResume((e) -> Flux.range(1, 3)); + // 5 -> 10 -> 1 -> 2 -> 3 -> onComplete (not onError) + ``` + - on error continue - skip the item where the error occurred and continue processing the items. **important - in the two methods above, albeit we failed silently, the processing in the original flux was stopped as soon as the error occurred, unlike in on error continue** + ```java + Flux.range(1, 5) + .map(i -> 10 / (3 - i)) + .onErrorContinue((error, obj) -> { + // do whatever with error, obj + }); + // 5 -> 10 -> -10 -> -5 -> onComplete (not onError) + ``` +- handling empty - + - default if empty - a hardcoded value would be returned (like on error return) + ```java + Flux.range(1, 10) + .filter(i -> i > 10) + .defaultIfEmpty(-1); + // -1 + ``` + - switch if empty - a different publisher i.e. flux / mono would be returned (like on error resume) + ```java + var integers$ = Flux.range(1, 10) + .filter(i -> i > 10) + .switchIfEmpty(Flux.range(-5, 5)); + // -5 -4 -3 -2 -1 + ``` +- switch on first - first request(1) is called. the result of this gets sent to the function we pass to switch on first. based on this, our function can decide what to do. after this, request(unbounded) is called. i think it is useful when we want to make a decision based on the first element of the flux. e.g. if we find the first element has age < 20, return just one person(underage, the_age). else, return the entire list of people i.e. continue the original flux - + ```java + Flux.range(1, 10) + .map(i -> Person.generate()) + .log() + .switchOnFirst((signal, personFlux) -> + signal.isOnNext() && signal.get().getAge() < 20 ? + Mono.just(new Person("underage", signal.get().getAge())) : + personFlux + ); + ``` +- **transform** - useful when we have a set of operators that are duplicated in multiple pipelines. the function accepts `Flux` and returns `Flux`. idea is multiple pipelines can call transform with the same function, and these set of operators get applied to all of these pipelines + ```java + Flux.range(1, 10) + .transform(TransformDemo::transformer); + + // ... + private static Flux transformer(Flux integers$) { + return integers$.map(i -> Person.generate()); + } + ``` +- map - transform one element to another type. flat map - return a flux for an element. the subscriber still sees a stream of element. the flat map helps abstract away this complexity by "flattening" the flux. we try getting all persons, and for each person, we try getting all their orders, which itself is a flux. by using flat map, the subscriber just receives a stream of orders + ```java + Flux.range(1, 10) + .map(i -> Person.generate()) + .flatMap(FlatMapDemo::getOrders); + + private static Flux getOrders(Person person) { + return Flux.range(1, Util.faker.random().nextInt(4)) + .map(i -> Order.generate(person)); + } + ``` +- publisher emits data when a subscriber subscribes to the data. so till now, whatever we saw was a **cold publisher** - every subscriber has their own data + ```java + public class GenericSubscriber { + + public static void subscribe(Flux flux$, String subscriberName) { + String thread = Thread.currentThread().getName(); + flux$.subscribe( + (t) -> System.out.printf("[%s] %s> on next: %s%n", thread, subscriberName, t), + (throwable) -> System.out.printf("[%s] %s> on error: %s%n", thread, subscriberName, throwable.getMessage()), + () -> System.out.printf("[%s] %s> on complete%n", thread, subscriberName) + ); + } + } + + Flux movie$ = Flux.range(1, 5) + .delayElements(Duration.ofSeconds(1)) + .map((i) -> "stream " + i); + GenericSubscriber.subscribe(movie$, "mike"); + Util.sleep(2); + GenericSubscriber.subscribe(movie$, "sam"); + Util.sleep(50); + ``` +- output - + ``` + [main] mike> on next: stream 1 + [main] mike> on next: stream 2 + [main] mike> on next: stream 3 + [main] sam> on next: stream 1 + [main] mike> on next: stream 4 + [main] sam> on next: stream 2 + [main] mike> on next: stream 5 + [main] mike> on complete + [main] sam> on next: stream 3 + [main] sam> on next: stream 4 + [main] sam> on next: stream 5 + [main] sam> on complete + ``` +- **hot publisher** - one data producer for all subscribers. the only difference here is line 4. share converts a cold publisher to hot publisher + ```java + Flux movie$ = Flux.range(1, 5) + .delayElements(Duration.ofSeconds(1)) + .map((i) -> "stream " + i) + .share(); + GenericSubscriber.subscribe(movie$, "mike"); + Util.sleep(2); + GenericSubscriber.subscribe(movie$, "sam"); + Util.sleep(50); + ``` +- output - + ``` + [main] mike> on next: stream 1 + [main] mike> on next: stream 2 + [main] mike> on next: stream 3 + [main] sam> on next: stream 3 + [main] mike> on next: stream 4 + [main] sam> on next: stream 4 + [main] mike> on next: stream 5 + [main] sam> on next: stream 5 + [main] mike> on complete + [main] sam> on complete + ``` +- `share` basically is `publish().refCount(1)`. the argument passed to ref count basically tells the minimum number of subscribers required by the producer to start producing. in the above example, if we replace share by for e.g. `publish().refCount(2)`, the output would be as follows, because the producer will start producing only when the subscriber sam subscribes + ``` + [main] mike> on next: stream 1 + [main] sam> on next: stream 1 + [main] mike> on next: stream 2 + [main] sam> on next: stream 2 + [main] mike> on next: stream 3 + [main] sam> on next: stream 3 + [main] mike> on next: stream 4 + [main] sam> on next: stream 4 + [main] mike> on next: stream 5 + [main] sam> on next: stream 5 + [main] mike> on complete + [main] sam> on complete + ``` +- if we increase the sleep between mike and sam, we see sam starts receiving all the elements again! so, its almost like when a set of subscribers see to the end of a hot publisher, the hot publisher restarts + ```java + GenericSubscriber.subscribe(movie$, "mike"); + Util.sleep(7); + GenericSubscriber.subscribe(movie$, "sam"); + Util.sleep(30); + ``` +- output - + ``` + [main] mike> on next: stream 1 + [main] mike> on next: stream 2 + [main] mike> on next: stream 3 + [main] mike> on next: stream 4 + [main] mike> on next: stream 5 + [main] mike> on complete + [main] sam> on next: stream 1 + [main] sam> on next: stream 2 + [main] sam> on next: stream 3 + [main] sam> on next: stream 4 + [main] sam> on next: stream 5 + [main] sam> on complete + ``` +- example - + ```java + Flux flux$ = Flux.create((FluxSink fluxSink) -> { + Util.log("inside create"); + fluxSink.next(2); + }); + + flux$.subscribe((i) -> Util.log("subscribe " + i)); + flux$.subscribe((i) -> Util.log("subscribe " + i)); + ``` +- output - + ``` + main: inside create + main: subscribe 2 + main: inside create + main: subscribe 2 + ``` +- understand - all the process happens in the current thread - main +- schedulers available in reactive - + - bounded elastic - for networking / io time consuming tasks + - parallel - for cpu intensive tasks + - single - dedicated thread for tasks + - immediate - current thread +- subscribe on - + ```java + Flux flux$ = Flux.create((FluxSink fluxSink) -> { + Util.log("inside create"); + fluxSink.next(2); + }) + .subscribeOn(Schedulers.boundedElastic()); + + flux$.subscribe((i) -> Util.log("subscribe " + i)); + flux$.subscribe((i) -> Util.log("subscribe " + i)); + ``` +- output - + ``` + boundedElastic-2: inside create + boundedElastic-1: inside create + boundedElastic-2: subscribe 2 + boundedElastic-1: subscribe 2 + ``` +- point to remember - scheduler does not mean execute my current pipeline in parallel. one pipeline execution still happens in one thread, e.g. boundedElastic-2. scheduler with subscribe on means that if for e.g. we have multiple subscribers, each of them would be executed in its own thread. recall how without the subscribe on, everything was happening inside the main thread +- also note how subscribe on effects the entire pipeline from top i.e. look at the thread name of "inside create" +- publish on - + ```java + Flux flux$ = Flux.create((FluxSink fluxSink) -> { + Util.log("inside create"); + fluxSink.next(2); + }) + .publishOn(Schedulers.boundedElastic()); + + flux$.subscribe((i) -> Util.log("subscribe " + i)); + flux$.subscribe((i) -> Util.log("subscribe " + i)); + ``` +- output - + ``` + main: inside create + main: inside create + boundedElastic-1: subscribe 2 + boundedElastic-2: subscribe 2 + ``` +- so, publish on only affected the downstream (whatever operators came after it) i.e. look at the thread name of "inside create". note how this behavior is different from what we saw inside subscribe on +- rule - subscribe on affects upstream and publish on affects downstream +- so, its almost like subscribe on will go over and hand its scheduler to the actual source. this way, all the operators after it are affected by subscribe on (until maybe a publish on is encountered) +- publish on is relatively simpler to visualize, since it affects all operators after it +- multiple subscribe on - + ```java + Flux.range(1, 1) + .subscribeOn(Schedulers.boundedElastic()) + .doOnNext((i) -> Util.log("inside on next 1")) + .subscribeOn(Schedulers.parallel()) + .doOnNext((i) -> Util.log("inside on next 2")) + .subscribe((i) -> Util.log("inside subscribe")); + ``` +- output - look how parallel was ignored, the one closer to the actual source gets executed. my understanding - this might be because **execution happens bottom to top**. its almost like the operator subscribe to the one above it and so on. so, maybe first, the second subscribe on hands on over its scheduler, and then the first subscribe on hands on over its scheduler, thus overwriting what the second scheduler did + ``` + boundedElastic-1: inside on next 1 + boundedElastic-1: inside on next 2 + boundedElastic-1: inside subscribe + ``` +- multiple publish on - + ```java + Flux.range(1, 1) + .publishOn(Schedulers.boundedElastic()) + .doOnNext((i) -> Util.log("inside on next 1")) + .publishOn(Schedulers.parallel()) + .doOnNext((i) -> Util.log("inside on next 2")) + .subscribe((i) -> Util.log("inside subscribe")); + ``` +- output - the scheduler closest to before an operator gets used + ``` + boundedElastic-1: inside on next 1 + parallel-1: inside on next 2 + parallel-1: inside subscribe + ``` +- combination of the two - + ```java + Flux.range(1, 1) + .doOnNext((i) -> Util.log("inside on next 1")) + .publishOn(Schedulers.boundedElastic()) + .doOnNext((i) -> Util.log("inside on next 2")) + .subscribeOn(Schedulers.parallel()) + .doOnNext((i) -> Util.log("inside on next 3")) + .subscribe((i) -> {}); + ``` +- output - + ``` + parallel-1: inside on next 1 + boundedElastic-1: inside on next 2 + boundedElastic-1: inside on next 3 + ``` +- diagram -
+ ![schedulers](/assets/img/spring-reactive/schedulers.drawio.png) +- schedulers help run different instances of the same pipeline run in different threads. to process the items of a pipeline in parallel, we can use a combination of parallel and run on - + ```java + Flux.range(1, 5) + .parallel() + .runOn(Schedulers.boundedElastic()) + .doOnNext((i) -> Util.log("inside on next " + i)) + .subscribe((i) -> {}); + ``` +- output - scheduler has 4 threads probably because there are 4 cores in cpu - + ```java + boundedElastic-4: inside on next 4 + boundedElastic-3: inside on next 3 + boundedElastic-1: inside on next 1 + boundedElastic-1: inside on next 5 + boundedElastic-2: inside on next 2 + ``` +- once we do parallel() like above, we would not have access to publish on, subscribe on, etc (which logically makes sense i think, because both are very different methods of achieving parallelism). to make the parallelized flux come back together, we can chain sequential - + ```java + // compilation error + .parallel() + .runOn(Schedulers.boundedElastic()) + .subscribeOn(Schedulers.parallel()) + + // works + .parallel() + .runOn(Schedulers.boundedElastic()) + .sequential() + .subscribeOn(Schedulers.parallel()) + ``` +- so, concepts we discussed - `subscribeOn()`, `publishOn()`, `.parallel().runOn()` and with it `.sequential()` +- publisher publishes at a faster rate than the consumer can consume. this is called **back pressure** / **overflow** +- overflow strategies in project reactor (basically `onBackPressure` prefix is constant, remaining suffixes have been mentioned below) - + - drop - once the queue is full, drop the remaining items. as simple as chaining on back pressure drop. optionally, this also accepts a callback, which receives the dropped value, which we can handle accordingly - + ```java + Flux.range(1, 500) + .doOnNext((i) -> Util.sleepMillis(2)) + .onBackpressureDrop((i) -> Util.log("dropped " + i)) + .doOnNext((i) -> Util.log("produced " + i)) + .publishOn(Schedulers.boundedElastic()) + .doOnNext((i) -> Util.sleepMillis(5)) + .doOnNext((i) -> Util.log("consumed " + i)) + .subscribe((i) -> {}); + ``` + - latest - like drop, but **one latest value** keeps getting overwritten - so, just like drop but just that one latest value is present as well + - error - an error is thrown to the downstream + - buffer - the default. keep in memory. so, my understanding - the risk here is exceptions like out of memory. so, we can optionally configure a size like below. when the buffer limit is reached, `OverflowException` is thrown - + ```java + .onBackpressureBuffer(20) + ``` +- combining publishers - start with, concat, zip, merge, combine latest +- **start with** - start with the provided flux. when it gets over, start with the original flux. e.g. we generate random names (which assume is slow), so we add it to cache as well. in the second subscription, the first two names are received from the cache and are therefore quick + ```java + List cache = new ArrayList<>(); + + Flux names$ = Flux + .generate((SynchronousSink sink) -> sink.next(Util.faker.name().fullName())) + .doOnNext((i) -> Util.log("generating fresh name...")) + .doOnNext(cache::add) + .startWith(Flux.fromIterable(cache)); + + GenericSubscriber.subscribe(names$.take(2), "sam"); + GenericSubscriber.subscribe(names$.take(3), "mike"); + ``` +- output - + ``` + main: generating fresh name... + [main] sam> on next: Dylan Kertzmann + main: generating fresh name... + [main] sam> on next: Cole Vandervort + [main] sam> on complete + [main] mike> on next: Dylan Kertzmann + [main] mike> on next: Cole Vandervort + main: generating fresh name... + [main] mike> on next: Kandis Douglas + [main] mike> on complete + ``` +- **concat** - like start with, but appends instead of prepending + ```java + Flux one = Flux.just("a", "b", "c"); + Flux two = Flux.just("d", "e"); + GenericSubscriber.subscribe(one.concatWith(two)); + // output - a b c d e + ``` +- we can also use an alternate syntax to combine at one go - `Flux.concat(one, two, three...)`. what if one of the flux we tried to perform concat on had an error? the subscriber would immediately halt with the error. however, we can push the error to the end i.e. after the emission from all the fluxes is over using `concatDelayError` - + ```java + Flux one = Flux.just("a", "b"); + Flux two = Flux.error(new RuntimeException("oops...")); + Flux three = Flux.just("c"); + + GenericSubscriber.subscribe(Flux.concat(one, two, three)); + // [main] > on next: a + // [main] > on next: b + // [main] > on error: oops... + + GenericSubscriber.subscribe(Flux.concatDelayError(one, two, three)); + // [main] > on next: a + // [main] > on next: b + // [main] > on next: c + // [main] > on error: oops... + ``` +- remember - both in concact and in start with, we expect a flux to be over before jumping on to the next flux +- **merge** - merge all fluxes i.e. they will all simultaneously emit to the subscriber. they can all emit at their own rates, and the subscriber will receive all items from all fluxes + ```java + Flux qatar$ = Flux + .generate((SynchronousSink sink) -> sink.next("Qatar " + Util.faker.random().nextInt(5))) + .delayElements(Duration.ofSeconds(3)); + Flux emirates$ = Flux + .generate((SynchronousSink sink) -> sink.next("Emirates " + Util.faker.random().hex())) + .delayElements(Duration.ofSeconds(1)); + Flux spiceJet$ = Flux + .generate((SynchronousSink sink) -> sink.next("Spice Jet " + Util.faker.random().nextInt(50000))) + .delayElements(Duration.ofSeconds(2)); + + Flux flights$ = Flux.merge(qatar$, emirates$, spiceJet$); + + GenericSubscriber.subscribe(flights$); + ``` +- my understanding - project reactor probably ensures delay elements is run in a background thread. recall how by default, if not using schedulers, project reactor executes everything in the main thread. when i was using `.doOnNext(() -> Util.sleep(1))` instead of `.delayElements(Duration.ofSeconds(1))`, the main thread was getting blocked, and only qatar was being able to emit. same i think appplies to `Flux.interval` i.e. sleep of it happens in a background thread +- **zip** - e.g. imagine a car building pipeline needs one car body, one engine and one set of tires. assume all the three components are fluxes of their own. it can happen that tire manufacturing is much faster than engine, but we only need one of each of the three components at a time to assemble a car. this is ensured using zip + ```java + Flux engine$ = Flux + .generate((SynchronousSink sink) -> sink.next("engine")) + .delayElements(Duration.ofSeconds(3)); + Flux body$ = Flux + .generate((SynchronousSink sink) -> sink.next("body")) + .delayElements(Duration.ofSeconds(2)); + Flux wheels$ = Flux + .generate((SynchronousSink sink) -> sink.next("wheels")) + .delayElements(Duration.ofSeconds(1)); + + Flux> cars$ = Flux.zip(engine$, body$, wheels$); + + GenericSubscriber.subscribe(cars$); + ``` +- output - can emit only once every 3 seconds (slowest flux)? + ``` + [main] > on next: [engine,body,wheels] + [main] > on next: [engine,body,wheels] + [main] > on next: [engine,body,wheels] + ``` +- **combine latest** - combine the latest emitted element from all the fluxes + ```java + Flux one$ = Flux.just("a", "b", "c") + .delayElements(Duration.ofSeconds(3)); + Flux two$ = Flux.just("a", "b", "c") + .delayElements(Duration.ofSeconds(2)); + + Flux> combined = Flux.combineLatest(one$, two$, (a, b) -> List.of(a, b)); + + GenericSubscriber.subscribe(combined); + ``` +- output - + ``` + [main] > on next: [a, a] // 3rd second + [main] > on next: [a, b] // 4th second + [main] > on next: [a, c] // 6th second + [main] > on next: [b, c] // 6th second + [main] > on next: [c, c] // 9th second + ``` +- note about behavior, do not get confused - even the same instance of publisher is treated separately by separate subscribers, since default is cold publisher, not hot + ```java + Flux flux = Flux.just("a", "b", "c"); + GenericSubscriber.subscribe(flux.startWith(flux)); + + // output - + // a, b, c, a, b, c, on complete ✅ + // a, b, c, on complete ❌ + ``` +- **batching** - buffer, window, group +- buffer - collect in groups of 5 items. for the last batch, it would not just hang and wait for 5 items, but just emit the remaining items. so, point to remember - it is important to ensure our publishers to always emit a complete signal once they are done, otherwise it can cause unexplainable behavior + ```java + Flux> events$ = Flux.interval(Duration.ofMillis(300)) + .map((i) -> "event " + i) + .take(8) + .buffer(5); + + GenericSubscriber.subscribe(events$); + ``` +- output - + ``` + [main] > on next: [event 0, event 1, event 2, event 3, event 4] + [main] > on next: [event 5, event 6, event 7] + [main] > on complete + ``` +- buffer based on duration - + ```java + .buffer(Duration.ofSeconds(2)); + ``` +- best of both worlds - combination of both duration timeout and buffer size - + ```java + .bufferTimeout(5, Duration.ofSeconds(2)); + ``` +- another use case of buffer - if for e.g. i want last three items. the second parameter specifies how many items to "skip". since we specify 1, we get `[0,1,2]`, `[1,2,3]` and so on + ```java + Flux> events$ = Flux.interval(Duration.ofMillis(300)) + .map((i) -> "event " + i) + .buffer(3, 1); + + GenericSubscriber.subscribe(events$); + ``` +- output - + ``` + [main] > on next: [event 0, event 1, event 2] + [main] > on next: [event 1, event 2, event 3] + ``` +- **window** - like buffer, but it returns a flux and not a list. the advantage - same as list vs flux! if buffer size is 5, all the items for the buffer should be available in one go, since it uses a list. in window, we can get the items as and when they arrive, since a flux is used + ```java + [parallel-1] > on next: event 0 + [parallel-1] > on next: event 1 + [parallel-1] > on next: event 2 + [parallel-1] > on complete + [parallel-1] > on next: event 3 + [parallel-1] > on next: event 4 + [parallel-1] > on next: event 5 + [parallel-1] > on complete + ``` +- just like buffer, i do see option for + - passing duration to window + - using `windowTimeout` for best of both worlds i.e. duration and window size + - configure skip +- **group by** - works just like in for e.g. sql. note - do not use something with high cardinality. again, this too is a flux of flux + ```java + Flux> flux$ = Flux.range(1, 30) + .delayElements(Duration.ofMillis(500)) + .groupBy(i -> i % 3); + + flux$.subscribe((f) -> { + Util.log("invoked for " + f.key()); + f.subscribe((a) -> Util.log(String.format("[%s]: %s", f.key(), a))); + }); + ``` +- output - + ``` + [parallel-1] invoked for 1 + [parallel-1] [1]: 1 + [parallel-2] invoked for 2 + [parallel-2] [2]: 2 + [parallel-3] invoked for 0 + [parallel-3] [0]: 3 + [parallel-4] [1]: 4 + [parallel-1] [2]: 5 + [parallel-2] [0]: 6 + [parallel-3] [1]: 7 + ``` +- **repeat** - resubscribe after complete signal. repeat 2 means repeat twice, i.e. total 3 times + ```java + Flux integers$ = Flux.range(1, 3) + .doOnComplete(() -> Util.log("do on complete (before repeat)")) + .repeat(2) + .doOnComplete(() -> Util.log("do on complete (after repeat)")); + + GenericSubscriber.subscribe(integers$); + ``` +- output - understand how the on complete of subscriber would be **only called once** + ``` + [main] > on next: 1 + [main] > on next: 2 + [main] > on next: 3 + [main] do on complete (before repeat) + [main] > on next: 1 + [main] > on next: 2 + [main] > on next: 3 + [main] do on complete (before repeat) + [main] > on next: 1 + [main] > on next: 2 + [main] > on next: 3 + [main] do on complete (before repeat) + [main] do on complete (after repeat) + [main] > on complete + ``` +- repeat can also accept a boolean supplier - probably helps with making the decision of repeating dynamically + ```java + .repeat(() -> shouldIRepeatAgain()) + // ... + private Boolean shouldIRepeatAgain() { + // ... + } + ``` +- **retry** - resubscribe after error signal + ```java + var integers$ = Flux.range(1, 5) + .map((i) -> i / (i - 2)) + .doOnError((t) -> Util.log("do on error (before retry): " + t.getMessage())) + .retry(2) + .doOnError((t) -> Util.log("do on error (after retry): " + t.getMessage())); + + GenericSubscriber.subscribe(integers$); + ``` +- output - + ``` + [main] > on next: -1 + [main] do on error (before retry): / by zero + [main] > on next: -1 + [main] do on error (before retry): / by zero + [main] > on next: -1 + [main] do on error (before retry): / by zero + [main] do on error (after retry): / by zero + [main] > on error: / by zero + ``` +- **retry spec** - retry based on the type of error that occurs. e.g. it makes sense to retry when we get a 500, not 404 - + ```java + var http$ = Flux.generate((sink) -> { + if (state$.getAndIncrement() < 3) { + sink.error(new RuntimeException("500")); + } else { + sink.error(new RuntimeException("400")); + } + }) + .doOnError((t) -> Util.log("do on error (before retry): " + t.getMessage())) + .retryWhen(Retry.from((flux) -> flux.handle((Retry.RetrySignal rs, SynchronousSink sink) -> { + if (rs.failure().getMessage().equals("500")) sink.next("anything?"); + else sink.error(rs.failure()); + }))) + .doOnError((t) -> Util.log("do on error (after retry): " + t.getMessage())); + + GenericSubscriber.subscribe(http$); + ``` +- output - + ``` + [main] do on error (before retry): 500 + [main] do on error (before retry): 500 + [main] do on error (before retry): 400 + [main] do on error (after retry): 400 + [main] > on error: 400 + ``` +- sinks - producers emit values on the sinks, and subscriber can subscribe to sinks using `asMono` + ```java + Sinks.One sink = Sinks.one(); + Mono mono = sink.asMono(); + + sink.tryEmitValue("hello"); + GenericSubscriber.subscribe(mono); + ``` +- similarly, we can also call `tryEmitError` +- we looked at the try variation above i.e. `tryEmitValue`. its return type is `Sinks.EmitResult`, which we can use to see any possible exceptions that might have occurred during the emitting of value. however, we can use the version without the try - `emitValue`, in which case we need to provide a callback, which is the **failure handler**. we can also return a boolean from the failure handler. if e return a true, it means the sink will retry emitting the value again automatically for us. the callback is a failure handler, it would only be called if there is a failure when trying to emit a value + ```java + sink.emitValue("hello", (signalType, emitResult) -> { + Util.log("signal type: " + signalType); + Util.log("emit result: " + emitResult); + return false; + }); + ``` +- so, for e.g. below, the second emit would fail, because the sink is of type one, so it allows emitting only one value + ```java + Sinks.One sink = Sinks.one(); + Mono mono = sink.asMono(); + + sink.tryEmitValue("hello"); + sink.emitValue("bonjour", (signalType, emitResult) -> { + Util.log("signal type: " + signalType); + Util.log("emit result: " + emitResult); + return false; + }); + + GenericSubscriber.subscribe(mono); + ``` +- output - + ```java + [main] signal type: onNext + [main] emit result: FAIL_TERMINATED + [main] > on next: hello + [main] > on complete + ``` +- if we would have returned true, we would have had an infinite loop! - it would try and fail every time +- based on above discussions, we should not just run try emit next and assume it worked! we should read its return value or use the emit value variant which accepts a callback +- types of sinks - **multicast** - multiple subscribers allowed, **unicast** - only one subscriber allowed + - one multicast + - many unicast + - many multicast + - many multicast with replay +- remember in many, unlike in one, we need to emit complete explicitly (complete emitted implicitly in one sink when we emit next). how to complete without emitting any value in one sink then? i can see that unlike many sink, one sink has `tryEmitEmpty` +- constructing a sink of many unicast type - + ```java + Sinks.Many sink = Sinks.many() + .unicast() + .onBackpressureBuffer(); + sink.tryEmitNext("how"); + sink.tryEmitNext("are"); + sink.tryEmitNext("you"); + sink.tryEmitComplete(); + + Flux flux = sink.asFlux(); + + GenericSubscriber.subscribe(flux, "mike"); + ``` +- output - + ``` + [main] mike> on next: how + [main] mike> on next: are + [main] mike> on next: you + [main] mike> on complete + ``` +- sink of many multicast type - + ```java + Sinks.Many sink = Sinks.many() + .multicast() + .directAllOrNothing(); + + Flux flux = sink.asFlux(); + + sink.tryEmitNext("how"); + sink.tryEmitNext("are"); + GenericSubscriber.subscribe(flux, "sam"); + GenericSubscriber.subscribe(flux, "mike"); + sink.tryEmitNext("you"); + GenericSubscriber.subscribe(flux, "jake"); + sink.tryEmitNext("doing"); + ``` +- output - first subscriber gets all "pending messages" (e.g. only sam gets how and are). then, the remaining subscribers "only get the messages that come after they subscribe" (e.g. sam and mike both get you, while all three sam, mike and jake get doing) + ``` + [main] sam> on next: how + [main] sam> on next: are + [main] sam> on next: you + [main] mike> on next: you + [main] sam> on next: doing + [main] mike> on next: doing + [main] jake> on next: doing + ``` +- if we change the method of constructing the flux like so - + ```java + Sinks.Many sink = Sinks.many() + .replay() + .all(); + ``` +- output - + ``` + [main] sam> on next: how + [main] sam> on next: are + [main] mike> on next: how + [main] mike> on next: are + [main] sam> on next: you + [main] mike> on next: you + [main] jake> on next: how + [main] jake> on next: are + [main] jake> on next: you + [main] sam> on next: doing + [main] mike> on next: doing + [main] jake> on next: doing + ``` +- **context** - a way for downstream to send information to upstream + ```java + Mono mono = Mono.deferContextual((ctx) -> { + if (ctx.hasKey("user")) return Mono.just("welcome " + ctx.get("user")); + return Mono.error(new RuntimeException("user not provided")); + }); + + GenericSubscriber.subscribe(mono.contextWrite(Context.of("user", "sam"))); + GenericSubscriber.subscribe(mono); + ``` +- output - + ``` + [main] > on next: welcome sam + [main] > on complete + [main] > on error: user not provided + ``` +- recall how context goes from downstream to upstream. recall that is how subscription works as well. here, we demo how because of this nature, the upper context will overwrite the context below it + ```java + Mono mono = Mono.deferContextual((ctx) -> { + if (ctx.hasKey("user")) return Mono.just("welcome " + ctx.get("user")); + return Mono.error(new RuntimeException("user not provided")); + }) + .contextWrite(Context.of("user", "jake")) + .contextWrite(Context.of("user", "sam")); + + GenericSubscriber.subscribe(mono); + ``` +- output - + ``` + [main] > on next: welcome jake + [main] > on complete + ``` +- use context value to write to context - + ```java + .contextWrite(ctx -> ctx.put("user", ctx.get("user").toUpperCase())) + ``` +- important to note - context is unmodifiable. when we call ctx.put, a new context is returned. so, while above is a shorthand since amount of processing needed is small, remember that **modified context needs to be returned** for it to change the context for upstream, i.e. `ctx.put` does not modify the original ctx +- test simple demo - + ```java + Flux just = Flux.just(1, 2, 3); + StepVerifier.create(just) + .expectNext(1) + .expectNext(2) + .expectNext(3) + .expectComplete() + .verify(); + + // or + + Flux just = Flux.just(1, 2, 3); + StepVerifier.create(just) + .expectNext(1, 2, 3) + .expectComplete() + .verify(); + ``` +- asserting error - + ```java + Flux flux = Flux.create((sink) -> { + sink.next(1); + sink.next(2); + sink.error(new IllegalStateException("an overflow occurred")); + }); + + StepVerifier.create(flux) + .expectNext(1, 2) + .expectError() + .verify(); + ``` +- other specific techniques for verifying error - + - `.expectError(IllegalStateException.class)` + - `.expectErrorMessage("an overflow occurred")` +- sometimes we might have many items, we cant specify all of them in `expectNext` as we saw earlier. so, we can use following tricks - + - expect next count - specify count + ```java + Flux range = Flux.range(1, 50); + + StepVerifier.create(range) + .expectNextCount(48) + .expectNext(49, 50) + .expectComplete() + .verify(); + ``` + - consume while - consume till predicate is satisfied + ```java + Flux range = Flux.range(1, 50); + + StepVerifier.create(range) + .thenConsumeWhile((i) -> i <= 45) + .expectNext(46, 47, 48, 49, 50) + .expectComplete() + .verify(); + ``` +- custom assertions - + ```java + Mono book$ = Mono.just(Book.generate()); + + StepVerifier.create(book$) + .assertNext((book) -> assertNotNull(book.getAuthor())) + .expectComplete() + .verify(); + ``` +- e.g. for the flux below - + ```java + Flux flux$ = Flux.range(1, 5) + .delayElements(Duration.ofSeconds(3)) + .map((i) -> i * i); + + StepVerifier.create(flux$) + .expectNext(1, 4, 9, 16, 25) + .expectComplete() + .verify(); + ``` +- output - it actually takes 15 seconds or so for the test to execute! so, we can use **virtual time** + ```java + @Test + public void two() { + StepVerifier.withVirtualTime(this::flux) + .thenAwait(Duration.ofSeconds(30)) + .expectNext(1, 4, 9, 16, 25) + .expectComplete() + .verify(); + } + + private Flux flux() { + return Flux.range(1, 5) + .delayElements(Duration.ofSeconds(3)) + .map((i) -> i * i); + } + ``` +- the test runs immediately, with no delay. note - this did not work for me - `StepVerifier.withVirtualTime(() -> some_flux)`, but this did - `StepVerifier.withVirtualTime(method_that_returns_some_flux)` and probably this would work as well, not sure - `StepVerifier.withVirtualTime(() -> method_that_returns_some_flux())` +- verifying context - + ```java + Mono mono = Mono.deferContextual((ctx) -> { + if (ctx.hasKey("user")) return Mono.just("welcome " + ctx.get("user")); + return Mono.error(new RuntimeException("user not provided")); + }); + + // verifying error is easy + StepVerifier.create(mono) + .expectErrorMessage("user not provided") + .verify(); + + // we have to provide context to verify happy path + StepVerifierOptions options = StepVerifierOptions.create() + .withInitialContext(Context.of("user", "sam")); + StepVerifier.create(mono, options) + .expectNext("welcome sam") + .expectComplete() + .verify(); + ``` + +## Spring WebFlux + +- traditionally, with spring mvc, a single thread is used per request +- also, each thread consumes a certain amount of resources of our compute - so threads are expensive +- with webflux, io is done in a non blocking way - the thread is notified once the io request responds +- thus, the thread is utilized more efficiently in webflux +- assume a separate service is called by a spring mvc application and a spring reactive application. assume this service takes 5 seconds to respond + - by default, the spring mvc uses 200 threads. so, if we have 400 concurrent requests, 200 out of these 400 requests will have to "wait" for the threads to be free. however, remember that the while processing the first 200 requests, the threads are just sitting blocked for 5 seconds, waiting for the separate service to respond + - however, with spring webflux, we only have threads = number of cores i think, and the thread would not wait on the network calls +- reactive manifesto - + - lazy - only do work when required + - responsive - keep showing one post and load more when scrolled, do not block to load all posts at once + - resilient - do not fail the whole system due to one service in the system + - elastic - throughput of system scales automatically with varying demand + - message driven - loose coupling, asynchronous communication, e.g. rsocket +- spring mvc uses the traditional "servlet api" using "servlet container" (discussed [here](/posts/spring)) +- spring webflux uses "reactive http" using netty (by default), but can use undertow, servlet 3.x, etc +- netty working - it has two thread groups - thread group one has the one "master thread", which does things like handshake etc and queues the request. threads from thread group two - "worker threads" pick up something from the queue and perform the action. if the worker thread has to call another service, it would not wait for the response - the entire point behind reactive programming is not being blocked! it would pick up something else from the queue, and continue processing the original request when the response comes back. my doubt - does the response basically get added back to the queue, and does it happen that another thread from the worker thread group continues processing this request? maybe not, because access to variables (stack etc) might be lost? +- to create a project select "spring reactive web" from start.spring.io +- when using spring webmvc, its like our webserver is the publisher, and the browser, calling service, etc are like the subscribers +- reactive example - return the multiplication table - + ```java + @GetMapping("/multiplication-table/{input}") + public Flux multiplicationTable(@PathVariable Integer input) { + return reactiveMathService.calculateMultiplicationTable(input); + } + + public Flux calculateMultiplicationTable(Integer input) { + return Flux.range(1, 10) + .doOnNext((i) -> log.info("processing " + i)) + .doOnNext((i) -> Util.sleep(2)) + .map((i) -> ResponseDto.builder() + .output(i * input) + .build()); + } + ``` +- however, in above as well, 20 seconds of wait can be seen in chrome before the entire result is returned. to use streaming api, we only need to change the get mapping line - + ```java + @GetMapping(value = "/multiplication-table-streaming/{input}", produces = MediaType.TEXT_EVENT_STREAM_VALUE) + ``` +- this way, our application also becomes responsive - show as and when data loads +- so, the difference is that when not using text event stream, spring would still behind the scenes collect the entire list of objects and then send the response. however, when using text event stream, spring would send the objects one by one. i think this does not mean we should always use text event stream, because our core service logic would still be performed in a reactive way, even if the response is not streaming +- if suppose we cancel a request - in case of spring mvc, the processing would not stop and would continue, unlike for the spring reactive method above, where the we would not see any more "log statements" once the request is cancelled. so, spring reactive only does work when required +- request body can be read in a non blocking way as well - + ```java + @RequestBody Mono multiplicationRequestDto + ``` +- my understanding - if we do not use mono, the request body would have to be deserialized before the controller method being called, unlike when we use mono. however, the deserialization in both cases does happen in a non blocking way, so we should be good for the most part? +- exception handling, my understanding - either we can just write `throw new ...Exception...` etc or use the reactive way, e.g. `sink.error`, both work i.e. both will work with `@ControllerAdvice` and `@ExceptionHandler`, both ways get picked up by the `server.error...` properties we had discussed [here](/posts/spring), etc + ```java + public Mono calculateProduct(Mono multiplicationRequestDto) { + return multiplicationRequestDto.handle((dto, sink) -> { + if (dto.getFirst() <= 0 || dto.getSecond() <= 0) { + // option 1 + throw new ResponseStatusException(HttpStatus.BAD_REQUEST, "input numbers should be greater than 0"); + + // option 2 - feels like the better way, but both work according to me + sink.error(new ResponseStatusException(HttpStatus.BAD_REQUEST, "input numbers should be greater than 0")); + } else { + sink.next(ResponseDto.builder() + .output(dto.getFirst() * dto.getSecond()) + .build()); + } + }); + } + ``` +- functional endpoints - while the above works, this is an alternative method as well, to for e.g. obey a more functional style of programming + ```java + @Configuration + @RequiredArgsConstructor + public class RouterConfig { + + private final ReactiveMathService reactiveMathService; + + @Bean + public RouterFunction serverResponseRouterFunction() { + + return RouterFunctions.route() + .path("/functional-reactive-math", (builder) -> builder + .GET("/square/{input}", (request) -> { + Integer input = Integer.parseInt(request.pathVariable("input")); + Mono product = reactiveMathService.calculateSquare(input); + return ServerResponse.ok().body(product, ResponseDto.class); + }) + + .GET("/multiplication-table/{input}", (request) -> { + Integer input = Integer.parseInt(request.pathVariable("input")); + Flux table = reactiveMathService.calculateMultiplicationTable(input); + return ServerResponse.ok().body(table, ResponseDto.class); + }) + + .GET("/multiplication-table-streaming/{input}", (request) -> { + Integer input = Integer.parseInt(request.pathVariable("input")); + Flux table = reactiveMathService.calculateMultiplicationTable(input); + return ServerResponse.ok().contentType(MediaType.TEXT_EVENT_STREAM).body(table, ResponseDto.class); + }) + + .POST("/product", (request) -> { + Mono body = request.bodyToMono(MultiplicationRequestDto.class); + Mono product = reactiveMathService.calculateProduct(body); + return ServerResponse.ok().body(product, ResponseDto.class); + })) + .build(); + } + } + ``` +- note how we - + - use one `RouterFunction` to symbolize one controller + - extract path variable (square) + - extract request body (product) + - use streaming response (multiplication-table) + - use `@RequestMapping` on controller class like feature (`.path()`) +- my understanding - recall how we could have `@ExceptionHandler` inside a controller to handle exceptions specific to a controller? for the functional style, we can achieve the same by chaining onError. hopefully, i never have to use this 🤣 +- the builder pattern above also allows for request predicates i.e. only execute if condition is satisfied - + ```java + .GET("/square/{input}", RequestPredicates.path(".*/1?"), (request) -> { // ... + ``` +- so, we get 404 if we ask for square of anything not in the range 11-19. the entire builder is executed from top to down, so we can also have different ways of handling as follows. note how the last one is like a catch all + ```java + .GET("/square/{input}", RequestPredicates.path(".*/1?"), (request) -> { // ... + .GET("/square/{input}", RequestPredicates.path(".*/2?"), (request) -> { // ... + .GET("/square/{input}", (request) -> { // ... + ``` +- spring boot has **rest template** for making network calls, but it is blocking. so, we use **web client** for spring reactive +- constructing a web client - + ```java + @Bean + public WebClient webClient() { + return WebClient.builder() + .baseUrl("http://localhost:8080") + .defaultHeaders((headers) -> headers.setBasicAuth("username", "password")) + .build(); + } + ``` +- web client test - + ```java + @Test + public void webClientTest() { + + Flux response = webClient.get() + .uri("/reactive-math/multiplication-table-streaming/{input}", 7) + .retrieve() + .bodyToFlux(ResponseDto.class); + + StepVerifier.create(response) + .expectNextCount(7) + .assertNext((dto) -> assertEquals(56, dto.getOutput())) + .assertNext((dto) -> assertEquals(63, dto.getOutput())) + .assertNext((dto) -> assertEquals(70, dto.getOutput())) + .expectComplete() + .verify(); + } + ``` +- note - the above test would work for both streaming and non streaming response type! +- similarly, post request test - + ```java + Mono request = Mono.fromSupplier(() -> new MultiplicationRequestDto(6, 7)); + Mono response = webClient.post() + .uri("/reactive-math/product") + .body(request, MultiplicationRequestDto.class) + .retrieve() + .bodyToMono(ResponseDto.class); + + StepVerifier.create(response) + .assertNext((dto) -> assertEquals(42, dto.getOutput())) + .expectComplete() + .verify(); + ``` +- note - here, the format is `body(mono(obj), class)`. it can also be `bodyValue(obj)` based on our use case +- for testing error scenarios, recall we can chain `expectError` etc when using `StepVerifier` +- till now, we were chaining `.retrieve()`. but it only gives access to body, not status code etc. for asserting on them lot, we can use `.exchange()` instead of `.retrieve()` (not discussed here) +- adding request parameter when using web client - note how instead of providing a hardcoded string inside the uri, we now use the builder + ```java + .uri(builder -> builder.path("/reactive-math/stream").query("a={a}&b={b}").build(2, 4)) + ``` +- attributes - help influence the "central configuration" of web client dynamically - when we use web client somewhere in the code, we pass it "attributes" - and these attributes change how web client make calls, which was configured in the "central configuration" - below, we are expected to send an attribute for auth type from the place we actually make the request, and then the configuration decides how to generate auth details for every request (using filter), which extracts the attribute and accordingly modifies the request + ```java + @Bean + public WebClient webClient() { + return WebClient.builder() + .baseUrl("http://localhost:8080") + .filter(this::filter) + .build(); + } + + private Mono filter(ClientRequest request, ExchangeFunction exchangeFunction) { + Optional authType$ = request.attribute("auth-type"); + ClientRequest modifiedRequest = authType$ + .map((authType) -> authType.equals("basic") ? addBasicAuth(request) : addTokenAuth(request)) + .orElse(request); + return exchangeFunction.exchange(modifiedRequest); + } + + private ClientRequest addBasicAuth(ClientRequest originalRequest) { + return ClientRequest.from(originalRequest) + .headers((headers) -> headers.setBasicAuth("user", "password")) + .build(); + } + + private ClientRequest addTokenAuth(ClientRequest originalRequest) { + return ClientRequest.from(originalRequest) + .headers((headers) -> headers.setBearerAuth("just-the-token")) + .build(); + } + ``` +- webclient will help with http requests, but what about database calls? they are not http, they are a custom protocol on top of tcp. so, we have different drivers for different databases +- difference in return type of blocking vs reactive of mongo driver - + + | operation | blocking | non blocking | + |------------|-------------|--------------| + | find by id | Optional | Mono | + | find all | List | Flux | + | count | Long | Mono | + | exists | Boolean | Mono | + +- a very simple crud service for mongodb - + ```java + @Service + @RequiredArgsConstructor + public class ProductService { + + private final ProductDao productDao; + + private final ProductMapper productMapper; + + public Flux findAll() { + return productDao.findAll() + .map(productMapper::map); + } + + public Mono findById(String id) { + return productDao.findById(id) + .map(productMapper::map); + } + + public Mono create(Mono productDto) { + return productDto.map(productMapper::map) + .flatMap(productDao::save) + .map(productMapper::map); + } + + public Mono update(String id, Mono update) { + return productDao.findById(id) + .flatMap((_existing) -> update) + .doOnNext((dto) -> dto.setId(id)) + .map(productMapper::map) + .flatMap(productDao::save) + .map(productMapper::map); + } + + public Mono deleteById(String id) { + return productDao.deleteById(id); + } + } + ``` +- data layer - + ```java + @Data + @AllArgsConstructor + @NoArgsConstructor + @Builder + @Document(collection = "products") + public class ProductEntity { + + @Id + private String id; + + private String description; + + private Integer price; + } + + @Repository + public interface ProductDao extends ReactiveMongoRepository { + } + ``` +- we cannot (should not) use spring data jpa / hibernate / jdbc, because they are all blocking in nature, and therefore we lose out on the performance benefits of reactive programming +- we will use r2dbc - which feels like non blocking alternative to jdbc +- we use spring data r2dbc - which i think sits on top of the r2dbc, thus simplifying development +- i think as an alternative to r2dbc / spring data r2dbc, we can also use hibernate reactive, which probably would be closer to jpa +- r2dbc does not support relationships 😢. so, we can use tricks like `on delete cascade` in our ddl to avoid errors when deleting an entity, since we can no longer specify `CascadeType.ALL` etc on the mapping + ```java + @Data + @AllArgsConstructor + @NoArgsConstructor + @Builder + @Table(name = "`user`") + public class UserEntity { + + @Id + private Integer id; + + private String name; + + private Integer balance; + } + + @Data + @AllArgsConstructor + @NoArgsConstructor + @Builder + @Table(name = "transaction") + public class TransactionEntity { + + @Id + private Integer id; + + private Integer userId; + + private Integer amount; + + private Instant transactionDate; + } + ``` +- e.g. user to transaction is one to many. we want to create a transaction and reduce the balance of the user. so, we first check if the user has enough balance and reduce the balance. if there was a row with an update, the boolean returned would be true, post which we can process it further + ```java + @Repository + public interface UserDao extends ReactiveCrudRepository { + + @Query("update `user` set `user`.balance = `user`.balance - :amount where `user`.id = :userId and `user`.balance >= :amount") + @Modifying + Mono updateBalance(Integer userId, Integer amount); + } + + public Mono create(Integer userId, Mono request) { + return request.flatMap((dto) -> + userDao.updateBalance(userId, dto.getAmount()) + .filter((updated) -> updated) + .flatMap((_updated) -> transactionDao.save(transactionMapper.map(dto))) + ) + .map((_entity) -> TransactionStatus.APPROVED) + .defaultIfEmpty(TransactionStatus.DECLINED); + } + ``` +- we can use jpa with webflux, i think some point to note - + - remember it is blocking, so we should additionally chain a `publishOn` / `subscribeOn` + - we should use for e.g. `Flux.fromStream(() -> repo.findAl().stream())`, if we were to use `Flux.fromIterable(repo.findAll())`, i think it would defeat the point + ```java + public Flux findAll(Integer userId) { + // orderDao is a normal jpa repository, not r2dbc + return Flux.fromStream(() -> orderDao.findAllByUserId(userId).stream()) + .subscribeOn(Schedulers.boundedElastic()) + .map(orderMapper::map); + } + ``` + - also see how we chain with `.map()` when calling `save`, not with `.flatMap`, since recall it returns a normal object unlike when using spring reactive data, which returns the object wrapped with `Mono` +- [a "context" pattern](https://stackoverflow.com/a/77592888/11885333). this is all my logic btw lol - when we have several interactions with several services, we need some properties of some objects, some properties of some other objects, etc. remember that with a functional / declarative style that comes with reactive programming, we do not have access to all objects in the method - we only can access previous chained call's return value. so, we can instead use a helper context object where we store all interactions, so that they can be easily accessed at any time + ```java + public Mono fulfill(Mono orderRequest) { + + Context ctx = new Context(); + + return orderRequest + // record request into context + .doOnNext(ctx::setOrderRequestDto) + // get product from product service + .flatMap((_v) -> productService.getProduct(ctx.getOrderRequestDto().getProductId())) + // record product into context + .doOnNext(ctx::setProductDto) + // deduct amount by calling user service - note how it involves using both original request and product + .flatMap((_v) -> userService.createTransaction(ctx.getOrderRequestDto().getUserId(), ctx.getProductDto().getPrice())) + // record status of response into context + .doOnNext(ctx::setTransactionStatus) + // build the order based on transaction status (if user had enough amount to pay for product), actual product, etc + .map((_v) -> orderMapper.map(ctx.getProductDto(), ctx.getOrderRequestDto(), ctx.getTransactionStatus())) + // record the built order entity into context + .doOnNext(ctx::setOrderEntity) + // save the order (using map, not flatMap since this is not r2dbc) + .map((_v) -> orderDao.save(ctx.getOrderEntity())) + // create response + .map((_v) -> orderMapper.map(ctx.getOrderEntity())) + // use subscribe on, since jpa repo.save is a blocking call + .subscribeOn(Schedulers.boundedElastic()); + } + + @Data + class Context { + + private OrderRequestDto orderRequestDto; + + private ProductDto productDto; + + private TransactionStatus transactionStatus; + + private OrderEntity orderEntity; + } + ``` +- a simple way to implement sse (server sent events). recall we had already discussed `TEXT_EVENT_STREAM_VALUE`. we can combine it with the concept of sinks for live updates in ui! + ```java + @Service + public class ProductService { + + // ... + + private final Sinks.Many productsSink; + + @Getter + private final Flux productsFlux; + + public ProductService(ProductDao productDao, ProductMapper productMapper) { + // ... + productsSink = Sinks.many().replay().all(); + productsFlux = productsSink.asFlux(); + } + + public Mono create(Mono productDto) { + // ... + .doOnNext(productsSink::tryEmitNext); + } + + public Mono update(String id, Mono update) { + // ... + .doOnNext(productsSink::tryEmitNext); + } + } + + // ... + @GetMapping(value = "/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE) + public Flux findAllStream() { + return productService.getProductsFlux(); + } + ``` +- `WebTestClient` - testing routers, controllers, controller advice, etc + ```java + @SpringBootTest + @AutoConfigureWebTestClient + public class OneWebTestClientTest { + + @Autowired + WebTestClient client; + + @Test + void test() { + Flux response = client.get() + .uri("/reactive-math/square/{n}", 4) + .exchange() + .expectStatus().isOk() + .expectHeader().contentType(MediaType.APPLICATION_JSON) + .returnResult(ResponseDto.class) + .getResponseBody(); + + StepVerifier.create(response) + .assertNext((dto) -> assertEquals(16, dto.getOutput())) + .expectComplete() + .verify(); + } + } + ``` +- note - feels like everything will be a flux, no concept of mono, but that should be fine? +- while we extracted response as a flux above, we can use fluent assertions as well - + ```java + .expectHeader().contentType(MediaType.APPLICATION_JSON) + .expectBody() + .jsonPath("$.size()").isEqualTo(4) + .jsonPath("$[0].output").isEqualTo(4) + .jsonPath("$[1].output").isEqualTo(8) + .jsonPath("$[2].output").isEqualTo(12) + .jsonPath("$[3].output").isEqualTo(16); + ``` +- above is example of integration test, slow +- unit test is fast - we test nth layer and assume (n - 1)th layer works, and thus for e.g. mock it +- unit test controller logic using `WebClientTest` + ```java + @WebFluxTest({ReactiveMathController.class}) + public class TwoWebTestClientTest { + + @Autowired + WebTestClient client; + + @MockBean + ReactiveMathService reactiveMathService; + + @Test + void test() { + when(reactiveMathService.calculateSquare(4)) + .thenReturn(Mono.just(ResponseDto.builder().output(16).build())); + + // rest stays the same as the integration test! + } + } + ``` +- diff --git a/_posts/2023-12-04-java.md b/_posts/2023-12-04-java.md new file mode 100644 index 0000000..1f5f32e --- /dev/null +++ b/_posts/2023-12-04-java.md @@ -0,0 +1,2286 @@ +--- +title: Java +--- + +## Object Oriented Programming + +### Basics + +- a java program can have any number of classes. the classes can have any name and the java program can have any name + - however, only one public class is allowed in a java program + - the name of both the public class and the java program should be the same +- when we compile a class using `javac Durga.java`, the number of class files generated = number of classes present in that program + ```java + class A { + + public static void main(String args[]) { + System.out.println("class A"); + } + } + + class B { + + public static void main(String args[]) { + System.out.println("class B"); + } + } + + class C { + } + ``` +- output of above program -
+ ![java main output](/assets/img/java/java-main-output.png) +- three pillars of object oriented programming + - encapsulation - helps with **security** + - data hiding (visibility) + - abstraction + - inheritance - helps with **reusability** + - polymorphism - helps with **flexibility** + - compile time - + - overloading + - method hiding + - variable hiding / shadowing + - runtime - + - overriding + +### Import Statements / Package + +- two ways for using classes from libraries - + - fully qualified name of the class + - import statement at the top - preferred +- two kinds of import statements - + - **explicit import** - `import java.util.ArrayList;` - preferred + - **implicit import** - `import java.util.*;` + - note - implicit import does not include sub packages +- by default - + - all classes under `java.lang` package are available and need not be imported + - all classes under the same package as the package of the current class need not be imported +- **package** - group of related java programs +- different packages can have the same java program, e.g. `java.util.Date` and `java.sql.Date` +- universally accepted naming convention for package naming in java - reverse of domain name +- if we write the following program - + ```java + package com.learning.java; + + class Test { + public static void main(String args[]) { + System.out.println("hello world"); + } + } + ``` +- and try compiling using `javac Test.java`, it compiles. but when i tried running `java Test.java`, it failed + ``` + Error: Could not find or load main class Test + Caused by: java.lang.NoClassDefFoundError: Test (wrong name: com/learning/java/Test) + ``` +- so we should actually compile using - `javac -d . Test.java` +- this generates the entire directory structure of com/learning/java in the current working directory and places the Test.class there +- packages help with implementing **encapsulation** - the entire complexity / functionality is viewed as one unit residing inside a package + +### Class and Method Access Modifiers + +- classes have two different **access modifiers** - `public` and `<>` + - **default** classes are only accessible from only within the package + - **public** classes can be accessed from anywhere outside the package as well +- for inner class, apart from `abstract`, `final`, `public` and `<>`, we can also have `static`, `private` and `protected` modifiers +- **members** have four different **access modifiers** - + - **public** - access method from anywhere, inside or outside package + - **default** - can be accessed from inside package only, not outside the package + - **private** - can only be accessed from within the same class, not outside the class + - **protected** - can be accessed from anywhere inside package, and from subclasses if outside package as well + - small note - if accessing protected members from inside subclass but from outside package, only subclass reference can be used, not superclass (i.e. even polymorphism is not allowed) +- protected example - + - a/A.java - + ```java + package a; + + public class A { + + protected void a() { + System.out.println("from a"); + } + } + ``` + - b/B.java - + ```java + package b; + import a.A; + + class B extends A { + + public static void main(String[] args) { + A a = new A(); + a.a(); + } + } + ``` + - output -
+ ![protected caveat](/assets/img/java/protected-caveat.png) + - solution - change to - + ```java + B b = new B(); + b.a(); + ``` +- therefore, summary in tabular form - + + | visibility | public | protected | default | private | + |--------------------------------|--------|------------------------------|---------|---------| + | same class | ✅ | ✅ | ✅ | ✅ | + | subclass same package | ✅ | ✅ | ✅ | | + | non subclass same package | ✅ | ✅ | ✅ | | + | subclass different package | ✅ | ✅ (subclass reference only) | | | + | non subclass different package | ✅ | | | | + +- access modifiers help with - **data hiding** - a different class cannot "directly" access the fields of another class - it would have to use public methods +- note - think about member visibility only when class is visible first (recall default vs public) +- access modifiers also help achieve **encapsulation** - interact with data members via exposed methods, not directly + +### Abstract Classes And Interfaces + +- `abstract` modifier is applicable for both methods and classes +- abstract method is used when we do not know about the implementation of the class upfront. e.g. Vehicle class can have an abstract method `getNumberOfWheels`. syntax - + ```java + public abstract Integer getNumberOfWheels(); + ``` +- if a class contains *even one* abstract method, it would have to be declared as abstract as well +- if a class is `abstract`, instantiation is not possible for the class +- also, if for e.g. we would not like for it to be possible to instantiate a class, we can declare it as abstract even if it does not have abstract methods +- subclasses are responsible to provide the implementation of the abstract methods of super class +- we can have multiple levels of nesting for abstract classes as well - abstract class Vehicle -> abstract class Car -> class RangeRover +- **interface methods are `public` and `abstract` without us specifying anything** +- so, when overriding in subclass, "method should be public" +- when **implementing** an interface - + - either override all the methods of the interface + - or make the class itself abstract +- code example - + ```java + interface I { + + void m1(); + void m2(); + } + + abstract class A implements I { + + public void m1() { + } + } + ``` +- abstract variables are not supported - so only one kind of member i.e. method is allowed for abstract, not variable +- so why use abstract classes and interfaces - + - **mandating a structure for an implementation** - "mandates" subclasses to provide implementation, else there will be compile time error + - **acting as a specification / contract** - e.g. we write servlet api compliant code, but that same code can run on different vendors like jetty, tomcat, weblogic, resin, oracle http server, etc which are all implementations of the same servlet api. same applies for jdbc and the different sql compliant drivers as well. + - **abstraction** - client will not know / need not care about the internal implementation +- note - all variables inside an interface are public static final, so they need to be initialized then and there. no instance variables can be created for an interface + +### Inheritance + +- inheritance helps use **is a relationship** +- we use `extends` to implement this +- members of the **superclass** are **inherited** by the **subclass** +- so, subclass can use members of the superclass +- the other way around does not hold i.e. superclass reference cannot use members of subclass +- all classes are implicitly subclasses of `Object` +- main advantage of inheritance - superclass will contain common functionality, thus helping us avoid duplication of logic in subclasses +- types of inheritance - + - **single inheritance** - one superclass and one subclass. supported in java + - **multilevel inheritance** - one superclass has one subclass, and that subclass again acts as a superclass for yet another subclass. this too is supported in java + - **multiple inheritance** - multiple superclasses, one subclass. not supported in java for classes, but supported via interfaces + ```java + class C1 extends C2, C3 {} // compilation error + + interface I1 extends I2, I3 {} // works + class C1 implements I1, I2 {} // works + ``` + - **hierarchical inheritance** - one superclass, multiple subclasses + - **hybrid inheritance** - combination of multiple types of inheritance +- inheritance example -
+ ![inheritance](/assets/img/java/inheritance.drawio.png) +- confusion cleared - we just said every class extends object. if a class C1 extends another class C2, it is extending both C2 and Object. then isn't this multiple inheritance? why did we say java does not allow multiple inheritance? + - when we do not extend any class, we extend Object implicitly + - when we extend a different class, we do not extend Object directly. so, the root class in the chain which does not have any explicit superclass extends Object implicitly. so it is basically multi level inheritance and not multiple inheritance which helps extend this subclass extend Object indirectly +- note - `final` class cannot have a subclass + +### Polymorphism - Overloading + +- **method signature** - method name + argument types +- in java, **return type is not a part of method signature** +- when resolving method calls, method signature is what gets used +- so, it is a compile time error if we try to add two methods with same signature, even if they have different return types +- **overloading** - when a class has multiple method names with same but different argument types +- advantage - same method is being used for multiple implementations +- **static polymorphism** / **compile time polymorphism** / **early binding** - in case of overloading, the decision around which variation of method to use is made at compile time +- example - + ```java + class Overloader { + + public void printer(int x) { + System.out.println("printing an integer: " + x); + } + + public void printer(String x) { + System.out.println("printing a string: " + x); + } + } + + public class Overloading { + + public static void main(String[] args) { + Overloader overloader = new Overloader(); + overloader.printer(1); // printing an integer: 1 + overloader.printer("hello"); // printing a string: hello + } + } + ``` +- **automatic promotion** + overloading in java - if when overloading, an _exact_ match is not found for a primitive type, java promotes to the next available primitive type using the following rules - + - byte -> short -> int -> long -> float -> double + - char -> int -> ... +- so, if refer the example above - there is no overloaded method for char. so, we jump to the next type as follows - + ```java + overloader.printer('a'); // printing an integer: 97 + ``` +- if no promotion is possible, we get a compile time error - + ```java + overloader.printer(10.5); // Overloading.java:19: error: no suitable method found for printer(double) + ``` +- if there is a clash during overloading for superclass vs subclass, subclass gets priority +- e.g. `null` can be used both for `Object` and `String`. so, if a method is overloaded for both of them and we pass it `null`, it will call the `String` implementation +- if there is clash during overloading for two classes which are independent, compiler throws an unambiguous exception +- e.g. `null` can be used both for `String` and `StringBuffer`. so, if a method is overloaded for both of them and we pass it `null`, it will throw an exception + ```java + overloader.printer(null); // Overloading.java:24: error: reference to printer is ambiguous + ``` +- since method overloading is compile time, the decision is influenced by the reference, not by the instance +- e.g. if i do `Object x = new String("s")`, and a method is overloaded for both `String` and `Object`, the object version would be called, since the decision is made by the type of reference - if i have two variations - `m1(Object obj)` and `m1(String str)`, the `m1(Object obj)` variation would be called + +### Polymorphism - Overriding + +- superclass reference can hold subclass instance +- the other way around does not hold i.e. subclass reference can not hold superclass instance +- **overriding** - subclass redefines method of superclass +- variations - + - superclass reference pointing to superclass instance - superclass method would be called + - subclass reference pointing to subclass instance - subclass method would be called + - superclass reference pointing to subclass instance - subclass method would be called +- the third variation is what interests us - compiler only checks if superclass has that method defined +- the method is called actually called on the instance during execution +- **dynamic polymorphism** / **runtime polymorphism** / **late binding** - in case of overriding, the decision around which variation of method to use is made at runtime +- **co variant** - when overriding, we can return subclass type of what superclass returns + ```java + class Parent { + public Object m1() { + return null; + } + } + + class Child extends Parent { + public String m1() { + return "hello world"; + } + } + + class CoVariant { + public static void main(String[] args) { + Parent p = new Child(); + System.out.println("covariant response = " + p.m1()); // covariant response = hello world + } + } + ``` +- if superclass method is final, we cannot override the method and we get a compile time error +- if superclass method is non final, we can override the method, and can also make it final in the subclass +- if method is private, there is no concept of overriding, since it is treated like an internal method. so, even if we redefine the method with the same name in the subclass, the compiler would not complain +- access modifiers + overriding - when overriding, we cannot reduce the scope, but we can increase the scope + ```java + class Parent { + public String m1() { + return "from parent"; + } + } + + class Child extends Parent { + protected String m1() { + return "from child"; + } + } + ``` +- output - + ``` + attempting to assign weaker access privileges; was public + ``` +- so, conclusion for access modifiers and overriding - for `private` methods, overriding concept is not applicable, for others - + - superclass - `public`, subclass can be - `public` + - superclass - `protected`, subclass can be - `protected`, `public` + - superclass - `default`, subclass can be - `default`, `protected`, `public` +- exception - below is **of course** only applicable for checked exceptions and not unchecked exceptions. below will make sense automatically as well if we think about `Parent p = new Child(); p.m1();` + - if subclass does not throw an exception, superclass can or cannot throw an exception + - if subclass throws an exception, superclass should throw a superclass of exception as well +- superclass `public static void m1()`, subclass - `public void m1()` - compile time error +- subclass `public void m1()`, superclass - `public static void m1()` - compile time error +- subclass `public static void m1()`, superclass - `public static void m1()` - works, but it is not overriding. it is **method hiding**. this resolution is compile time, happens by reference, and the superclass version is called + ```java + class Parent { + public static String m1() { + return "hello from parent"; + } + } + + class Child extends Parent { + public static String m1() { + return "hello from child"; + } + } + + class MethodHiding { + public static void main(String[] args) { + Parent p = new Child(); + System.out.println("parent reference, child object responds with = " + p.m1()); + } + } + ``` +- output - + ``` + parent reference, child object responds with = hello from parent + ``` +- conclusion - **method hiding** is also example of **compile time polymorphism** / **static polymorphism** / **early binding** just like **overloading** +- **variable hiding / shadowing** - there is no concept of overriding for variable members. so, if we redefine the vairable in the child class as well, resolution happens via superclass + ```java + class Parent { String s = "parent"; } + class Child extends Parent { String s = "child"; } + + class VariableShadowing { + public static void main(String[] args) { + Parent p = new Child(); + System.out.println(p.s); // prints 'parent' + } + } + ``` +- TODO: add double dispatch? + +### Object Type Casting + +- syntax - `A b = (C) d` +- three checks - + - compile time check 1 - C and d should be somehow related. either should be superclass of other + - passes compilation - + ```java + Object o = new String("hello world"); + StringBuffer sb = (StringBuffer) o; + ``` + - fails compilation - `incompatible types: String cannot be converted to StringBuffer` + ```java + String str = new String("hello world"); + StringBuffer sb = (StringBuffer) str; + ``` + - compile time check 2 - obvious - C should be subclass of A or same as A + - passes compilation - + ```java + Object o = new String("hello world"); + StringBuffer sb = (StringBuffer) o; + ``` + - fails compilation - `incompatible types: StringBuffer cannot be converted to String` + ```java + Object o = new String("hello world"); + String s = (StringBuffer) o; + ``` + - runtime check 1 - actual instance d should be subclass of C or same as C. understand how this is different from compile time check 1 - there, we were checking if whatever reference is used for d, that should be somehow related to C. here however, we check if the actual runtime object that d holds is a subclass of C or same as C + - passes runtime - + ```java + Object o = new String("hello world"); + String s = (String) o; + ``` + - fails runtime - `ClassCastException: class java.lang.String cannot be cast to class java.lang.StringBuffer` + ```java + Object o = new String("hello world"); + StringBuffer sb = (StringBuffer) o; + ``` + +## Constructors + +- constructor helps with **initialization** +- `new` keyword helps with **instantiation** +- for constructor, method name should be same as the name of class +- only applicable modifiers for constructors are access modifiers +- use case - make the constructor `private`. now, an object for the class can only be created from inside the class. this can help us for e.g. implement the **singleton pattern** +- **if we do not add any constructor** for a class, the compiler adds the **default no args constructor** for us automatically +- note - this default no args constructor is added for abstract classes as well +- first line in our constructor should always be `super()` or `this` +- if we do not add the super call ourselves, the compiler will automatically add `super()` for us +- note - this automatic adding of super happens for both constructors written by us and inside the default no args constructor +- convoluted example - + - our code - + ```java + class Test { + + Test(int i) { + this(); + } + + Test() { + + } + } + ``` + - what compiler generates + ```java + class Test { + + Test(int i) { + this(); + } + + Test() { + super(); + } + } + ``` +- when we have code like below, we get a compilation error because when the compiler generates `super()` automatically, it is not enough since the superclass only one constructor - the one we manually wrote, and it requires an argument, which the compiler is not capable of defaulting + ```java + class Parent { + Parent(int i) {} + } + + class Child extends Parent { + } + ``` +- error - + ``` + Test.java:5: error: constructor Parent in class Parent cannot be applied to given types; + class Child extends Parent { + ^ + required: int + found: no arguments + reason: actual and formal argument lists differ in length + 1 error + ``` +- note - super has to be the first line, otherwise we get `error: call to super must be first statement in constructor` +- note - this has to be the first line, otherwise we get `error: call to this must be first statement in constructor` +- so, conclusion - we can only use either `super()` or `this()` and that too only in the first line of the constructor +- `super()` or `this()` can only be called inside a constructor and not inside any other method +- `this` and `super` keywords can also be used to reference instance variables +- note - `this` and `super` are always related to an instance, so they cannot be used inside `static` methods +- my doubt - how to handle variable hiding / shadowing for static variables? solution - maybe use class prefix instead of super? +- constructor + overloading is a common pattern. we then can use `this()` inside them to call each other with default values for missing arguments +- a constructor can throw exceptions +- however, if superclass constructor throws an exception, the subclass constructor should throw the same exception or superclass exception of that exception. we cannot wrap with try catch, since super or this should be the first call + +## Strings + +- **string** is **mutable**, **string buffer** objects are **immutable** + ```java + class Introduction { + + public static void main(String[] args) { + String s = new String("Durga"); + s.concat(" Software"); + System.out.println(s); // Durga + + StringBuffer sb = new StringBuffer("Durga"); + sb.append(" Software"); + System.out.println(sb); // Durga Software + } + } + ``` +- `==` is for reference comparison. `equals` in `Object` class works just like `==`, but sometimes subclasses can override this method, e.g. `String` class below overrides it for content comparison, while `StringBuffer` does not + ```java + class Equality { + + public static void main(String[] args) { + + String s1 = new String("durga"); + String s2 = new String("durga"); + System.out.println(s1 == s2); // false + System.out.println(s1.equals(s2)); // true + + StringBuffer sb1 = new StringBuffer("durga"); + StringBuffer sb2 = new StringBuffer("durga"); + System.out.println(sb1 == sb2); // false + System.out.println(sb1.equals(sb2)); // false + } + } + ``` +- heap is used for storing objects. string objects can be created when we use `new String()`, `str.concat("suffix")`, etc +- **scp (string constant pool)** is used for storing string literals. java stores them in the hopes of reusing them later +- note - scp a section in the heap itself, maybe it is present in a different location when compared to where java objects are stored +- while objects in heap are eligible for **gc (garbage collection)**, objects in scp are not, because java internally maintains references to the string literals stored in scp +- deeper understanding - scp is used for storing string literals. if i do `str.concat("suffix")`, suffix would be stored in scp, not concatenated result of str and suffix. the concatenated result will however be stored in heap +- so, it almost feels like that albeit in heap, scp is more of a compile time feature, while string objects are a runtime feature +- 2 in heap - (s1, String("durga")), (s2, String("durga")) and 1 in scp - "durga". s3 and s4 point to scp itself, while s1 and s2 point to both heap and scp. note how despite having the same string 4 times, it was stored only once in scp + ```java + String s1 = new String("durga"); + String s2 = new String("durga"); + String s3 = "durga"; + String s4 = "durga"; + ``` +- 3 in heap, out of which 1st and 2nd are eligible for gc - (,String("durga")), (,String("durga software")), (s, String("durga software solutions")) and 3 in scp - "durga", " software", " solutions" - + ```java + String s = new String("durga"); + s.concat(" software"); + s = s.concat(" solutions") + ``` +- in below examples, we compare equality using `==` and not `equals`, maybe because equals should anyway do content comparison, but here we see which references point to the same object +- equality of string literals - (equals compares reference and if not same, contents, while == just compares reference, and that evaluates to true since sl1 and sl2 are both pointing to the same object inside scp) + ```java + String sl1 = "durga"; + String sl2 = "durga"; + System.out.println(sl1 == sl2); // true + ``` +- concatenation for string literals can happen at compile time as well, which is why slc1 and slc2 point to the same object stored in the scp. this is probably happening due to optimizations that are performed on instructions + ```java + String slc1 = "durga software"; + String slc2 = "durga " + "software"; + System.out.println(slc1 == slc2); // true + ``` +- here, str2 is created at runtime, so str2 points to string object in heap while str3 points to string literal in scp. str2 does not point to a corresponding object in scp in this case + ```java + String str1 = "durga"; + String str2 = str1 + " software"; + String str3 = "durga software"; + System.out.println(str2 == str3); // false + ``` +- here, both strf2 and strf3 are created at compile time hence scp itself, because final variables would be replaced at compile time. understand how this behavior changed when compared to the example above, just by adding the `final` keyword + ```java + final String strf1 = "durga"; + String strf2 = strf1 + " software"; + String strf3 = "durga software"; + System.out.println(strf2 == strf3); // true + ``` +- the main advantage of scp - if a string is used multiple times, its instance need not be managed / tracked separately multiple times +- basically, jvm maintains a reference to strings in scp, so that there is no garbage collection happening there +- also, strings in scp cannot be mutated - when we make changes, new objects are stored in heap / new strings are stored in scp +- string buffers do not work like strings - string buffers do not use concepts like scp etc - so it is mutable - there is no referencing to same object in scp like concepts in string buffer +- in strings, `concat` and `+` both do the same thing +- other important methods in strings - `equalsIgnoreCase()`, `charAt()`, `length()`, `isEmpty()`, `substring()`, `replace()` (replace a certain character), `indexOf()`, `lastIndexOf()`, `toLowerCase()`, `toUpperCase()`, `trim()` +- **string buffer** - string is not meant for string content that can change frequently +- strings are immutable - for every change, a new object is created +- this is why we need string buffer. all changes we make happen on the same object +- since string buffer is mutable, it has two concepts - `capacity` and `length`. `capacity` determines how many characters the string buffer can hold, while `length` gives the current number of characters the string buffer has +- when we run out of space, memory is doubled, a new object is created and all the existing characters are copied +- other important methods in string buffer - `capacity()` (get the current capacity), `setCharAt()`, `append()` (works with most primitive etc types), `insert()` (insert a string at a specific position), `delete()` (delete substring from specified positions), `reverse()` (reverse contents of string buffer), `ensureCapacity()` (increase capacity to specified capacity upfront) +- note - all methods inside string buffer are synchronized - run `javap java.lang.StringBuffer` in terminal to view the **profile** of string buffer + ``` + public synchronized int length(); + public synchronized int capacity(); + public synchronized void setCharAt(int, char); + // and so on... + ``` +- so, at a time, only one thread can operate on a `StringBuffer`, thus affecting performance of applications +- so, we can also use `StringBuilder` +- the apis of **string builder** are almost the same as string buffer - so, it is like a "non synchronized version of string buffer" - run `javap java.lang.StringBuilder` in terminal - + ``` + public void setCharAt(int, char); + public int capacity(); + public int length(); + // and so on... + ``` +- so higher performance at the cost of race conditions which we might have to take care of ourselves +- side note - strings are automatically thread safe since they are immutable +- **method chaining** - because most methods in `String`, `StringBuffer`, `StringBuilder` return the same object type, we can use method chaining technique + +## Exceptions + +- Throwable + - Exception + - RuntimeException + - ArithmeticException + - NullPointerException + - etc + - IOException - used when doing file related operations etc + - InterruptedException - used in multithreading related code etc + - etc + - Error +- **unchecked exceptions** + - runtime exceptions and its subtree + - error and its subtree +- everything else is **checked exception** +- **try with resources** - cleaner code, no need to call `close` explicitly, if they use the interface `AutoCloseable` + ```java + try (BufferedReader br = new BufferedReader(new FileReader("file.txt"))) { + // ... + } + ``` +- note - what we declare / assign inside the try statement is final, and cannot be reassigned +- `Closable` extends `AutoClosable`. `Closable` throws `IOException`, while `AutoClosable` throws `Exception` which is more generic +- when we are handling exceptions, it might happen that we lose track of the original exception, and throw another exception which is not that relevant. e.g. + - reading from a resource fails due to missing file + - closing the resource fails due to null pointer because the resource was never initialized properly +- the eventual exception we get is the null pointer, but the missing file exception would have helped us more in identifying the root cause +- so, we can also use `ex.addSuppressed(Throwable t)` or `Throwable[] t = ex.getSuppressed()`. this way, we can also find the original cause behind the exception +- note - try with resources will automatically make use of suppressions for us bts +- another note - when using try with resources, the null pointer exception will be added as a suppression to the file not found exception, because understand that the main exception that happened in the try block was file not found exception, and the null pointer exception happened inside the finally block + ```java + class CustomResource implements AutoCloseable { + + public void read() { + throw new RuntimeException("could not read file"); + } + + @Override + public void close() { + throw new RuntimeException("a null pointer exception happened"); + } + } + + public class SuppressionExample { + + public static void main(String[] args) { + + try { + beforeJava7Way(); + } catch (Exception e) { + System.out.println(e.getMessage()); + for (Throwable t : e.getSuppressed()) { + System.out.println("suppress: " + t.getMessage()); + } + } + + try { + sinceJava7Way(); + } catch (Exception e) { + System.out.println(e.getMessage()); + for (Throwable t : e.getSuppressed()) { + System.out.println("suppress: " + t.getMessage()); + } + } + } + + private static void beforeJava7Way() { + CustomResource customResource = null; + try { + customResource = new CustomResource(); + customResource.read(); + } finally { + customResource.close(); + } + } + + private static void sinceJava7Way() { + try (CustomResource customResource = new CustomResource()) { + customResource.read(); + } + } + } + ``` +- output - + ``` + a null pointer exception happened + + could not read file + suppress: a null pointer exception happened + ``` +- we can also catch multiple exceptions using a single catch block + ```java + try { + + } catch (NullPointerException | ArrayOutOfBoundsException e) { + e.printStackTrace(); + triggerAlert(e); + } + ``` + +## Multi Threading + +### Concepts + +- there are two benefits of multithreading - **responsiveness** and **performance** +- _repeat - remember multithreading gives both the features above_ +- **concurrency** means performing different tasks on the same core. instead of waiting for one task to entirely complete first, we perform both simultaneously in a time-shared manner. it **increases responsiveness** +- **concurrency** is also called **multi tasking**. remember - we do not even need different cores for this +- **parallelism** means performing different tasks on different cores. it **increases performance** +- **throughput** is the number of tasks completed per unit time +- **latency** is the time taken per unit task +- how are the two different + - for optimizing throughput, since the tasks themselves are different, they just need to be scheduled on different threads in parallel, and that automatically increases the throughput. therefore, fewer considerations exist + - for optimizing latency, we would probably break a single task into smaller subtasks. considerations - + - what parts of the original task can be performed in parallel and which parts have to be done sequentially + - how to aggregate the smaller chunks of results into the final result +- in case of multithreading, components like **heaps get shared across the threads**, while components like **stack and instruction pointer are scoped to a single thread** + - my understanding - the only time some actual value is stored inside the stack and not heap - local primitive types, e.g. if we declare an `int a = 1` inside a method. otherwise at all other times, the data is stored inside a heap - + - for non primitive types - the reference is stored in stack and the actual instance is stored in heap + - primitive types - e.g. members of a class are stored in heap, because the object itself is stored inside the heap? +- a stack and instruction pointer are scoped to a thread. so, if a process has 10 threads, it would have 10 stacks, 10 instruction pointers, etc +- ideally this makes sense - remember, each thread is executing a different instruction, so each of them needs its own instruction pointer etc +- a stack is used for local variables of a method call, and is used alongside instruction pointer. a frame is created for every method call, and it can result with a stack overflow exception if we end up with too many frames +- heap belongs to a process, and all threads can write to / read from the heap at any given time. all objects are stored in the heap till there is a reference to them, after which they get garbage collected +- all object instances - primitive or non primitive are all allocated on the heap +- do not get confused - **references to objects are stored in the stack, while the actual objects are allocated in the heap** +- when we execute a program, it becomes a process i.e. it gets loaded into the memory from the disk and a thread is used to execute it +- there are often way more processes being executed than cores in a cpu. so, using **context switching**, one thread at a time gets cpu and, gets paused and another thread is scheduled on the cpu +- context switching has overhead, and doing a **lot of it can lead to** something called **thrashing** +- however, context switching between the threads of the same process is much cheaper than context switching between the threads of different processes, since a lot of components like heaps are reused +- when the operating system has to chose between scheduling multiple tasks on a thread, and if for e.g. it schedules a computationally expensive task first, it can lead to the **starvation** of other smaller tasks +- so, to combat issues like this, there are various algorithms used by the operating system to calculate the priority of a task +- we can also programmatically provide a priority which gets used in the calculation above +- a thing that struck me - when writing applications, do not base your conclusions off the computer you are running your code on, base it off how it would work on the server +- number of threads = number of cores is the best way to start, since context switching as discussed earlier consumes resources +- number of threads = number of cores is only optimal if the threads are always performing some computation, and never in blocked state. if the threads perform some io, then some thread performing some computation can takes its place +- also, modern day computers use **hyper threading** i.e. the same physical core is divided into multiple virtual cores. this means that a core can run more than one thread in modern cpus, so our logic of number of threads = number of cores can fail here as well + +### Thread Creation + +- we create an instance of `Thread` and to it, we pass an object of a class that implements `Runnable`. its `run` method needs to be overridden. all of this can be replaced by a lambda java 8 onwards + ```java + Thread thread = new Thread(() -> System.out.println("i am inside " + Thread.currentThread().getName())); + thread.start(); + ``` +- if instead of using `Runnable`, we extend the `Thread` class, we get access to a lot of internal methods +- when we run `Thread.sleep`, we instruct the os to not schedule that thread until the timeout is over +- note misconception - invoking this method does not consume any cpu i.e. it is not like a while loop that waits for 5 seconds +- we can set a name of a thread to make it helpful when debugging, using `thread.setName()` +- we can set a priority between 1 and 10 using `thread.setPriority` +- we can use `thread.setUncaughtExceptionHandler` to catch "unchecked exceptions" that might have occurred during the execution of the thread, and thus cleanup resources +- we can shut down the application entirely from any thread using `System.exit(0)` + +### Thread Coordination + +- the application will not terminate until all threads stop +- but, we might want to interrupt a thread so that the thread can maybe understand that the application wants to terminate, and accordingly handle cleaning up of resources + ```java + Thread thread = new Thread(new Task()); + thread.start(); + thread.interrupt(); + ``` +- the interruption can be handled gracefully in two ways as described below + - if our code throws an interrupted exception, calling `interrupt` will trigger it, and then we can handle it. other examples where this exception happens are for calls like `thread.join()` and `object.wait()` + ```java + public class Task implements Runnable { + + @Override + public void run() { + try { + Thread.sleep(20000); + } catch (InterruptedException e) { + System.out.println("[inside catch] i was interrupted..."); + } + } + } + ``` + - else we can check the property `isInterrupted` and handle it accordingly + ```java + public class Task implements Runnable { + + @Override + public void run() { + Date date = new Date(); + while ((new Date()).getTime() - date.getTime() < 10000) { + if (Thread.currentThread().isInterrupted()) { + System.out.println("[inside loop] i was interrupted..."); + break; + } + } + } + } + ``` +- **background / daemon threads** there might be a case when what the thread does need not be handled gracefully, and it is just an overhead for us to check for e.g. the `isInterrupted` continually. so, we can set the daemon property of the thread to true. this way when the thread is interrupted, it will be terminated without us having to handle it + ```java + Thread thread = new Thread(new Task()); + thread.setDaemon(true); + thread.start(); + thread.interrupt(); + ``` +- also, unlike normal threads, where the application does not close if any thread is running, a daemon thread does not prevent the application from terminating +- if we implement `Callable` instead of `Runnable`, we can also throw an `InterruptedException` when for e.g. we see that `isInterrupted` is evaluated to true. this means the parent thread calling this thread will know that it was interrupted in an adhoc manner +- threads execute independent of each other. but what if thread b depends on the results of thread a? +- **busy wait** - one way could be we run a loop in thread b to monitor the status of thread a (assume thread a sets a boolean to true). this means thread b is also using resources, which is not ideal +- so, we can instead call `threadA.join()` from thread b, thread b goes into waiting state till thread a completes +- we should also consider calling the join with a timeout, e.g. `threadA.join(t)` +- my understanding - if for e.g. the main thread runs the below. first, we start threads t1 and t2 in parallel of the main thread. now, we block the main thread by calling `t1.join()`. the main thread will be stopped till t1 completes + ```java + t1.start(); t2.start(); + t1.join(); t2.join(); + ``` +- scenario 1 - t1 completes before t2, the main thread resumes, and again will be stopped till t2 completes +- scenario 2 - t1 completes after t2. the main thread resumes and will not wait for t2 since it has already completed + +### Thread Pooling + +- **thread pooling** - reusing threads instead of recreating them every time +- tasks are added to a **queue**, and the threads pick them up as and when they become free +- so, when tasks are cpu intensive, we should have number of threads closer to core size, and when tasks are io intensive, we should have higher number of threads, but remember that - + - too many threads can cause performance issues as well due to context switching + - threads are not trivial to create, they are resource intensive +- java provides 4 kinds of thread pools - `FixedThreadPool`, `CachedThreadPool`, `ScheduledThreadPool` and `SingleThreadedExecutor` +- **fixed thread pool executor** - polls for tasks stored in a queue. there can be many tasks, but a set number of threads which get reused. the queue should be thread safe i.e. blocking + ```java + int numberOfProcessors = Runtime.getRuntime().availableProcessors(); + ExecutorService executorService = Executors.newFixedThreadPool(numberOfProcessors); + + executorService.execute(new Runnable() {...}); + ``` +- **cached thread pool executor** - it looks at its threads to see if any of them are free, and if it is able to find one, it will schedule this task on the free thread. else, it will spawn a new thread. too many threads is not too big of a problem, thanks to the keep alive timeout discussed later. however, expect out of memory exceptions if too many tasks are added to the executor, because threads are resource intensive + ```java + ExecutorService executorService = Executors.newCachedThreadPool(); + ``` +- **scheduled thread pool executor** - it used a delay queue, so that the tasks get picked up by the threads after the specified delay or schedule. this means tasks might have to be reordered, which is done by the queue itself. `schedule` can help trigger the task after a certain delay, `scheduleAtFixedRate` can help trigger it like a cron at regular intervals while `scheduleAtFixedDelay` can help schedule the next task a fixed time period after the previous task was completed + ```java + ScheduledExecutorService executorService = Executors.newScheduledThreadPool(5); + executorService.schedule( + () -> System.out.println("hi from " + Thread.currentThread().getName()), + 5, + TimeUnit.SECONDS + ); + ``` +- **single thread pool executor** - like fixed thread pool executor with size of pool as one. the advantage is for e.g. all the tasks will be run in order of creation +- all thread pool executors create new threads if the previous thread is killed for some reason +- there are a variety of parameters that can be added to the executors +- **core pool size** - minimum number of threads that are always kept in the pool +- **max pool size** - maximum number of threads that can be present in the thread pool. it has value `INTEGER.MAX_VALUE` by default for cached and scheduled thread pool executor, while the same value as core pool size for fixed and single thread pool executor +- **keep alive timeout** - the time till an idle thread is kept in the pool, after which it is removed. keep alive is only applicable to cached and scheduled thread pool executors, since in fixed and single thread pool executors, the number of threads do not change +- note that keep alive timeout does not change the core pool threads. this behavior can however be changed using `allowCoreThreadTimeOut` +- **queue** - the different types of executors use different queues based on their requirements. the queues also need to be thread safe + - e.g. a fixed and single thread pool executor has a fixed number of threads, so there can potentially be infinite number of tasks that get queued up, because of which it uses a `LinkedBlockingQueue` + - cached thread pool spawns number of threads equal to the number of tasks, so it uses a `SynchronousQueue`, which only needs to hold one task + - scheduled thread pool uses `DelayedWorkQueue` so that the tasks are returned from the queue only if the condition of cron etc. is met +- **rejection handler** - assume all threads are occupied and the queue is full. in this case, the thread pool will reject the task that it gets. how it rejects the task is determined using the rejection policy. the different rejection policies are - + - **abort** - submitting the new task throws `RejectedExecutionException`, which is a runtime exception + - **discard** - silently discard the incoming task + - **discard oldest** - discard the oldest task from the queue to add this new task to the queue + - **caller runs** - requests the caller thread itself to run this task +- till now, to obtain an instance of `ExecutorService`, we were using static methods on `Executors`. we can also use `new ThreadPoolExecutor()` and then pass our own core pool size, queue, etc. configuration parameters as the constructor arguments +- we need to shut down the executor in a clean way. we can initiate it using `executorService.shutdown()`. this will throw the `RejectedExecutionException` for any new tasks that are submitted to it, but at the same time will complete both all the currently executing tasks and queued up tasks +- if we run `shutdownNow`, it will return `List` for the queued up tasks and clear the queue, but complete all the currently executing tasks +- `awaitTermination(timeout)` will terminate the tasks if they are not completed by the specified time +- we also have helper methods like `isShutdown()` and `isTerminated()` +- if a task wants to return a value, we use `Callable` instead of `Runnable` +- however, the `execute` method on `ExecutorService` only works if we implement `Runnable` interface. if we implement `Callable` interface, we have to use `submit` +- the return value of `Callable` is wrapped around a `Future`. `future.get()` is a blocking call i.e. the thread calling it will not move ahead until the future resolves. so, we can also use `future.get(timeout)` + ```java + ExecutorService executorService = Executors.newFixedThreadPool(1); + + Future result = executorService.submit(() -> { + Thread.sleep(4000); + return (new Random()).nextInt(); + }); + + Thread.sleep(3000); + // this simulates that we were able to perform 3 seconds worth of operations + // in the main thread while the task thread was performing its blocking stuff + + System.out.println("result = " + result.get()); + ``` +- we can cancel the task using `future.cancel(false)`. this means that the thread pool will remove the task from the queue. the false means that if a thread is already running the task, it will not do anything. had we passed true, it would have tried to interrupt the task +- we also have helper methods like `future.isDone()` and `future.isCancelled()` +- suppose we have a list of items, and for each item, we want to perform a series of processing + ```java + Future package$ = executorService.submit(() -> pack(order)); + Future delivery$ = executorService.submit(() -> deliver(package$.get())); + Future email$ = executorService.submit(() -> sendEmail(delivery$.get())); + ``` + notice how the calling thread is blocked by all `get` of future. instead, we could use - + ```java + CompletableFuture.supplyAsync(() -> pack(order)) + .thenApply((package) -> deliver(package)) + .thenApply((delivery) -> sendEmail(delivery)) + // ... + ``` +- in the above case, we have specified a series of steps to run one after another and since we do not care about the results in our main thread, the assigning of tasks to threads is managed by java itself. the main thread is not paused by the get calls. notice how we also do not need to specify any executor +- if we use `thenApplyAsync` instead of `thenApply`, a different thread can be used to execute the next operation instead of the previous one +- internally, `CompletableFuture` uses fork join pool, but we can specify a custom executor as well, e.g. `thenApplyAsync(fn, executor)` + +### Race Condition + +- **race condition** - happens where **resource is shared** across multiple threads + ```java + public class SharedResourceProblem { + + public static void main(String[] args) throws Exception { + + Integer count = 10000000; + Counter counter = new Counter(); + + Thread a = new Thread(() -> { + for (int i = 0; i < count; i++) { + counter.increment(); + } + }); + + Thread b = new Thread(() -> { + for (int i = 0; i < count; i++) { + counter.decrement(); + } + }); + + a.start(); b.start(); + a.join(); b.join(); + + System.out.println("shared resource value = " + counter.getCount()); + // shared resource value = 15 + } + } + + class Counter { + + private int count = 0; + + public void increment() { + count += 1; + } + + public void decrement() { + count -= 1; + } + + public int getCount() { + return count; + } + } + ``` +- the `resource += 1` and `resource -= 1` operations are not atomic, it comprises of three individual operations - + - getting the original value + - incrementing it by one + - setting the new value +- so, it might happen that 2 operations of the increment were performed, then all three operations of decrement were performed and finally, the last operation of increment was performed, thus resulting in the "lost update" kind of problem +- solutions - identifying critical sections and use locks, make operations atomic, etc + +### Synchronized + +- we can wrap our code blocks with a **critical section**, which makes them atomic. this way, only one thread can access that block of code at a time, and any other thread trying to access it during this will be suspended till the critical section is freed +- say we use `synchronized` on multiple methods of a class +- once a thread invokes one of the synchronized method of this class, no other thread can invoke any other synchronized method of this class. this is because **using synchronized on a method is applied on the instance (object) of the method** +- the object referred to above is called a **monitor**. only one thread can acquire a monitor at a time +- method one - prefix method signature with synchronized (refer the counter example earlier. the shared resource print would now print 0) + ```java + public synchronized void increment() { + // ... + } + ``` +- another method is to use synchronized blocks + ```java + synchronized (object) { + // ... + } + ``` +- using blocks, the code is much more flexible since we can have different critical sections locked on different monitors +- if using synchronized on methods, two different methods of the same class cannot be executed in parallel - the monitor there is the instance itself +- however, when using synchronized blocks, we can do as follows inside different methods of the same class - + ```java + Object lock1 = new Object(); + Object lock2 = new Object(); + + // ... + + synchronized(lock1) { + // ... + } + + synchronized(lock2) { + // ... + } + ``` +- note - reduce critical section size for better performance + +### Atomic Operations + +- so, **assignment to references and primitive values in java are atomic** + - `this.name = name` inside for e.g. a constructor is atomic + - `int a = 8` is atomic +- however, an **exception** in this is assignment to longs and doubles. since it is 64 bit, it happens in 2 operations - one assignment for the lower 32 bit and another one for the upper 32 bit +- the solution is to declare them with **volatile**, e.g. `volatile double a = 1.2` +- using volatile makes operations on longs and doubles atomic +- also, java has a lot of atomic classes under `java.util.concurrent.atomic` as well +- remember - when we use volatile, we make assignment atomic, not operations like `a++` atomic +- my doubt probably cleared - then what is the use for e.g. `AtomicReference`, if assignment to reference is already an atomic operation? we can do as follows (a metric example discussed later) - + ```java + AtomicReference state$ = new AtomicReference<>(); + state$.set(initialValue); + + State currentState = state$.get(); + State newSate = computeNewState(); + Boolean isUpdateSuccess = state$.compareAndSet(currentState, newState); + ``` + +### Data Race + +- remember - race condition and data race are two different problems +- **data race** - when the order of operations on variables do not match the sequential code we write. this happens mostly because there are optimizations like prefetching, vectorization, rearranging of instructions, etc + ```java + class Pair { + + private int a = 0; + private int b = 0; + + public void increment() { + a++; + b++; + } + + public void check() { + if (b > a) { + System.out.println("well that doesn't seem right..."); + } + } + } + ``` + calling the class - + ```java + Pair pair = new Pair(); + + Thread t1 = new Thread(() -> { while (true) pair.increment(); }); + Thread t2 = new Thread(() -> { while (true) pair.check(); }); + + t1.start(); t2.start(); + t1.join(); t2.join(); + ``` +- our expectation is that since b is read before a and a is incremented before b, there is no way even with a race condition that b can be bigger than a. however, due to data race, we do hit the print statement +- data race is also where we can use `volatile`. **volatile guarantees the order of instructions being executed** + ```java + private volatile int a = 0; + private volatile int b = 0; + ``` +- this is called the **visibility problem** +- basically, the two threads have their own **local cache**, but also have a **shared cache**. they write the value to the local cache, but this does not + - either update the shared cache + - or the second thread's local cache does not refresh its value from the shared cache +- **however, when we use volatile, it refreshes / synchronizes both the shared cache and the local cache of all threads** +- basically, code before access to a volatile variable gets executed before it, and code after the access to a volatile variable after. this is called the happens before relationship +- while we could have just used synchronized for both the methods above, realize the advantage of using volatile over synchronized. with synchronization, we lose out on the multithreading, since our functions would have been invoked one at a time. in this case, the two methods are still being invoked concurrently +- if we have n cores, for each core we have a register. then we have an associated l1 cache on top of each register. l2 cache can be shared across multiple cores, and finally we have only one l3 cache and ram
+ ![multithreading](/assets/img/java/multithreading.drawio.png) +- **java memory model** - it is an enforcement that jvm implementations have to follow so that java programs have similar behavior everywhere, and the different optimizations of instructions, cache, etc. do not affect the functioning of the program + +### Locking Strategies and Deadlocks + +- **coarse-grained locking** - meaning we use one lock for everything, just like having synchronized on all methods, not performant. its counterpart is **fine-grained locking** +- coarse grained locking example - make all methods of the class synchronized +- cons with fine-grained locking - we can run into deadlocks more often +- conditions for a deadlock - + - **mutual exclusion** - only one thread can hold the resource at a time + - **hold and wait** - the thread acquires the resource and is waiting for another resource to be freed up + - **non-preemptive** - the resource is released only when the thread is done using it and another thread cannot acquire it forcefully + - **circular wait** - a cyclic dependency is formed where threads wait for resources acquired by each other +- one way to prevent deadlocks is to acquire locks in our code in the same order. this need not be considered when releasing the locks +- another way can be to use techniques like `tryLock`, `lockInterruptibly`, etc (discussed later) +- reentrant lock - instead of having a synchronized block, we use this reentrant lock + ```java + Lock lock = new ReentrantLock(); + ``` +- unlike synchronized where the block signals the start and end of the critical section, locking and unlocking happens explicitly in case of reentrant locks +- to avoid deadlocks caused by for e.g. the method throwing exceptions, we should use it in the following way - + ```java + lock.lock(); + try { + // critical section + } finally { + lock.unlock(); + } + ``` +- it provides a lot of methods for more advanced use cases like `getOwner`, `getQueuedThreads`, `isHeldByCurrentThread`, `isLocked`, etc +- the name `Reentrant` comes from the fact that the lock can be acquired by the thread multiple times, which means it would have to free it multiple times as well, e.g. think about recursive calls. we can get the number of times it was acquired using `getHoldCount` +- another benefit of using reentrant locks is **fairness** - e.g. what if a thread repeatedly acquires the lock, leading to the starving of other threads? we can prevent this by instantiating it using `new ReentrantLock(true)` +- note that introducing fairness also has some overhead associated with it, thus impacting performance +- if we do not set to true, what we get is a **barge in lock** i.e. suppose there are three threads waiting for the lock in a queue. when the thread originally with the lock releases it, if a new thread not in the queue comes up to acquire the lock, it gets the lock and the threads in the queue continue to stay there. however, if we had set the fairness to true, the thread with the longest waiting time gets it first +- if the lock is not available, the thread of course goes into the suspended state till it is able to acquire the lock +- we can use `lockInterruptibly` - this way, another thread can for e.g. call `this_thread.interrupt()`, and an interrupted exception is thrown. this "unblocks" the thread to help it proceed further. had we just used lock, the wait would have been indefinite + ```java + try { + lock.lockInterruptibly(); + } catch (InterruptedException e) { + // cleanup and exit + } + ``` +- similar to above, we also have the `tryLock` method, which returns a boolean that indicates whether a lock was successfully acquired. it also accepts timeout as a parameter, what that does is self-explanatory +- this can help, for e.g. in realtime applications to provide feedback continuously without pausing the application entirely + ```java + while (true) { + if (lock.tryLock()) { + try { + // critical section + } finally { + lock.unlock(); + } + } else { + // some logic + } + // some logic + } + ``` +- so, we saw how reentrant lock, which while works like synchronized keyword, has additional capabilities like telling current owner and locking using different strategies like `lockInterruptibly` and `tryLock` +- when locking till now, we used mutual exclusion to its fullest. but, we can be a bit more flexible when the shared resource is just being read from and not written to +- multiple readers can access a resource concurrently but multiple writers or one writer with multiple readers cannot +- this is why we have `ReentrantReadWriteLock` + ```java + ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + Lock readLock = lock.readLock(); + Lock writeLock = lock.writeLock(); + ``` +- fairness in `ReentrantReadWriteLock` works the same way as `ReentrantLock`, except that if the thread waiting for the longest time was a reader, all reader threads in the queue are freed up to read +- of course, base decisions off of type of workloads - if workload is read intensive, read write lock is better, otherwise we might be better off using the normal reentrant lock itself + +### Inter Thread Communication + +- **semaphore** - it helps restrict number of users to a resource +- remember - locks only allow one user per resource, but semaphores allow multiple users to acquire a resource +- so, we can call a lock a semaphore with one resource + ```java + Semaphore semaphore = new Semaphore(number_of_permits); + ``` +- when we call `semaphore.acquire()` to acquire a **permit**, and the number of permits reduces by one. if no permits are available at the moment, the thread is blocked till a resource in the semaphore is released +- similarly, we have `semaphore.release()` +- optionally, i think both `acquire` and `release` accept n, the number as an argument which can help acquire / release more than one permit +- another major difference from locks - there is **no notion of owning thread** in semaphores unlike in locks - e.g. a semaphore acquired by thread a can be released by thread b. so, thread a can acquire it again without having ever released it +- semaphores are a great choice for producer consumer problems. producer consumer problem using semaphores - + - we need a lock so that multiple threads cannot touch the queue at one go + - we start with the full semaphore being empty and the empty semaphore being full, since there are no items initially + - look how we use semaphore's philosophy to our advantage - consumer threads acquire full semaphore while producer threads release it + - my understanding of why we need two semaphores - e.g. if we only had full semaphore - producer releases it and consumer acquires it - how would we have "stopped" the producer from producing when the rate of production > rate of consumption? its almost like that the two semaphores help with **back pressure** as well + ```java + Integer CAPACITY = 50; + Semaphore empty = new Semaphore(CAPACITY); + Semaphore full = new Semaphore(0); + Queue queue = new ArrayDeque<>(CAPACITY); + Lock lock = new ReentrantLock(); + ``` +- producer code - + ```java + while (true) { + empty.acquire(); + Item item = produce(); + lock.lock(); + queue.add(item); + lock.unlock(); + full.release(); + } + ``` +- consumer code - + ```java + while (true) { + full.acquire(); + lock.lock(); + Item item = queue.poll(); + lock.unlock(); + consume(item); + empty.release(); + } + ``` +- some different inter thread communication techniques we saw till now - + - calling `interrupt` from one thread on another thread. this is then further used in techniques like `lockInterruptibly` + - calling `join` for a thread to wait for another thread to complete its job + - using `acquire` and `release` on semaphore +- **conditions** - semaphores are a special type of condition, that try checking if number of permits > 0 +- flow - + - one thread **checks a condition**, and goes to sleep if the condition is not met + - a second thread can "mutate the state" and **signal** the first thread to check its condition again + - if the condition is met, the thread proceeds, else it can go back to sleep +- note - conditions come with a lock, so that the "state" being modified can be wrapped with a critical section +- condition - conditions are always associated with a lock +- note - when we call `await` on the condition, it also releases the lock before going to sleep, so that the second thread described in the flow above can acquire the lock to mutate the state +- when another thread runs `signal` on the condition, the threads waiting for the condition wake up +- placing the condition inside the while loop helps so that even if signalled, it will again start waiting if the condition is not met yet +- also, even though the thread which was waiting gets signaled to wake up, it also needs to be able to acquire the lock again, i.e. the other threads modifying state need to release the lock + - first thread - + ```java + ReentrantLock lock = new ReentrantLock(); + Condition condition = lock.newCondition(); + + lock.lock(); + try { + while (condition x is not met) { + condition.await(); + } + } finally { + lock.unlock(); + } + ``` + - second thread - + ```java + lock.lock(); + try { + // modify variables used in condition x... + condition.signal(); + // despite signalling, thread one does not wake up, we need to unlock the lock first + } finally { + lock.unlock(); + } + ``` +- conditions also have advanced methods like - + - `await(timeout)` - just like locks have timeouts to prevent indefinite waiting + - `signalAll` - using `signal`, only one of all the threads waiting on the condition wake up, `signalAll` wakes all of them up +- the class Object, and therefore all objects have methods `wait`, `notify` and `notifyAll` +- therefore, without using any special classes - + - simulate **conditions** using `wait`, `notify` and `notifyAll` + - simulate **locks** using `synchronized` +- note - recall how when using conditions we were wrapping it via locks. we need to do the same thing here i.e. wrap using synchronized block in order to be able to call notify + - first thread - + ```java + synchronized (this) { + while (condition x is not met) { + wait(); + } + } + ``` + - second thread. my understanding - but needs to happen on same object and inside different method - + ```java + synchronized(this) { + // modify variables used in condition x... + notify(); + } + ``` +- when we call `wait` on an object, the thread it was called on continues to be in waiting state until another thread calls `notify` on that object +- `notify` will wake up any random thread that was sleeping, and to wake up all threads we can use `notifyAll` +- if we think about it, the `lock.lock()` and `lock.unlock()` are the starting and ending of `synchronize` blocks respectively, `condition.await()` is like `wait()` and `condition.signal()` like `notify()` +- introducing locks can make our code more error-prone, more subject to deadlocks etc. however, it makes the code more flexible, e.g. unlike synchronized blocks which have to exist within a single method, locks can be acquired and freed from different methods +- using locks result in issues like deadlocks if coded improperly +- our main objective is to execute instructions as a single hardware operation +- we can achieve this by using Atomic classes provided by java + ```java + AtomicInteger count = new AtomicInteger(initialValue); + count.incrementAndGet(); + ``` +- recall how we had discussed that `a = a + 1` actually consisted of three atomic operations, which has all been condensed down into one using these java helper classes +- so, recall the counter example in shared resource earlier, and how we had solved it using synchronized. we can now get rid of the `synchronized` and implement it as follows - + ```java + public void increment() { + count.incrementAndGet(); + } + ``` +- the disadvantage of using these classes is of course that only each operation by itself is atomic, a series of such calls together is not atomic, so it may be good only for simpler use cases +- a lot of operations use `compareAndSet` underneath, and we have access to it to. it sets the value to the new value if the current value matches the expected value. otherwise, the old value is retained. it also returns a boolean which is true if the current value matches the expected value + ```java + count.compareAndSet(expectedValue, newValue); + ``` +- `AtomicReference` can be used for any object type to get and set values in a thread safe i.e. atomic way, and we can use methods like compareAndSet on it +- e.g. notice how below, the synchronized keyword is not used for addSample, but we still have a thread safe implementation by using `compareAndSet`. note how and why we use a loop - if the old value stays the same before and after calculating the new value, then update using the new value, else recalculate using the new value using the "new old value" + ```java + class Metric { + + int count = 0; + + int sum = 0; + } + + class MetricAtomic { + + AtomicReference metric$ = new AtomicReference<>(new Metric()); + + public void addSample(int sample) { + Metric currentMetric; + Metric newMetric; + do { + currentMetric = metric$.get(); + newMetric = new Metric(); + newMetric.count = currentMetric.count + 1; + newMetric.sum = currentMetric.sum + sample; + } while (!metric$.compareAndSet(currentMetric, newMetric)); + } + } + ``` +- we often have a lot of tasks but not so many threads. some objects are not thread safe i.e. cannot be used by multiple threads. however, they can be used by multiple tasks being executed on the same thread. coding this ourselves can be tough, which is why we have `ThreadLocal`, which basically returns a new instance for every thread, and reuses that instance when a thread asks for that instance again + ```java + public static ThreadLocal car = ThreadLocal.withInitial(() -> new Car()); + ``` +- spring uses the concept of this via `ContextHolder`s in for instance, `RequestContextHolder`, `TransactionContextHolder`, `SecurityContextHolder`, etc. my understanding - since spring follows one thread per-request model, this way, any of the services, classes, etc. that need access to information can get it easily. it is like setting and sharing state for a request + +### High Performance IO + +- what is **blocking io** - when cpu is idle, e.g. when reading from database etc +- such **io bound tasks** block the thread till they return the result +- io bound tasks are very common in web applications etc +- how it works internally -
+ ![io bound](/assets/img/java/io-bound-architecture.drawio.png) + - the controllers like network cards return the response to the dma (direct memory access) + - the dma writes it to the memory + - the dma notifies the cpu that the response is available + - the cpu can now access the memory for variables +- so, during this entire duration, the thread that was processing the request that involved the io task (and thus reaching out to the controller) was sitting idle and thus **was blocked** +- this is why number of threads = number of cores does not give us the best performance when we have more io bound instead of cpu intensive tasks +- this is why we have a "thread per request model" in spring mvc, which i believe caps at 200 threads to prevent out of memory errors etc +- it has caveats like - + - creating and managing threads are expensive - recall how it has its own stack etc + - number of context switching increases, which too is an expensive operation - recall **thrashing** + - assume that there are two kinds of calls a web server supports - one that makes a call to an external service and one that calls the database. assume the external service has a performance bug, which makes the first call very slow. this way, if we had for e.g. 150 requests for first call and 150 for the second call (assume 200 is the default thread pool size in embedded tomcat), the 150 instances of the second call would start to be affected because of the 150 instances of the first call now +- so, the newer model used by for e.g. spring web flux is **asynchronous** and **non blocking** +- the thread is no longer blocked waiting for the response - a callback is provided which is called once the request is resolved +- so now, we can go back to the **thread per core** model - which is much more optimal +- there can be problems like **callback hell** etc, which is solved by using libraries like project reactor for reactive style of programming, which is more declarative to write + +### Virtual Threads + +- till now, the `Thread` class we saw was actually a wrapper around an actual os thread +- these are also called **platform threads** - since they map one to one with os threads +- **virtual threads** - they are not directly related to os threads. they are managed by the jvm itself +- this makes them way lesser resource intensive +- the jvm manages a pool of platform threads, and schedules the virtual threads on these platform threads one by one +- once a virtual thread is **mounted** on a platform thread, it is called a **carrier thread** +- if a virtual thread cannot progress, it is **unmounted** from the platform thread and the platform thread starts tracking a new virtual thread +- this way, the number of platform threads stay still small in number and influenced by the number of cores +- there is no context switching overhead just like in reactive programming - what we are saving on here - frequent normal (hence platform hence os threads) context switching is replaced by frequent virtual thread context switching +- creation techniques - + ```java + Runnable runnable = () -> System.out.println("from thread: " + Thread.currentThread()); + + new Thread(runnable).start(); // platform thread (implicit) + // from thread: Thread[#19,Thread-0,5,main] + + Thread.ofPlatform().unstarted(runnable).start(); // platform thread (explicit) + // from thread: Thread[#20,Thread-1,5,main] + + Thread.ofVirtual().unstarted(runnable).start(); // platform thread + // from thread: VirtualThread[#21]/runnable@ForkJoinPool-1-worker-1 + ``` +- note - virtual threads are only useful when we have blocking io calls, not when we have cpu intensive operations +- this happens because unlike the usual model where our thread had to sit idle for the blocking call, the platform thread never stops here and is always working, it is the virtual thread that is sitting idle, and hence we optimize our cpu usage because we are using our platform threads optimally +- so, developers still write the usual blocking code, which simplifies coding, as compared to say reactive programming +- underneath, the blocking calls have been refactored for us to make use of virtual threads so that the platform threads are not sitting idle +- e.g. cached thread pools replacement is **new virtual thread per task executor** - we do not have to create pools of fixed size - we use a thread per task model and all the complexity is now managed by jvm for us bts +- when we are using normal threads for blocking call e.g. using jpa, the thread cannot be used. what we can do is use context switching to utilize the cpu better. however, this model meant we needed a lot of platform threads, and managing them, context switching between them, etc has a lot of overhead, which is why maybe embedded tomcat for instance had a cap of about 200 threads. now with virtual threads, there is no cap needed, so it can be used via cached thread pool executor equivalent, but here there would never be any out of memory etc issues like in cached thread pool executor, since virtual threads are very lightweight +- some notes - + - virtual threads are always daemon, and making them non daemon will throw an exception + - virtual threads do not have a concept of priority + +## Generics + +- what is generics - + - helps extend java's type system - types now start acting like parameters that we as clients can provide + - to allow a type or method to operate on objects of various types to thus allow **reusability**. e.g. without generics, we would use overloading, which causes a lot of duplication of logic - + ```java + class OverloadingProblem { + + public static Double add(Double a, Double b) { + return a + b; + } + + public static Integer add(Integer a, Integer b) { + return a + b; + } + + public static void main(String[] args) { + System.out.println(add(1, 5)); + System.out.println(add(1.2, 5.3)); + } + } + ``` + - while providing **compile time safety** - e.g. without using generics, we would use type casting, which has two compile time checks but one runtime check, and catching errors at compile time > catching them at runtime - + ```java + class TypeCastingProblem { + + private static Object item = null; + + public static void setItem(Object item) { + TypeCastingProblem.item = item; + } + + public static Object getItem() { + return item; + } + + public static void main(String[] args) { + setItem(1.4); + Integer item = (Integer) getItem(); + } + } + ``` + output - + ![generics typecasting](/assets/img/java/generics-typecasting.png) +- we use the **diamond operator** for generics + ```java + class Pair { + + private K key; + private V value; + + public Pair(K key, V value) { + this.key = key; + this.value = value; + } + + @Override + public String toString() { + return "{ " + key + ": " + value + " }"; + } + } + + class GenericExample { + + public static void main(String args[]) { + Pair score = new Pair<>("maths", 85); + System.out.println(score); + } + } + ``` +- **generic method** - my understanding - this is useful when the class itself is not generic / maybe method and class generics do not mean the same thing, so we can for e.g. use `T` for class and `V` for method + ```java + class GenericMethod { + + public static void printer(T arg) { + System.out.println("value is: " + arg); + } + + public static void main(String args[]) { + printer(1); + printer("hello"); + } + } + ``` +- while the return type above is void, we could have for e.g. returned `T` etc as well +- **bounded generic types** - bound the types that are allowed to be used, to get access to the additional functionality that is present in the types used in these bounds, e.g. only allow `Number` and its subclasses to be used for a generic class containing mathematical utilities +- we use the `extends` keyword to achieve bounded generic types, and the target type should be a subclass of the interface / class mentioned in this clause + ```java + public static > T calculateMin(T a, T b) { + return (a.compareTo(b) < 0) ? a : b; + } + ``` +- e.g. [`copy`](https://docs.oracle.com/javase/8/docs/api/java/util/Collections.html#copy-java.util.List-java.util.List-) is implemented as follows - my understanding - this is to help make use of dynamic polymorphism + bounded types. note - just because we can do `superclass_reference = subclass_reference`, does not mean we can do `List = List` + ```java + public static void copy(List dest, List src) + ``` +- we can also specify multiple bounds using `&` +- **type inference** - determine the types automatically. some examples - + - java can automatically guess "the most specific type" that both `String` and `ArrayList` can work with - `Serializable` + ```java + class TypeInference { + + public static T getFirst(T a, T b) { + return a; + } + + public static void main(String[] args) { + Serializable result = getFirst("hello world", new ArrayList()); + } + } + ``` + - we use `List list = new ArrayList<>();` and not `new ArrayList()` + - we use `list.add("name")` and not `list.add("name")` +- note - just because `Number` and `Integer` are related via inheritance, it does not mean `List` and `List` are somehow related as well +- this is the motivation behind **wildcards** + ```java + import java.util.List; + + class Wildcards { + + private static void print(List list) { + list.forEach(System.out::println); + } + + public static void main(String[] args) { + List list = List.of(1, 2, 3); + print(list); + } + } + + // error: incompatible types: List cannot be converted to List + + // solution - notice the use of ? + // private static void print(List list) { ... + ``` +- **upper bounded wildcards** - when we use `?` and `extends`, e.g. allow all lists where the type of element is a subclass of the class specified in the generic method signature +- drawback - e.g. while we can print all elements of the list easily, we cannot add an element to the list - e.g. the list is actually of integer, and we might be trying to add a double to the list. since java cannot identify this problem, it gives a compile time error +- e.g. this works perfectly + ```java + private static void printList(List numbers) { + numbers.forEach(System.out::println); + } + + public static void main(String[] args) { + printList(List.of(1, 2, 3)); + printList(List.of(1.1, 2.2, 3.3)); + } + ``` +- however, if we add the below to the printList method - + ```java + private static void printList(List numbers) { + numbers.forEach(System.out::println); + numbers.add(7); + } + ``` +- we get the error below - + ``` + BoundedWildCardsExtends.java:7: error: incompatible types: int cannot be converted to CAP#1 + numbers.add(7); + ^ + where CAP#1 is a fresh type-variable: + CAP#1 extends Number from capture of ? extends Number + ``` +- **lower bounded wildcards** - when we use `?` and `super`, e.g. allow all lists where the type of element is a superclass of the class specified in the generic method signature +- so now, since java knows that the list passed to has elements of supertype of specified type, we can now add elements to the list of that type (dynamic polymorphism) +- drawback - we cannot read from the list - we have to treat the element as type `Object` + ```java + public static void addToList(List list) { + list.add(1.4); + } + + public static void main(String[] args) { + List list = new ArrayList<>(); + list.add(1); list.add("shameek"); + addToList(list); + System.out.println(list); + } + ``` +- use case of wildcards + bounded types - copy elements from one list to another - + ```java + public void copy(List source, List destination) { + source.forEach(destination::add); + } + ``` +- so, we should - + - use "lower bounded wildcards" when we want to perform some kind of mutation + - use "upper bounded wildcards" when we want to read values + - use "type parameters" when we want to do both reading and writing +- one difference between "type parameters" and "wildcards" is that type parameters allow for multiple bounds unlike wildcards, e.g. following is valid - `` +- rule of thumb? - use wildcards when possible, when not possible (e.g. we want to influence return type based on arguments), then use type parameters +- **type erasure** - java replaces all generic types we define with either Object, or the bound if a bound is specified +- as a part ofo this, java might introducing **type casting** etc as well +- e.g. the code below - + ```java + List list = new ArrayList<>(); + list.add(1); + Integer ele = list.get(0); + + class Store { T item; } + ``` +- is converted to this code due to type erasure - + ```java + List list = new ArrayList(); + list.add(1); + Integer ele = (Integer) list.get(0); + + class Store { Serializable item; } + ``` + +## Collections + +### List + +- `ArrayList` allows for control over **ordering** of elements +- all items are identified by an **index** +- items are located right next to each other in ram, thus making **random access via index o(1)** +- searching for items based on value is however o(n) +- adding items at the end is o(1) +- adding items at random positions is o(n), since it requires shifting of items by one position +- same logic is applicable for removal of items - o(1) for removing items from the end and o(n) for removing items from arbitrary positions +- size of array lists in java can change **dynamically** - once the amount of memory allocated gets over, a list with memory equal to double the size of the current list is provisioned, and all the items from the current list are copied over to the new list +- however, this ability to resize dynamically comes at a price - it takes o(n) time for this resize + copying over of items to the new location to happen +- however, when instantiating, we can provide the **initial capacity**, so that this resizing does not have to happen often +- disadvantage of array lists - when removing / adding items at random positions, a lot of **shifting** is needed to maintain the **contiguous** nature +- this problem is not there when using `LinkedList` +- since in linked lists, there is only a pointer to the next element that needs to be maintained +- disadvantage - linked lists don't allow random access with given index at o(1) time +- note - linked list in java is optimized - + - implemented as doubly linked list which allows it traversal in both directions + - maintains pointers to both head and tail - e.g. we can do use both `addFirst` and `addLast` at o(1) time +- linked list vs array list performance for adding elements at the beginning - + ```java + import java.util.List; + import java.util.ArrayList; + import java.util.LinkedList; + + class ListPerformance { + + public static void main(String args[]) { + perform("linked list", new LinkedList<>()); + perform("array list", new ArrayList<>()); + } + + private static void perform(String type, List list) { + long start = System.currentTimeMillis(); + for (int i = 0; i < 500000; i++) { + list.add(0, i); + } + long end = System.currentTimeMillis(); + System.out.println("time taken by " + type + ": " + (end - start) + "ms"); + } + } + ``` +- output - + ``` + time taken by linked list: 75ms + time taken by array list: 43375ms + ``` +- note - while we compared linked list to array lists above, as discussed later, if removing or adding to one of the ends, the most performant option we have is array deque, not stacks, not linked lists +- **vector** - **synchronized** implementation of **array list** i.e. all operations like add etc will do acquiring and releasing of lock +- generally, doing this using our own locks might be better, since we get more flexibility, e.g. batch multiple operations under one acquiring + releasing of lock +- **stack** - **lifo** structure (last in first out) +- important operations include `push`, `pop` and `peek` +- note - stacks use vectors underneath, so they are inherently synchronized + ```java + Stack stack = new Stack<>(); + stack.push("jane"); + stack.push("jackson"); + System.out.println(stack); // [jane, jackson] + System.out.println(stack.pop()); // jackson + ``` +- to avoid using synchronized version, we can use **array dequeue** instead + +### Queues + +- **queues** - **fifo** structure (first in first out) +- important operations include `add` (enqueue), `remove` (dequeue) and `peek` (retrieve but not remove last element) +- queues are abstract like stack as well - it is implemented using linked lists + ```java + Queue queue = new LinkedList<>(); + queue.add("jane"); + queue.add("jackson"); + System.out.println(queue); // [jane, jackson] + System.out.println(queue.remove()); // jane + ``` +- **priority queue** - objects being stored inside a priority queue should extend the `Comparable` interface +- this helps retrieve items form the structure in the order of their priority +- **dequeue** - double ended queue - o(1) for operating from either side of the collection. it is implemented by array dequeue and just like normal queues, we can implement it using linked lists instead as well +- note - java calls it deque and not dequeue + ```java + Deque dequeOne = new LinkedList<>(); + Deque dequeTwo = new ArrayDeque<>(); + ``` +- my doubt about performance - based on the fact that array dequeue might be using an array underneath, doing the typical "capacity resizing" that we discussed, would we have an even more performant solution if we were to use linked list? be it for implementing stacks or queues, logically performance of linked lists > array dequeues (dynamic capacity resizing issue) > stacks (synchronization issue) +- based on [this answer](https://stackoverflow.com/a/32625029/11885333), apparently not, because the main overhead that comes with linked lists is the extra creation of that node, garbage collection of that node, etc +- so, it is probably safe to conclude that in java, when we are looking for stack or queue implementation, we should use array dequeues almost always (over the stack since it is synchronized, and linked lists since it has memory overhead?) +- also, do not use array lists blindly - if we just have to remove and add elements to either ends, and do not need random access, array dequeues might be better than array lists (inserting at beginning of array list is o(n) and inserting at beginning of array deque is o(1)) + +### Maps + +- key value pairs +- also called **associative arrays** +- with maps, we ensure times of o(1) for **adding**, **removing** and **lookup** +- maps are **unordered** / do not support **sorting** +- the idea is that since **keys** in a map are **unique**, we transform the keys into an index between 0 to length - 1 of array using a **hash function**. then, accessing elements via the given key becomes o(1) - we just need to translate the key into an index using the hash function, and random access of elements in an array is an o(1) operation +- the hash function should be able to handle the type of key - e.g. if the key is an integer, using modulo operator with the length of array is enough, if the key is a string then ascii value of characters can be used and so on +- **collision** in hash tables - the hash function we used result in the same value for multiple keys +- **overwrite** - replace current value with new incoming value +- **chaining** - each **bucket** in the hash table can store a linked list. worst case scenario - all keys evaluate to the same value, so the entire map is just a single big linked list stored in one bucket, thus resulting in an o(n) complexity instead of o(1) +- **open addressing** - + - **linear probing** - try finding the next available empty slot - k + 1, k + 2, k + 3, ... disadvantage - **clusters** are formed i.e. elements with same hash are clustered together + - **quadratic probing** - try finding the next available empty sot using a quadratic polynomial - k + 1, k + 4, k + 9, k + 16, ... + - **rehashing** - perform another hashing on the key till an empty slot is found - h(h(h....(x))) +- so actually, worst case in hash tables for all operations - insertions, deletions and lookups are o(n) +- **load factor** - n / m, where n = number of items in the hash table and m = size of the array. if it is close to 1, the probability of collision will increase +- so, we can also do **dynamic resizing** of hash tables. disadvantage - this resizing is an o(n) operation +- in java, for `HashMap`, when the load factor becomes around 0.75, the dynamic resizing happens +- however, hash maps cannot be used in multithreaded scenarios, since they are not synchronized +- some important methods available in maps - `keySet()`, `entrySet()`, `values()` +- auto generated hash code example - look at how a prime number is used to generate a function with less collision chances + ```java + class Person { + + private Integer age; + + private String name; + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((age == null) ? 0 : age.hashCode()); + result = prime * result + ((name == null) ? 0 : name.hashCode()); + return result; + } + } + ``` +- note - the `equals` needs to be overridden as well. it might happen that due to chaining discussed earlier, multiple items end up in the same bucket of hash table. at that point, java might need to be able to differentiate between two different elements of the same hash +- so basically, java uses both chaining and dynamic resizing based on load factor by the looks of it +- `LinkedHashMaps` vs `HashMaps` - **linked hash maps** use a doubly linked lists underneath to track the "order of insertion", so the keys are basically ordered according to insertion time + ```java + Map hashMap = new HashMap<>(); + hashMap.put("aaa", 1); hashMap.put("bbb", 2); hashMap.put("ccc", 3); + System.out.println(hashMap); // {aaa=1, ccc=3, bbb=2} + + Map linkedHashMap = new LinkedHashMap<>(); + linkedHashMap.put("aaa", 1); linkedHashMap.put("bbb", 2); linkedHashMap.put("ccc", 3); + System.out.println(linkedHashMap); // {aaa=1, bbb=2, ccc=3} + ``` +- balanced bst (binary search trees) - **red black trees** and **avl trees** +- tree rotations are used to maintain this structure +- **tree maps** use red black trees unlike in hash maps, where an array like structure is used +- so, the keys are stored in sorted order in tree maps. notice how it is automatically fr us below - + ```java + Map treeMap = new TreeMap<>(); + treeMap.put("ccc", 3); treeMap.put("bbb", 2); treeMap.put("aaa", 1); + System.out.println(treeMap); // {aaa=1, bbb=2, ccc=3} + ``` +- because it uses trees, operations have a guaranteed complexity of o(log n) in tree maps, whereas operations have mostly o(1) but sometimes o(n) complexity in case of hash maps +- my understanding - since a bst is being used, concept of collision, load factor, etc do not exist in tree maps unlike in hash maps +- so, for huge workloads, while we might have to consider tuning the load factor in case of hash set, we do not have to think about it in case of a tree set +- note - in newer versions, hash maps does not use linked lists (chaining) for each bucket, it uses red black trees for each bucket. this further optimizes the hash maps now +- because of the very nature - using a red black tree per bucket, using an array to store the multiple keys, etc - memory required by hash maps > tree maps +- but remember, reducing time > reducing memory with cloud etc + +### Sets + +- they allow **no duplicates** +- **hash sets** and hash maps work in the same way - a one dimensional array is used to store the elements by performing a hash on the element +- some important functions - `add`, `remove`, `retainAll` (calling `set2.retainAll(set1)` will retain all the elements in the set2 present in set1, and remove other elements from set2) +- so, operations are mostly are o(1) but can be o(log n) in worst case / o(n) when dynamic resizing is needed +- again, **linked hash sets** are same as hash maps, the insertion order would be maintained, which is maintained with the help of an additional doubly linked list +- finally, **tree set** are same as tree maps - maintain elements in a sorted order using a red black tree underneath, thus making operations o(log n) in general +- tree sets come with their own additional methods - e.g. `subset(a, b)` will give us a new set with all values of the set present between a and b, `first` for getting the first element, etc + +### Sorting + +- sort - notice "reverse order" below - + ```java + List list = new ArrayList<>(); + list.add(3); list.add(2); list.add(1); list.add(4); list.add(5); + System.out.println(list); // [3, 2, 1, 4, 5] + + Collections.sort(list); + System.out.println(list); // [1, 2, 3, 4, 5] + + Collections.sort(list, Collections.reverseOrder()); + System.out.println(list); // [5, 4, 3, 2, 1] + ``` +- we can implement `Comparable` on our custom classes to be able to sort them directly - + ```java + class Person implements Comparable { + + String name; + + Integer age; + + Person(String name, Integer age) { + this.name = name; + this.age = age; + } + + public int compareTo(Person person) { + Integer nameDiff = name.compareTo(person.name); + Integer ageDiff = age.compareTo(person.age); + return ageDiff != 0 ? ageDiff : nameDiff; + } + + public String toString() { + return "Person(name=" + name + ", age=" + age + ")"; + } + } + + class CustomSortComparable { + + public static void main(String[] args) { + + List people = new ArrayList<>(); + people.add(new Person("ayan", 25)); + people.add(new Person("ruth", 5)); + people.add(new Person("jack", 25)); + people.add(new Person("jane", 25)); + people.add(new Person("mike", 20)); + System.out.println(people); + // [Person(name=ayan, age=25), Person(name=ruth, age=5), Person(name=jack, age=25), Person(name=jane, age=25), Person(name=mike, age=20)] + + Collections.sort(people); + System.out.println(people); + // [Person(name=ruth, age=5), Person(name=mike, age=20), Person(name=ayan, age=25), Person(name=jack, age=25), Person(name=jane, age=25)] + } + } + ``` +- `Comparator` use cases - + - we want to sort using multiple techniques. compareTo can only have one implementation, therefore lacks flexibility + - we want to sort a class not in our control i.e. we cannot change the class to make it implement `Comparable` + - also helps achieve separation of concerns + + ```java + class PersonAgeComparator implements Comparator { + + @Override + public int compare(Person person1, Person person2) { + return person2.age.compareTo(person1.age); + } + } + + Collections.sort(people, new PersonAgeComparator()); + System.out.println(people); + // [Person(name=ayan, age=25), Person(name=jack, age=25), Person(name=jane, age=25), Person(name=mike, age=20), Person(name=ruth, age=5)] + + Collections.sort(people, new PersonAgeComparator().reversed()); + System.out.println(people); + // [Person(name=ruth, age=5), Person(name=mike, age=20), Person(name=ayan, age=25), Person(name=jack, age=25), Person(name=jane, age=25)] + ``` +- using lambdas - for a more functional style, we can use the following syntax as well 🤯 - + ```java + Collections.sort( + people, + Comparator.comparing(Person::getAge).reversed().thenComparing(Person::getName) + ); + System.out.println(people); + // [Person(name=ayan, age=25), Person(name=jack, age=25), Person(name=jane, age=25), Person(name=mike, age=20), Person(name=ruth, age=5)] + ``` + +### Miscellaneous + +- some methods, refer docs for more - + ```java + List list = new ArrayList<>(); + list.add(5); list.add(1); list.add(2); list.add(4); list.add(3); + + System.out.println("original list = " + list); // original list = [5, 1, 2, 4, 3] + + Collections.shuffle(list); + System.out.println("shuffled list = " + list); // shuffled list = [3, 1, 5, 4, 2] + + Collections.reverse(list); + System.out.println("reversed list = " + list); // reversed list = [2, 4, 5, 1, 3] + + System.out.println("min = " + Collections.min(list) + ", max = " + Collections.max(list)); // min = 1, max = 5 + ``` +- since collections are pass by reference, make collections unmodifiable so that clients cannot mutate our collections + ```java + List unmodifiableList = Collections.unmodifiableList(list); + unmodifiableList.add(-1); + // Exception in thread "main" java.lang.UnsupportedOperationException + // at java.base/java.util.Collections$UnmodifiableCollection.add(Collections.java:1091) + // at MiscellaneousMethods.main(MiscellaneousMethods.java:20) + ``` +- if we want to obtain a synchronized version of the normal collections we can use `List synchronizedList = Collections.synchronizedList(normalList)` +- drawback - coarse grained locking is used, all methods use `synchronized` keyword now +- so, better solution is to use concurrent collections that java provides, e.g. `ConcurrentHashMap` + +## Maven + +- maven is a **build tool** for java +- other alternatives are gradle, ant, etc +- **build** - process of building source code into **artifacts** that can be run +- maven has various **plugins** - + - **jar plugin** to create jars + - **compiler plugin** to help compile code + - **surefire plugin** to execute tests +- a plugin has various **goals**. goals represent a unit of work +- to examine a plugin, we can use the following commands - + ``` + mvn help:describe -Dplugin=org.apache.maven.plugins:maven-compiler-plugin` + ``` +- **maven coordinates** - + - **group id** - company / department name. domain name in reverse order is the convention + - **artifact id** - project name + - **version** - + - **packaging** - there are two types of packaging - **jar** (mostly used nowadays and the default) and **war** (web application archive) + - **classifier** - e.g. we want to build for different versions of java but use the same pom. so, we can use classifiers like `jdk8` and `jdk11`. these then get appended to the version, so people can import the right dependency +- out of these, the **gav** (group id, artifact id and version) help us uniquely identify the project +- to use these libraries in our projects, we use **repositories** +- there two repositories - **local repository** and **remote repository** +- basically, maven downloads from remote repositories and puts it into our local repository +- then, our projects running locally can use dependencies downloaded in this local repository +- default location for local repository is ~/.m2/repository +- default url for remote repository is https://repo1.maven.org/maven2/ (called **maven central**) +- we can configure remote repositories via settings.xml - so that we can use our own remote repository - use case - companies maintain their own remote repository, which is a mirror of maven central + +### Plugin Management + +- **lifecycle** has **phases** +- a phase has multiple goals attached to it +- if a phase does not have any goals attached to it, it would not be executed +- e.g. the clean lifecycle has three phases - pre-clean, clean and post-clean +- only the clean phase of the clean lifecycle is attached to a goal +- it is attached to the clean goal of maven-clean-plugin plugin 🤯 +- when we say `mvn clean`, we are actually instructing maven to run the clean phase +- when we run a phase, all the phases before it in the lifecycle are executed - in this case pre-clean would be executed first (if it has some goals attached to it, it does not by default) and the clean phase itself +- we just discussed that we typically invoke `mvn <>`, which runs all the goals of all the phases up to before the specified phase's lifecycle. however, we can also invoke a particular goal using the following syntax variations - + - `mvn plugin_group_id:plugin_artifact_id:plugin_version:goal` + - `mvn plugin_group_id:plugin_artifact_id:goal` + - `mvn plugin_prefix:goal` + - `mvn plugin_prefix:goal@execution_id` - while executions help us tie goals to phases, we can also invoke these executions directly + + ```sh + mvn org.apache.maven.plugins:maven-clean-plugin:2.5:clean + mvn org.apache.maven.plugins:maven-clean-plugin:clean + mvn clean:clean + ``` +- there are two kinds of plugins - + - **reporting plugins** - run during site generation + - **build plugin** - run to help build the project +- below - we try to tie the run goal of maven-antrun-plugin to pre-clean and post-clean phases - + ```xml + + org.apache.maven.plugins + maven-antrun-plugin + 3.0.0 + + + 1 + pre-clean + + run + + + + Learning Maven: pre-clean + + + + + + 2 + post-clean + + run + + + + Learning Maven: post-clean + + + + + + + + Learning Maven: standalone invoking + + + + ``` +- so, now when we run post-clean phase, all three phases - pre-clean, clean and post-clean would be run +- configuring a plugin + - a plugin can have multiple execution blocks. each execution block specifies - + - what goal to run + - what phase to tie this goal to + - configuration for the goal + - a configuration element can be specified in the root as well. earlier point was us basically specifying multiple execution blocks, which helped us tie goals to phases. this point here is about specifying configuration in the root block of the plugin. this can be useful when we invoke the plugin:goal directly + - dependencies - if a plugin has dependencies, we can for e.g. specify the version of that dependency using this block + - inherited - by default, the plugin configuration is inherited by the children. we can disable this behavior by setting inherited to false +- id should be unique across all executions for a plugin (not across plugins) +- apart from clean, the two other lifecycles are default and site +- the goals that are triggered for the default lifecycle are dependent on the packaging type (recall packaging type can be one of jar or pom, it is a part of maven coordinates). for jar, this is the table - + + | phase | plugin:goal | + |------------------------|-------------------------| + | process-resources | resources:resources | + | compile | compiler:compile | + | process-test-resources | resources:testResources | + | test-compile | compiler:testCompile | + | test | surefire:test | + | package | jar:jar | + | install | install:install | + | deploy | deploy:deploy | + +- when we specify dependencies in dependency management of parent, child projects can get these dependencies if they want to, but don't get the dependency unless added explicitly. **plugin management** works in the same way - inherit all the configuration related to the plugin specified in the plugin management section of the parent, but do not get it by default unless the plugin is added explicitly +- extra - executing scripts using exec maven plugin! - + ```xml + + exec-maven-plugin + 3.1.1 + org.codehaus.mojo + + + Renaming build artifacts + package + + exec + + + bash + handleResultJars.sh + + + + + ``` + +### Inheritance and Aggregation + +- `` helps determine the xsd (scm schema definition) version to use i.e. what elements are allowed in the pom file, how they should be configured, etc +- multiple levels of **inheritance** is supported in pom +- all pom (directly or indirectly) inherit from the [**super pom**](https://maven.apache.org/ref/3.6.3/maven-model-builder/super-pom.html) +- this inheritance helps us extract out common functionality around plugins, plugin configuration, dependencies, etc to a parent pom from which all other projects can inherit +- we can print the effective pom like so - `mvn help:effective-pom` +- my understanding - the parent might be managed separately - + - parent would be downloaded from the remote repository into the local repository, post which it can be used + - for development purpose - build the parent, which will install it in the local repository, and then build the child +- the parent might be managed in the same project, in which we can provide the `relativePath`. understand that this way, we do not have to build the parent project separately like above - +- also, packaging type in parent can be specified to be `pom` instead of relying on the default value i.e. `jar` +- till now, we discussed inheritance. we can also use **aggregation** in maven +- use case - when we run a phase e.g. `mvn clean`, `mvn install`, etc., it gets run for all the child projects as well +- not only that - in aggregate projects, if the child projects depend on each other, maven can determine the right order to build them in for us automatically +- we can also use the same pom for both aggregation and inheritance +- notes about versions - + - version property of parent gets inherited by the children as well + - for specifying the version of parent in the child, we use `${revision}` + - for specifying interdependencies between children, we use `${project.version}` +- based on everything above, a simple multi module setup - + - parent - + ```xml + + 4.0.0 + + org.apache.maven.ci + ci-parent + ${revision} + + + 1.0.0-SNAPSHOT + + + + child1 + child2 + + + ``` + - child - + ```xml + + 4.0.0 + + + org.apache.maven.ci + ci-parent + ${revision} + ../pom.xml + + + org.apache.maven.ci + ci-child + + + + org.apache.maven.ci + child2 + ${project.version} + + + + ``` + +### Dependency Management + +- we can specify a range of versions using `[3.8, 4.0)` (`[` for inclusive, `(` for exclusive) +- version format - `<>.<>.<>-<>` +- the **qualifier** `SNAPSHOT` is used for unstable projects, which can change frequently +- this way, if we depend on a project with snapshot in its version, we get access to the latest code always +- my understanding - if for e.g. we do not use snapshot - + - if the local repository already has an existing copy, maven would not bother refetching it from the remote repository to refresh the local copy + - probably sometimes the remote repository also would not allow pushing artifacts again against the same version +- bts, this SNAPSHOT is converted to timestamp automatically for us - so, `x.y.z-SNAPSHOT` basically becomes `x.y.z-timestamp`, and thus this way, maven always tries pulling the latest version for us +- maven is able to handle **transitive dependencies** for us - if our project depends on jar a which in turn depends on jar b, maven is able to download jar a and then jar b automatically for us when building the project +- **classpath** - location of classes and packages that our project is dependent on +- the different **dependency scopes** - + - **compile** - include dependency in all classpaths. the default if `scope` is not specified explicitly + - **test** - only required for compiling and executing tests, not required when executing therefore need not be included when packaging artifacts + - **runtime** - include dependency when project executes or tests are being run, but do not include them when compiling. e.g. jdbc driver like mysql connector. use case - we as developers will not mistakenly depend on these libraries + - **provided** - dependencies provided by the environment. e.g. we are developing a web application, we would need to depend on the servlet api to compile, but we would not want to include this in the war file, since it would be provided to us by the [servlet container](/posts/spring/#rest) + - **system** - like provided, but the path when compiling is specified manually + ```xml + + io.datajek + some-dependency + 1.0 + system + ${project.basedir}/libs/dep-1.0.jar + + ``` + - **import** - when a dependency is of type **pom** and has the scope of import, it should be replaced by its dependencies in its `dependencyManagement` section +- **dependency mediation** - choosing what version of dependency to use +- default behavior - + - e.g. our project depends on A and B. A depends on D which again depends on E (version x). B directly depends on E (version y). our project would use E (version y), because if we imagine dependencies like a tree, E (version y) is the closest to root + - e.g. our project depends on B and A (B comes first in pom.xml). B depends on E (version x), while A depends on E (version y). our project would use E (version x), because B comes first +- so one technique based on above - if we would like to use version x of E invariably - place version x of dependency E as *early as possible* and *directly* inside the pom. this way, we end up using the verison x of E always +- when adding a dependency, if we use the `exclusion` tag along with it, the dependencies specified in the exclusion tag are excluded from the dependency tree - + ```xml + + io.datajek + project9-projectb + 1 + + + com.google.code.gson + gson + + + + ``` +- this means that we should either expect gson to come as a transitive dependency from another project, or include gson manually inside our pom as another dependency, etc +- lets say our project name is xyz, and we mark a dependency in our pom as **optional** +- it excludes this dependency from being added as a transitive dependency in any project that has xyz itself as a dependency +- **dependency management** section - this way, all the projects in for e.g. a team can specify the versions of dependencies that work well with each other in one place, and all of it gets inherited by all other child projects +- example - + - if a parent has the following section - + ```xml + + + + com.google.code.gson + gson + 2.8.6 + + + + ``` + - the child can skip the version of gson when adding it as a dependency + ```xml + + + com.google.code.gson + gson + + + ``` +- another use case of dependency management section 🤯 - helps with transitive dependencies as well - e.g. if our project has a dependency A, which depends on C (version x), and has a dependency B, which again depends on C (version y). if we add the dependency C (version z) in the dependency management section, version z of dependency is the one that maven uses! + - note - we could also have included dependency C (version z) directly in the dependency section to force maven to use version z (default behavior - closest to the root wins). however, if another project added this project as a dependency, even if it was not using dependency C (version z) directly, it would still have it being added to its classpath. this problem would not have happened in the first place if we had added dependency C (version z) in the dependency management section as described earlier + +### Build Portability + +- **build portability** - having consistent ways to build cross environments, machines, teams, etc +- **variables** - variables defined in parent are inherited by children +- however, children can override these variables +- project can be accessed using `project`, e.g. `${project.version}`, `${project.build.sourceDirectory}`, etc. the root element of our pom is `project`, so that is where these variables come from. another very useful one i found - `${project.parent.basedir}` if for e.g. a child project wants to access something from the parent directory +- whatever we define in the properties section can be accessed using the name of the property directly, e.g. `${MyString}` +- java system properties (what we access using `java.lang.System.getProperties()`) can be accessed using `java`, e.g. `${java.home}` +- environment variables can be accessed using `env`, e.g. `${env.PATH}` +- variables in settings.xml can be accessed using `settings`, e.g. `${settings.offline}` +- **profiles** - alternative configuration for overriding default values +- we can specify profiles either in the project specific pom.xml, or in settings.xml, which itself can be machine / project specific + ```xml + + 4.0.0 + + io.datajek + project14 + 1 + Project14 + + + + test + + + + org.codehaus.mojo + exec-maven-plugin + + + my-special-exec + + /Project14/testScript.sh + + + + + + + + + + prod + + + + org.codehaus.mojo + exec-maven-plugin + + + my-special-exec + + /Project14/prodScript.sh + + + + + + + + + + + + + + + org.codehaus.mojo + exec-maven-plugin + 3.0.0 + + + my-special-exec + clean + + exec + + + + + + + + + + ``` +- the most basic way in which we can specify which profile to use - `mvn clean -Ptest` +- another way ofo enabling a certain profile - inside the `profile` section we saw above, we can have an activation section like below - + ```xml + + + testProp + DataJek + + + ``` +- this means that the profile expects the system property testProp to be of value DataJek - `mvn clean -DtestProp=DataJek` +- **archetypes** - a project templating toolkit so that a new project can be created easily with all the for e.g. firm specific standards established in the project from the get go + +## Working of Java + +- java tries releasing changes every 6 months +- not all versions of java are lts (long term support) - java will provide patches and security updates for a long time only for versions marked lts - effort towards maintaining non lts versions would be much less +- open jdk is the underlying java source code, but it is not responsible for building and distributing installable binaries +- there are various vendors sitting on top of it, like adopt openjdk, oracle jdk, and then some providers like amazon have their own jdk distributions which work better with their own aws products + +## File Operations + +- there are two packages in java - java.io and java.nio + - io stands for input output, while nio stands new input output + - this means working with nio related api is much easier. e.g. when we use nio's `Paths.get`, it internally takes care of using the right file separator (/ for linux, \ for windows) + - io is blocking in nature, nio is non blocking in nature + - so, for operations related to huge workloads, prefer java.io since huge workloads work better with dedicated threads, while for smaller workloads, prefer java.nio for better performance using the non blocking nature + - nio uses buffers (which allows us to move backward and forward) while io uses streams (which does not allow any such movement) +- `Paths.get` - implemented using varargs + ```java + Path path = Paths.get(".", "test.txt"); + System.out.println(path.toAbsolutePath()); + + // /home/shameek/Desktop/library/java/new_features/./test.txt + ``` +- `normalize`- get rid of redundant bits in the path + ```java + Path normalizedPath = path.normalize(); + System.out.println(normalizedPath); + System.out.println(normalizedPath.toAbsolutePath()); + + // test.txt + // /home/shameek/Desktop/library/java/new_features/test.txt + ``` +- `resolve` - build a new path using an existing path + ```java + Path rootDir = Paths.get("."); + Path file = rootDir.resolve("test.txt"); + System.out.println(file.toAbsolutePath()); + + // /home/shameek/Desktop/library/java/new_features/./test.txt + ``` +- reading all the content at once - + ```java + byte[] bytes = Files.readAllBytes(file); + String content = new String(bytes); + System.out.println(content); + ``` +- reading the lines one by one - + ```java + List lines = Files.readAllLines(file); + for (String line : lines) { + System.out.println("line: " + line); + } + ``` +- creating directories and files - + ```java + Path directoryPath = Paths.get("new_directory"); + Files.createDirectory(directoryPath); + + Path filePath = directoryPath.resolve("new_file.txt"); + Files.createFile(filePath); + ``` +- writing to a file - + ```java + Files.write(filePath, "\nappend me to the file".getBytes(), StandardOpenOption.APPEND); + ``` +- we can use `WatchService` to monitor a directory for events like creation / deletion of files, modification of files, etc + +## Miscellaneous Newer Features + +- `Objects` class (do not confuse with `Object` class) - + - `requireNonNull(T obj)` + - `requireNonNull(T obj, String message)` +- strings are allowed in switch statements, but strings cannot be null, else null pointer exception would be thrown + ```java + private static Boolean isWeekend(String day) { + switch (day) { + case "Saturday": + case "Sunday": + return true; + default: + return false; + } + } + ``` diff --git a/_posts/2023-12-27-spark.md b/_posts/2023-12-27-spark.md new file mode 100644 index 0000000..c44f29f --- /dev/null +++ b/_posts/2023-12-27-spark.md @@ -0,0 +1,1216 @@ +--- +title: Spark +--- + +## Introduction + +- spark - developed at uc berkley as an improvement of hadoop +- it borrowed concepts from hadoop but now works independent of it +- unlike hive etc, spark doesn't convert into the map reduce equivalent +- it is much more performant than hadoop +- it offered much more flexibility when compared to hadoop - + - unlike hadoop which relies on yarn as a cluster manager, it can work with different cluster managers like mesos and kubernetes. cluster manager, resource manager, container orchestrator, etc all mean the same thing + - unlike hadoop which relies on hdfs for storage, it can work with cloud storage options as well +- eventually, it became its own thing as it has much more features +- why spark is popular - we write for e.g. sql and all the complexities around distributed processing is abstracted away from us. also, it is a unified platform - all capabilities - including batch processing, stream processing and ml are in one platform +- databricks - founded by original developers of spark. for e.g. it makes deploying spark applications much easier, has an optimized runtime for spark, etc + +## Driver, Executor and Deployment Modes + +- we submit our spark application to spark +- spark creates a master (called **driver** in spark) and slaves (called **executor** in spark) +- the driver will just assign work to the executors, while the executors perform all the heavy tasks +- when we used `local[*]` in the java program, we basically used the **local cluster manager**. this is why we never had to build a jar and submit it to a spark cluster. `*` probably means spark will decide how many threads to use, but we can specify a number as well. a single jvm is used in this case. this is a useful alternative when testing out things in local. both the driver and executors are inside one jvm +- apart from local, the real cluster managers supported by spark include yarn, mesos and kubernetes +- now, there are two **deployment modes** for running spark in an actual cluster - + - **client mode** - the driver will be run on the client side. the client itself will spawn executors in the spark cluster. this is what happens when we use interactive clients like spark-shell. so, the driver dies automatically when the interactive shell is closed + - **cluster mode** - both the driver and the executors will run on the cluster. this is what happens when we submit built jars to a spark cluster +- i think deployment modes are not applicable when using the local cluster manager, since there is no actual cluster over there in the first place, since both driver and executors were inside the same jvm + +## Getting Started + +- download spark from [here](https://spark.apache.org/downloads.html) +- `tar -xvzf spark-3.5.0-bin-hadoop3.tgz` +- pom.xml - the junit configuration was needed because otherwise i was getting "cannot access class sun.nio.ch.DirectBuffer". i am using java 8 and latest versions of spark and junit possible + ```xml + + + 4.0.0 + + com.example + spark-batch + 1.0-SNAPSHOT + + + 1.8 + 1.8 + 2.13 + 3.5.0 + 5.10.1 + 3.2.1 + + + + + org.apache.spark + spark-core_${scala.version} + ${spark.version} + + + + org.apache.spark + spark-sql_${scala.version} + ${spark.version} + + + + org.junit.jupiter + junit-jupiter + test + + + + + + + org.junit + junit-bom + ${junit.version} + pom + import + + + + + + + + maven-surefire-plugin + ${surefire.version} + + + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED + --add-opens=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED + --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED + + + + + + + ``` +- app - notice how we derive master and file name from args, so that we can use the same spark code for running in both cases - when we use locally installed hadoop in pseudo distributed mode and when we use the local cluster manager + ```java + public class Main { + + private static final Logger log = Logger.getLogger(Main.class.getName()); + + public static void main(String[] args) { + String master = args[0]; + SparkSession spark = SparkSession.builder() + .master(master) + .appName("TablesDemo") + .getOrCreate(); + + log.info("reading file..."); + String fileName = args[1]; + Dataset surveyDf = read(spark, fileName); + + log.info("performing transformations..."); + Dataset countByCountryDf = countByCountry(surveyDf); + + log.info("final stats = " + countByCountryDf.collectAsList()); + + // try (Scanner sc = new Scanner(System.in)) { + // log.info("waiting for user acknowledgement"); + // sc.nextLine(); + // } + } + + protected static Dataset countByCountry(Dataset surveyDf) { + return surveyDf.filter(col("age").lt(40)) + .select("Age", "Country", "state", "Gender") + .groupBy("Country") + .count(); + } + + protected static Dataset read(SparkSession spark, String fileName) { + return spark.read() + .format("csv") + .option("header", true) + .option("inferSchema", true) + .option("path", fileName) + .load(); + } + } + ``` +- we also pause the program for user input when using local cluster manager so that we can view the spark ui - the spark ui would only be visible while the job is running. the spark ui is visible at http://localhost:4040/jobs/ +- we can override defaults at cluster level from ~/spark-3.5.0-bin-hadoop3/conf/. this includes files like log4j2.properties.template (for logging), spark-defaults.conf.template (for configuring what we specify via `SparkConf`), spark-env.sh.template (for properties like java home) etc +- writing tests - note how because we had broken our application down into separate chunks using different methods, we were able to unit test our application easily - refactor the transformations into separate methods, which input and output `Dataset`, then simply call this method in the unit test and call `collectAsList` on the output to view it as a list and assert on it + ```java + @TestInstance(TestInstance.Lifecycle.PER_CLASS) + public class MainTest { + + SparkSession spark; + + @BeforeAll + void setup() { + System.out.println("setting up spark..."); + spark = SparkSession.builder() + .master("local[*]") + .appName("Main") + .getOrCreate(); + } + + @AfterAll + void cleanup() { + System.out.println("cleaning up spark..."); + spark.close(); + } + + @Test + void whenCsvIsRead_thenDatasetIsReadSuccessfully() { + Dataset input = Main + .read(spark, "src/main/resources/sample.csv"); + + assertEquals(9, input.count()); + } + + @Test + void whenCountByCountryIsCalled_thenResultIsOk() { + Dataset input = Main + .read(spark, "src/main/resources/sample.csv"); + Dataset output = Main.countByCountry(input); + Map countMap = output.collectAsList().stream() + .collect(Collectors.toMap((a) -> a.getString(0), (a) -> a.getLong(1))); + + assertEquals(4, countMap.get("United States")); + assertEquals(2, countMap.get("Canada")); + assertEquals(1, countMap.get("United Kingdom")); + } + } + ``` +- understand that we could not have directly performed assertions on the dataframe, a dataframe is just partitions of data sitting in different executors. so, we first call `collectAsList()` to get all the data into the driver, and then we can easily perform assertions +- we could also have generated mock data as below, instead of reading from csv in tests like we did above. both methods have their own pros and cons imho - generating mock data repeatedly has a lot of code, while reading using a csv means slower test - by mocking data we can generate data specific for each test, while using a csv does help with cleaner code + ```java + @Test + void whenCountByCountryIsCalled_thenResultIsOk_unit() { + StructType schema = new StructType(new StructField[] { + DataTypes.createStructField("Age", DataTypes.IntegerType, true), + DataTypes.createStructField("Gender", DataTypes.StringType, true), + DataTypes.createStructField("Country", DataTypes.StringType, true), + DataTypes.createStructField("state", DataTypes.StringType, true) }); + + List rows = Arrays.asList(new Row[] { + RowFactory.create(37, "Female", "United States", "IL"), + RowFactory.create(44, "M", "United States", "In"), + RowFactory.create(32, "Male", "Canada", "NA") }); + + Dataset input = spark.createDataFrame(rows, schema); + Dataset output = Main.countByCountry(input); + Map countMap = output.collectAsList().stream() + .collect(Collectors.toMap((a) -> a.getString(0), (a) -> a.getLong(1))); + + assertEquals(1, countMap.get("United States")); + assertEquals(1, countMap.get("Canada")); + } + ``` + +### Using Spark Local Cluster Manager + +- launch.json - equivalent of run configurations in intellij + ```json + { + "version": "0.2.0", + "configurations": [ + { + "type": "java", + "name": "Main [Local]", + "request": "launch", + "mainClass": "com.example.spark_batch.Main", + "projectName": "spark-batch", + "args": ["local[*]", "src/main/resources/sample.csv"] + } + ] + } + ``` + +### Using Spark Submit + Hadoop + +- setup hadoop in pseudo distributed mode using [this link](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html) +- namenode format - `~/hadoop-3.3.6/bin/hdfs namenode -format` +- start all components using `~/hadoop-3.3.6/sbin/start-all.sh` +- create initial hdfs directories - `~/hadoop-3.3.6/bin/hdfs dfs -mkdir -p /user/$USER` +- copy the necessary files - + - `~/hadoop-3.3.6/bin/hdfs dfs -put src/main/resources/sample.csv` + - `~/hadoop-3.3.6/bin/hdfs dfs -ls` +- build the jar. note - i am able to work without generating fat jars / use shade plugin for now, but based on the use case, that might be necessary - + ```sh + mvn clean install + ``` +- submitting jobs to spark - note the arguments, we need to specify them explicitly since default value of master is local and default value of deploy-mode is client + ```shell + ~/spark-3.5.0-bin-hadoop3/bin/spark-submit \ + --verbose \ + --class com.example.spark_batch.Main \ + --master yarn \ + --deploy-mode cluster \ + target/spark-batch-1.0-SNAPSHOT.jar \ + sample.csv yarn + ``` +- other important options which we could have sent spark-submit include + - executor-memory, driver-memory - ram + - executor-cores, driver-cores - cpu cores + - num-executors - number of executors + +### Using Spark Shell + +- using interactive clients - `./bin/spark-shell` +- on starting spark shell, we can access the ui at http://localhost:4040/jobs/ +- e.g. we can run the following commands - + ```scala + val df = spark.read.csv("full_path_to_sample.csv") + df.show() + ``` +- note how we did not have to create a spark session manually here, unlike when writing spark applications + +## Dataframe + Hadoop + +- **dataframe** - distributed table with a well defined schema i.e. each column has a specific data type +- working with dataframe is like working with tables in sql +- data stored in for e.g. hdfs is broken into smaller **splits**. these splits are of size 128mb by default +- the dataframe too will basically be composed of smaller chunks called **partitions**, where each partition might represent the hdfs split +- my understanding - the fact that spark does everything using memory instead of using files like map reduce is what makes spark more performant when compared to hadoop as well +- above, we talked about storage / data, now we talk about compute +- we submit the job to spark +- spark will then with the help of yarn's resource manager create the application master in one of the worker nodes. recall how containers are used hadoop 2.x onwards, so this application master would be created inside a yarn / hadoop container +- now, the spark driver will run inside this application master container +- then, the spark driver will talk to yarn's resource manager and create more worker containers +- then, spark will spawn executors inside these worker containers +- now, each executor will be responsible for some partition(s) of data, which it loads in its memory +- while doing all this, spark will take care of **rack awareness** i.e. assigning executors the partitions in such a way that there is minimum network transfer between hdfs and executor / container + +## Spark CPU and Memory + +- recall we said that we can dictate how many cores an executor can have when submitting the spark job +- e.g. we say executors should have 4 cores. nowadays, each core can itself be split into virtual cores as well +- this virtual core is called a **slot** in spark +- so, if we have 2 virtual cores per core, and if we assigned our executor 4 cores, it essentially has 8 slots +- a task uses one slot +- so, number of tasks that can run in a cluster = (number of slots in the executor) * (number of executors) +- now, assume that spark computes that it needs 30 tasks but we only have 20 slots in our cluster +- spark is intelligent enough to schedule the 20 tasks and queue the remaining 10 tasks +- my understanding - we can control the "slots" (yes, slots not cores) via `spark.driver.cores` / `spark.executor.cores`, and i think these are same as passing `--driver-cores` / `--executor-cores` to spark-submit +- till now, we talked about cpu, and now we talk about memory +- note for pyspark - + - the driver will have a python component running alongside it. so, if for e.g. using yarn, inside the application master container, there will be a python process and the actual jvm driver + - if we use functionality / libraries of python not available in pyspark, then the executors too will have a python component running alongside them. so, if for e.g. using yarn, inside the container, there will be a python process and the actual jvm executor +- when setting memory limits, we have two variations for both executor and driver - + - `spark.driver.memory`, `spark.driver.memoryOverhead` - my assumption - `spark.driver.memory` is same as passing `--driver-memory` to spark-submit + - `spark.executor.memory`, `spark.executor.memoryOverhead` - my assumption - `spark.executor.memory` is same as passing `--executor-memory` to spark-submit +- the memory variant is for the actual jvm driver / jvm executor, while the memory overhead variant is for non jvm processes (like the one needed when using pyspark) +- so, e.g. we set `spark.executor.memory` to 1gb and `spark.executor.memoryOverhead` to 0.1 + - spark driver would ask yarn for containers having memory 1.1gb. note - this value should of course be lesser than a worker node's physical memory, otherwise we will get exceptions + - so, out of the 1.1gb for a container, 1gb would be allocated for executor jvm process, while the remaining 100mb would be allocated for non jvm processes, like the sidecar needed for pyspark +- there is more to how memory is broken down in spark, it is too much for my brain for now 😛 + +## Job Scheduling + +- there are two sides to this + - job scheduling across different applications - **dynamic resource allocation** + - job scheduling inside the same application - **spark schedulers** +- just covering from theoretical perspective, how to configure this can be found [here](https://spark.apache.org/docs/latest/job-scheduling.html) + +### Dynamic Resource Allocation + +- e.g. we have a spark job that uses up all the resources in our cluster +- now, we submit another small job +- but, this job cannot run since all the resources have already been used up +- a small job has to wait for the large job to complete +- so, spark has two strategies - static allocation and dynamic allocation +- **static allocation** - the default. the driver will ask for all the resources for its executors upfront. it will hold on to them for the entire duration till the entire job is over +- when we asked for some executors via the `num-executors` option, it meant that the spark driver would hold on to these resources for the entire duration of the job +- however, the number of executors the stages actually use can change dynamically +- remember - number of executors used in a stage depends on the number of tasks a stage has +- e.g. if a stage has 20 tasks, and we have executors with 5 slots (and sufficient memory), we will actually be using 20 / 5 = 4 executors +- but clearly, the number of executors actually needed by spark can change across stages +- so, we can instead use **dynamic resource allocation** - where instead of us manually specifying the number of executors to use, it is determined dynamically for every stage +- by default, static allocation is used, but we should consider using dynamic allocation if we are using a shared cluster for multiple jobs + +### Spark Schedulers + +- if our spark application has multiple jobs - + ```java + df1.join(df2).count() + df3.join(df4).count() + ``` +- by default, spark driver would execute this code synchronously. so, first all jobs for the first line would finish and then the all jobs for the second line would start and finish +- however, what if we use multithreading? - e.g. something like this - + ```java + Thread t1 = new Thread(() -> df1.join(df2).count()); + Thread t2 = new Thread(() -> df3.join(df4).count()); + t1.start(); t2.start(); + t1.join(); t2.join(); + ``` +- this means that the jobs for both would be triggered in parallel +- this is what we might actually want as well - what is the point of stalling the second job for the first job? +- however, when kicking off the jobs in parallel like this, they will content with each other for resources +- solution - by default, spark uses the fifo scheduler, but we can ask it to use the fair scheduler as well +- **fifo scheduler** - the first job gets priority. it gets all the resources for itself. then the second job gets the leftover, and would be stalled if not enough resources are available +- **fair scheduler** - assign resources to tasks in a round robin fashion. all issues like **starvation** (short job waiting for a long running job) etc are prevented + +## Transformations and Actions + +- spark dataframes are **immutable** +- we tell spark driver the **transformations** we would like to do +- these transformations are simple sql statements - e.g. filter where age > 40, projection of columns, grouping, etc +- each transformation then results in a new dataframe +- transformations can be further categorized into **narrow transformation** and **wide transformation** +- narrow transformation - each partition of data can be processed independently. a transformation on one partition is independent of a transformation on another partition. e.g. filtering +- wide transformation - partitions need to be **repartitioned**. e.g. in group by, all rows belonging to the same group need to be brought into the same partition. this process of repartitioning of data for a wide transformation is called a **shuffle** +- **execution plan** - we write the transformations one by one using a builder pattern. but spark might not execute the operations in the same way - it will construct an execution plan, which is an optimized version of our transformations - e.g. if we filter then use project, it would move the projection before the filtering +- **lazy evaluation** - spark will not execute the transformations immediately - it will build the execution plan described above and wait for us to call an **action**. actions include `read`, `write`, `collect`, `show`. the moment we call an action, the execution plan is triggered, and we see a **job** +- `collect` will basically collect all the data in the driver. so, be mindful of out of memory exceptions when performing this operation + +## Jobs, Stages and Tasks + +- our entire spark application is broken down into **jobs** +- a job is triggered only once an **action** is encountered (recall lazy evaluation) +- jobs are further broken down into **stages** +- stages are further broken down into **tasks** +- so, tasks are the unit of work +- a task basically executes on one **slot** of executor and is responsible for a partition of data +- a task is a bunch of narrow transformations +- all the tasks of a single stage operate in **parallel** +- each wide transformation results in a new stage, due to the repartitioning that is needed +- before the tasks of a next stage start, all tasks of the previous stage should complete, because that was the entire point behind wide transformation - it depends on all the previous stage's partitions and not just one +- when going from one stage to another, since data is being **shuffled** / **repartitioned**, data is temporarily written to a buffer which spark calls **exchange** +- so, the idea probably is to wait for all tasks of a stage to complete and then with the help of exchange, get the right partition of data to the right executor and finally kick off the tasks of the new stage +- this process of copying data from the **write exchange** to the **read exchange** is called **shuffle / sort** + +![job stages tasks](/assets/img/spark/job-stages-tasks.drawio.png) + +## Debugging Spark + +- debugging spark is not easy - all the code we write is first converted into an execution plan and is lazily evaluated +- so, when we place debug pointers in our code, we are just stepping through the driver thread (which is not even doing anything). we are not stepping through the executor thread actually performing those transformations +- we can however use **lambda accepting transformations** like `map`, `flatMap`, `forEach`, etc +- when we place debug pointer inside these lambdas, we will be able to see the executor thread performing them +- logs are the best way of debugging a production spark application, which is running in a distributed environment +- first step is to log using the log4j2 libraries that come as a transient dependency from spark libraries, which i did in the code snippet shown earlier +- second step would be to provide the appropriate log configuration like - + - log4j2.properties file to use. this can be cluster wide or application specific, depends on use case + - configure the file and console appenders, specifying file names for the file appenders + - actually locating the log files for the driver vs the executors in the cluster manager + +## Spark Structured APIs + +- a bit of history - + - spark first came up with rdd, which was a better alternative to map reduce + - then spark came up with dataframe api, which was easier to work with + - however, we could not use the regular lambda transformations like `map`, `filter`, etc which rdd had + - so, the idea would be that we would convert between rdd to dataframe to use these, e.g. dataframe has `toJavaRDD()` + - however, on going from dataframe to rdd, we would lose out on the optimizer + - so, spark then came up with dataset api - + - we can use for e.g. java pojos which would give us compile time safety + - it supported the regular lambda transformations + - dataframe is now `Dataset`. row is a generic object, so it does not have the compile time safety unlike if we use pojos + - note - apparently, in java, spark does not have the concept of `DataFrame`, so we should instead use `Dataset` anyway +- rdd stands for **resilient distributed dataset** + - resilient because if for e.g. there is a failure in one of the executors, spark knows how to load the partitions the failed executor was responsible for into a new executor + - distributed because spark partitions the data into smaller chunks and processes them in parallel +- spark calls its dataset api as **structured apis** +- structured apis basically use rdd underneath +- spark asks us to use structured apis where possible, since there is a **catalyst optimizer** (also called **tungsten**) sitting in between structured apis and rdd, so we lose out on the optimizations when using rdd directly +- use rdd only for specific use cases like custom partitioning +- using dataframe (i.e. dataset of row) vs dataset (i.e. dataset of a specific pojo) - + - dataset will have compile time safety - using `.filter((person) -> person.age > 40)` has compile time safety unlike `.where(col("age").gt(40))` + - dataset is less optimal when compared to dataframe - serialization is an important step in distributed computing. dataset serialization will use java serializers, while dataframe serialization will be able to use tungsten underneath, which is more performant +- there is also something called spark sql - we can write a string which will exactly look like sql. this long string would be an alternative to chaining methods in spark dataframe api - + ```java + Dataset inputDf = // read.... + inputDf.createOrReplaceTempView("survey"); + Dataset countDf = spark.sql("select Country, count(*) from survey " + + "where age > 40 " + + "group by Country"); + ``` +- this sql works just like dataframe, so there is no performance impact there +- based on everything above, i will probably use dataframe all the way. i would also use the java apis and not sql, since it has a bit better compile time safety / auto complete as compared to writing sql strings +- **spark engine** - this sits on top of the chosen cluster manager. recall how unlike yarn is a part of hadoop, spark does not come with a cluster manager, and supports yarn, mesos, kubernetes. spark engine acts as an interface between spark and the chosen cluster manager + +![spark ecosystem](/assets/img/spark/spark-ecosystem.drawio.png) + +## Execution Plan / Catalyst Optimizer Working + +- the catalyst optimizer works internally in following steps +- or we can say that spark executes the **execution plan** in the following steps - +- generate an ast (abstract syntax tree). any errors in our field names, sql function usage, etc would be caught here +- now we will have a **logical plan** +- perform optimization on our logical plan. the optimization here includes techniques like - + - **predicate pushdown** - push filtering operations earlier to reduce the amount of data transfer + - **partition pruning** - when writing to internal sources, we can specify partitioning scheme, and then there will be a different directory for each partition. this has been discussed in [data sources](#data-sources). predicate pushdown can go up to as early as only reading some partitions of data when loading the data into dataframes + - **projection pruning** - push projection operations earlier to reduce the amount of data transfer +- generate a bunch of **physical plans**, and associate a cost with each of them. e.g. one plan uses shuffle join, another uses broadcast join +- finally, a **cost model** evaluates the most optimal physical plan +- **wholestage code generation** - generate the bytecode to run on each executor + +![execution plan](/assets/img/spark/execution-plan.jpg) + +## Data Sources + +- data **sources** in spark can be external or internal +- **external** - external to spark. some notable ones include + - jdbc data sources - oracle, ms sql, postgres, mysql + - no sql - cassandra, mongo db + - cloud data warehouses - snowflake, redshift + - streaming sources - kinesis, kafka +- **internal** - this can be either **hdfs** or **cloud based storage** e.g. s3 (preferred) +- for internal source, there are several **file formats** which we have to consider. again, spark supports various file formats like parquet, json, csv, avro, etc +- there are two ways to access external sources - + - ingest using external tools to write data from external sources to internal sources. data goes unmodified from different sources into internal sources. then spark reads from these internal sources directly. useful when using spark for batch processing + - make spark directly read from these different external sources +- batch processing prefers first option because for e.g. our db capacity was provisioned with otlp workloads in mind, and might not be optimized for spark based big data workloads. thus, it helps decouple the two from performance, security, etc perspective +- stream processing prefers second option +- so basically, while we established the architecture that data from external sources -> some tools -> internal sources -> spark, we can however, directly do data from external sources -> spark
+ ![spark architecture](/assets/img/spark/spark-architecture.drawio.png) +- finally, sinks work in the same way in spark - they can be internal or external +- we use `DataFrameReader` to read from internal / external sources, which we obtain via `spark.read()` +- we specify the type using `format` +- we provide configuration using `option` +- we can also provide a mode, which determines the behavior when spark encounters a **malformed record**. it can be - + - **permissive** (default) - make all columns null and place the record in a new column + - **drop malformed** - ignore the malformed records + - **fail fast** - terminate the program +- schema - + - for file formats like csv, either it can be defined explicitly using `schema` (preferred), or it can infer the schema automatically (prone to errors) + - for file formats like avro / parquet, the schema is a part of the file format itself and therefore spark derives its schema from the file format itself +- so basically, while we can use `schema` for defining the schema explicitly, remember this is applicable only for formats like csv and json, so best case would be to avoid these file formats altogether and try and use parquet / avro formats where possible +- spark has its own data types, and they map to different types specific to the language we use, e.g. we can see how spark types map to java types [here](https://spark.apache.org/docs/3.5.0/sql-ref-datatypes.html#supported-data-types)
+ ![spark to java types](/assets/img/spark/spark-to-java-types.png) +- the last boolean flag specifies whether the field is nullable or not + ```java + StructType schema = new StructType(new StructField[] { + DataTypes.createStructField("FL_DATE", DataTypes.DateType, true), + DataTypes.createStructField("OP_CARRIER", DataTypes.StringType, true), + DataTypes.createStructField("ORIGIN", DataTypes.StringType, true), + DataTypes.createStructField("DEST", DataTypes.StringType, true), + DataTypes.createStructField("CANCELLED", DataTypes.IntegerType, true), + DataTypes.createStructField("DISTANCE", DataTypes.IntegerType, true) }); + + Dataset flightDf = spark.read() + .format("csv") + .option("header", "true") + .option("path", "src/main/resources/flight*.csv") + .option("dateFormat", "M/d/y") + .option("mode", "FAILFAST") + .schema(schema) + .load(); + + flightDf.printSchema(); + flightDf.show(); + ``` +- note how we specified the date format in configuration as well - for column specific configuration, maybe we can use `to_date` etc to convert from string to date type +- writing data - the default format used by spark is parquet if not specified +- the mode can be - + - **append** - append to the existing data + - **overwrite** + - **error if exists** + - **ignore** - write if location is empty, ignore otherwise +- so, when i use the code below - + ```java + flightDf.write() + .format("avro") + .mode("overwrite") + .option("path", "src/main/resources/output/sinks_demo") + .save(); + ``` +- i get the following output -
+ ![simple output](/assets/img/spark/simple-output.png) +- note - `df.write()` has return type of `DataFrameWriter` (recall `spark.read()` had return type of `DataFrameReader`) +- note - for spark to avro, i had to add following dependencies, since avro related dependencies are bundled separately from spark + ```xml + + org.apache.spark + spark-avro_${scala.version} + ${spark.version} + + + + + 2.15.3 + + com.fasterxml.jackson.core + jackson-databind + ${jackson-databind.version} + + ``` +- **partition by** - the code is as follows - `df.write().partitionBy("OP_CARRIER", "ORIGIN")` +- notice how it is chained to `DataFrameWriter` and not `Dataset` unlike [`repartition`](#repartition-and-coalesce) - so, my understanding - partition by is when we want to write the output and make it optimized for future jobs that might read this output. however, repartition can help with optimizing the current job itself +- the columns we partition on are not visible in the output files, because they are essentially part of the directory names! +- note how directories for origin are nested inside directories for carrier
+ ![partition by output](/assets/img/spark/partition-by-output.png) +- we can also chain `maxRecordsPerFile` to the `DataFrameWriter`, just like we chained `partitionBy`. it is useful when there are some partitions that become too big for spark to process. e.g. in the above example, if for carrier nw and origin den, the number of flights were too many, by using this option, this directory too will contain multiple files +- why use **bucketing** - since partitioning results in a unique directory for each value, partitioning by a column having too many unique values might not be a good idea, since it would result in too many directories (partitions) with too less data. so, we can instead use **bucketing** for columns having too many unique values +- how it works - we specify the number of buckets and the column to bucket using. then, spark will do hash(column_value) % number_of_buckets to get the bucket in which the row should be stored +- **sorting** - sorting can further improve the performance - e.g. if we had to perform joins, and the data is already sorted on the columns used for join, we can skip the sort phase in the shuffle join (described later) +- so, just like `partitionBy`, i chained the following to `DataFrameWriter` - + ```java + df.write() + .bucketBy(2, "OP_CARRIER", "ORIGIN") + .sortBy("OP_CARRIER", "ORIGIN") + .mode("overwrite") + .option("path", "src/main/resources/output/sinks_demo/") + .save(); + ``` +- however, i got the following exception on running the above - `'save' does not support bucketBy and sortBy right now` + +### Spark + Hive + +- so, we have to use `saveAsTable`. my understanding - till now, we were simply storing data as normal files, and they were accessible like a regular directory structure, but for bucketing and sorting, we need to bring in "database support" of spark +- my understanding - whatever we discuss in this part has been borrowed from hive +- since spark too has concepts of database and tables, there are two things spark needs to store - + - the **actual data** - this is what we have seen till now, when for e.g. we saw files being stored inside folders (i.e. partitions) + - the **metadata** - the table name, database name, etc. this is stored in something called **metastore**. by default, an in memory implementation is used i.e. the duration of this metastore is the same as the spark session +- there are two kinds of tables in spark - + - **managed tables** - spark will manage both the metadata and the actual data. by default, the actual data is stored inside `spark.sql.warehouse.dir`. when we for e.g. drop a table, both the metadata and the actual data get deleted + - **unmanaged tables** - also called **external tables**. spark will only manage the metadata. when creating a table, we specify the location of the actual data. useful when for e.g. the actual data already exists somewhere and is not managed by us. when we for e.g. drop a table, only the metadata is deleted, and the actual data is untouched +- managed tables are preferred - we can do optimizations like bucketing and sorting. with unmanaged tables, we have to rely on the existing data structure. we need unmanaged tables when we need to perform spark operations on already existing data +- my thought - one technique might be to port data from unmanaged to managed tables for better performance / more flexibility? this should ideally again be something spark can do - + - read from unmanaged tables + - perform some transformations like sorting and bucketing + - finally write to managed tables +- we need to add the spark + hive dependency + ```xml + + org.apache.spark + spark-hive_${scala.version} + ${spark.version} + + ``` +- then, chain the hive support in the spark session builder - + ```java + SparkSession spark = SparkSession.builder() + .master("local[*]") + .appName("TablesDemo") + .enableHiveSupport() + .getOrCreate(); + ``` +- now, we first create a database - (otherwise default would be used) + ```java + spark.sql("create database if not exists tables_demo"); + spark.catalog().setCurrentDatabase("tables_demo"); + spark.catalog().listDatabases().show(); + ``` +- output of list databases -
+ ![list databases](/assets/img/spark/list-databases.png) +- finally, we write a dataframe as follows. note - notice how we do not provide the path parameter, since it is managed table territory, therefore the `spark.sql.warehouse.dir` will be used, and we call `saveAsTable(db_name.table_name)` instead of `save()` + ```java + flightsDf.write() + .bucketBy(2, "OP_CARRIER", "ORIGIN") + .sortBy("OP_CARRIER", "ORIGIN") + .mode("overwrite") + .saveAsTable("tables_demo.bucketed_by"); + ``` +- output - notice how two new directories - for metadata (metastore_db) and for storing the actual data (spark_warehouse) are created. data is stored inside `<>.db/<>`
+ ![bucket by](/assets/img/spark/bucket-by-output.png) + +## Transformations + +- for transformations, we can either use spark functions like we do in sql, or we can use lambda accepting transformations like `groupByKey` +- for specifying columns in transformations, either use column_name directly as a string, or use `df.col("column_name")`. note - we cannot use both methods in the same transformation +- **udf** or **user defined functions** - register custom functions to use inside spark - + ```java + UserDefinedFunction parse_gender = udf((String gender) -> { + Pattern malePattern = Pattern.compile("^m$|^male$|^m.n$", Pattern.CASE_INSENSITIVE); + Pattern femalePattern = Pattern.compile("^f$|^female$|^wom.n$", Pattern.CASE_INSENSITIVE); + return malePattern.matcher(gender).find() ? "male" + : (femalePattern.matcher(gender).find() ? "female" : "unknown"); + }, DataTypes.StringType); + spark.udf().register("parse_gender", parse_gender); + + Dataset gendersDf = surveyDf + .select("Gender") + .withColumn("gender_cleaned", expr("parse_gender(Gender)")); + + gendersDf.show(); + ``` +- a function for adding a unique identifier to each record - `monotonically_increasing_id`. this number would be unique across all partitions but remember that it would not necessarily be continuous +- usual sql constructs like renaming using `alias`, changing data type using `cast`, etc are available +- `explode` - e.g. our record contains an array field. this field will ensure our result contains a record for each element of the array. e.g. our input has 2 elements in the array for the first record, and 3 elements in the array for the second record. the output will have 5 elements + +### Example + +- input - + + | day | month | year | + |-----|-------|------| + | 28 | 1 | 2002 | + | 23 | 5 | 81 | + | 12 | 12 | 6 | + | 7 | 8 | 63 | + | 23 | 5 | 81 | + +- transformation - + ```java + Dataset cleanedDobDf = dobDf.withColumn("year_parsed", + when(col("year").leq(23), col("year").plus(2000)) + .when(col("year").leq(99), col("year").plus(1900)) + .otherwise(col("year"))) + .withColumn("date", concat_ws("/", col("day"), col("month"), col("year_parsed"))) + .withColumn("parsed_date", to_date(col("date"), "d/M/yyyy")); + ``` +- output - + + | day | month | year | year_parsed | date | parsed_date | + |-----|-------|------|-------------|------------|-------------| + | 28 | 1 | 2002 | 2002 | 28/1/2002 | 2002-01-28 | + | 23 | 5 | 81 | 1981 | 23/5/1981 | 1981-05-23 | + | 12 | 12 | 6 | 2006 | 12/12/2006 | 2006-12-12 | + | 7 | 8 | 63 | 1963 | 7/8/1963 | 1963-08-07 | + | 23 | 5 | 81 | 1981 | 23/5/1981 | 1981-05-23 | + +## Aggregations + +- **simple aggregations** - note the different aggregations carefully, e.g. difference between `count("*")` vs `count("Description")` + ```java + Dataset aggDf = inputDf.select( + count("*").alias("total_count"), + count("Description").alias("non_null_description_count"), + countDistinct("InvoiceNo").alias("unique_invoices"), + sum("Quantity").alias("total_quantity"), + avg("UnitPrice").alias("avg_unit_price")); + ``` +- **grouping aggregations** - we can also perform groupings using `groupBy` + ```java + Dataset aggByCountryAndInvoiceDf = inputDf + .groupBy("Country", "InvoiceNo") + .agg(count("Quantity").alias("total_quantity"), + round(sum(col("UnitPrice").multiply(col("Quantity"))), 2).alias("invoice_value")); + aggByCountryAndInvoiceDf.show(); + ``` +- note - when we chained `groupBy`, it returns a `RelationalGroupedDataset`, and when we again chained `agg` to it, it was converted back to our usual `Dataset` +- **window aggregations** - e.g. we need the running total by week for every country. three things to keep in mind for windowing aggregations - + - identify the **partitioning columns** - e.g. here, restart the running total for every country + - identify the **ordering of columns** - e.g. here, ensure that the data is ordered by the week number, week 3's running total = week 1's sale + week 2's sale + week 3's sale, and this is only possible when we order by week + - identify the **window bounds** - e.g. here, it starts at the first record and ends at the current record, like described in the week 3 example above +- example - note - for the bounds, we also have something called `unboundedFollowing`, but for our use case, `unboundedPreceding` and `currentRow` was enough + ```java + WindowSpec windowSpec = Window.partitionBy("country") + .orderBy("week_number") + .rowsBetween(Window.unboundedPreceding(), Window.currentRow()); + + Dataset outputDf = inputDf + .withColumn("running_total", sum("invoice_value").over(windowSpec)); + outputDf.show(); + ``` +- output would be automatically sorted by country and week number based on the window we specified, and it would have the running total column added to it, which automatically resets for every country + +## Joins + +- bringing "left" and "right" dataframe together +- we combine them using the **join expression** and the **join type** + ```java + Dataset orderWithProductsDf = orderDf.join(productDf, + orderDf.col("prod_id").equalTo(productDf.col("prod_id")), + "inner"); + orderWithProductsDf.show(); + ``` +- order schema - (order_id, prod_id, unit_price, qty) +- product schema - (prod_id, prod_name, list_price, qty) +- therefore, the joined table's schema - (order_id, prod_id, unit_price, qty, prod_id, prod_name, list_price, qty) +- note how the joined table's schema contains two columns for quantity + - one is from the product - it probably indicates in stock + - one is from the order - it indicates quantity of product ordered +- assume we wanted to select only some columns (only order's quantity, not product's quantity) - + ```java + Dataset orderWithProductsDf = orderDf.join( ... ) + .select("order_id", "prod_name", "unit_price", "qty"); + ``` +- we get the following exception - `[AMBIGUOUS_REFERENCE] Reference 'qty' is ambiguous, could be: ['qty', 'qty'].` +- how it works - we pass column names, internally spark converts to it the right identifier. when we pass qty, it probably finds two identifiers and hence gets confused +- some solutions - + - rename columns before joining (`withColumnRenamed`) + ```java + productDf = productDf.withColumnRenamed("qty", "product_qty"); + ``` + - drop one of the ambiguous columns - + ```java + Dataset orderWithProductsDf = orderDf.join( ... ) + .drop(productDf.col("qty")) + .select("order_id", "prod_name", "unit_price", "qty"); + ``` + - specify explicitly which dataframe's quantity to use - notice the end of the select clause + ```java + Dataset orderWithProductsDf = orderDf.join( ... ) + .select(col("order_id"), col("prod_name"), col("unit_price"), orderDf.col("qty")); + ``` +- outer joins - e.g. when we use an outer join like left, we might receive nulls for some columns. to get rid of the nulls, we can use `coalesce`, which will set the final value to the first non null value from the list it receives. e.g. below, we do a left join to get all the orders. prod_name comes from the product dataframe. if one of the products are missing, prod_name would be null. so, we tell spark to use prod_id of order dataset if prod_name of product dataset is missing + ```java + Dataset orderWithProductsDf = orderDf.join(productDf, + orderDf.col("prod_id").equalTo(productDf.col("prod_id")), + "left") + .withColumn("prod_name", coalesce(col("prod_name"), orderDf.col("prod_id"))) + .select(col("order_id"), col("prod_name"), col("unit_price"), orderDf.col("qty")); + ``` +- one small trick for clean code - note how we write each of the chaining all in one place. we can for e.g. extract some parts to variables like join conditions, some complex transformations, etc +- another technique might be to extract parts of logic to functions that accept and return dataframes. this also helps unit test these bits of logic +- there are two kinds of joining techniques used by spark - **shuffle join** and **broadcast join** + +### Shuffle Joins + +- imagine when we have two datasets with 3 partitions, and we have three executors +- so first a **shuffle** + **sort** happens to ensure that the same keys from both datasets belong to the same executor +- now, we can simply perform a **merge** to join the two datasets +- refer diagram [here](#jobs-stages-and-tasks) to recall breakdown of exchange and shuffle + sort +- small note - i am defaulting to thinking of shuffle joins as **shuffle sort merge joins**. there is another variation - **shuffle hash joins**, which is less optimal when compared to shuffle sort merge joins, so i am ignoring shuffle hash joins for now + +![shuffle join working](/assets/img/spark/shuffle-join-working.drawio.png) + +### Optimizing Shuffle Joins + +- reduce data being joined - because this shuffle and sort process is of course the bottleneck and can cause out of memory like issues, we should consider techniques like filtering (basically reducing) the amount of data we are joining, performing aggregations before joining, etc. basically, code intelligently +- maximize parallelism - the maximum parallelism possible when performing a join is the minimum of the three parameters below, so try maximizing the three parameters below - + - maximum number of executors our cluster allows + - `spark.sql.shuffle.partitions` - this determines the number of partitions after a shuffle i.e. after a wide transformation happens. the default value of this is 200. so, in case of join, this will give the number of partitions of the joined dataset? + - number of unique keys in the datasets involved in the join - handling **key skews** / **hot partitions** - discussed later +- **bucketed joins** - if the datasets are already bucketed and sorted using the keys involved in the join, we will not have to rely on the shuffling and sorting done by spark at all! the idea is to partition, sort and store the datasets in bucketed fashion before the join starts. we then load the datasets and perform the joins, and there would be no shuffle involved in the joins + +### Broadcast Joins + +- shuffle joins are used when we join two large datasets +- however, we can use broadcast joins when either (or maybe both) the datasets are small +- assume the smaller dataset can be stored inside one partition, while the larger dataset has 200 partitions +- if using the shuffle join technique, first all of the 200 + 1 partitions will be sent for shuffle and sort +- this means there is network transfer involved to first send the data to exchanges and then load it back in a sorted manner into the executors +- however, in the broadcast join technique, the partitions of the larger dataset can stay where they were, and the smaller dataset can be copied over to all the executors having the larger dataset partitions +- this way, we avoid having to move (shuffle) the larger dataset's partition over the network +- essentially, we are *broadcasting* the smaller dataset to all the executors having the larger dataset +- the driver and executor memory should be > than the size of the smaller dataset, so that it can fit inside the memory +- notice how below the larger dataset stays as it is unlike earlier where the larger dataset was sorted
+ ![broadcast join working](/assets/img/spark/broadcast-join-working.drawio.png) +- the threshold which decides when to use a broadcast join is `spark.sql.autoBroadcastJoinThreshold`, which is 10mb by default +- note - unlike shuffle join, broadcast join is a hash join always. recall in shuffle joins, we have the concept of both hash joins (rarely used) and sort merge joins +- we can also provide spark the hint to use broadcast join like so, if we are not happy with the defaults - + ```java + import static org.apache.spark.sql.functions.broadcast; + + Dataset joinedDf = flightsDf1.join(broadcast(flightsDf2), ... + ``` +- note - to confirm all this, go to http://localhost:4040/ -> Sql / Dataframe tab -> select the sql query +- we can tell from this if an exchange was involved or we skipped it by using bucketed joins, if shuffle join was used or broadcast join was used, etc + +## Spark AQE + +- **aqe** - **adaptive query execution** +- it includes optimizations discussed below +- set `spark.sql.adaptive.enabled` to true for this (it should be enabled by default in new versions), and rest of the optimizations discussed in this sections will be automatically enabled +- crazy granular configurations can be seen [here](https://spark.apache.org/docs/latest/sql-performance-tuning.html), use documentation for specific configuration, just learning things from theoretical perspective for now + +### Dynamically Deciding Shuffle Partitions + +- earlier, after a wide transformation, for e.g. group by, the number of output partitions from a stage would be = `spark.sql.shuffle.partitions` (default 200), but lets say we set it to 10 +- what if i only have e.g. 5 groups after a group by statement? +- spark would still create a total of 10 partitions, therefore 10 tasks in the subsequent stage +- now, our spark job would eat up the resources for 5 empty tasks as well +- remember that for a wide transformation, spark stalls all the tasks of its previous stage, so the empty tasks are just sitting idle +- this optimization by aqe resolves this issue +- spark will look at the number of unique groups, and then dynamically adjust the number of output partitions +- now, assume one of the partitions was relatively larger +- spark used one task for one partition of data +- spark would complete all the tasks except this one quickly +- again, remember that for a wide transformation, spark stalls all the tasks of its previous stage, so the tasks that get over quickly are just sitting idle +- this optimization by aqe resolves this issue as well +- spark would now also look at the number of records in each group +- spark can merge some partitions to be handled by one task +- so, since one task = one slot, that task would process multiple partitions of data one by one serially +- e.g. this way, our job ended up using only 4 slots optimally - this is better than for e.g. using 5 slots, out of which 4 would get over pretty quickly, since the 5th slot now can be allocated to some other job +- remember how this is different from [dynamic resource allocation](#dynamic-resource-allocation) - dynamic resource allocation changes the executors dynamically, while dynamically deciding shuffle partitions changes the number of output partitions and what partition goes to what executor dynamically +- so, recap - two optimizations - + - determine the number of shuffle partitions dynamically + - dynamically coalesce the smaller shuffle partitions + +![aqe shuffle partitions](/assets/img/spark/aqe-shuffle-partitions.drawio.png) + +### Dynamically Switching Join Strategies + +- we already know that broadcast joins are more optimal than the regular shuffle joins +- however, assume one of the tables have a lot of complex transformations before being involved in the join +- spark may not be able to decide whether or not to use broadcast join, and would default to using shuffle join +- however, with aqe enabled, spark can **after shuffling** decide to go for a broadcast join +- the optimization here is that while we are involved in the shuffle process (therefore the network transfer) of the shuffle join, we still get rid of the sort and merge process, which is more expensive than a simple broadcast join + +### Dynamically Optimizing Skew Joins + +- if for e.g. we are joining two tables, and we have a **hot / skewed partition** +- before aqe, number of partitions / tasks = number of unique keys involved in the joins +- after aqe, spark is intelligent enough to break the hot partition into smaller chunks +- now, these smaller chunks can be processed in parallel in different tasks +- thus, we will not have an overly sized task (and thus out of memory exceptions) anymore +- note how we had 3 tasks without aqe, but now have 4 tasks with aqe +- note how the partition of the smaller dataset is copied + +![aqe skew joins](/assets/img/spark/aqe-skew-joins.drawio.png) + +## Dynamic Partition Pruning + +- it is enabled by default +- first, recall [partition pruning](#execution-plan--catalyst-optimizer-working) +- it is usually used for efficiency gains in a star schema design, so thats the lingo used in this section +- e.g. we have sql like below - + ```sql + select * + from fact join dimension + on fact.dimension_id = dimension.id + where dimension.some_attribute = 'xyz' + ``` +- my understanding of constraints needed for dynamic partition pruning - + - should be a broadcast join (dimension table would be broadcasted) + - fact table should be partitioned using dimension_id + - and of course, dynamic partition pruning should be enabled +- now, how this join would work is - + - the dimension table would be filtered using some_attribute = 'xyz' + - the filtered dimension table would be broadcast everywhere + - spark would be intelligent enough to only load the partitions of the fact table where dimension_id is present in the ids of the filtered dimension table + +## Caching + +- two methods for caching - chain `cache()` or `persist()` on the dataframe +- `cache` will cache using the default storage and memory, and does not allow configuration +- `persist` allows for more configuration around storage + - use disk or memory or a combination of both + - when storing in disk, data would of course be serialized, but when storing in memory, we can either store it in deserialized format or serialized format. serialized format advantage - would be compact therefore acquire less space. serialized format disadvantage - it would need to be serialized before storing / deserialized after reading from memory, hence it would use cpu + - use replication +- the default for persist / cache is memory + disk, with deserialization for memory and no replication +- both cache and persist are lazy like transformations - they are only triggered once there is an action + ```java + Dataset cachedDf = df.cache(); + cachedDf.count(); + ``` +- spark need not cache all partitions, it would only cache the partitions based on the actions we use, e.g. if we use `take(10)`, it would just cache the first partition, since the first partition should be self sufficient in providing with 10 records. however, if for e.g. we used an action like `count()`, it would have to cache all partitions +- however, spark will always either cache the entire partition or nothing, it will never cache a portion of the partition +- when to cache - when we use the same dataframe in multiple actions +- to evict from cache, chain `unpersist()` on the dataframe + +## Repartition and Coalesce + +- **repartition** - the code is as follows - `partitionedDf = df.repartition(3)` +- when we try to write this repartitioned dataframe, the output looks like follows - + ![repartition output](/assets/img/spark/repartition-output.png) +- note - above, we saw `repartition(number)`, but we can also use `repartition(columns...)` or `repartition(number, columns...)` +- when we do not specify a number to repartition and just column names, the number of partitions created = `spark.sql.shuffle.partitions` +- so basically, the number of partitions in repartition = either specified by us in the function call, or set via `spark.sql.shuffle.partitions`, and the column used for this partitioning can be specified by us as well +- when to use repartition - to improve performance, but we should be absolutely sure, since repartition would cause a shuffle +- when we are reducing number of partitions, do not use repartition, use `coalesce` +- `coalesce` will only collapse the partitions on the same worker node, thus avoiding a shuffle sort +- so my guess - if for e.g. we call `coalesce(10)`, but the data was on 11 worker nodes, total number of partitions finally would be 11? + +## Hints + +- we can add hints related to partitioning and joins +- hints - no guarantee that they would be used +- in the dataframe api, we can either use spark sql functions, or use `dataframe.hint()` +- join hint example using both techniques - + ```java + df1.join(broadcast(df2)) + df1.join(df2.hint("broadcast")) + ``` +- partitioning hint example - `df.hint("coalesce", 4)` +- note - i don't think there is any difference between chaining coalesce directly vs using it as a hint +- when writing the same using sql, there is a special [comment syntax](https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-hints.html#examples) we use + +## Shared Variables + +- these were both primarily used in rdd apis, but can have a niche use case in dataframe world as well + +### Broadcast Variables + +- **broadcast variables** use case - e.g. our udf uses some static reference data +- the reference data is for e.g. 5-10 mb, i.e. too big to store in plain code +- so, we can for e.g. store it in a file, and broadcast it to all the nodes +- this way, this variable can then be used inside the udf +- my understanding - maybe we can use closure as well i.e. we store the data in like a variable outside the udf, and then access it in the udf +- disadvantage of using closure - if for e.g. we have 1000 tasks running on 30 nodes, there would be 1000 deserializations. in case of broadcast variables however, there would only be 30 deserializations +- example - + ```java + SparkSession spark = // ... + Broadcast broadcastVar = spark.sparkContext().broadcast(new int[] {1, 2, 3}); + broadcastVar.value(); // can be used inside a udf. it returns [1, 2, 3] + ``` +- note - a better technique could also have been to somehow load this reference data as a dataframe if possible + +### Accumulators + +- **accumulators** are like a global variable that we can update +- e.g. from our udf, we would like to update a variable based on some condition +- so, these variables can be updated on a per row basis +- these variables basically live in the driver, and the executors internally communicate with the driver to update this variable +- example - + ```java + SparkSession spark = // ... + LongAccumulator accum = spark.sparkContext.longAccumulator(); + numberDf.foreach((x) -> accum.add(1)); + accum.value(); // should print the count of rows + ``` +- note - there is no shuffle etc involved in this process of realizing the final value of the accumulator - it is being mutated inside the driver by the executor communicating the changes to the driver +- so, these accumulators can either be updated from transformations like udf, or actions like forEach like we saw +- however, understand - if we use accumulators from within for e.g. udf, the value of accumulator can go bad - e.g. if a task fails, the executor will retry it - the accumulator cannot discard the partial changes made to it via the failed task, since there are too many concurrent modifications happening on it already via other tasks +- however, this does not happen when using an accumulator from inside actions like `forEach` + +## Spark Speculation + +- can be enabled via `spark.speculation`, false by default +- example we have 10 tasks, and all of them complete under 2 seconds, but one of them takes 10 seconds +- spark will automatically identify the slow running tasks and run a duplicate copy of this task +- this way, whichever one of the two finishes faster is used by spark, and the other task is killed +- useful when for e.g. the original task was running slow due to a fault in the worker node that it was running on, which was causing it to be slow +- running speculative tasks does have overhead in terms of resources +- e.g. if there are data skews or out of memory issues in our application, spark would still run copies of this task (which too will run slow or maybe fail) without realizing that the root cause is actually the data / faulty configuration itself + +## Streaming Introduction + +- earlier convention was batch processing - data first comes and sits in the lake +- then, there would be jobs that can be run for e.g. daily to perform the processing +- however, with time, jobs started demanding for smaller and quicker batches +- the idea is not to schedule the jobs in smaller intervals +- instead, we start viewing data as a stream that is in motion and not at rest +- spark streaming is an extension of the dataframe apis +- spark uses **micro batches** for achieving stream processing +- spark automatically takes care of lot of challenges like start and end time of batches, intermediate state management, etc +- initially, spark used **dstreams** - built on top of rdd +- now, sparks offers **structured streaming apis** - built on top of dataframe apis i.e. supports sql +- additionally, **event time semantics** are supported by structured streaming apis as well, which were not available in the d stream apis +- word count example using netcat - notice how for reading data, `read()` changed to `readStream()`, but otherwise, everything else stays the same. `readStream()` returns a `DataStreamReader` (recall read used to return `DataFrameReader`) + ```java + SparkSession spark = SparkSession.builder() + .master("local[*]") + .appName("Streaming Demo") + .getOrCreate(); + + Dataset lines = spark.readStream() + .format("socket") + .option("host", "localhost") + .option("port", "9999") + .load(); + ``` +- data from the socket comes in a column `value`. we want to split each line into its constituent words, and create a separate row for each word + ```java + Dataset wordCount = lines.select(explode(split(col("value"), " ")).alias("word")) + .groupBy("word") + .count(); + ``` +- finally, we try writing it to the console. again, `write()` changes to `writeStream()`. writeStream returns a `DataStreamWriter` (recall write used to return a `DataFrameWriter`) + ```java + StreamingQuery streamingQuery = wordCount.writeStream() + .format("console") + .option("checkpointLocation", "checkpoint") + .outputMode("complete") + .start(); + streamingQuery.awaitTermination(); + ``` +- note - we used `streamingQuery.awaitTermination()` above to simulate running an application indefinitely, and we got streamingQuery from the result of writing to a streaming sink +- note - sinks terminate when application is stopped / due to some error condition +- however, what if were writing to multiple sinks? + - we can use `spark.streams().awaitAnyTermination()`, when any of the streaming sinks terminate + - remember to have multiple checkpoint locations - do not use the same checkpoint location for multiple streaming sinks +- start the netcat utility using `nc -lk 9999`, and run the app to see the streaming output in the console +- working - first, spark creates an optimized logical plan, just like it did in case of dataframes +- now, it would create a job that reads from the source, processes it and finally writes it to the sink +- underneath, spark runs a background thread +- based on our trigger configuration, a new spark job is created. so, a spark job will not be created at every interval, it would only be created based on our trigger configuration, and all this is taken care of us by a background thread + ![spark streaming jobs](/assets/img/spark/spark-streaming-jobs.png) +- **trigger** determines how often to trigger the micro batch +- the default is **unspecified**. trigger a micro batch immediately, but stall this current micro batch until there is some input in the source +- trigger can also be based on for e.g. **time interval** - if the previous micro batch exceeds the time limit, the new batch starts after the previous batch finishes. however, if the previous micro batch finishes before the specified time limit, the new batch would wait till the mark reaches the time. for this, just chain the below to the `writeStream()` + ```java + .trigger(Trigger.ProcessingTime("1 minute")) + ``` +- finally, trigger can also be **continuous** - this is an experimental feature, where the performance is even faster than the current micro batch approach +- some popular streaming sources / sinks - netcat (already seen above), file and kafka +- the file source is capable of monitoring the path for new files. it can also use archival i.e. move the processed files to a different directory / delete the processed files altogether +- so, only sinks available are kafka, file and console for streaming requirements. how to for e.g. use jdbc? we can use `forEachBatch`, which is maybe called for every micro batch? - + ```java + outputDf.writeStream().foreachBatch((df, batchId) -> { + df.write() + .format("xyz") + // ... + .save(); + }); + ``` +- output modes - + - **append** - like insert only. used when previous outputs are not affected + - **update** - like upsert i.e. either new records are added or old records are updated + - **complete** - overwrite the complete result every time +- update vs complete example - + - input -
+ ![streaming input](/assets/img/spark/streaming-input.png) + - complete -
+ ![streaming output complete](/assets/img/spark/streaming-output-complete.png) + - update -(look at batch 2 in particular)
+ ![streaming output update](/assets/img/spark/streaming-output-update.png) +- append does not make sense with aggregations like count, so it would throw an error like this - `Append output mode not supported when there are streaming aggregations on streaming DataFrames/DataSets without watermark;`. the why - this is because append means immutable - the other two output modes - complete and update have some way of reflecting updates made to previous groups, but append cannot allow for updating of existing groups, only creating of new groups. now maybe how aggregations work in spark streaming - spark receives a record, decides which group this record should belong to, and updates that group. this updating is not allowed in append mode, hence append mode does not support aggregations +- a spark streaming application is like a web server i.e. keeps running unlike when submitting batch jobs to spark +- even a streaming application will stop at least at some point due to reasons like some failure, some maintenance, etc +- so, we need to be able to handle this stopping and restarting gracefully +- **gracefully** = **exactly once processing** +- exactly once processing basically means neither should we end up reading an input twice, nor missing an input record +- this is what **checkpoint location** helps achieve +- checkpoint location maintains things like - + - what was the input boundaries of the last micro batch + - state information (e.g. running total of the word count) +- we just saw how checkpoints helps spark achieve exactly once processing. however, exactly once processing also depends on sources and sinks - e.g. source should be replayable i.e. allow reading of old messages. e.g. using kafka / files as streaming sources allows for this. similarly, sinks should be idempotent i.e. it should recognize duplicates instead of adding duplicates to the data +- what if our application has a bug? - we fix the spark code, we rerun spark-submit. now, can we rely on check pointing to continue the job from where it left off after the job was stopped and restarted? + - yes, if our fix was something like filter out malformed records + - no, if our fix changed the aggregation strategy etc, since maybe it messes up the checkpoint state altogether + +## Streaming Using Kafka + +- add the following dependency - + ```xml + + org.apache.spark + spark-sql-kafka-0-10_${scala.version} + ${spark.version} + + ``` +- use the following to establish a connection - + ```java + Dataset kafkaSourceDf = spark.readStream() + .format("kafka") + .option("kafka.bootstrap.servers", "localhost:9092") + .option("subscribe", "invoices") + .load(); + ``` +- when we try printing the schema - `kafkaSourceDf.printSchema();`, we get the following - + ``` + |-- key: binary (nullable = true) + |-- value: binary (nullable = true) + |-- topic: string (nullable = true) + |-- partition: integer (nullable = true) + |-- offset: long (nullable = true) + |-- timestamp: timestamp (nullable = true) + |-- timestampType: integer (nullable = true) + ``` +- the value is in binary format. here is how to extract all fields into dataframe friendly format + - assume we create the schema of the payload somewhere + - then, we can cast the value field to a string + - then, call from_json on it, which also needs the schema + - this means all our data would be available as a struct type under the attribute value + - finally, based on [this](https://stackoverflow.com/a/54433013/11885333), i chained a `.select`, so that i do not have to access fields using value.attribute, but just using attribute - + + ```java + Dataset flattenedDf = kafkaSourceDf + .select(from_json(col("value").cast("string"), schema).alias("value")) + .select("value.*") + ``` +- [this doc](https://kafka.apache.org/quickstart) is great for debugging when writing kafka related code - creating topics, publishing to topics using kafka-producer, consuming from kafka-consumer, etc +- now, when we try `flattenedDf.printSchema();`, we get the right schema which we can use in our transformations +- to understand - how does kafka + spark actually work i.e. does spark rely on offset committing logic of kafka, or does spark itself maintain the offset inside the checkpoint directory +- writing to kafka - while reading from kafka, we deserialized the value attribute. while writing to kafka, we need to convert our dataframe into two fields of key and value + - combine all fields into a struct + - convert this field to json + - rename this condensed field to value + - pick any other attribute to act as key + + ```java + .select( + to_json(struct("*")).alias("value"), + col("InvoiceNumber").alias("key")); + ``` + +## Streaming Transformations + +- **stateless transformations** - do not need to maintain state across micro batches. e.g. filter, map, flatMap, explode, etc +- **stateful transformations** - need to maintain state across micro batches. e.g. for computing totals etc as we process new records, the state needs to be stored as a part of the checkpoint. e.g. grouping, aggregations +- now, stateless transformations do not support complete output mode. think why - + - if our streaming transformations are only stateless, 10 input records would contain 10 output records + - this means we will have to include input records as a part of the output every time + - this means all records need to be stored in the state, which is not efficient for spark +- so, as a side effect - we can run into out of memory issues when using spark streaming due to excessive state. spark stores all this state inside memory for efficiency +- it also stores it in the checkpoint location so that for e.g. when the application dies / is stopped due to some reason, it can resume from where it left off +- so, we have two concepts - **time bound state** and **unbounded state** +- **time bound state** - e.g. we calculate a weekly running total. spark knows that it can get rid of records older than a week,since they do not contribute to the total. this is also called **managed state**, since spark can manage this state +- **unbounded state** - there is no time bounds we can specify for the state. therefore, we ourselves need to specify some kind of cleanup logic for the state, so that our application does not encounter out of memory issues. this is also called **unmanaged state**, since the cleanup logic is on us to implement + +## Window Aggregations + +- this is the time bound state / managed state that we talked about above +- **trigger time** - determines when a micro batch starts and ends +- **event time** - the actual time when the event occurred +- important - the bounds of the **window** we specify has nothing to do with the trigger time +- the window we specify uses the event time to decide which window the record should be a part of +- spark also handles **late events** - e.g. we get an event for 10.00-10.15 when we have already performed processing for 10.15-10.30 and 10.30-10.45 +- e.g. we create a window of 15 minutes - + - this basically means a new column called window of type struct would be added to our dataset, with two fields - start and end + - spark will automatically decide for us which of these groups a record belongs to, based on the column name we specify. this column acts as the event time - e.g. created time in this example + - since this is basically inside a group, we can specify more columns to group on. e.g. we specify type column in the group by clause. then, we get windows for each of the type separately + - finally, we perform an aggregation - all records where type is buy, have their amount attribute added to total buy, all records where type is sell, have their amount added to total sell + - so basically think about whats in state of spark - for all groups i.e. windows, spark is storing the computed aggregate and updating it as and when new records arrive + - confusion, note - remember how this window is so much more different than the windowing aggregation we saw earlier - there, there was no grouping or aggregation involved - based on our specification, we were automatically able to add a new column for running total + + ```java + Dataset outputDf = stockSourceDf + .groupBy(window(col("CreatedTime"), "15 minute")) + .agg( + sum(when(col("Type").equalTo("BUY"), col("Amount")).otherwise(lit("0"))).alias("TotalBuy"), + sum(when(col("Type").equalTo("SELL"), col("Amount")).otherwise(lit("0"))).alias("TotalSell")); + ``` + +- remember - spark had to maintain old windows inside its state as well, to help it with late events +- **watermark** - helps expire old window state, so that out of memory etc exceptions are not caused. remember how this is the biggest advantage of using managed state +- so, we need to decide how late can an event be, post which - + - we can simply ignore the event + - we can clean up the state for that window +- for this, we simply need to chain the `withWatermark`. note - + - chain it before the group by clause + - column name used for windowing and column name specified inside watermark should be the same + + ```java + .withWatermark("CreatedTime", "30 minutes") + .groupBy(window(col("CreatedTime"), "15 minute")) + ``` +- how should the cleanup happen? - all windows with end_time < (max_event_time - watermark) can be ejected from state (note - max_event_time i think means event with maximum time in the micro batch). e.g. say our watermark is 30 minutes, and we receive a record with event time = 10.48. all windows with end time before 10.48 - 30 = 10.18 would be ejected from the spark state. this is the managed state / automatic cleanup that we were talking about in time bound state +- watermark and complete output mode do not make sense together - spark cannot cleanup state if it has to output all the records for every micro batch +- recall how we had talked about append mode not working when we have group by etc in our streaming jobs, because append cannot update groups. however, think about watermarks - when the max_event_time - watermark moves, all windows with ends below this line can be closed. hence, when we introduce watermarks and windows with aggregations, spark supports append mode. all windows which have been declared closed by spark are output after the micro batch gets over +- summary of the difference between output modes when using watermark + windowing - + - complete - output all windows, ignore watermark concept + - update - output all windows which were updated by the micro batch, eject all windows from state which are declared stale by spark via watermark concept + - append - eject all windows from state and only output windows which have been declared stale by spark via watermark concept, do not output all windows that were updated like update output mode +- **tumbling windows** vs **sliding windows** - + - tumbling windows do not overlap, while sliding windows can have an overlap + - my understanding - in tumbling windows, window duration = sliding interval, whereas in sliding windows, both are unequal + - in tumbling windows, an event can be a part of only one window. in sliding windows, an event can be a part of multiple windows, e.g. 10.18 can be a part of 10.10-10.20 and 10.15-10.25 + - so, the only difference in syntax is we now pass two parameters - window duration and sliding window size + + ```java + .groupBy(window(col("CreatedTime"), "15 minute", "5 minute")) + ``` + +## Streaming Joins + +### Streaming to Static + +- commonly used for stream enrichment +- stateless - spark does not have to maintain any state - this is because every time we get an event, we can simply compute the rows it produces as a result of the join and output these results, since they would not change / the event would not be needed for computing future joins anymore +- for each micro batch, spark is smart enough to refresh the static dataframe i.e. imagine when the application is already running, we insert new data into the static dataframe underlying source, e.g. jdbc. spark will reload the static dataframe with the new data when a new event comes in for the streaming dataframe +- inner join is supported +- left outer join is possible when the streaming dataframe is on the left. why - assume right outer join was allowed. spark would have to predict for the static dataframe's record whether or not a row is present in the streaming dataframe. this cannot be concluded, since streams grow infinitely. this is why right (and full) outer joins are not supported + +### Streaming to Streaming + +- stateful - we need to maintain both sides of data forever in the state, unlike when joining streaming dataframe to static dataframe. remember how this is stateful, but streaming to static can be stateless +- we can solve this problem using 🥁 `withWatermark`. specify a watermark on both streams being joined, so that spark can remove events that are stale +- inner join is supported +- left outer join is possible but with some limitations, TODO +- TODO: spark interview question of memory diff --git a/_posts/2024-01-18-elasticsearch.md b/_posts/2024-01-18-elasticsearch.md new file mode 100644 index 0000000..dd1dd96 --- /dev/null +++ b/_posts/2024-01-18-elasticsearch.md @@ -0,0 +1,1367 @@ +--- +title: Elasticsearch +--- + +## Introduction + +- elasticsearch is open source +- we interact with elasticsearch using rest api and json, making it easy to work with +- elasticsearch is written in java and uses apache lucene underneath +- row in rdbms - **documents** in elasticsearch +- columns in rdbms - **fields** in elasticsearch +- table in rdbms - **index** in elasticsearch +- **index templates** - apply settings and mappings to indices that match a pattern + +## Use Cases + +- used for implementing search functionality, by addressing common problems like + - filtering search results - e.g. filter products based on category, price range, brand, etc + - sort results based on relevance - e.g. most reviewed, similarity with search parameters, etc +- we can aggregate the data stored in elasticsearch while querying. so, using elasticsearch data for analytics and not at all for searching is a perfectly valid use case +- apm or application performance management - e.g. analyze logs, monitor system metrics, etc +- machine learning - + - forecast future values - e.g. predict sales + - anomaly detection - e.g. alert when number of visitors on our website suddenly drops + +## Elastic Stack + +- elasticsearch - the heart of the elastic stack which stores the data +- kibana - + - serves as a web interface for configuration etc + - visualize the data stored in elasticsearch by creating dashboards in kibana + - note - kibana stores its data in elasticsearch. this means a new kibana instance pointing to our existing elasticsearch instance will automatically load all the configuration, dashboards, etc +- logstash - traditionally for processing logs and sending to elasticsearch. now, it has evolved into a more general purpose data processing tool, to perform etl +- x pack - add additional features like - + - authentication and authorization to elasticsearch and kibana + - monitoring - monitor performance of components of elasticsearch, logstash, kibana, etc and set up alerting based on issues related to these components + - machine learning + - graph - e.g. suggest relevant songs. popular != relevant. e.g. if 10 users use google, it is just because google is a very commonly used search engine, but if 10 users use stack overflow, it indicates something common between them. it helps us look for "uncommonly common" features + - sql - we typically use elasticsearch's query dsl to query elasticsearch, but we can also use sql, which gets translated to the query dsl bts. this can help people used to sql to get started with using elasticsearch +- beats - light weight agents installed on servers which then ship data to elasticsearch / logstash. e.g. file beats for sending log files, metric beats for system level metrics like memory and cpu usage, etc + +## Setup + +- download elasticsearch from [here](https://www.elastic.co/downloads/elasticsearch) +- download kibana from [here](https://www.elastic.co/downloads/kibana) +- run `./bin/elasticsearch` to run elasticsearch. it will display the following - + - enrollment token - helps kibana communicate with elasticsearch securely + - password - `pU-z6IdUirqzzUsFVlWh` for me +- run `./bin/kibana` to run kibana. we need to do the following - + - it would display the kibana url with a code as query parameter. open it + - enter the enrollment token displayed in the elasticsearch console + - authenticate using username as `elastic` and password as what is displayed in the elasticsearch console +- to interact with elasticsearch + - in kibana using dev tools - + - `get _cluster/health` to view cluster's health + - `get _cat/nodes?v` to view all nodes + - `get _cat/indices?v` to view all indices + - using curl - + ```sh + curl --cacert elasticsearch-8.12.0/config/certs/http_ca.crt \ + -u elastic:pU-z6IdUirqzzUsFVlWh \ + https://localhost:9200/ + ``` + +## Architecture + +- **node** - an instance of elasticsearch +- each node belongs to a **cluster** +- we can have different clusters based on use cases, e.g. one cluster for search, a different cluster for apm, etc + +## Sharding + +- elasticsearch uses **sharding** to help it scale +- sharding - splitting an index into smaller chunks +- this way, we are not limited by the storage capacity of 1 node +- sharding is done at index level for flexibility, because some indices can be very large, while others very small +- because of sharding, we can scale the cluster horizontally instead of having to do it vertically +- underneath, each shard is independent, like a fully functionally index. actually, each shard is a lucene index underneath +- sharding also helps parallelize the elasticsearch queries we issue, since the query can be broken down and run on each shard in parallel +- so two advantages - scale storage and improve throughput +- for elasticsearch < 7.0.0 - + - default number of shards was 5 - thus leading to **over sharding** when there were many small indices in the cluster + - changing number of shards after creating an index was not possible - to increase the number of shards, people would create a new index with the correct number of shards and move over the documents manually +- for newer versions of elasticsearch - + - default number of shards is 1 + - we can increase / decrease the number of shards using the **split** / **shrink** api, and elasticsearch does the heavy lifting for us bts + +## Replication + +- the underlying nodes / hardware / storage in a cluster can easily fail +- introducing **replication** for fault tolerance in elasticsearch is very easy +- replication is also configured at index level +- copies of shards are created, called **replica shards** +- the shard that has been replicated is called the **primary shard** +- all of the shards together are called a **replication group** +- primary and replica shards are never stored on the same node, because that defeats the purpose +- so, if our cluster has only one node, no replica shards are added even if we set replication +- replicas can also serve as read replicas +- this means if we have three shards (one primary and two replicas), there can be three search requests that can be served in parallel +- so two advantages (just like sharding) - serve as standby and improve throughput +- default replication is 1 +- use `get _cat/shards?v` to view all shards. it gives us which index it belongs to, its type (primary or replica), which node it is stored on, etc + +## Snapshots + +- helps take backups +- we can take snapshots of specific indices or of the entire cluster +- it helps us restore the state to a specific point in time + +## Node Roles + +- **master** - + - the master node in a cluster performs cluster wide actions like creating and deleting indices + - if there are several nodes with this role, one of them are elected as the master + - larger clusters should have "dedicated masters" so that they do not perform high io tasks like serving search requests +- **data** - + - enables it to store shards + - thus, it can perform query / modification of data on the shards that it is responsible for +- **ml** - + - lets a node run machine learning jobs + - `xpack.ml.enabled` needs to be enabled as well on the node +- **coordination** - + - node can be responsible for distributing the queries and then aggregating the data results + - can be accomplished by disabling all other roles on the node, there is no direct role available in elasticsearch for this +- **voting only** - + - can participate in the election of a new master, but not be elected as the master itself + +## Simple CRUD + +- deleting an index - `delete pages` +- by default when we call `put index_name`, we get two shards by default - one primary and one replica shard +- this is why my cluster running locally goes into yellow health after creating an index - since i was running one elasticsearch node and one of the replicas shards are still unassigned +- specify settings when creating an index - + ``` + put products + { + "settings": { + "number_of_shards": 2, + "number_of_replicas": 0 + } + } + ``` +- we can index a document like below. it would return us the auto generated id for it + ``` + post products/_doc + { + "name": "Coffee Maker", + "price": 64, + "in_stock": 10 + } + ``` +- for a custom id, the endpoint above could have been like so - + ``` + post products/_doc/100 + ``` +- retrieving a product if we have the id - + ``` + get products/_doc/100 + ``` +- now, if we for e.g. run the below "for the same id" again, the older document is "replaced" with this new document + ``` + post products/_doc/100 + { + "name": "Red Shoes" + } + ``` +- note - we looked at two variations - `post <>/_doc` for automatic ids, and `post <>/_doc/<>` for custom ids and create or update (basically replace). there are many more variations, not bothering right now +- elasticsearch documents are immutable - when we call post using the same id again, elasticsearch will basically create a new document and re index this document, effectively replacing the older document +- **scripted updates** - update using code, instead of us first retrieving the value, deciding the new value and then updating it. this approach for e.g. reduces network calls made to elasticsearch. we can do things like set the operation to delete if the in stock value becomes 0 etc. skipping this for now, as i would likely use an orm + +## Routing + +- **routing** - helps resolve the shard for a document +- basically - shard = hash(_routing) % number_of_primary_shards +- by default, _routing = id +- so, when we try performing crud operations using the id, this is how the shard resolution happens inside elasticsearch +- underneath, issues like skewed shards etc are prevented by elasticsearch automatically +- this is why changing the number of shards for an index on the fly is difficult - the shard of the existing documents might change as the number of shards change. for us as developers however, using the shrink and split api is much easier now in newer versions of elasticsearch + +## Working of Reads + +- the request reaches the coordinating node +- by the formula discussed in routing, it determines which primary shard is responsible for this document +- then, it directs the read request to the best replica shard in the replication of group of the primary shard +- the replica is chosen using a strategy called **ars** or **adaptive replica selection**, which is deemed best for performance +- finally, the response reaches the client back from the coordinating node + +## Working of Writes + +- the request reaches the coordinating node +- by the formula discussed in routing, it determines which primary shard is responsible for this document +- now, the request is sent to the primary shard, unlike in reading document where the request was sent to any replica using ars +- the primary shard validates the document - e.g. throw an error if a string value is being specified for a numeric value +- then it indexes the document +- now, it sends requests to its replica shards in parallel +- finally, the write is complete and the response is sent back to the client via the coordinating node + +## Conflicts During Writes + +- what if primary shard goes down after receiving a write? a replica shard would be promoted but what if the write was already committed to some other replica shards and not the newly appointed primary shard? +- what if a replica shard goes down during a write? +- many such failure scenarios can happen in distributed systems like this +- all these problems are handled by using primary term, sequence number and checkpoint in elasticsearch +- **primary term** - how many times the primary shard has changed +- **sequence number** - a counter that is incremented for each write operation. i think it is index specific +- **global checkpoint** - the minimum sequence number all the shards in the replication group have been aligned up to +- **local checkpoint** - the sequence number the current shard is at +- my understanding - the values of primary term and sequence number are also assigned to the documents to help with optimistic concurrency control +- **optimistic concurrency control** - what if an older version of document overwrites a newer version i.e. when writes happen concurrently? this situation is common, given the distributed nature of elasticsearch. e.g. two visitors on our e-commerce app try decreasing the in stock attribute by one simultaneously +- in newer versions, we are supposed to send the primary term and sequence numbers discussed earlier in order to implement optimistic concurrency control - + ``` + post products/_update/100?if_primary_term=1&if_seq_no=9 + // ... + ``` +- note, my understanding - apart from primary term and sequence numbers, when we retrieve documents, they also return a version. it is just like we would expect a version column to work, i.e. increments by one. it was used for implementing optimistic concurrency control in older versions, but the newer and preferred method is to use the primary term and sequence numbers instead that is descried above + +## Bulk Operations + +- these are much more efficient than sending out individual requests +- we can use the `_update_by_query` and `_delete_by_query` variants, where we specify the match clause +- the operations i think work like `update ... where ...` and `delete ... where ...` +- we can also use the bulk api to perform multiple kinds of operations on an index all at once +- this format that we use is also called nd json +- example of using bulk api inside kibana - + ``` + post products/_bulk + { "index": { "_id": 200 } } + { "name": "Espresso Machine", "price": 150, "in_stock": 4 } + { "create": { "_id": 201 } } + { "name": "Espresso Machine", "price": 150, "in_stock": 4 } + { "update": { "_id": 202 } } + { "doc": { "name": "Espresso Machine", "price": 150, "in_stock": 4 } } + { "delete": { "_id": 100 } } + ``` +- one line specifies the action (index, create, update, delete), the second line specifies the document contents (except in delete) +- my understanding - index vs create vs update - index works for both create and update, update fails when no document exists and create fails when document already exists +- we can also specify the primary term and sequence numbers inside the action line for optimistic concurrency control +- using curl to upload data using bulk api, where a file has all the data in the form of nd json - + ``` + curl -H "Content-Type: application/x-ndjson" \ + -XPOST \ + --cacert ~/elasticsearch-8.12.0/config/certs/http_ca.crt \ + -u elastic:pU-z6IdUirqzzUsFVlWh \ + https://localhost:9200/products/_bulk \ + --data-binary "@products-bulk.json" + ``` + +## Working of Bulk Operations + +- first, the query reaches the coordinating node as usual +- a snapshot of the entire index is taken +- then, the query to search for the documents and the bulk request to update them is sent to all the nodes +- if a failure occurs during this update, the failures are sent back to the client and "not rolled back" +- understand how this might be different from e.g. rdbms systems where there are features like transactions which help rollback +- idea is instead of rolling back, elasticsearch sends the failures to the client so that the client can handle it accordingly +- why was the snapshot taken - this helps elasticsearch implement optimistic concurrency control internally - it is not unlikely that since bulk request is huge, and during the processing of this bulk request, some document gets updated in the intermediary. so, elasticsearch uses this snapshot to compare the primary term and sequence number of the document it updates + +## Analysis + +- values are **analyzed** when indexing documents, to help with searching them +- different data types in elasticsearch will use different data structures e.g. numeric and geo spatial data might be stored inside bkd trees. however, most of them are fairly straightforward like in for e.g. rdbms, unlike text data types, what elasticsearch is known for. so, that is the focus here +- **analyzer** consists of three building blocks - character filters, a tokenizer and token filters +- **character filters** - + - add / remove / transform characters + - an analyzer can have multiple character filters, and they would be run one after another + - e.g. the html_strip character filter will filter out the html entities + - input - `I'm REALLY liking beer` + - output - I'm REALLY liking beer +- **tokenizer** - + - split into tokens + - an analyzer contains exactly one tokenizer + - some characters e.g. punctuations can be removed as a part of this + - important - the offset for each token is recorded as well. useful e.g. for [match phrase queries](#full-text-queries) + - e.g. the standard analyzer splits based on special characters + - input - "I'm REALLY liking beer" + - output - ["I'm", "REALLY", "liking", "beer"] +- **token filters** - + - add / remove / modify tokens + - an analyzer can have multiple token filters, and they would be run one after another + - e.g. lowercase filter to make all tokens lowercase + - input - ["I'm", "REALLY", "liking", "beer"] + - output - ["i'm", "really", "liking", "beer"] +- **standard analyzer** - the default. it uses no character filters, the standard tokenizer and finally the lowercase token filter +- there is an easy to use api, where we specify the analyzer / its components, and elasticsearch returns us the analyzed result + ``` + post _analyze + { + "text": "2 guys walk into a bar, but the third... DUCKS! :-)" + } + + post _analyze + { + "text": "2 guys walk into a bar, but the third... DUCKS! :-)", + "analyzer": "standard" + } + + post _analyze + { + "text": "2 guys walk into a bar, but the third... DUCKS! :-)", + "char_filter": [], + "tokenizer": "standard", + "filter": ["lowercase"] + } + ``` +- output is as follows - + ```json + { + "tokens": [ + { "token": "2", "start_offset": 0, "end_offset": 1, "type": "", "position": 0 }, + { "token": "guys", "start_offset": 2, "end_offset": 6, "type": "", "position": 1 }, + { "token": "walk", "start_offset": 7, "end_offset": 11, "type": "", "position": 2 }, + { "token": "into", "start_offset": 12, "end_offset": 16, "type": "", "position": 3 }, + { "token": "a", "start_offset": 19, "end_offset": 20, "type": "", "position": 4 }, + { "token": "bar", "start_offset": 21, "end_offset": 24, "type": "", "position": 5 }, + { "token": "but", "start_offset": 26, "end_offset": 29, "type": "", "position": 6 }, + { "token": "the", "start_offset": 30, "end_offset": 33, "type": "", "position": 7 }, + { "token": "third", "start_offset": 34, "end_offset": 39, "type": "", "position": 8 }, + { "token": "ducks", "start_offset": 43, "end_offset": 48, "type": "", "position": 9 } + ] + } + ``` +- we saw above how elasticsearch constructs tokens using a three step process +- **inverted index** - a mapping between the tokens and what documents contain these tokens +- e.g. finding which documents contain a specific term is as simple as looking up the term in the inverted index +- inverted indices are scoped to a "field of an index" +- **keyword** data type - used for exact matching. e.g. status field +- for full text searches, use the **text** data type instead +- internally, keyword uses the **keyword analyzer**, which is no op i.e. does not do anything +- in the inverted index that is created for keyword data type, the key is the entire string, the values are the documents having it +- in the inverted index that is created for text data type, the keys are the tokens, the values are the documents having it, along with offsets etc to help with for e.g. `match_phrase` query +- elasticsearch comes in with a lot of built in character filters, tokenizer and token filters, and we can mix and match them +- elasticsearch also comes in with a lot of [built in analyzers](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-analyzers.html) which we can use. they are configurable as well, e.g. we can add the stop word to the standard analyzer +- two common token filters - + - **stemming** - reducing words to their root form. e.g. if the word in the description is "loved", and the client searches for "loves", they should still be able to search for the word. stemming helps reduce the word to its "root form" + - **stop words** - common words in a language that are filtered out when a field is analyzed. e.g. articles +- note - what we search for is analyzed in the same way as the attribute! e.g. if the word drinking in the document is stemmed to drink, the word drinks in the query is also stemmed to drink +- below is an example of creating a custom analyzer inside an index. notice the four sections inside analysis - character filter, tokenizer, filter (token filter is called filter) and finally analyzer - + ``` + put analyzer_test + { + "settings": { + "analysis": { + "char_filter": { }, + "tokenizer": { }, + "filter": { + "danish_stop": { + "type": "stop", + "stopwords": "_danish_" + } + }, + "analyzer": { + "my_custom_analyzer": { + "type": "custom", + "char_filter": ["html_strip"], + "tokenizer": "standard", + "filter": [ + "lowercase", + "danish_stop", + "asciifolding" + ] + } + } + } + } + } + ``` + +## Mapping + +- **mapping** defines the structure of documents +- like a schema in rdbms +- two approaches - + - **explicit mapping** - we specify the fields and their data types ourselves + - **dynamic mapping** - the field mapping is automatically created for us when elasticsearch encounters a new field +- [data types](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html) available in elasticsearch +- creating an explicit mapping - + ``` + put reviews + { + "mappings": { + "properties": { + "rating": { "type": "float" }, + "content": { "type": "text" }, + "product_id": { "type": "integer" }, + "author": { + "properties": { + "first_name": { "type": "text" }, + "last_name": { "type": "text" }, + "email": { "type": "keyword" } + } + } + } + }, + "settings": { + "number_of_shards": 2, + "number_of_replicas": 0 + } + } + ``` +- retrieving the mapping for an index - + ``` + get reviews/_mapping + ``` +- when relying on dynamic mapping e.g. for strings, first, using [type coercion](#type-coercion), it would try converting it to a number / date. if that fails, the default behavior is to use [multi field mappings](#multi-field-mappings), so that text is used for attribute, and keyword is used for attribute.keyword. e.g. - + ``` + put recipes/_doc/1 + { + "ingredients": ["potato", "tomato"] + } + + get recipes/_mapping + ``` +- output - + ``` + { + "recipes": { + "mappings": { + "properties": { + "ingredients": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } + } + } + ``` +- this default behavior might not be ideal for us since it consumes a lot of disk space, e.g. for ingredients, we would rarely perform text searches while for description of recipes, we would rarely perform aggregations, sorting, etc +- we can ask elasticsearch to disable dynamic mapping using - + - `"dynamic": "strict"` - would not allow unknown fields when indexing a document + - `"dynamic": "false"` - would allow additional fields but not analyze them. they would just be stored and be a part of the _source in the response + + ``` + put people + { + "mappings": { + "dynamic": "strict", + "properties": { + "first_name": { "type": "text" } + } + } + } + ``` + +### Changing the Mapping + +- changing the mapping might not be easy. e.g. assume we want to go from numeric to keyword data type. this is not easy for elasticsearch, since it would have to re index all the existing documents, since the underlying structure itself changes from a bkd tree to an inverted index (keyword data type uses keyword analyzer) +- so, we can use the re index api, which copies over our documents from the source to the destination index. while doing this, we specify the script, which can do some conversions for us. the syntax / working is similar to scripted updates which we mentioned earlier + ``` + post _reindex + { + "source": { "index": "reviews" }, + "dest": { "index": "reviews_new" }, + "script": { + "source": """ + if (ctx._source.product_id != null) { + ctx._source.product_id = ctx._source.product_id.toString(); + } + """ + } + } + ``` + +## Object vs Nested Data Types + +- when we use **object** data type, internally, elasticsearch flattens it using **dot notation** +- e.g. assume we had a document like below i.e. we set the field type of reviews to be an object + ```json + { + "product": { + "manufacturer": { + "name": "xyz" + }, + "reviews": [ + { "author": "abc", "rating": "4.7" }, + { "author": "def", "rating": "3" } + ] + } + } + ``` +- how elasticsearch kind of views them internally - + ```json + { + "product.manufacturer.name": "xyz", + "product.reviews.author": ["abc", "def"], + "product.reviews.rating": ["4.7", "3"] + } + ``` +- based on above, there is a downside of using the type object - if we search for a review left by abc and with rating 3, the current document we showed above would be returned - even though abc left 4.7. this is because after the flattening done by elasticsearch internally, the correlation between the fields of an object was lost +- therefore, due to the shortcomings above, we can use the **nested** data type. this means that all the fields of that structure would be correlated +- nested data type works in a fundamentally different way compared to object data type - internally, a new document is created for each of the review - so, if we were to index a document with 10 reviews, internally 11 documents would be indexed by elasticsearch. there is no flattening inside the same document like in object data type +- assume we had an array of objects. we can create mapping for nested type as follows - + ``` + // ... + "reviews": { + "type": "nested", + "properties": { + "author": { "type": "text" }, + "rating": { "type": "float" } + } + } + ``` +- for object data type, we just need to omit the `"type": "nested"` line +- having two many nested objects in the array can slow down queries etc, but this might be an indicator of a bad design in the first place as well. there are limits on the maximum number of fields allowed inside a nested document, maximum number of nested objects allowed in the array, etc as a safeguard + +## Arrays in Elasticsearch + +- there is no concept of arrays in elasticsearch - any field can contain 0 or more values in elasticsearch by default + ``` + post products/_doc/100 + { + "tags": ["electronics"] + } + + post products/_doc/100 + { + "tags": "smart phone" + } + + get products/_doc/100 + ``` +- in case of text fields, values of array type are simply "concatenated" one after another + ``` + post _analyze + { + "text": ["hello", "world"] + } + ``` +- output - make note of the offset + ```json + { + "tokens": [ + { "token": "hello", "start_offset": 0, "end_offset": 5, "type": "", "position": 0 }, + { "token": "world", "start_offset": 6, "end_offset": 11, "type": "", "position": 1 } + ] + } + ``` +- empty array / skipping the field mean the same thing +- i don't think this is the same as explicitly providing null however + +## Date Data Type + +- internally, elasticsearch stores dates as milliseconds since epoch, by converting it into the utc timezone +- if we do not specify the format, we can specify it in iso-8601 format (the one that looks like `2024-01-21T04:25:21.139Z`) or a number, that is the milliseconds since epoch +- however, when creating the explicit mapping, we can also specify the format using the java date format + ``` + "purchased_at": { + "type": "date", + "format": "dd/M/yy" + } + ``` + +## Type Coercion + +- **type coercion** - if we provide `"price": "7.4"` instead of `"price": 7.4`, elasticsearch is smart enough to convert it to the float type instead of treating it as keyword, string, etc +- in the example below + - we first create a document (the index is created automatically), and ensure that the dynamic mapping has type number for price. if we started with string itself, then of course the dynamic mapping would create it using text + keyword type + - then, second and third calls go through due to type coercion and how [arrays](#arrays-in-elasticsearch) in elasticsearch work, while the fourth call fails because it cannot be coerced + + ``` + post coercion_test/_doc/100 + { + "price": 7.4 + } + + // the mapping shows price is of type float + get coercion_test/_mapping + + // does not throw an error + // even though we provide a string + post coercion_test/_doc/101 + { + "price": "7.4" + } + + // does not throw an error + // even though we provide an array containing strings and numbers + post coercion_test/_doc/101 + { + "price": ["7.4", 8.9] + } + + // will throw an error + post coercion_test/_doc/102 + { + "price": "7.4m" + } + ``` +- when retrieving the document, we see "7.4" and not 7.4! maybe while elasticsearch does analyze the fields, it will ultimately just return us what we provided it with in the first place +- notice how this is a recurring theme, we saw it in [mapping](#mapping) when using `"dynamic": false` as well - _source is the exact same as the input by user, but bts, other processes like coercion, analyzing based on data type, etc are carried out +- to avoid all the hassle with type coercion, we can just disable it as well when creating the index + ``` + put sales + { + "settings": { + "index.mapping.coerce": false + } + // ... + } + ``` + +## Multi Field Mappings + +- e.g. assume we want a field to have both type keyword and text +- problem statement + - aggregations etc can not be run on text data type, but can be run on keyword data type + - searching etc can not be run on keyword data type, but can be run on text data type +- e.g. we have a recipes index, and we would like to use ingredients for searching (use case for text data type) and for aggregations like popular ingredients (use case for keyword data type) +- so, elasticsearch allows us to specify multiple data types for a field +- e.g. below, text related data structures would be created for ingredients, while keyword related data structures would be created for ingredients.agg. so, when querying elasticsearch, we would use the same convention as well i.e. use ingredients when we want to use the text based queries but use ingredients.agg for keyword based queries + ``` + put recipes + { + "mappings": { + "properties": { + "ingredients": { + "type": "text", + "fields": { + "agg": { + "type": "keyword" + } + } + } + } + } + } + ``` +- recall how when relying on dynamic mapping, this is the default i.e. using attribute for text data type and attribute.keyword for keyword data type +- other use cases might be for e.g. to optimize a field using different analyzers for different use cases + +## Elastic Common Schema + +- **ecs** or **elastic common schema** +- how common fields should be mapped +- e.g. doesn't matter the source of event - redis, kafka, nginx, etc, the "event timestamp" should be mapped via `@timestamp` field +- it is more useful for standard events like from web servers, databases, etc, not for custom use cases like using a product index + +## Term Level Queries + +- **term level queries** - term level queries are not analyzed, it is not like a part of it should match, the entire thing should match +- it does not make sense to use term level queries with text data type. it is typically used for all other data types like keyword, numbers, etc. this is because term level queries are not analyzed, while text data type is analyzed. it just does not make sense to do so, even if we get some hits +- e.g. of term level query. recall how [dynamic mapping](#mapping) created [multi field mapping](#multi-field-mappings) for both text and keyword. so, since we want to use the keyword variant, we use the below (term level queries are not meant for text data types) - + ``` + get products/_search + { + "query": { + "term": { + "tags.keyword": "Vegetable" + } + } + } + ``` +- specifying multiple terms to match based on - + ``` + get products/_search + { + "query": { + "terms": { + "tags.keyword": ["Vegetable", "Meat"] + } + } + } + ``` +- we retrieved document by id using + ``` + get products/_doc/100 + ``` +- retrieve documents by multiple ids + ``` + get products/_search + { + "query": { + "ids": { + "values": ["100", "200"] + } + } + } + ``` +- **range searches** - useful for fields of type dates, numbers, etc + ``` + get products/_search + { + "query": { + "range": { + "in_stock": { + "gte": 1, + "lte": 6 + } + } + } + } + ``` + +### Flexibility in Term Level Queries + +- while term level queries are not analyzed, they do allow for some flexibility described in this section +- still, do not forget the rule of thumb - term level queries are not analyzed, and therefore are not meant to be used for text data types +- **case insensitive** - will match documents having a tag vegetable / Vegetable. notice how the structure changes a little bit, `tags.keyword` is not a string now like earlier, but an object, with the value specified under `value` + ``` + get products/_search + { + "query": { + "term": { + "tags.keyword": { + "value": "vegetable", + "case_insensitive": true + } + } + } + } + ``` +- **prefix** - begins with. will match documents having name both "Pasta" and "Pastry", but not "Red Pasta" + ``` + get products/_search + { + "query": { + "prefix": { + "name.keyword": { + "value": "Past" + } + } + } + } + ``` +- **wildcard** - can use `?` / `*`. `past?` will match "paste", `past*` will match "pasta" and "pastry" however, do not do `*past`. while it will work, it might be very slow if index is huge + ``` + get products/_search + { + "query": { + "wildcard": { + "name.keyword": { + "value": "Past*" + } + } + } + } + ``` +- **regexp** - allows for regular expressions, useful when use case is more complex than what wildcard can do. remember, i get slightly confused in other places as well - `past*` is wildcard, `past.*` is regular expression. just like in wildcards, only try using it for prefix matching + ``` + get products/_search + { + "query": { + "regexp": { + "tags.keyword": { + "value": "Bee(f|r)" + } + } + } + } + ``` +- below the value, for all types like regexp, wildcard, prefix, etc, we can additionally also specify `case_insensitive` + +### Exists Term Level Query + +- search for all documents where a tag exists + ``` + get products/_search + { + "query": { + "exists": { + "field": "tags.keyword" + } + } + } + ``` +- what basically happens in exists query - it looks for all documents that are present in the inverted index +- there can be many reasons why a document would not be present in an inverted index, some common ones are - + - we specify null + - we specify an [empty array](#arrays-in-elasticsearch) - recall this is the same as omitting the field + - if for e.g. we use the `ignore_above` parameter, and the value was too long and was thus not indexed - recall this is usually keyword not text, so the entire string would be used for the inverted index and not the tokens, in which case it might have stayed below the character limit + +## Full Text Queries + +- term level queries are used for exact searches on structured data +- **full text queries** are used for searching through unstructured data +- the query is analyzed - if the field is analyzed, the same analyzer is used, else the standard analyzer is used +- analyzing both the query and the actual query using the same analyzer is key - otherwise, finding the document in the inverted index would not be possible +- like term level queries should be used for any data type but text +- full text queries should be used for only text data types +- querying all documents - + ``` + get products/_search + { + "query": { + "match_all": {} + } + } + ``` +- e.g. search for a particular field - note how `case_insensitive` is not needed like in term level queries, since the standard analyzer already contains the lowercase filter + ``` + get products/_search + { + "query": { + "match": { + "name": "PAsTa" + } + } + } + ``` +- if we specify multiple words, e.g. below, we get all products having either pasta **or** chicken in their name + ``` + get products/_search + { + "query": { + "match": { + "name": "pasta chicken" + } + } + } + ``` +- this is because the default operator is or. we can however change it to and as below. notice how the structure changes a little bit, `name` is not a string now like earlier, but an object, with the value specified under `query` + ``` + get products/_search + { + "query": { + "match": { + "name": { + "query": "pasta chicken", + "operator": "and" + } + } + } + } + ``` +- **multi match** - match using multiple fields i.e. either name or tags should have vegetable + ``` + get products/_search + { + "query": { + "multi_match": { + "query": "vegetable", + "fields": ["name", "tags"] + } + } + } + ``` + +### Controlling Scores in Full Text Queries + +- **relevance scoring** - typically in term level queries, the score is just 1, so this concept is not present there. this is not true in full text queries though. e.g. in the or variant of pasta chicken example discussed above, the recipes having both pasta and chicken come before recipes having either of them. this is because recipes containing both are deemed more relevant by elasticsearch +- **relevance boost** - e.g. boost the score of recipes having vegetable in its name. notice how everything is almost the same except the caret symbol + ``` + get products/_search + { + "query": { + "multi_match": { + "query": "vegetable", + "fields": ["name^2", "tags"] + } + } + } + ``` +- by default, the score and therefore the sorting happens using the "best matching field". e.g. assume a recipe has vegetable both in its name and its tag. if the name above leads to a score of 12.3 and tags lead to a score of 3, "the final score is not 15.3, but 12.3". we can change this behavior by specifying for e.g. **tie breaker**. so, its like the default value of tie breaker is 0. if we specify for e.g. 0.3, the final score = field_with_highest_score + (0.3 * (sum_of_scores_of_other_fields)). so, all other fields will contribute 0.3 of its score, while the field with the highest score will contribute its entire value + ``` + get products/_search + { + "query": { + "multi_match": { + "query": "vegetable", + "fields": ["name^2", "tags"], + "tie_breaker": 0.3 + } + } + } + ``` + +### Full Text Queries - Match Phrase + +- **match phrase** - a phrase is a sequence of one or more words. till now, the examples we saw did not consider the order of the words, e.g. if we search for "chicken pasta", "pasta xyz chicken" and "chicken pasta" should have the same score. using match phrase, words should appear in the "correct order" and "one after another". e.g. if we search for "complete guide", "a complete and useful guide" would not match. this why [offsets](#analysis) was stored as a part of analysis in the first place. again since it is a full text query, the field would be analyzed using the same analyzer used for field, and all recipes having juice and mango in its name one after another would match + ``` + get products/_search + { + "query": { + "match_phrase": { + "name": "juice (mango)" + } + } + } + ``` +- but, what if we want to allow for e.g. "spicy tomato sauce" to match "spicy sauce"? +- so, we can add the **slop** parameter to the query as follows - + ``` + get proximity/_search + { + "query": { + "match_phrase": { + "title": { + "query": "spicy sauce", + "slop": 1 + } + } + } + } + ``` +- when we say slop = 1, it basically means that the term can moved around once. we can move sauce +- lets say slop is 2. this means we are allowed two moves. in this case, "spicy sauce" will also match "sauce spicy" + + | **original** | spicy | sauce | | + | **slop 1** | | spicy, sauce | | + | **slop 2** | | sauce | spicy | + +- **edit distance** is another synonym for this concept +- e.g. of building a naive search - use a [bool query](#compound-queries---bool) + - `must` can use `match` with spicy sauce - recall how by default, or operator would be used + - `should` can use `match_phrase` with spicy sauce, and have some slop as well, to help boost documents with spicy sauce close by + +## Compound Queries - Bool + +- we can combine **leaf queries** to form complex **compound queries** +- we can have multiple nested compound queries +- **must** - must be present +- **must not** - must not be present +- **should** - their presence is not mandatory, but they help boost the relevance scores +- e.g. look for alcohol, not wine, and we are particularly interested in beer. note - while we do not provide multiple queries, each of must, must not and should is an array, thus allowing for multiple term level / full text queries + ``` + get products/_search + { + "query": { + "bool": { + "must": [ + { + "term": { + "tags.keyword": "Alcohol" + } + } + ], + "must_not": [ + { + "term": { + "tags.keyword": "Wine" + } + } + ], + "should": [ + { + "multi_match": { + "query": "beer", + "fields": ["name", "description"] + } + } + ] + } + } + } + ``` +- a special note - if we do not have must / must not clauses and only the should clause, one of all the queries inside should "should" match (no pun intended). maybe because if this was not the case, technically all documents of the index would be a part of the result set, and would just have different relevance scores. so for e.g. if we wanted to model scenarios like "or", we can just specify them inside the should query. at least one of the conditions inside or (i.e. inside should) would match, and documents specifying more number of conditions in the or clause would be ranked higher +- recall how we said should only affects the scoring if either must or must not is present. we can change this behavior by providing **minimum should match** clause +- **filter** - will just filter documents. unlike must, it would not contribute to the relevance score. e.g. if we are just looking for all products of type alcohol, we do not need it to contribute to the relevance score +- **filter execution context** - filter execution context does not contribute to the relevance score. thus, it is more optimal. additionally, queries inside the filter execution context can be cached for higher performance. e.g. must not and filter +- **query execution context** - contributes to the relevance score. thus, slower and cannot be cached. e.g. must and should + +## Compound Queries - Boosting + +- **boosting** - e.g. we want the functionality of should, but it should reduce the relevance score +- what we specify inside **positive** has to match (like must of bool) +- the scores of documents that match what we specify inside **negative** is reduced (opposite of should in bool) +- "the factor" by which we want the score to be reduced can be specified via **negative boost** +- e.g. "i want" juice, but i "do not like" not apple - + ``` + get products/_search + { + "query": { + "boosting": { + "positive": { + "match": { + "name": "juice" + } + }, + "negative": { + "match": { + "name": "apple" + } + }, + "negative_boost": 0.2 + } + } + } + ``` +- e.g. i like pasta, but not bacon. so, both are optional, unlike above where juice was mandatory. so, we need to combine both boosting (for its negative) and bool (for its should). additionally, notice how we use match_all inside must of bool (if only should is present, it would become mandatory) + ``` + get products/_search + { + "query": { + "boosting": { + "positive": { + "bool": { + "must": [ + { + "match_all": {} + } + ], + "should": [ + { + "term": { + "tags.keyword": "Pasta" + } + } + ] + } + }, + "negative": { + "term": { + "tags.keyword": "Bacon" + } + }, + "negative_boost": 0.2 + } + } + } + ``` + +## Compound Queries - Disjunction Max + +- **disjunction max** - we can specify multiple queries +- the query with highest relevance score is the one that is used ultimately +- we can however, use a **tie breaker** for the other matches +- recall how the working of this is exactly like [multi match](#full-text-queries). there, we specify multiple fields, here we specify multiple queries +- in fact a multi match query is converted into a dis max query. multi match query - + ``` + get products/_search + { + "query": { + "multi_match": { + "query": "vegetable", + "fields": ["name", "description"], + "tie_breaker": 0.7 + } + } + } + ``` +- equivalent dis max query - + ``` + get products/_search + { + "query": { + "dis_max": { + "queries": [ + { "match": { "name": "vegetable" } }, + { "match": { "description": "vegetable" } } + ], + "tie_breaker": 0.7 + } + } + } + ``` + +## Nested Queries + +- if we have nested objects, dot notation works just fine +- recall how we should use nested type and not object type if we want correlation between the different fields for an array of objects +- how to search through an array of nested type - + ``` + get recipes/_search + { + "query": { + "nested": { + "path": "ingredients", + "query": { + "bool": { + "must": [ + { + "match": { + "ingredients.name": "parmesan" + } + }, + { + "range": { + "ingredients.amount": { + "gte": 100 + } + } + } + ] + } + } + } + } + } + ``` +- how the score of the matching child documents effect the score of the parent document is determined via **score mode**. it is average by default (average of the scores of all the matching child documents), but it can be changed to min, max, sum. we just need to add `"score_mode": "max"` to the `nested` object for this +- if we add the **inner hits** parameter, we get all the nested documents that matched, with what score etc. understand that by default, we will get only one score which is for the parent document. this parameter helps us dig deeper into what nested document matched, with what score, etc. we just need to add `"inner_hits": {}` to the `nested` object for this + +## Controlling Query Results + +- specify format using ?format. can be for e.g. yaml +- use ?pretty if using curl so that the json response is properly formatted. response when using kibana is anyways always formatted, this is more when using for e.g. shell + ``` + curl --cacert ~/elasticsearch-8.12.0/config/certs/http_ca.crt \ + -u elastic:7Nb_iz3DKsvOgWirudWq \ + -XGET "https://localhost:9200/_cat/nodes?format=json&pretty" + ``` +- we can specify `_source` key to decide what attributes the result should return. by default, the entire document is returned. use case - we only need the ids and want to fetch the original data from another source. it is like projection in sql. set it to false for just the ids, or specify the attributes to include / exclude +- control the number of results returned using the `size` parameter. the default is 10 - + ``` + get products/_search + { + "size": 100, + "query": { + "match_all": {} + } + } + ``` +- to implement pagination, we can implement the offset using `from` - + ``` + get products/_search + { + "size": 1, + "from": 2, + "query": { + "match_all": {} + } + } + ``` +- implementing pagination + - total_pages = ceil(total_hits / page_size) + - `size` is page size + - `from` is page_size * (current_page - 1) +- sorting results - default is sorting by score. also note that sorting by name would throw an exception like - `Text fields are not optimized ...`, so use name.keyword. recall that [default dynamic mapping](#mapping) would generate [multi field mapping](#multi-field-mappings) for strings, with both text and keyword variant + ``` + get products/_search + { + "size": 10, + "from": 2, + "sort": [ + { "price": "desc" }, + { "name.keyword": "asc" } + ], + "query": { + "match_all": {} + } + } + ``` +- assume field is multi valued (elasticsearch does not care if a field is an [array](#arrays-in-elasticsearch)). we can then inside the sort array, structure the object like so - + ``` + { + "price": { + "order": "desc", + "mode": "avg" + } + } + ``` + +## Metric Aggregations + +- **metric aggregations** - calculate metric like sum, average, etc on the field we specify. e.g. - + ``` + get orders/_search + { + "size": 0, + "aggs": { + "total_sales": { + "sum": { + "field": "total_amount" + } + } + } + } + ``` +- `total_sales` is the name of the aggregation, inside which we specify the type of aggregation e.g. `sum`, and inside `sum`, we provide the name of the field to perform the aggregation on +- we set the size to 0 because retrieving the documents is of no use to us. i the case above, just all documents would be fetched / based on size specified +- similarly, we can use `avg`, `min`, `max`, `cardinality` (for distinct values) +- note - we can also specify `query` etc to filter out the documents on which we want to perform the aggregation +- so, to get the number of documents on which our aggregation was performed, we can use `value_count` +- we can use `stats` for a summary that includes aggregations like min, max, etc + +## Bucket Aggregations + +- **bucket aggregations** - we calculate aggregations for buckets of documents. documents fall into a bucket, and aggregations are not calculated for a specific field like in metric aggregation +- my brain cannot remember this syntax, so just understand and remember the idea for reference, but refer docs for the actual syntax +- there are many more bucket aggregations like [range](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-range-aggregation.html) for custom ranges of numbers / dates, [histogram](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-histogram-aggregation.html) to automate this bucket creation, etc, refer documentation based on use case + +### Term in Bucket Aggregation + +- [term](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html) - based on the field we specify, it will dynamically create the buckets for us. e.g. log level for logs, status for order like below, etc + ``` + get orders/_search + { + "size": 0, + "aggs": { + "status_agg": { + "terms": { + "field": "status" + } + } + } + } + ``` +- this helps us get different buckets for each order status, where each bucket contains the number of documents present in it +- additionally, to get the documents which have for e.g. the status field set to null / do not have the status field, we can add the following inside `terms` above - + ``` + "missing": "N/A", // + "min_doc_count": 0 // + ``` +- `missing` helps set name of bucket with documents containing missing status field to "N/A" +- why set `min_doc_count` - the bucket would not be returned if no faulty documents are present. setting it to 0 helps ensure even buckets with 0 documents are returned +- note - bucket aggregations are not always accurate. when our query reaches the coordinating node, it asks each shard for the top 10 documents. now, the count of status pending can be in top 10 of the first shard, but not necessarily in the second shard. so, all of the pending orders might not be present in the bucket once the coordinating node aggregates the result from both first and second shard. solution - increase the size parameter so that the default of 10 is not used. issue - it will effect performance + +### Nested in Bucket Aggregations + +- unlike metric aggregations, bucket aggregations allow for nesting +- in fact, we can nest a metric aggregation inside a bucket aggregation as well +- e.g. below, we will have stats like min, max, etc for each bucket. we create bucket using term discussed above + ``` + get orders/_search + { + "size": 0, + "aggs": { + "status_agg": { + "terms": { + "field": "status" + }, + "aggs": { + "status_stats": { + "stats": { + "field": "total_amount" + } + } + } + } + } + } + ``` + +### Filter in Bucket Aggregations + +- [filter](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-filter-aggregation.html) - e.g. i want the avg price of all sales, and i also the average price for sales of t-shirt + ``` + get /sales/_search?size=0&filter_path=aggregations + { + "aggs": { + "avg_price": { "avg": { "field": "price" } }, + "t_shirts": { + "filter": { "term": { "type": "t-shirt" } }, + "aggs": { + "avg_price": { "avg": { "field": "price" } } + } + } + } + } + ``` +- response will contain both the average price of t-shirt's sales and average price of all sales +- remember - if we for e.g. wanted just the average sales of t-shirts, we would run the below i.e. a query will filter the documents then the aggs would only run on the filtered documents + ``` + get /sales/_search?size=0&filter_path=aggregations + { + "query": { "term": { "type": "t-shirt" } }, + "aggs": { + "avg_price": { "avg": { "field": "price" } } + } + } + ``` + +### Filters in Bucket Aggregations + +- [filters](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-filters-aggregation.html) - helps us perform aggregations on custom buckets +- e.g. max length of log for errors and warnings + ``` + get logs/_search + { + "size": 0, + "aggs": { + "messages": { + "filters": { + "filters": { + "errors": { "match": { "body": "error" }}, + "warnings": { "match": { "body": "warning" }} + } + }, + "aggs": { + "max_length": { + "max": { + "field": "message_length" + } + } + } + } + } + } + ``` + +## Kibana + +- open source ui to visualize elasticsearch data +- it also stores its data inside elasticsearch itself, thus helping us avoid issues around backups, easily scale kibana horizontally, etc +- dashboards are dynamic as well with interactivity +- **data views** - + - was called **index patterns** in the past + - we specify an index pattern here, and all indexes matching this pattern will be queried by kibana + - e.g. for logs, it is typical to have one index per month to help with scaling, as having all the data inside one index might not scale well + - optionally, we can also set the timestamp field when configuring a data view which helps filter the data in dashboards by time +- kibana has different apps like apm, maps, dashboard, etc +- **kql** or **kibana query language** - quickly put together some querying to filter documents instead of the verbose elasticsearch's query dsl. kql is internally converted to the equivalent elasticsearch query dsl. some tips - + - simply type some words to search for them in all fields - `products fischer` + - search for the exact phrase by surrounding using double quotes - `"products fischer"` + - search for documents with specific values for a field using operators - `http.response.status_code : 400` + - `:` is for equals, we can also use `>`, `<=`, etc + - we can have multiple conditions and combine them using boolean operators like `and` and `or` + - invert condition using `not` + - make groups using parentheses `()` to for e.g. avoid relying on the default precedence + - we can use wildcards for values - `url.path : /brands*` +- kibana -> discover for browsing through index data as is. it is meant for adhoc analysis of data + 1. data view - select the data view to use + 2. kql - enter kql + 3. time - enter time range, absolute or relative. recall the timestamp field we set when creating the data view. this is a common feature in different apps across kibana + 4. histogram - based on our time range, elasticsearch will automatically create the histogram. e.g. since my time range was of three days, it generated buckets of an hour, and shows number of documents inside each bucket + 5. fields - by default all fields are displayed. e.g. to only see values of certain fields in 6., we can select the fields here + 6. messages - the actual documents (log messages in this case) + + ![discover](/assets/img/elasticsearch/discover.drawio.png) + +- note about time - throughout the ui, time is in our timezone, but when queries are sent from kibana, they are sent in utc format + - why is it good - e.g. if i want to see the logs in the last 5 hours. i will simply see the values in my timezone / query using my timezone, without having to care about the conversion myself + - when it can be a problem - i want to see the logs for 1st of january. ist of january can mean different times in different timezones. so, i might want to adjust the times in my query / change the timezone itself in kibana +- to create visualizations - from the sidebar, go to analytics -> visualize library -> create visualization + - my understanding - the kql and time filters at the top are available at the top - the kql we enter is a part of the visualization, but the timestamp we select is not +- we configure from the right pane +- the left pane shows us the actual values +- in the right pane, there are two sections + - metrics - for the actual metric - average of total in this case + - buckets - for the parameter to bucket based on - product category in this case +- metric visualization example -
+ ![metrics](/assets/img/elasticsearch/metrics.png) +- when doing nested aggregations, a typical example can be - + - bucket using date histogram and timestamp field + - create the sub aggregation using [term](#term-in-bucket-aggregation) and e.g. status code field +- in visualizations like line chart etc, we can select the bucket type to be **split series** or **split chart**. split series will show for e.g. in the same chart, whereas split chart will create separate charts in the same visualization +- the order of aggregations might matter sometimes - e.g. if i want the date histogram of the top 5 most accessed url paths throughout - + - if i first bucket by date and then by url path, kibana would show the top 5 urls for every interval + - however, if i reverse this order of bucketing, i get the right output +- example of line chart. notice the configuration on the right, with the right ordering explained above
+ ![line chart](/assets/img/elasticsearch/line-chart.png) +- note - bar, area and line chart are configured in the same way, they are just different to look at +- recall [filters](#filters-in-bucket-aggregations) in bucket aggregations. we use kql to specify custom buckets
+ ![filters](/assets/img/elasticsearch/filters.png) +- note - for above use case, we could have used range as well, but terms might be useful for more custom bucketing use cases +- when using date histogram, the interval can be set to **auto** - kibana decides the best interval automatically, e.g. a day if the range is a few months, or an hour if it is a day, etc +- **data table** - e.g. we want total sales for all salesmen + - we should use terms aggregation (since the buckets are dynamic). the field to use would be salesmen's id + - just like in bar chart etc, data table would have **split rows** and **split table** + - we add the metric to be sum of total + - now, just the salesmen's id and total might not be good to look at - we might want to add the salesmen's name. so, we use a "no op" metric like "top hits". this way, the top document of the bucket is used, and we use the field as salesman's name. in our case, that hardly matters because all orders in the same bucket have the same salesman + - we can order the buckets using one of the metrics - when configuring the bucket using salesmen's id, we configure it to order using the metric we added - sum of total (refer 3rd point) + - we can configure the data table to for e.g. display the total at the end + + ![data table](/assets/img/elasticsearch/data-table.png) + +- **heat maps** - the basic one which we might probably use is matrix based, but other use cases include - + - actual maps - e.g. which region has the most activity on our website + - on websites - e.g. like eye tracker - which areas of our website draw the most attention +- e.g. we would like to see the peak hours on the different pages of our website + - we already have an "hour of day" field to use histogram aggregation on. this way, we get it for each hour + - now, we use terms aggregation for the "url path" field + - each cell shows how many visits were there for a particular hour and url path. clearly, activity is more around 8am to 8pm + + ![heat map](/assets/img/elasticsearch/heat-map.png) + +- **tag clouds** - bubbles would be larger for the tags with higher values. e.g. bucket using cities, and measure sum of order totals
+ ![tag clouds](/assets/img/elasticsearch/tag-clouds.png) +- **dashboards** - orchestration of visualizations. they typically show all the visualizations from the same data view +- when we edit the visualizations for a dashboard, we can either modify the visualization itself, or override the settings of the visualization at the dashboard level, thus leaving the visualization as is +- **interactivity** - when we click on the chart, it automatically adds **filters** (they are like ksql i.e. present at the top) and update other visualizations using these filters as well. similarly, if we select some areas (like rectangles) on the charts, it will automatically set the timestamp filters on the dashboard and update other visualizations using this time range as well. e.g. refer the filter below on the left and the time range on the right, which were added based on our interactivity with the visualizations + ![interactivity](/assets/img/elasticsearch/interactivity.png) diff --git a/_posts/2024-03-02-low-level-design.md b/_posts/2024-03-02-low-level-design.md new file mode 100644 index 0000000..96cd5b5 --- /dev/null +++ b/_posts/2024-03-02-low-level-design.md @@ -0,0 +1,1398 @@ +--- +title: Low Level Design +mermaid: true +--- + +## SOLID Principles + +### Single Responsibility Principle + +- "a class should have only one reason to change" +- it should not handle multiple concerns +- this increases "cohesion" - only related code belongs together +- it improves readability +- it also makes writing focused tests easier + +### Open Closed Principle + +- "open for extension" - extend the functionality without touching existing code +- this is done using principles like composition, inheritance, etc +- "closed for modification" - do not add extra functionality to existing code, since it is already tested +- e.g. instead of bundling everything inside one class, have a generic `Writer` interface, and have different concrete implementations like `DBWriter`. for new functionality, we add a new writer `FileWriter` instead of touching the existing code + +### Liskov Substitution Principle + +- "sub classes should be able to substitute base classes" +- subclass should not reduce the feature set offered by base class, only increase it +- e.g. below violates liskov substitution - + +```java +class Vehicle { + + void startEngine() {} +} + +class Bicycle extends Vehicle { + + void startEngine() { + throw new RuntimeException("no engine present..."); + } +} +``` + +- solution - break into different interfaces - + +```java +class Vehicle {} + +class MotorVehicle { + + void startEngine() {} +} + +class Bicycle extends Vehicle {} +``` + +### Interface Segregation Principle + +- "clients should not be forced to depend on interfaces they do not use" +- this prevents "fat" interfaces +- example can be same as liskov above + +### Dependency Inversion Principle + +- "depend on abstractions, not concrete implementations" +- "decoupling" - modules will not have to change with change in underlying implementations +- "abstractions should not depend on details, but details should depend on abstractions" +- can be achieved through techniques like "dependency injection" - dependencies are provided to the class from outside instead of the class itself instantiating them +- thus implementations can also be swapped easily, e.g. - + +```java +class Computer { + + private final Keyboard keyboard; + private final Mouse mouse; + + Computer(Keyboard keyboard, Mouse mouse) { + this.keyboard = keyboard; + this.mouse = mouse; + } +} + +class BluetoothKeyboard implements Keyboard {} +class WiredKeyboard implements Keyboard {} + +class BluetoothMouse implements Mouse {} +class WiredMouse implements Mouse {} +``` + +## Object Oriented Analysis and Design using UML + +- procedural programming was about organizing code into blocks to help manipulate data +- oop organizes the code and wraps the data and functionality inside an object +- object oriented analysis - + - we identify the objects in a system + - we establish the relationship between them + - finally, we make the design that can be converted to executable code in our object oriented language +- uml or unified modelling language helps model the object oriented analysis +- it helps communicate design decisions easily by breaking down a complex system into smaller, understandable pieces + +### Use Case Diagrams + +- "use case" - set of actions the system can perform +- "actors" - external users of the system +- gives a high level functional behavior of the system +- models the relationship between actors and use cases, as well as between the different use cases +- "system boundary" - limit the scope of the system +- "include" - invocation of one use case by another use case (like invoking a method) +- "extend" - works like the base use case it extends with additional steps +- extend can also be used for conditional use cases. e.g. pay fine only on late returns, not all returns + +![use case](/assets/img/low-level-design/use-case.drawio.png) + +### Class Diagram + +- helps show how different entities relate to each other +- map directly to object oriented language +- the representation of class has three sections - class name, properties and methods +- "visibility" - we can put this ahead of the attributes / methods. `+` for public, `-` for private and `#` for protected and `~` for default +- "associations" - if two classes communicate with each other, there needs to be a link between them +- associations can be bidirectional (both classes are aware of each other) or unidirectional (only one class is aware) +- "multiplicity" - how many instances of the class participate in the relationship +- "inheritance" is also called an "is a" relationship. denoted by open arrows (the head is not filled) +- for abstract class, use italics +- composition / aggregation are also called a "has a" relationship +- "aggregation" - lifecycle of the child class is independent of the parent class. denoted by open arrows with diamonds at end +- "composition" - lifecycle of the child class is dependent on the parent class i.e. the child cannot exist independent of the parent. denoted by closed arrows with diamonds at end +- "generalization" - combining similar classes into a single class +- basic e.g. - + - inheritance between customer / admin and user + - composition (with multiplicity) between orders and customers + +```mermaid +classDiagram + +User <|-- Admin +User <|-- Customer +Order "*" *-- "1" Customer + +class User { + -name +} + +class Order { + -customerId + -creationDate + -shippingDate + +place() +} + +class Admin { + +updateCatalog() +} + +class Customer { + +register() + +login() +} +``` + +### Sequence Diagrams + +- sequence of interactions in terms of messages +- the vertical dimension represents the chronological order of the messages +- the horizontal dimension shows the messages that are sent +- used for "dynamic modelling" i.e. how objects interact with each other + +```mermaid +sequenceDiagram + +participant Customer +participant ATM +participant Account +participant Screen + +Customer->>ATM: Balance Inquiry +ATM->>Account: Get Balance +Account->>ATM: Balance +ATM->>Screen: Display Balance +Screen->>Customer: Show Message +``` + +### Activity Diagrams + +- flow of control from one activity to another +- "activity" - an operation that results in a change of state +- used for "functional modelling" i.e. how inputs map to outputs + +![activity](/assets/img/low-level-design/activity.drawio.png) + +## Design Patterns Introduction + +- problems that occur frequently have well defined solutions +- three broad categories - creational, structural, behavioral +- creational - how objects are constructed from classes +- structural - composition of classes i.e. how classes are constructed +- behavioral - interaction of classes and objects with one another and the delegation of responsibility + +## Creational Patterns + +### Builder Pattern + +- separate the representation of object from its construction process +- e.g. helps prevent "telescoping constructors" - + ```java + Aircraft(Engine engine); + Aircraft(Engine engine, Cockpit cockpit); + Aircraft(Engine engine, Cockpit cockpit, Bathroom bathroom); + ``` +- "product" - what we want to create - aircraft here +- we have a "builder" interface +- implementations of this builder are called "concrete builders" +- the builder has empty / default implementations +- this way, the builder methods can be selectively overridden depending on variant +- "director" - has the "algorithm" to help create products using builders +- sometimes, the director can be skipped - the client invokes the methods on builder directly +- pretty similar to [abstract factory](#abstract-factory-pattern) + +
+code example +
+
+abstract class AircraftBuilder {
+
+  void buildCockpit() {}
+  void buildEngine() {}
+  void buildBathroom() {}
+  Aircraft getResult() {}
+}
+                                          // no bathrooms in f16
+class BoeingBuilder                       class F16Builder {
+    extends AircraftBuilder {                 extends AircraftBuilder {
+
+  @Override void buildCockpit() {}          @Override void buildCockpit() {}
+  @Override void buildEngine() {}           @Override void buildEngine() {}
+  @Override void buildBathroom() {}         @Override Aircraft getResult() {}
+  @Override Aircraft getResult() {}       }
+}
+
+class Director {
+
+  AircraftBuilder aircraftBuilder;
+
+  Aircraft construct(boolean isPassenger) {
+    aircraftBuilder.buildCockpit();
+    aircraftBuilder.buildEngine();
+    if (isPassenger) {
+      aircraftBuilder.buildBathroom();
+    }
+    return aircraftBuilder.getResult();
+  }
+}
+
+
+
+ +### Singleton Pattern + +- create only one instance of a class +- e.g. thread pool, registries, etc +- we make the constructor "private" so that other classes cannot instantiate it +- some methods have been discussed below + +
+not thread safe +
+
+class AirForceOne {
+
+  private static AirForceOne instance;
+
+  private AirForceOne() { }
+
+  public static AirForceOne getInstance() {
+
+    if (instance == null) {
+      instance = new AirForceOne();
+    }
+
+    return instance;
+  }
+}
+
+
+
+ +
+synchronized - makes code slow as every invocation acquires a lock +
+
+synchronized public static AirForceOne getInstance() {
+  // ...
+}
+
+
+
+ +
+static initialization - if instantiation is expensive, it can cost us performance if object is never used +
+
+private static AirForceOne instance = new AirForceOne();
+
+
+
+ +
+"double checked locking" - solves all problems, but not generally recommended +
+
+class AirForceOne {
+
+  // IMP - notice the use of volatile
+  private volatile static AirForceOne instance;
+
+  private AirForceOne() { }
+
+  public static AirForceOne getInstance() {
+
+    if (instance == null) {
+      synchronized(AirForceOne.class) {
+        if (instance == null) {
+          instance = new AirForceOne();
+        }
+      }
+    }
+
+    return instance;
+  }
+}
+
+
+
+ +### Prototype Pattern + +- create new objects by copying existing objects +- "prototype" - the seed object from which other objects get created +- sometimes, cloning can be more performant than creating entirely new instances +- another advantage - instead of too many subclasses, vary behavior by changing fields - two separate classes for boeing and f16 are not required below +- use case - "dynamic loading" - e.g. we do not have access to constructors. the runtime environment registers prototypes with the "prototype manager", so that whenever an object is requested, a copy is returned by this prototype manager +- "shallow" vs "deep" copy - nested fields would be shared in shallow copy unlike in deep + +
+code example +
+
+class F16 implements Aircraft {
+
+  void setEngine(Engine engine) { }
+
+  Aircraft clone() { /* deep copy */ }
+}
+
+Aircraft f16A = aircraft.clone();    Aircraft f16B = aircraft.clone();
+f16A.setEngine(f16AEngine);          f16B.setEngine(f16BEngine);
+
+
+
+ +### Factory Method Pattern + +- delegate the actual instantiation to subclasses +- the factory method may or may not provide a default implementation +- the subclass will override this implementation +- downside - compare with [prototype pattern](#prototype-pattern) - it results in too many subclasses + +
+code example +
+
+class F16 {
+
+  protected Aircraft makeF16() {
+    cockpit = new Cockpit();
+  }
+}
+
+class F16A extends F16 {            class F16B extends F16 {
+
+  @Override                           @Override
+  public Aircraft makeF16() {         public Aircraft makeF16() {
+    super.makeF16();                    super.makeF16();
+    engine = new F16AEngine();          engine = new F16BEngine();
+  }                                   }
+}                                   }
+
+F16 f16A = new F16B(); f16A.makeF16();
+F16 f16B = new F16B(); f16B.makeF16();
+
+
+
+ +### Abstract Factory Pattern + +- creating families of related objects without specifying their concrete classes +- we have "abstract factory" returning "abstract products" +- "concrete factories" override these abstract factory methods and return "concrete products" +- now, only the right concrete factory needs to be passed to the aircraft to construct it +- in [factory method pattern](#factory-method-pattern), we were using inheritance to create a single product +- here, we create a family of products using composition +- concrete factories can be [singleton](#singleton-pattern) + +
+code example +
+
+class Aircraft {
+
+  void makeAircraft(AircraftFactory aircraftFactory) {
+    engine = aircraftFactory.makeEngine();
+    cockpit = aircraftFactory.makeCockpit();
+  }
+}
+
+interface AircraftFactory {
+
+  Engine makeEngine();
+  Cockpit makeCockpit();
+}
+
+class BoeingAircraftFactory implements AircraftFactory {
+
+  @Override Engine makeEngine() { return new BoeingEngine(); }
+  @Override Cockpit makeCockpit() { return new BoeingCockpit(); }
+}
+
+class F16AircraftFactory implements AircraftFactory {
+
+  @Override Engine makeEngine() { return new F16Engine(); }
+  @Override Cockpit makeCockpit() { return new F16Cockpit(); }
+}
+
+
+
+ +## Structural Patterns + +### Adapter Pattern + +- allows incompatible classes to work together by converting the interface of one class into another +- e.g. our aircraft business now needs to accommodate hot air balloons +- "adaptee" is the incompatible class - hot air balloon +- "target" is the interface the client (i.e. our code) understands - aircraft +- "adapter" is the class sitting in between, which is composed using adaptee and implements the target +- usually done after a system is designed to accommodate to fit additional requirements +- this entire process discussed above is called "object adapter" +- we can also use the "class adapter" pattern - where the adapter extends both the adaptee and the target +- disadvantage - multiple inheritance is not supported by java + +
+code example +
+
+interface Aircraft {
+
+  void takeOff();
+}
+
+class Adapter implements Aircraft {
+
+  HotAirBalloon hotAirBalloon;
+
+  Adapter(HotAirBalloon hotAirBalloon) {
+    this.hotAirBalloon = hotAirBalloon;
+  }
+
+  @Override
+  void takeOff() {
+    hotAirBalloon.inflateAndFly();
+  }
+}
+
+// now, client can use adapter like any other `Aircraft`
+
+
+
+ +### Bridge Pattern + +- helps separate abstraction and implementation into two different class hierarchies +- e.g. we have two shapes - circle and square +- now, we want to introduce two colors - blue and red +- we will end up with four classes - blue circle, blue square, red circle, red square +- this can grow exponentially +- another problem - changes to color and shape effect each other - they are not decoupled +- so, we split into two separate hierarchies - shape and color +- so, we have "abstraction" and "refined abstraction" (shapes) +- then, we have "implementation" and "concrete implementation" (colors) +- so, instead of inheritance, we use composition +- we compose the refined abstractions using the concrete implementations + +
+code example +
+
+class Shape {
+
+  private Color color;
+
+  Shape(Color color) {
+    this.color = color;
+  }
+}
+
+class Circle {              class Square {
+
+  Circle(Color color) {       Square(Color color) {
+    super(color);               super(color);
+  }                           }
+}                           }
+
+interface Color {}
+class Red implements Color {}
+class Blue implements Color {}
+
+
+
+ +### Composite Pattern + +- helps compose our model in a tree like structure and work with them +- e.g. an air force can have several levels of nested air forces, and ultimately the last level of air force would be composed of planes +- "composite" - helps model the trees / subtrees +- "leaves" - the last level in these trees +- "component" - both the leaf and composite are coded to this common interface +- now, the client can simply call `getPersonnel` and treat the composite / leaf as the same +- it uses [internal iterator](#iterator-pattern) - the iterator is not exposed, and is handled by the composite itself + +
+code example +
+
+interface Alliance {
+
+  int getPersonnel();
+}
+
+class AirForce implements Alliance {
+
+  private Alliance[] alliances;
+
+  @Override
+  int getPersonnel() {
+
+    int personnel = 0;
+
+    for (Alliance alliance : alliances) {
+      personnel += alliance.getPersonnel();
+    }
+
+    return personnel;
+  }
+}
+
+interface Aircraft { }
+
+class F16 implements Aircraft, Alliance {
+
+  @Override
+  int getPersonnel() {
+    return 2;
+  }
+}
+
+class Boeing implements Aircraft, Alliance {
+
+  @Override
+  int getPersonnel() {
+    return 10;
+  }
+}
+
+
+
+ +### Decorator Pattern + +- extend the behavior of an object dynamically +- the decorator basically adds to the existing functionality, by for e.g. taking some action before / after invoking the method on the wrapped component +- alternative to creating more subclasses +- e.g. below, the luxury and bulletproof variants could have been subclasses of boeing as well +- but then we could not wrap a different aircraft with different decorators +- "component" - the common interface to which the component and decorator is coded +- "concrete component" - what we wrap +- "decorator" - an interface for different decorators. this will also extend the component +- "concrete decorator" - the actual implementation of decorators. they wrap the concrete components +- we can wrap using multiple decorators as well +- e.g. below, we can make an aircraft bulletproof and luxurious, which affects its weight but its flying method stays the same +- the advantage is that the client code is agnostic of all this - it still codes to component +- notice how the decorator is composed using the component + +
+code example +
+
+interface Aircraft {
+  
+  void fly();
+  
+  int getWeight();
+}
+
+class Boeing implements Aircraft {    class F16 implements Aircraft {
+
+  @Override                             @Override
+  public void fly() {                   public void fly() {
+    System.out.println("flying");         System.out.println("soaring");
+  }                                     }
+
+  @Override                             @Override
+  public int getWeight() {              public int getWeight() {
+    return baseWeight;                    return baseWeight;
+  }                                     }
+}                                     }
+
+abstract class Decorator implements Aircraft { }
+
+class BulletProofDecorator extends Decorator {
+
+  Aircraft aircraft;
+
+  @Override
+  public void fly() {
+    aircraft.fly();
+  }
+
+  @Override
+  public int getWeight() {
+    return aircraft.getWeight() + 13;
+  }
+}
+
+class LuxuriousDecorator extends Decorator {
+
+  Aircraft aircraft;
+
+  @Override
+  public void fly() {
+    aircraft.fly();
+  }
+
+  @Override
+  public int getWeight() {
+    return aircraft.getWeight() + 27;
+  }
+}
+
+Aircraft boeing = new Boeing();
+Aircraft ceoPlane = new BulletProofDecorator(new LuxuriousDecorator(boeing));
+boeing.getWeight(); // cumulated weight
+
+
+
+ +### Facade Pattern + +- a single uber interface to a subsystem to make working with it easier +- the client will now interface with the "facade" and not worry about the complexities of the subsystem +- changes to the subsystem will now affect the facade and not the client + +
+code example +
+
+class AutopilotFacade {
+
+  private BoeingAltitudeMonitor altitudeMonitor;
+  private BoeingEngineController engineController;
+  private BoeingNavigationSystem navigationSystem;
+
+  AutopilotFacade(BoeingAltitudeMonitor altitudeMonitor,
+      BoeingEngineController engineController, 
+      BoeingNavigationSystem navigationSystem) {
+    this.altitudeMonitor = altitudeMonitor;
+    this.engineController = engineController;
+    this.navigationSystem = navigationSystem;
+  }
+
+  void autopilotOn() {
+    altitudeMonitor.autoMonitor();
+    engineController.setEngineSpeed(700);
+    navigationSystem.setDirectionBasedOnSpeed(engineController.getEngineSpeed());
+  }
+
+  void autopilotOff() {
+    altitudeMonitor.turnOff();
+    engineController.turnOff();
+    navigationSystem.turnOff();
+  }
+}
+
+
+
+ +### Flyweight + +- sharing state among objects for efficiency +- e.g. if we use a global radar to track air crafts, we will end up with too many air craft objects for the same air craft at different coordinates +- "intrinsic state" - independent of the context of object. e.g. top speed of the air craft +- "extrinsic state" - dependent of the context of object. e.g. coordinates of the air craft +- so, to prevent creation of too many objects, we store intrinsic state inside the object, while extrinsic state outside it +- this way, we automatically end up with less objects, since we only need new objects when the intrinsic state changes, and not every time the extrinsic state changes +- "flyweight" - the object has become light since it only stores intrinsic state now +- "flyweight factory" - used to create the flyweight objects, because we do not want the client to create them directly +- "context" - used to store the extrinsic state + +
+code example +
+
+class F16 implements IAircraft {
+
+  private final int topSpeed = 800;
+
+  int getTimeToDestination(int curX, int curY, int destX, int destY) {
+    int distance = ...;
+    return distance / topSpeed;
+  }
+}
+
+
+
+ +### Proxy Pattern + +- calls to the "real subject" are hidden behind a "proxy" +- this way, the real subject is shielded from the client +- both implement the "subject" interface so that the client code does not change +- e.g. client will call methods like turn left and turn right on remote control +- the remote control will call these methods on the drone +- both of them implement an interface called `IDrone` +- "remote proxy" - when the real subject is located on a remote server, the calls made by the client actually reaches a proxy first +- the proxy sits on the same jvm, and the proxy then makes the request over the network to the real subject on the remote server +- "virtual proxy" - delays the object creation when it is expensive +- e.g. we see loading overlays or wire frames with same height and width while expensive pictures are loading +- "protection proxy" - acts as an authorization layer in between + +## Behavioral Patterns + +### Chain of Responsibility Pattern + +- decoupling the sender of a request from its receiver +- passing it along a chain of handlers till one of the handlers handle it or the request falls off the chain and remains unhandled +- use this pattern when a request can be handled by multiple objects and it is not known in advance which one will end up handling it +- we have a "handler" which all "concrete handlers" implement +- notice how all handlers maintain a reference to their successor + +
+code example +
+
+class ErrorCodes {
+
+  static final int LOW_FUEL = 1;
+  static final int HIGH_ALTITUDE = 2;
+}
+
+class Handler {
+
+  Handler next;
+
+  Handler(Handler next) {
+    this.next = next;
+  }
+
+  void handleRequest(int errorCode) {
+    if (next != null) {
+        next.handleRequest(errorCode);
+    }
+  }
+}
+
+class LowFuelHandler extends Handler {          class HighAltitudeHandler extends Handler {
+
+  LowFuelHandler(Handler next) {                  HighAltitudeHandler(Handler next) {
+    super(next);                                    super(next);
+  }                                               }
+
+  void handleRequest(int errorCode) {             void handleRequest(int errorCode) {
+    if (errorCode == ErrorCodes.LOW_FUEL) {         if (errorCode == ErrorCodes.HIGH_ALTITUDE) {
+      // ...                                          // ...
+    } else {                                        } else {
+      super.handleRequest(errorCode);                 super.handleRequest(errorCode);
+    }                                               }
+  }                                               }
+}                                               }
+
+
+
+ +### Observer Pattern + +- "observers" subscribe to "subjects" for state changes +- so, we have "observer" and "concrete observers", "subject" and "concrete subjects" +- "push model" - the subject will push the new state into the observer when calling its update method +- "pull model" - the subject will call the observer's update method using itself i.e. `this` +- then, the observer can call the getter method on the subject which can expose individual bits of state + +
+code example +
+
+interface ISubject {
+
+  void addObserver(IObserver observer);
+
+  void removeObserver(IObserver observer);
+
+  void notifyObservers();
+}
+
+interface IObserver {
+
+  void update(Object newState);
+}
+
+public class ControlTower implements ISubject {
+
+  List\ observers = new ArrayList<>();
+
+  @Override
+  public void addObserver(IObserver observer) {
+    observers.add(observer);
+  }
+
+  @Override
+  public void removeObserver(IObserver observer) {
+    observers.remove(observer);
+  }
+
+  // assume some poller calls this every 5 seconds
+  // with the current weather conditions etc
+  @Override
+  public void notifyObservers(Object newState) {
+    for (IObserver observer : observers) {
+      observer.update(newState);
+    }
+  }
+}
+
+class F16 implements IObserver {
+
+  ISubject subject;
+
+  public F16(ISubject subject) {
+    this.subject = subject;
+    subject.addObserver(this);
+  }
+
+  @Override
+  public void land() {
+    subject.removeObserver(this);
+  }
+
+  @Override
+  public void update(Object newState) {
+    // take appropriate action based on weather etc
+  }
+}
+
+
+
+ +### Interpreter Pattern + +- a grammar defines if some code is syntactically correct or not +- "context free grammar" - has the following components - + - start symbol + - set of terminal symbols + - set of non terminal symbols + - set of production rules +- we keep expanding the non terminal symbols till we reach the terminal symbols +- any final expression we can derive is called a "sentence" +- the sentence is said to be in the "language of grammar" we defined +- e.g. we have three operations in a flight simulation software - glide, barrel roll, splits +- we cannot perform barrel rolls and splits one after another +- we need to start and end with glide +- the production rules will look like as follows - + ``` + -> + -> glide + -> barrel roll + -> splits + ``` +- ast (abstract syntax tree) - can be used to represent the sentences in our grammar +- in this ast, the internal nodes are non terminal symbols, while leaf nodes are terminal symbols +- an ast example -
+ ![interpreter ast](/assets/img/low-level-design/interpreter-ast.drawio.png) +- "abstract expression" - the interface +- the abstract expression can be a "terminal expression" or a "non terminal expression" +- the non terminal expression will hold a reference to the other abstract expressions based on the production rules +- how we interpret an expression depends on the "context" + +
+code example +
+
+interface AbstractExpression {
+
+  void interpret(Context context);
+}
+
+class Context {}
+
+class Flight implements AbstractExpression {
+
+  private AbstractExpression flightOne;
+  private AbstractExpression showOff;
+  private AbstractExpression flightTwo;
+
+  @Override
+  public void interpret(Context context) {
+  }
+}
+
+class ShowOff implements AbstractExpression {
+
+  private AbstractExpression barrelRoll;
+  private AbstractExpression splits;
+
+  @Override
+  public void interpret(Context context) {
+  }
+}
+
+class Glide implements AbstractExpression {
+
+  @Override
+  public void interpret(Context context) {
+  }
+}
+
+class BarrelRoll implements AbstractExpression { 
+
+  @Override
+  public void interpret(Context context) {
+  }
+}
+
+class Splits implements AbstractExpression {
+
+  @Override
+  public void interpret(Context context) {
+  }
+}
+
+
+
+ +### Command Pattern + +- represent an action or a request as an object +- this can then be passed to other objects as parameters +- these requests can then be queued for later execution +- think of it like "callbacks" +- e.g. when we press a button, it does not need not know what to do +- it only needs to know the object that knows what to do +- "receiver" - the object that knows what to do - `MissileLauncher` in this case +- "command" and "concrete command" - the command is composed of the receiver. it is the abstraction layer - `Command` and `FireMissileCommand` in this case +- "invoker" - invokes the command - it is unaware of the underlying implementation of the command - `AircraftPanel` in this case +- "macro command" - setup a series of command objects in another command object. all these command objects will be invoked when invoking this macro command. this is a combination of [composite pattern](#composite-pattern) + command pattern + +
+code example +
+
+interface Command {
+
+  void execute()
+}
+
+class FireMissileCommand implements Command {
+
+  MissileLauncher missileLauncher;
+
+  @Override
+  void execute() {
+    missileLauncher.fire();
+  }
+}
+
+class AircraftPanel {
+
+  Command[] commands = new Command[10];
+
+  void setCommand(int i, Command command) {
+    commands[i] = command;
+  }
+
+  void fire() {
+    commands[3].execute();
+  }
+}
+
+
+
+ +### Iterator Pattern + +- traverse the elements of a aggregate without exposing the internal implementation +- so, we have "iterator" and "concrete iterator", "aggregate" and "concrete aggregate" +- "external iterator" - the client requests for the next element and performs the operation +- "internal iterator" - the client hands over the operation to perform to the iterator +- this way, the iterator is never exposed to the client +- e.g. [composite pattern](#composite-pattern) typically uses internal iterators +- below, we have multiple aggregates, each having their own iterator but everything is hidden behind one iterator + +
+code example +
+
+public interface Iterator {
+
+  IAircraft next();
+
+  boolean hasNext();
+}
+
+public class AirForceIterator implements Iterator {
+
+  List\ jets;
+  IAircraft[] helis;
+  
+  int jetsPosition = 0;
+  int helisPosition = 0;
+
+  public AirForceIterator(AirForce airForce) {
+    jets = airForce.getJets();
+    helis = airForce.getHelis();
+  }
+
+  @Override
+  public IAircraft next() {
+
+    if (helisPosition < helis.length) {
+      return helis[helisPosition++];
+    }
+
+    if (jetsPosition < jets.size()) {
+      return jets.get(jetsPosition++);
+    }
+
+    throw new RuntimeException("No more elements");
+  }
+
+  @Override
+  boolean hasNext() {
+
+    return helis.length > helisPosition ||
+      jets.size() > jetsPosition;
+  }
+}
+
+
+
+ +### Mediator Pattern + +- encourage lose coupling between interacting objects +- by encapsulating interactions in a "mediator" object +- the interacting objects are called "colleagues" and "concrete colleagues" +- use when interactions between the colleagues becomes very complex +- the colleagues are involved in many to many interactions, but with the mediator, it becomes one to many from mediator to colleagues +- we can often combine the mediator pattern with the [observer pattern](#observer-pattern) as well +- e.g. a runway needs to be free for an air craft to land +- instead of all air crafts looking at each other if the runway is being used, we can use a control tower that manages all of this for us + +
+code example +
+
+class Aircraft {
+    
+  ControlTower controlTower;
+
+  void startLanding() {
+    controlTower.queueForLanding(this);
+  }
+
+  void land() {
+    System.out.println("pull out wheels");
+  }
+}
+
+class ControlTower {
+
+  Queue\ aircraftQueue;
+
+  void queueForLanding(Aircraft aircraft) {
+    aircraftQueue.enqueue(aircraft);
+  }
+
+  @Schedule("2 minutes")
+  void allowLanding() {
+    if (!queue.isEmpty()) {
+      queue.dequeue().land();
+    }
+  }
+}
+
+
+
+ +### Memento Pattern + +- capture the internal state of an object without exposing its internal structure +- so that the object can be restored to this state later +- "originator" - the object whose state is captured +- "memento" - the snapshot / the state which was captured +- "caretaker" - the object that holds the memento +- by making the memento a static class inside originator, we ensure that only the originator can access the state - since `getState` is private, outside classes like for e.g. the caretaker cannot call `getState` + +
+code example +
+
+class State { }
+
+class Originator {
+
+  static class Memento {
+
+    private State state;
+
+    Memento(State state) {
+      this.state = state;
+    }
+
+    private State getState() {
+      return state;
+    }
+  }
+  
+  private State state;
+
+  public Memento save() {
+    return new Memento(state);
+  }
+
+  public void restore(Memento memento) {
+    this.state = memento.getState();
+  }
+}
+
+class Caretaker {
+
+  private Stack\ history;
+  private Originator originator;
+
+  void takeSnapshot() {
+    Memento memento = originator.save();
+    history.push(memento);
+  }
+
+  void undo() {
+    Memento memento = history.pop();
+    originator.restore(memento);
+  }
+}
+
+
+
+ +### State Pattern - TODO + +- alter behavior of the object as its state changes +- so that it appears to change its class +- TODO: remaining + +### Template Method Pattern + +- subclasses define parts of the algorithm without modifying the overall structure of the algorithm +- "template method" - the common part stays in the base class +- "hook method" - the variable part is overridden by the subclasses +- the base class can provide default implementations for these hook methods if needed +- the template method can be made final +- e.g. pre flight checks can be the template method, which checks + - fuel levels + - air pressure + - if the door is locked +- all these can be hooks i.e. specific to the aircraft +- helps avoid "dependency rot" - where dependencies at various levels depend on each other horizontally and vertically +- [factory method pattern](#factory-method-pattern) is a special form of the template method pattern + +### Strategy Pattern + +- make algorithms belonging to the same family easily interchangeable +- "strategy" - the common interface +- "concrete strategy" - the actual implementation of the different algorithms +- "context" - uses the strategy +- the context is composed using the strategy +- context can use a default strategy as well to lessen the burden on client + +
+code example +
+
+interface ISort {
+
+  void sort(int[] input);
+}
+
+class BubbleSort implements ISort {   class MergeSort implements ISort {
+
+  @Override                             @Override
+  void sort(int[] input) {              void sort(int[] input) {
+  }                                     }
+}                                     }
+
+class Context {
+
+  private ISort howDoISort;
+
+  public Context(ISort howDoISort) {
+    this.howDoISort = howDoISort;
+  }
+
+  void sort(int[] numbers) {
+      howDoISort.sort(numbers);
+  }
+}
+
+
+
+ +### Visitor Pattern - TODO + +- define operations for elements of an object without changing the class of this object +- e.g. assume we want to monitor several metrics like fuel, altitude, etc on all the air crafts +- option - introduce all these methods on each of the concrete aircraft classes +- issue - we are bloating our aircraft class +- solution - we use the visitor pattern +- note how the visitor pattern will have a separate method for each of the concrete class +- so, we have "element" and "concrete element", "visitor" and "concrete visitor" +- the concrete element will call its corresponding method on the visitor +- if concrete elements increase, we will have to modify all visitors +- so, use the visitor pattern when the element hierarchy is stable but we keep adding new functionality to visitors + +
+code example +
+
+interface Aircraft {
+
+  void accept(AircraftVisitor visitor);
+}
+
+class Boeing implements Aircraft {          class F16 implements Aircraft {
+
+  void accept(AircraftVisitor visitor) {      void accept(AircraftVisitor visitor) {
+    visitor.visitBoeing(visitor);               visitor.visitF16(visitor);
+  }                                           }
+}                                           }
+
+interface AircraftVisitor {
+
+  void visitBoeing(Boeing boeing);
+
+  void visitF16(F16 f16);
+}
+
+class FuelVisitor implements AircraftVisitor {   class DoorVisitor implements AircraftVisitor {
+
+  void visitBoeing(Boeing boeing) {}               void visitBoeing(Boeing boeing) {}
+  
+  void visitF16(F16 f16) {}                        void visitF16(F16 f16) {}
+}                                                }
+
+
+
+ +## Amazon + +warehouse - + +- product_instance(bar_code) +- product(name, price, product_instance[], amount) + - composition for products + - this allows us to select multiple products +- warehouse(product_instance[], address) + - aggregation for product +- warehouse selection - use [strategy pattern](#strategy-pattern), since there can be different algorithms + - closest warehouse to the customer + - warehouse that can fullfil all items in the order, so we do not have to source from multiple warehouses + - warehouse that will induce the least shipping charge + - so, the algorithm will accept list of warehouses and the order, and spit out the warehouse(s) to source the items from + +actors - + +- guest(cart) + - composition for cart + - can browse products + - can add products to cart + - however, the guest cannot checkout until logged in / registered as a customer +- customer extends guest(account) +- admin(account) +- account(name, email, phone, password) + +cart and coupons - + +- item(product, count, price) +- cart(item[]) +- coupon_decorator extends item() + - n% off on all individual items - individual_coupon_decorator extends coupon_decorator(offer_percentage) + - d% off on nth item of type t - n_items_coupon_decorator extends coupon_decorator(offer_percentage, no_of_items) - it can access "count" field of item and compare it with its no_of_items field +- use [composite pattern](#composite-pattern) for calculating total price for items in cart after applying coupons. calling get price on item will return the decorated price + +order - + +- order(customer, item[], payment, order_log[], current_order_status) + - we can use optimistic concurrency control to handle concurrent users +- order_log(order_status, date) +- shipment(shipment_log[], current_shipment_status) +- shipment_log(shipment_status, date) +- payment(payment_status, total_amount, payment_type) + +## BookMyShow + +actors - same logic as [amazon](#amazon) + +static - + +- city +- movie(name, duration, city[]) + - aggregation - a movie can only play in selected cities + - city to movie is many to many +- cinema(name, address, city, hall[]) + - composition - hall +- hall(number, seat[]) + - composition - seat +- seat(seat_type, row, col) + - seat_type - economy, business + +dynamic - + +- show(movie, start_time, hall, show_seat[]) + - movie - aggregation + - composition - show_seat +- show_seat extends seat(price, is_booked) + - note the inheritance - show_seat "is a" seat +- booking(show, show_seat[], payment) + - side note - for flights, this list might not suffice - we might go with map{seat, passenger} instead +- payment(payment_status, total_amount, payment_type) + +handling concurrency - + +- what if two users try booking the same seat + - pessimistic approach - lock when data is read. now, user 2 would not be able to read and therefore book the same seat + - optimistic approach - allows multiple users to perform the read. once the first user is able to successfully update the data, the version changes to v2. now, the second user on trying to update sees that the version has changed, and therefore the update for the second user fails +- so, optimistic might be better - multiple users might be trying to book but only a few of them actually end up booking. if we try locking for all the users every time, the screen would be stuck for other users most of the time +- we can use redis - locks in redis have the concept of time, so that we can block the seats for a certain duration, e.g. 1 minute, till the payment is completed by the user. if the user exceeds this timeout, they will have to re initiate the booking process from scratch +- also, we can talk about the different isolation levels in a transaction when handing updates + +## Parking Lot + +- account(name, email, password, phone) +- customer(account, ticket[], vehicle[]) +- parking_attendant(account) +- vehicle(number, vehicle_type) + - vehicle_type - bike, cycle, sedan, suv, truck +- parking_spot(parking_spot_type, is_empty) + - parking_spot_type - compact, large, two_wheeler, electric (has charging capability, and might have extra charges for that) +- parking_floor(all_parking_spots: map{parking_spot_type, parking_spot[]}, display_board) +- display_board(free_spots: map{parking_spot_type, no_of_free_spots}) + - can use [observer pattern](#observer-pattern) - as there are vehicles added to / removed from parking floor, update the numbers here +- parking_strategy - uses the strategy pattern - finding the next available parking spot - + - find the nearest parking spot to the entrance - useful if there are multiple entrances + - if no more compact spaces are available, start allocating from empty spots reserved for electric vehicle + - will receive the vehicle type, and return the parking spot of this type if available +- pricing_strategy - calculate the price. will receive just the ticket, and output the final price + - hourly rates may be different i.e. hourly rate decreases as the total time the vehicle is parked increases - 2$ for first hour, 1$ for 2nd hour, 0.5$ for remaining hours + - can have a capped price if the vehicle is to be parked for a whole day / multiple days +- ticket(entry_time, exit_time, payment, parking_spot, vehicle, ticket_status) + - price can change based on type of parking spot +- payment(payment_status, total_amount, payment_type) + +## ATM + +- we use [state design pattern](#state-pattern---todo) for modelling the different states of the atm - state(insert_card(), enter_pin(), select_operation(), perform_operation()) + - idle_state extends state(card) + - enter_pin extends state(pin) + - display_operations extends state(operation_type) + - perform_operation extends state(operation, perform() - call operation.perform()) + - idle +- operation(operation_type, perform) + - withdraw_cash extends operation(amount) + - deposit_cash extends operation(amount) + - deposit_cheque extends operation(cheque) + - print_statement extends operation() +- [chain of responsibility design pattern](#chain-of-responsibility-pattern) for deciding cash withdrawal when deciding denominations to use. denomination_processor(next, amount) + - 2k_processor(500_processor, 2000) + - 500_processor(100_processor, 100) +- atm(dispenser, screen, keypad, current_state) + - we basically call atm.current_state = atm.current_state.transition_action(). the transition_action automatically returns the right next state +- customer(account[]) +- account(account_type, card) + - account_type - checking, savings +- card(number, cvv, expiry) diff --git a/_posts/2024-03-09-high-level-design.md b/_posts/2024-03-09-high-level-design.md new file mode 100644 index 0000000..a1e6b67 --- /dev/null +++ b/_posts/2024-03-09-high-level-design.md @@ -0,0 +1,1504 @@ +--- +title: High Level Design +--- + +## Software Architecture + +- what is software architecture - + - high level design - hide implementations and express in terms of abstractions + - of the different components + - and how they interact with each other + - to fulfil requirements (what it should do) and constraints (what it should not do) +- software development lifecycle - we can repeat this process again and again + - design + - implementation + - testing + - deployment +- software architecture is the output of the first step / input to the second step +- decisions at the bigger level cannot be changed easily, cost a lot of wasted effort, etc so we need to make good decisions + +## System Requirements + +- the scope of the problem / the number of ways to solve a problem increases as the abstraction increases from designing a method -> class -> module -> application +- the ambiguous problem needs to be converted to a technical problem +- we might need to ask clarifying questions to the client +- different types of requirements - + - features of the system + - quality attributes + - system constraints + +### Features of the System + +- express the actual "functional requirements" of the system +- e.g. hitchhiking service - allow users to share rides +- identify all the "actors" and "use cases" +- expand each "use case" through a "flow of events" - we can use a [sequence diagram](/posts/low-level-design/#sequence-diagrams) for this + +![features of the system](/assets/img/high-level-design/features-of-the-system.svg) + +### Quality Attributes + +- to address the "non functional requirements" of the system +- how well a system should perform in a [particular dimension](#important-quality-attributes) +- [important quality attributes](#important-quality-attributes) include [performance](#performance), [scalability](#scalability), [availability](#availability), [fault tolerance](#fault-tolerance), etc +- have a direct impact on the technical decisions of the system unlike [features of the system](#features-of-the-system) +- e.g. show products when searched for under 100ms, system should be available 99.9% of the time, etc +- they have to "measurable" and "testable" +- need to make "tradeoffs" - there is no one architecture that can address all problems +- sometimes, clients might make "infeasible" requirements - 100% availability, unlimited storage, etc. we should call them out + +### System Constraints + +- limitations and boundaries of a system +- three types of constraints - technical, business and regulatory +- "technical constraints" - e.g. lockin to a particular database, cloud vendor, software license, etc +- "business constraints" - time and budget limitations +- "regulatory constraints" - e.g. location specific +- we should avoid tight coupling, else we would have constraints specific to hardware etc + +## Important Quality Attributes + +### Performance + +- "response time" - time between client sending a request and receiving a response +- response time = "processing time" + "waiting time" +- processing time - time spent in performing the actual business logic +- waiting time - time spent in transit, waiting queues, etc +- waiting time is also called "latency", while response time is also called "end to end latency" +- response time is critical when a request is in the path of a user interaction - users do not like to wait +- "throughput" - can be + - either "number of tasks performed per unit of time" + - or "amount of data processed per unit time" i.e. bits per second etc +- throughput can be useful when for e.g. analyzing a constant stream of logs from several sources +- consideration 1 (response time) - e.g. we as developers think our processing time is 10 ms so response time is 10ms, but assume our server can only process one request at a time +- if we get two concurrent requests, the waiting time for the second request will be 10ms, thus increasing its response time to 20ms +- so, response time is affected by waiting time as well +- consideration 2 (response time) - response times for some requests in our system will be very bad, while all others would be relatively better +- these relatively slow response times are called "tail latency" +- so, instead of metrics like median or average, the most effective way to measure response times is a "percentile distribution chart", instead of just using median or average +- in this chart, the "xth percentile" is the value below which x% of the values can be found +- refer the part around 100th percentile in the percentile distribution graph below for tail latency +- so, we would set [slo](#sla-slo-and-sli) like so - 95th percentile of requests should have 30ms response times + +![percentile distribution response time](/assets/img/high-level-design/percentile-distribution-response-time.svg) + +- consideration 3 (both response time and throughput) - effect of load - the point where the response time starts increasing / throughput starts decreasing due to increase in load is called the "degradation point" + +![degradation point](/assets/img/high-level-design/degradation-point.svg) + +### Scalability + +- the load on our system never stays the same - seasonal traffic e.g. during holidays +- "scalability" - systems capability to handle growing amount of load +- scalability are of three types +- "vertical scalability" - adding more resources / upgrading existing resources on a single machine +- advantage - no code changes are needed, migration is straightforward +- disadvantage - + - there is a limit to which we can scale vertically + - does not provide [high availability](#availability) or [fault tolerance](#fault-tolerance) +- "horizontal scalability" - adding more instances on different machines +- advantage - + - no limit to scalability + - more importantly - provides [high availability](#availability) or [fault tolerance](#fault-tolerance) +- disadvantage - + - code changes might be required + - overhead around coordination is introduced +- "team / organization scalability" - as we add more engineers, productivity decreases after a certain point +- we can split codebase into separate modules or better, architecture into separate services to decrease conflicts + +### Availability + +- "availability" - fraction of time our system is operational +- so, availability = uptime / (uptime + downtime) +- mtbf - "mean time between failures" and mttr - "mean time to recovery" (both are self explanatory) +- so, we can also say availability = mtbf / (mtbf + mttr) +- so, one way to ensure high availability is to reduce mttr i.e. detect and resolve issues in near 0 time +- 99.9% means ~9h of downtime in a year + +### Fault Tolerance + +- there can be "human errors" (e.g. faulty config), "software errors" (out of memory exceptions) or "hardware failures" (infrastructure issues / outage) +- failures are inevitable +- "fault tolerance" - helps keep system operational (i.e. [available](#availability)) despite failure of multiple components +- fault tolerance tactics - prevention, detection / isolation and recovery +- "failure prevention" - eliminate single points of failures. use "replication" and "redundancy" for this. two strategies - + - "active active architecture" - requests can go to any replica. so, all of them have to be kept in sync. so, if one of them goes down, the remaining one will still continue to operate. advantage - helps balance load, since it is like [horizontal scalability](#scalability). disadvantage - keeping all replicas in sync is non trivial + - "active passive architecture" - one primary replica takes all the requests, while the passive replicas take periodic snapshots of the active replica. disadvantage - we cannot [scale](#scalability) our system horizontally, since we are still restricted to the one active replica. advantage - this leader follower pattern is much easier to implement +- "failure detection / isolation" - if we have a faulty replica, our system should be able to detect it and isolate it +- this is done by a monitoring service using - + - health checks - monitor service polling the servers periodically + - heartbeats - the servers sending heartbeats to the monitoring service periodically +- monitoring service can be more complex - declare a host to be failed based on its error rate, if its response time has suddenly increased, etc +- "failure recovery" - some strategies - + - stop sending traffic to the faulty instance + - attempt to restart the host + - "rollback" - + - rollback service to a stable version + - rollback databases when it reaches an inconsistent state to a previous consistent state + +### SLA, SLO and SLI + +- sla - "service level agreement" - agreement between the service provider and client +- if we fail to deliver these sla, we have to provide refunds, license extensions, etc to clients +- slo - "service level objective" - goals we set for our systems +- each slo can represent one of the [quality attributes](#important-quality-attributes) +- an sla is basically a collection of slo +- even if we do not have an sla, we should have slo so that our users know what they can expect from us +- sli - "service level indicator" - quantitative measure of the different [quality attributes](#important-quality-attributes) +- achieved using monitoring services +- we can compare what we see in sli to what we define in slo +- this is why we said [quality attributes](#quality-attributes) should be measurable and testable - otherwise, we would not have been able to measure our slo using sli +- general guide - based on what clients ask, we should define slo and then find out matching sli +- another technique - define loser external slo but stricter internal slo + +## API Design + +- api - "application programming interface" +- the interface is a "contract" between our systems and the client +- our system becomes a "black box" - the client need not know the internal implementation of this api, they just have to interact with this api +- once we define the apis, our clients can start integrating with it without us actually having built its implementation entirely +- it is called remotely over the network +- apis can be public, private / internal and partner +- "public api" - exposed to general public and any developer can call them. might require registration from users first +- "private api" - used by other systems inside the organization, but not exposed outside the organization +- "partner api" - to organizations having a business relationship with us +- two types of apis we discuss - rpc and rest api + +### Good Practices for API Design + +- "encapsulation" - clients should not have to care about implementation +- we can change the implementation without the client changing anything on its end +- "ease of use" - descriptive actions and resources, keeping it consistent +- "idempotent operations" - no effect if the operation is performed > once. updating the address is idempotent, increasing balance by 100 is not +- assume there is an error due to some reason - the request is lost / response to the message is lost (but the request was processed) +- now, the client does not know which one happened +- so, even if it retries the operation, it should not have any consequences +- "pagination" for large responses - the client can provide the offset and the number of items to retrieve +- "asynchronous operations" - some operations are very big, and we cannot provide any reasonable response immediately +- instead of the client having to wait for something like this, we can use asynchronous operations +- the immediate response includes an identifier which the client can use to track the progress of the operation +- "versioning" - allows us to make non backward compatible changes to the api + +### RPC + +- rpc - "remote procedure calls" +- ability of a client to execute a subroutine on a remote server +- "location transparency" - calling an rpc looks like calling a local method +- applications written in different programming languages can also talk using rpc +- idl - "interface description language" - we define the api and data types in this language +- then, the rpc framework we use generates 2 separate implementations - "server stub" and "client stub" +- they include the corresponding classes for the api and data types we define in the interface description language +- rpc will also take care of marshalling / unmarshalling the request / response for us automatically +- it might include propagation of exception etc as well +- rpc helps the clients focus on performing an action on the server systems, and not worry about the network communication +- drawbacks - + - remote methods are a lot slower and unreliable. the client execution will thus be blocked. so, we should try writing asynchronous versions for the remote methods + - it is also not useful when we want the features like cookies, headers etc +- popular frameworks - + - grpc by google - high performance rpc. uses "http/2" for transport and "protocol buffers" as the interface description language + - apache thrift - by facebook + - java rmi (remote method invocation) - unlike above two, specific to java - helps one jvm invoke a method on another jvm + +### Rest API + +- rest - "representational state transfer" +- it is not a standard or protocol, but an architectural style +- advantage - helps maintain [quality attributes](#important-quality-attributes) like [performance](#performance), [availability](#availability), [scalability](#scalability) and [fault tolerance](#fault-tolerance) +- an api that obeys the rest architectural style is called a "restful api" +- the only actions a client can take in an rpc api is defined inside the interface definition language - so, it is somewhat static +- in rest, we can use hateoas - "hypermedia as the engine of application state" - the response contains "hypermedia links" around the operations that the client can perform +- rest should be "stateless" - no session information should be maintained by the server +- this way, each request is served in isolation +- advantage of statelessness - multiple requests by a single client can be processed by different horizontally scaled instances +- "cacheable" - the server can declare a response to be as cacheable or non cacheable. if a response is cacheable, the extra round trip to the server is avoided - the response is returned from the cache directly and our server is never even called +- this reduces response time and the load on server is reduced +- "resources" - resources are organized in a hierarchy in using the uri "uniform resource locator" +- a resource can be a "simple resource" or a "collection resource" +- resources can also have "sub resources" +- use nouns only for resources +- "resource identifiers" - should be unique +- for modelling actions, we use the http verbs +- so, unlike rpc, the only actions supported are crud - creating (POST), reading (GET), updating (PUT) and deleting (DELETE) a resource +- GET method is considered "safe" - does not change the state of the system +- GET, PUT and DELETE methods are considered "idempotent" - applying them multiple times will result in the same state change as applying them once +- GET requests are also considered cacheable by default +- the client can send the additional data using json (or xml) +- creating a rest api for a movie streaming service +- identify the resources - movies, users, reviews, actors +- map to uris - + - /users, /users/{user_id} + - /movies, /movies/{movie_id} + - /actors, /actors/{actor_id} + - /movies/{movie_id}/reviews, /movies/{movie_id}/reviews/{review_id} + +## Large Scale Systems Building Blocks + +### Load Balancers + +- if we run our application on multiple instances due to [horizontal scaling](#scalability), the client applications will have to know the ip addresses of all these instances in advance +- this results in tight coupling of clients to our systems, and makes it hard for us to make any changes +- advantages of load balancers - + - acts as a layer of abstraction between clients and our instances, so it looks like one server to the client + - distributes the load from clients among our horizontally scaled instances equally + - "autoscaling policies" - easily add / remove instances to the fleet based on requests per second, network bandwidth, etc, and all of this is hidden behind a single load balancer + - "fault tolerance" - load balancers can be configured with "health checks" to avoid sending traffic to unhealthy instances + - [rolling release](#rolling-deployment-pattern) - we can perform maintenance tasks easily by pulling down hosts one by one, and the load balancer would not direct traffic to these hosts +- types of load balancers - dns, hardware, software and global server +- "dns load balancer" - dns maps human friendly urls to ip addresses +- "dns record" is the response by "dns servers" when asked for ip addresses for a url +- can return multiple ip addresses in this record, ordered differently every time (maybe using round robin) +- the clients typically pick the first address from this list, and we achieve load balancing this way +- disadvantages + - dns servers do not perform health checks, so can return ips of faulty servers + - the dns record can be cached at client, which means they can call the faulty instance till the ttl - "time to live" expires + - exposes the ip addresses of our instances directly, thus exposing implementation details +- "hardware load balancers" and "software load balancers" address all the above problems with dns load balancers +- hardware load balancers run on hardware optimized for load balancers +- software load balancers can run on any general purpose machine +- all the communication is done through the load balancer, thus making our systems much more secure - in dns load balancing, it was happening directly once the client got the ip addresses +- they can monitor the health of our instances and only route traffic to the healthy instances +- they also allow for more advanced setup like take the instance type into account - some instances in our fleet might be more powerful than others, use more powerful techniques like current requests per second when load balancing the traffic, etc +- disadvantages + - typically, hardware and software load balancers are located close to the instances. so, if we run our load on multiple geographical locations called data centers, one group of the instances will have the load balancer located far away + - also, load balancers do not solve the "dns resolution" problem on their own - load balancers are again just an ip address, and we need to map it to a more human friendly url + +![hw and sw lb disadvantage](/assets/img/high-level-design/hw-and-sw-lb-disadvantage.svg) + +- "global server load balancer" - more intelligent than the typical dns load balancer +- it can redirect clients to the data center geographically closer to them, the location that will send a faster response time (this can be different from just using the geographical location due to number of hops), etc +- there is a load balancer deployed at each of the data center +- also, gslb can handle outages in one data center by not routing traffic to this faulty data center + +![gslb](/assets/img/high-level-design/gslb.svg) + +- open source software load balancers - haproxy, nginx +- cloud load balancers - aws elb, which has various types as well +- global server load balancer - route53 +- load balancers are also called "dispatchers" +- if using [microservices](#microservices-architecture), we can have a dispatcher for each micro service, and each microservice can be individually scaled +- below, we use load balancers both for communication from outside and internal clients + +![load balancing microservices](/assets/img/high-level-design/load-balancing-microservices.png) + +### Message Brokers + +- also called mom - "message oriented middleware" +- "synchronous communication" - both sides - client and server need to be healthy and maintain an active connection either with each other or via the load balancer - this is good when the server takes a short time to process and respond +- "message broker" - a queue data structure to store messages between senders and receivers +- message brokers helps with "asynchronous architecture" +- it entirely decouples senders from receivers - the sender does not wait for a confirmation from the receiver - it just puts the message onto the broker. this adds a lot of [fault tolerance](#fault-tolerance) - receivers can be down and still receive the events when they come back up. they also prevent messages from being lost. in synchronous systems, it can happen that the request / response is lost, and the client will never know which one it was, and it might retry, which would lead into further issues if the request is not idempotent +- e.g. the users see a success screen immediately after placing an order, while they get an email later if the order is placed successfully. this placing of an order involves a chain of services like order service, payment service, notification service, etc, and the client gets an immediate response with all this being handled behind the scenes +- message brokers are generally not exposed to outside world unlike load balancers +- it acts like a buffer when there is an increase in the load - assume we use synchronous communication - if there is a sudden spike, we will we will receive a lot of requests concurrently, which can result in our system crashing, dropping requests, etc. this is solved using asynchronous communication +- it can help with [load balancing](#load-balancing-pattern) - multiple instances can listen for an event and the message broker will send it to one of them +- it can also perform transformations on these messages, thus helping with [streaming analytics](#big-data) +- open source message brokers - rabbitmq, kafka +- cloud message brokers - sqs + +### API Gateway + +- we break our services into smaller services due to the [organization scalability](#scalability) +- the client will also need to now know about the different services - one service for fetching videos, another for fetching comments and so on +- api gateway helps with "api composition" - we compose all the different apis in all our services into one single api that the clients can interact with +- now, each service will need its own authentication and authorization. api gateway helps eliminate the duplication of auth logic - api gateway supports not only authentication but authorization as well +- we can have different apis for mobile vs desktop applications, and the client would be abstracted away from all this - [backends for frontends pattern](#backends-for-frontends-pattern) using user agent header +- api gateways can perform "ssl termination" - the traffic is encrypted between clients and api gateway, but decrypted between api gateway and servers +- api gateway can also implement "rate limiting" to prevent dos "denial of service" attacks +- without an api gateway, the client will make a call to fetch the home page, another call to fetch the video and finally another call for all the comments of this video. using "request routing", api gateway makes all the calls itself and sends the aggregated response to the client. this helps improve the performance a lot, since we are saved from these multiple requests going over the internet +- "static content and response caching" - caching to reduce response time for client +- it supports monitoring as well to not route traffic to unhealthy instances +- it can perform "protocol translation" - the api gateway exposes a rest api, while the underlying services use soap and xml, grpc + protocol buffers, etc +- considerations - api gateway can become a single point of failure - deploy multiple api gateways sitting behind a global server load balancer +- do not put business logic into api gateways +- open source api gateway - netflix zuul +- cloud api gateway - amazon api gateway + +### Note - Load Balancers vs API Gateway + +- [load balancers](#load-balancers) are only for balancing load among identical "servers" +- api gateway is the "public facing interface" that routes traffic to "services" and not "servers" +- so, a common pattern is that an api gateway routes traffic to load balancers, which can then route traffic to the individual servers +- apart from that - feature sets of both are different - + - load balancer is more around the [different routing algorithms](#load-balancing-pattern), performing health checks, etc + - api gateway is more around api composition, auth, request routing, protocol translation, throttling, caching, ssl termination, etc +- so, a load balancer might be enough for internal, individual services, while we might need an api gateway for public facing services + +![load balancer vs api gateway](/assets/img/high-level-design/load-balancer-vs-api-gateway.png) + +### CDN + +- cdn - "content delivery network" +- even with hosting on multiple data centers, there is significant latency between end user and server location +- first the 3 way handshake happens, then maybe the html is served and eventually all static assets like images are served +- this involves multiple network round trips and hops from the client to the server +- users do not wait for long for websites to load - they typically abandon it +- we can get the static content like htm, css, js, images and videos closer to our end users +- cdn is a "globally distributed network of servers" +- the servers are called "edge servers" +- the location the cdn servers are present at are called pop - "points of presence" +- page loads are faster now +- cdn also protects us against ddos attacks +- cdn also uses technologies that are more optimized, like using storage optimized for delivering static content, compressing using algorithms like gzip, minification of files, etc +- there are two strategies we can use - pull and push +- "pull strategy" - we tell cdn which content it should cache, and how often this should be "invalidated", which is configured by using a "ttl" property +- the first time, the cdn has to make the request to our servers to cache it +- however, subsequent requests are served by the edge servers of the cdn directly +- after the expiry, the cdn will send our servers a new request to check if the asset has changed, and accordingly refresh the cached asset +- disadvantages + - servers need to be available (first time or when ttl is reached) in order to serve the response + - first request after ttl is reached is slow +- "push strategy" - we publish the content to the cdn directly when the new version of the asset is available +- so, we typically do not set a ttl / set a very long ttl in this +- advantage - using the push strategy, the dependency on our servers to stay available is removed +- disadvantage - not desirable for frequently changing content, since it would require frequent invalidations and pushes from our end +- examples - cloudflare, amazon cloudfront + +## Data Storage + +### Relational Databases + +- refer [relational databases](/posts/relational-databases/) - tables, rows, columns, primary and foreign keys, etc +- advantages - + - perform flexible and complex queries using for e.g. joins + - remove data duplication by storing data efficiently + - intuitive for humans + - provides guarantees around [acid transactions](/posts/spring/#jpa) +- disadvantages - + - rigid structure enforced by schema, which requires planning ahead of time + - hard to maintain and scale due to guarantees around acid transactions - it can only be scaled vertically, not horizontally + - slower reads + +### Non Relational Databases + +- nosql databases - non relational databases +- solve drawbacks of [relational databases](#relational-databases) +- advantages - + - remove rigidity around schema - different records can have different sets of attributes + - eliminate the need for an orm - store data in a more "programming language friendly" and not "human friendly" way, by supporting structures like lists, maps, etc + - support much faster queries + - scale much more than relational databases, which is useful for big data like use cases - it can be scaled horizontally as well + - it follows base - + - basically available - never rejects the reads or writes + - safe state - can change data without user interaction - e.g. when performing reconciliation when there is deviation between replicas + - eventually consistent - we might get stale data +- disadvantages - + - does not support complex querying - operations like joins become hard + - acid transactions are not supported +- several types of non relational databases +- key value store - the value can be anything and it is opaque to the database - we cannot typically query on the value, only on the key. one use case - counters touched by multiple services. e.g. redis, amazon dynamodb +- document store - collections of documents, where documents have relatively more structure compared to a key value store - we can query on value. values are like an object. e.g. cassandra, mongodb +- graph database - an extension of a document store. helps establish relationship between records easily. use case - recommendation engine, social networks, etc. e.g. neo4j, amazon neptune +- we can also use nosql databases as a layer of cache in front of sql databases + +### Choosing the Right Database + +- redis - + - use cases - cache database calls, cache external service calls, etc + - these are key value stores +- s3 - + - used for assets like videos, images, etc + - typically backed by cdn solutions as well +- elasticsearch - + - built on top of apache lucene + - search using different fields of the entities + - supports fuzzy searching to help with typos - we can also configure the edit distance based on use case + - they are not meant to serve as primary sources of data - they should only serve searches +- influxdb - + - it is a time series database + - used for tracking application metrics like cpu utilization, throughput, etc + - it typically supports / is optimized for append only operations - it should not be used for frequently changing data + - read queries are performed in bulk - we query for the last few minutes or hours of data and perform aggregations on them +- cassandra - + - can handle massive amounts of reads and writes + - follows a no master / leaderless strategy + - the entire design of [key value store](/posts/high-level-design-case-studies/#key-value-store) comes in here + - so, horizontally scaling is as simple as adding more nodes + - these key value stores can make queries based on partition key easily - however, they cannot perform any complex searching +- hadoop - + - used for data warehousing to perform analytics + - we can dump all of the data in a large database and support querying on this data + - used for offline reporting +- mysql - + - if we have structured information and we need acid transactions + - we want strong consistency + - use cases - inventory management, payment related, etc +- mongodb - + - this is a document db + - lot of attributes, non rigid schema + - variety of queries - optimized for json like structures +- cassandra - + - this is a columnar db + - used for ever increasing data + - types of queries supported are mostly partition key based + +### Improve Quality Attributes of Databases + +- three techniques - indexing, replication, partitioning +- "indexing" - speed up retrievals by locating them in sub linear time +- without indexing, retrievals would require a full table scan +- this is a [performance](#performance) bottleneck +- underneath, it uses data structures like + - hash maps - e.g. find all people from a particular city. city can be the key, while the value can be a list of row indices containing that city + - balanced b trees - e.g. find all people in a particular age range +- composite indexes - formed using a set of columns +- while the advantage is that reads speed up, disadvantages are + - more storage space is required + - writes become slower +- "replication" - already discussed in [fault tolerance](#fault-tolerance) for compute, same logic +- disadvantage - not trivial to maintain, more common in non relational databases than in relational databases +- "partitioning / sharding" - in replication, we copy the same data in all replicas. in partitioning / sharding, we split the data in different replicas +- now, we are not limited by the storage capability of one machine +- additionally with more storage, queries can now be performed in parallel on the different partitions, thus increasing the speed +- disadvantage + - route the query to the right partition + - avoid hot partitions, etc + - more common in non relational databases than in relational databases +- partitioning can be done for compute as well - e.g. traffic from paid customers go to more powerful machines unlike traffic from free customers + +### Brewer's CAP Theorem + +- in case of a network partition, a distributed database has to chose one of consistency and availability +- e.g. below, a user updates the value to 6 in a replica +- another user queries another replica. the replica then via intercommunication realized that the value has changed, and sends the updated 6 value to the user + +![cap theorem introduction](/assets/img/high-level-design/cap-theorem-introduction.svg) + +- "network partition" - e.g. due to some network issues, one replica is isolated from others +- now, the replica that is isolated has two options - + - favoring availability - return its local value, which may be outdated + - favoring consistency - return an error, asking to try again later +- note - this only happened when there is a network partition, otherwise, all three were guaranteed +- definitions below in cap theorem are a little bit different then what we saw for e.g. [here](#important-quality-attributes) +- "consistency" - read request receives either the most recent write or an error. this helps guarantee all the clients see the same value at the same time, regardless of the database instance they communicate with +- "availability" - every request receives a non error response, which can be outdated +- "partition tolerance" - system continues to operate despite an arbitrary amount of messages being lost over the network +- so, cap theorem states that we can only have two of the three things +- so, we already saw cp and ap, what about ca? +- we can have ca if we have no replicas - only a centralized database + +### Unstructured Data + +- unstructured data - does not follow any "structure" +- e.g. audio / video files etc - they are just a blob "binary large object" +- while both [relational](#relational-databases) and [non relational](#non-relational-databases) databases allow for storing of blobs, they are meant for structured, and not unstructured data. e.g. they impose size limits etc +- some use cases of unstructured data - + - users upload files like videos and images, which we need to process (e.g. transcode, compress, etc) + - relational / non relational database snapshots - these snapshots are unstructured data + - web hosting - static content + - huge datasets used for machine learning, e.g. readings from sensors +- two solutions for unstructured data - dfs and object storage +- dfs - "distributed file system" +- features / advantages - + - internally, can have features like replication, auto healing, etc + - looks like a familiar tree like structure (files within folders) to us + - works like file system - mounting on hosts etc + - we can modify files like we typically do when working locally, e.g. append logs to log files +- disadvantage + - cannot work with web for static content directly - will require a wrapper on top + - has limits on the number of files we can store i.e. the storage space has limits +- "object / blob storage" - scalable storage, but unlike dfs has no limits on how many objects can be stored +- stored in containers called buckets +- also, object storage allows "bigger" files compared to dfs - which makes them ideal for storing database snapshots +- they expose a rest / http api unlike dfs, which can be easily referenced by our static html pages +- they support "object versioning" - for a file system, another wrapper would be needed +- files are stored in a "flat structure" - not a tree like structure like in file systems +- the object has a name and a value associated with it, which is the actual content +- typically, object storage is broken into several classes, which offer different throughput and latency +- object storage uses replication too +- disadvantage - + - files cannot be opened and modified like we can when using dfs - we need to for e.g. create and upload an entirely new version + - cannot be mounted like file systems +- we can also run object storage services on our own storage, if cloud is not an option +- e.g. openio is such a solution +- s3 (simple storage service) is aws's object storage + +## Big Data + +- datasets are either very large in size or come at a very high rate for our system to be able to process +- the output of big data processing can be visualizations, data that can be queried, predictive analysis, etc +- "batch processing" - + - we store the data on distributed file system + - we then run jobs on it based on a schedule + - every time the job runs, it can either pick up the new data that was added to the system since the last time it ran, or it can process the entire dataset from scratch + - after processing, it can write the computed view to a database +- advantages of batch processing + - easy to implement + - more efficient than processing each event individually + - e.g. we push some faulty code. if our dfs still has all the original data, we can push the fixed code and run the job on the entire dataset again + - finally, we have visibility into historic data as well +- drawbacks - not realtime +- e.g. we would like logs and metrics to be analyzed realtime so that we can identify and debug production issues quicker +- so, we use "stream processing" + - the events come on a [message broker](#message-brokers) + - so it reacts realtime, not based on a schedule + - after processing, it can write the computed view to a database +- advantage - react immediately +- disadvantage - complex analysis cannot be done - fusing data from different times is very difficult / not possible - our computations can only use recent data +- going back to the same e.g. of observability systems, we would need historic data as well in anomaly detection +- so, we can use the "lambda architecture" - balance between batch processing, and stream processing +- it has three layers +- "batch layer" - follows the batch processing architecture. it takes all the data into account and typically overwrites its old output +- "speed layer" - follows the stream processing architecture. it helps fill the gap caused by events which came in since the last event that was operated on by the batch job +- "serving layer" - joins the outputs of the batch layer and speed layer and combines them into one + +![lambda architecture](/assets/img/high-level-design/lambda-architecture.svg) + +## Cloud Computing + +- cloud is mostly based on iaas - "infrastructure as a service" +- gives us access to virtually infinite compute, storage and networking +- we only pay for what we use / what we reserve, thus saving costs +- we can improve our scalability and reliability by deploying our software to "multiple regions" and "multiple zones" +- disadvantage of cloud computing - we do not have access to the infrastructure + +## Scalability Patterns + +### Load Balancing Pattern + +- synchronous communication - can be implemented using [load balancers](#load-balancers) +- asynchronous communication - can also be implemented via [message brokers](#message-brokers) +- note - load balancing != load balancer, so do not get confused +- note - message brokers are not exposed outside, so they cannot be used via client directly unlike [load balancers](#load-balancers) +- when using cloud, both load balancers and message brokers are built with redundancy and replication in mind to increase [fault tolerance](#fault-tolerance) +- there are various "routing algorithms" used for load balancing. we discuss three of them below - round robbin, sticky session and least connections +- "round robbin" + - the simplest / most common / default algorithm + - routes each request sequentially to the "next" worker instance + - disadvantage - only works when application is stateless - each request by a client can be handled in isolation by any one of the target servers. it will not work when an "active session" is maintained between a client and a server +- "sticky session / session affinity" - + - use cases - + - auth information of a client is stored in the session so that the client does not have to reauthenticate repeatedly + - client is uploading a very large file in parts. the different parts need to go to the same server for this to work + - requests from the same client are always sent to the same server + - this can be achieved using a cookie / by inspecting client's ip address + - disadvantage - this only works for smaller sessions - otherwise, the same server might end up with too many longstanding connections +- "least connections" - + - route the request to the server with least number of open connections + - so, it solves the problem we saw with sticky sessions + - use case - like sql, ldap, etc +- "auto scaling + load balancing" - most instances run a background process called "agent". it collects metrics around cpu consumption, network traffic, memory consumption, etc. based on these metrics, we can automatically "scale in" (decrease) / "scale out" (increase) the number of our instances. we can tie this to [load balancer](#load-balancers) as well, thus the load balancer would always be aware of the available ip addresses + +### Pipes and Filters Pattern + +- data flows from the "source" to "sinks" +- it encounters multiple "filters" along the way, which does only one thing, and is unaware of one another +- source examples - service that receives requests from users, readings from sensors, etc +- sink examples - databases, distributed file systems +- the pipes in between are typically message brokers + +![pipes and filters](/assets/img/high-level-design/pipes-and-filters.png) + +- if we put all the processing logic in one application, it will end up being a monolith +- we saw the disadvantages of a monolith [here](#multi-tier-architecture) +- by using different filters + - the throughput will increase, as well as different filters can perform different tasks + - each filter can be individually horizontally scaled + - we can use different technology for each filter based on the use case +- till now, we saw a "sequence of filters" that run on some data +- we can also have multiple such sequence of filters all running in "parallel" +- an example of all filters needed for a video streaming platform - + - split into chunks, so that the video can be downloaded in chunks instead of downloading it all at once + - select a frame from each chunk to act as thumbnails, which helps when we try to seek + - resize each chunk to different resolutions, which helps with "adaptive streaming" i.e. decide the quality of the video based on the client's bandwidth + - in parallel to all the filters above, another sequence of filters can convert audio into captions based on nlp etc +- filters should be "stateless" +- this pattern is not ideal if we want to run all the filters as a part of a transaction - performing a distributed transaction is very difficult + +### Scatter and Gather Pattern + +- the client sends a request to the "dispatcher" +- the dispatcher sends the request to the "workers" and gathers the result +- unlike [load balancing](#load-balancing-pattern) where the request is only forwarded to one instance, the request in this case is send to all workers +- each worker is independent of the other, and thus they can all operate in parallel +- throughout this pattern, the client is unaware of all this + +![scatter gather](/assets/img/high-level-design/scatter-gather.png) + +- the workers can be + - completely different services - for add recommendations, we request multiple services and then chose the best add for the user and show it to them + - same service with access to different data - e.g. one worker processes files 1 to 100, a second worker processes files 101 to 200 and so on. i think this is what is used in databases with sharding +- if one of the workers do not respond, we can aggregate the partial results from the other remaining workers +- we can also use a message broker in between the dispatcher and workers for decoupling. if it is not possible to return the result instantaneously, the dispatcher can instead send an id which the client can monitor + +### Execution Orchestrator Pattern + +- imagine we break a monolith into [microservices](#microservices-architecture) +- an extra "orchestration service" is used, which does not perform any business logic itself +- it performs complex flows by calling different services in the right order +- this is in a way like [scatter and gather](#scatter-and-gather-pattern), but here we have a sequence of operations - not one operation sent down to all the workers. again, unlike in scatter and gather where all operations could be performed in parallel, we may or may not be able do that here, as result from one service might be used as a request to another +- the orchestration service maintains all the intermediate state till it is able to construct and return the final result + - what if the orchestration service fails midway / or after performing the entire flow but just before sending the response? + - the orchestration service can store all its intermediate state inside a db, so that if the client re initiates the request, another orchestration service can pick up from where the faulty orchestration service left +- the orchestration service also has logic around handling exceptions and retries - e.g. [saga pattern](#saga-pattern) +- for high availability, we can also deploy the orchestration service in a horizontally scaled manner and have it sit behind a load balancer +- orchestration service != [api gateway](#api-gateway) - api gateways are meant to be dumb, while the orchestration service fully understands the context of a request +- best practice - the orchestration service is not meant for business logic - only for orchestration. the business logic is performed only by the various services sitting behind it + +![execution orchestrator pattern](/assets/img/high-level-design/execution-orchestrator-pattern.png) + +### Choreography Pattern + +- drawback of [execution orchestrator pattern](#execution-orchestrator-pattern) - changes in any of the services involves a change in the orchestration service +- this is called a "distributed monolith" - the orchestration service in the above example has become a distributed monolith because for e.g. multiple teams working on their own services might have to now change the orchestration service code together, again impacting [organization scalability](#scalability) +- instead, the orchestration service is replaced by a message broker +- a message is put onto the message broker, and the services can subscribe to this message as needed +- they can then also put more messages into the queue as a result of which other services can subscribe to them again +- this continues till the flow is complete +- since all this communication is asynchronous, all services are decoupled from each other +- even if one of the services is down, the flow can still continue and the relevant parts will still complete +- disadvantage - tracing the entire flow can become very difficult in case of issues, since we do not have a central orchestrator which was aware of all the steps during the entire flow + +![choreography pattern](/assets/img/high-level-design/choreography-pattern.png) + +## Patterns for Data Intensive Applications + +### Map Reduce Pattern + +- simplified processing pattern +- by google around 2004 +- we need to distribute the processing and huge datasets into several machines +- issues include - + - distributing the data + - parallelizing the workload + - scheduling execution on the different workers + - aggregating results + - recovering from failures +- solution - we model all problems using the map reduce model + - we pass the input data through map function, which outputs key value pairs + - then, the reducer receives all the values for a key, on which it can then perform some computation +- underneath, the map reduce framework takes care of all the issues we listed above - refer [this](/posts/hadoop/#theory) for the entire working of map reduce. e.g. [heartbeats mechanism](/posts/hadoop/#hadoop-2x) might be used to ensure the worker is running. if this fails, the task would be rescheduled on another worker +- if the master itself fails + - the process can be restarted from scratch again + - the master can take frequent snapshots, so when a new master is spun up, it can restore from where the faulty master left off + - a backup master can run alongside the primary master, which stays in sync +- map reduce is great for cloud because - + - we easily get access to a lot of compute and storage + - map reduce is batch processing - so we can run on demand and pay as we go, and not pay for extra compute + +### Saga Pattern + +- in [microservices](#microservices-architecture), we discussed how we should use [one database per service](#database-per-microservice) +- with one database per microservice, we lose out on the [acid transactions](/posts/spring/#jpa) +- so, saga pattern helps us manage consistency across microservices using distributed transactions +- if there is a failure in any of the microservice, a rollback is performed on the other microservices by applying an operation which has the "opposite effect" of the original operation +- saga pattern can be implemented using - + - [execution orchestration pattern](#execution-orchestrator-pattern) - the execution orchestrator decides whether to proceed with the transaction or rollback the transaction on the previous service with a "compensating operation" + - [choreography pattern](#choreography-pattern) - each service can either trigger the event for the next service if successful, or trigger the "compensating event" for the previous service if unsuccessful + +![saga pattern](/assets/img/high-level-design/saga-pattern.png) + +### Transactional Outbox Pattern + +- helps implement reliability in an event driven architecture +- e.g. a service needs to update something in its database and send a message to a message broker + - updating database and sending a message to a message broker is not an atomic operation + - so, if we for e.g. perform the database operation first, it might happen that the database is updated but the message is never sent to the message broker + - if we send the message first, the database might never be updated, but the message would have already been sent to downstream services +- extension of above - we can argue that with [at least once semantics](#message-delivery-semantics), we can always ensure that the message gets sent. issue - + - we successfully update the database and commit the transaction + - we fire the message + - our server goes down at this point - otherwise libraries of kafka etc are "intelligent" enough to resend the message if ack from broker is not received + - the message too gets dropped midway and does not reach the message broker +- to solve this, we use the "transactional outbox pattern" +- step 1 - it instead "as part of the same transaction" updates the actual data and inserts a new event ino an "outbox table". either both the update and the insertion of this new event will succeed, or both will fail, since both of these are a part of the same transaction +- step 2 - another service called "message relay service" polls this outbox table and puts any new entries in this table onto the message broker +- step 3 - it then either deletes the event or marks it as sent + +![transactional outbox pattern](/assets/img/high-level-design/transactional-outbox-pattern.png) + +- issue 1 - "duplicate events" - just before step 3, the message relay service crashes. it then comes back up and again performs steps 2 and 3. this situation is called [at least once delivery semantics](#message-delivery-semantics) +- solutions - + - the service logic is designed to be idempotent + - assume that the outbox table adds a unique id for every event, which the message relay service adds to the event it puts on the message broker as well. the consumer keeps track of the ids it has already consumed, and this way, it knows that when there is a duplicate event, it needs to discard it +- issue 2 - the database does not support transactions, e.g. non relational databases. step 1 of our solution relied on the fact that insertion into the outbox table and update to the regular tables can all be done under one transaction +- solution - instead add an outbox parameter to the object, which contains the list of events to be sent + ``` + { + "name: "...", + "outbox": [ + { ... } + ] + } + ``` +- now, the message relay service can poll all objects with this outbox parameter and after adding the messages onto the queue, it can remove the outbox parameter from these objects +- issue 3 - ensure ordering of events. e.g. user registers and then cancels, but we receive the cancel request first (which is dropped since no user is found), and then the registration is processed - which means the cancellation process was ignored altogether. so, ordering of events might be important based on use case +- for this, use a sequence id when storing events in the outbox table. this way, the message relay service will always put the messages onto the broker after sorting them using this sequence id + +### Materialized View Pattern + +- complex queries that involve different tables or maybe even different databases can be very slow - e.g. when we split our stack into microservices, the data is stored in different databases +- these complex queries also consume compute resources, thus increasing cost +- "materialized view" - a read only table is created with the data of the result +- consideration - additional storage cost +- two strategies to update - + - whenever the base tables get updated + - based on a schedule +- two ways to update - + - some databases support materialized views out of the box. most of such databases are efficient - they only take into account the modifications in the base tables, and do not recompute the entire materialized views from scratch + - we can programmatically compute this materialized view ourselves and store it in an optimized e.g. in memory database +- refer [cqrs + materialized view pattern](#cqrs-pattern) + +### CQRS Pattern + +- cqrs - "command and query responsibility segregation" +- divide service into two different services - + - "command service" - mutation of data - inserts, updates and deletes + - "query service" - reads data and returns to the caller +- these services have their own databases as well - the command database can be optimized for writes - e.g. using an sql database, while the query database can be optimized for reads - e.g. using a nosql database +- cqrs is useful when we have both frequent reads and frequent writes +- "synchronization" - to keep the command and query database in sync, we can either use a message broker, or a function as a service +- using a message broker (in red) - + - an event is published via a message broker by the command service which the query service can consume + - now, the command service could have put the event into message broker directly. but, to prevent loss of messages, we can use the [transactional outbox pattern](#transactional-outbox-pattern) +- using a "function as a service" (in green) - + - a function as a service is sitting between the command and query database + - it will only be triggered when there is a change in the command database + - once triggered, it will go and update the query database + - since it is a function as a service, it only runs when there are updates, thus saving us costs + - doubt - is this essentially the architecture for cdc tools like debezium? + +![cqrs](/assets/img/high-level-design/cqrs.png) + +- cqrs drawbacks - + - we can only guarantee "eventual consistency" between command and query database + - we have additional complexity for two different services and for the logic for synchronization between them +- cqrs + materialized view - + - e.g. when we split our stack into microservices, the data is stored in different databases + - this means complex services will have to hit different databases (via api calls to their services), which can be slow + - so, we use one query service which receives events from "multiple command services" (multiple command services is the key here), and it stores the combined materialized view for all these services at one place + - e.g. one command service for courses, one command service for reviews + - and one query service for the [materialized view](#materialized-view-pattern) that joins the data from both services for an enriched course view + +### Event Sourcing Pattern + +- typically, data in databases is the current state - modifications override the previous state with new state +- sometimes, we need all the events that led to a state - e.g. we need to show all the transactions for a user's bank account +- so, we only store events instead of the current state +- events are "immutable" - we can only "append" events, not change existing ones +- event sourcing has high performance for write intensive workload - in normal databases in case of write heavy workloads, there is a high contention due to concurrent updates for the same tables and rows. with event sourcing, each write is "append-only", which involves lesser locks +- to find the current state, we only have to apply or replay all the events +- we can also store the events in message brokers instead of storing them in databases, but querying message brokers is more difficult than querying databases +- now, replaying all events for all queries every time might not be efficient. so, we can take "snapshots" at certain periods. we still have all the history, but for deriving the current state, we only need the records since the last snapshot +- another popular pattern - cqrs + event sourcing + - the command service just puts the writes to the write events on to the message broker. it can even get rid of its own database + - the query service listens to these events and accordingly populates its e.g. in memory database with the snapshot we discussed about for faster reads + - another pattern being used here is [event streaming](#event-driven-architecture) + - remember - cqrs means eventual consistency + +![event sourcing + cqrs](/assets/img/high-level-design/event-sourcing+cqrs.png) + +## Software Extensibility Patterns + +### Sidecar and Ambassador Pattern + +- apart from performing the core functionality based on [features of the system](#features-of-the-system), a service needs to do things like collect metrics, send its log events to a distributed logging service, connect to a service registry for the most up to date ip addresses of its downstream services, etc +- all these functionalities are also "common" across all our services - so we would not want to repeat ourselves +- one solution - we implement all this as a library, which all our services use +- disadvantage - different services might be implemented using different languages. so we would need to support the library for different languages, which is a lot of overhead +- so, we instead use "sidecar pattern" +- the sidecar is "isolated" from the main process - the additional function is run as a separate process / container on the same server +- the communication between the two is also very fast, since they run on the same host +- since the two use the "same resources" like file system, cpu, memory, etc - the sidecar can report the value for these resources easily +- the sidecar can now be implemented in any language of our choice +- after making the changes related to business logic in the main application, we do not need to test the sidecar + +![sidecar pattern](/assets/img/high-level-design/sidecar-pattern.png) + +- "ambassador pattern" is a particular type of sidecar pattern +- in ambassador pattern, the ambassador acts like a proxy +- the service just sends requests to the ambassador, which then forwards these requests to the actual server, by handling things like authentication, [retries](#retry-pattern), [circuit breaker](#circuit-breaker-pattern), etc +- using the ambassador pattern also allows us to perform "distributed tracing" easily + +### Anti Corruption Adapter / Layer Pattern + +- when we migrate from an old monolith to a new set of microservices +- the new set of microservices need to temporarily interact with the old monolith till the migration is complete +- this means that code for old apis and protocols is scattered in the new microservices +- so, we deploy an "anti corruption service" in between, which performs the translation between the new microservices to the old monolith (both request and response, as needed, to and from both microservices and monolith) +- sometimes, the anti corruption layer can be "temporary" or sometimes "permanent" when we cannot get rid of some parts of the legacy system - e.g. downstream services use the legacy application for reporting and are not ready for a migration yet + +![anti corruption adapter layer pattern](/assets/img/high-level-design/anti-corruption-adapter-layer-pattern.png) + +### Backends for Frontends Pattern + +- usually, we have a separate backend in front of our microservices, to serve the frontend +- now, the frontend just has to interact with this one backend, which performs the logic of relaying the request to the right microservice +- now, assume we have to support multiple frontends like desktops vs mobiles. they tend to interact with the api differently - + - e.g. mobile screens have lesser real estate so display lesser data than desktops + - mobile devices have lesser resources (ram etc) compared to desktop + - mobile app owners might want additional features like scanning barcode, only want products available in a particular location, etc +- now, our backend service starts to become a monolith - it has to support the additional features for the desktop, mobile app and the shared features between the two +- so, we use the bff or "backends for frontends pattern" +- we use a separate backend for each frontend +- each backend now stays slim, and allows its frontend to make use full use of its feature set +- we can now scale each backend individually as well - more server side computation might be needed for mobile apps then for desktops +- how to implement the shared functionality in these backends, e.g. login and register + - use a shared library - this pattern usually does not scale well, because - + - any change in this shared library affect all the backends that use it + - there is also often a "lack of ownership" with such shared libraries + - spin up another common service called a "shared backend service" +- the "user agent" header in requests helps us tell the device a request is coming from, and by placing an api gateway in front of these backends, we can decide which backend to route the request to based on the device type + +![backends for frontends pattern](/assets/img/high-level-design/backends-for-frontends-pattern.png) + +## Reliability, Error Handling and Recovery Patterns + +### Throttling and Rate Limiting Pattern + +- e.g. one client bombards our systems with multiple requests. this leads to high cpu and memory utilization of our resources. thus, our response time increases / services become unavailable, and we would be unable to serve other clients, thus violating our sla etc +- using "throttling and rate limiting", we set a limit on the number of requests in unit time / bandwidth (amount of bytes) in unit time +- "server side throttling" - we are the service providers and would like to limit our systems from over consumption +- server side throttling use case - we can have different grades of customer - premium and so on, and we would like different limits for these different customers +- "client side throttling" - we are calling external services and would like to set limits on the number of calls made to such services +- client side throttling use case - we can throttle requests for different services at different levels, based on their quotas +- we can handle this using different strategies - + - "drop requests" - status code is 429 (too many requests) + - "slow down the service" - queue the requests in a queue and process them later + - "degrade the service" - e.g. a video streaming platform can reduce the resolution of the video + +### Retry Pattern + +- we retry the same operation in case of a failure +- we should retry only if the failure is temporary and recoverable - e.g. not a user error like unauthorized, bad request, etc +- if the request succeeds on a retry, we were able to hide the internal issues from the user successfully +- so, we need to pick the right "delay" and the right "backoff strategy" for this delay + - "fixed delay" - the delay between subsequent requests stays same - 100, 100, 100, 100 + - "incremental delay" - the delay between subsequent requests increases linearly - 100, 200, 300, 400 + - "exponential backoff" - the delay between subsequent requests increases exponentially - 100, 200, 400, 800 +- we can add "jitter" - a random delay between these delays +- e.g. for incremental delay, instead of calculating delay using i * 100, we do i * (100 + random(-15, 15)) +- reason - clients might end up retrying at the same time, thus causing the retry storm. this jitter helps prevent the retry storm +- "retry storm" - some instances of the service were unhealthy, we bombarded the remaining instances with our retry requests and made the entire service unhealthy +- apart from the backoff strategy and delay, we can also configure how many times to retry / how long we should keep retrying for +- note - the operation we retry should be idempotent - the client will not know if the request was lost or the response - and if it was the response that was lost, we cannot retry a non idempotent operation +- retry pattern can be configured in the [ambassador pattern](#sidecar-and-ambassador-pattern) or implemented via popular libraries + +### Circuit Breaker Pattern + +- we were able to recover from temporary and recoverable issues using the [retry pattern](#retry-pattern) +- retry pattern is optimistic, while circuit breaker is pessimistic +- if the errors go above a certain threshold, the circuit breaker does not even allow the requests to go through +- this way, we save on resources and time from calling a service which might anyway be down +- after being in the open state for some time, the circuit breaker automatically goes into the "half open state" +- it allows a small percentage of requests to go through +- if they succeed, the circuit goes back into closed state + +![circuit breaker pattern](/assets/img/high-level-design/circuit-breaker-pattern.png) + +- we can either drop the requests, or save it in one place to be retried later. this approach is called "log and replay". it might be needed for requests that are not just simple get requests, but require calling a mutation endpoint on another service +- we should configure different circuit breakers for different services +- we can also replace the half open state with "asynchronous pings / health checks" to the service - once the health checks start passing, we can mark the circuit as closed. we get rid of the half open state in this technique +- this too can be configured in the [ambassador pattern](#sidecar-and-ambassador-pattern) or implemented via popular libraries + +### DLQ (Dead Letter Queue) Pattern + +- helps handle errors involving [message brokers](#message-brokers) + - producer error - the producer cannot put the message on the broker because the queue is already full, the message is too big, etc + - consumer error - the consumer cannot process the message due to some data discrepancy +- so, we introduce another special topic or queue called the "dead letter queue" +- two strategies - + - so, both the producer and consumer on encountering an error move the message to the dead letter queue themselves + - the message broker itself is configured to move messages to the dead letter queue + - producer errors can be identified easily by the message broker - queue is full, message is too big, etc + - for consumer errors, if the message would not be consumed for a long time, message brokers can conclude that the messages are not getting acknowledged, and it can move these messages to the dlq +- best practices - + - add the reason e.g. stacktrace to the message headers before moving the message to the dlq + - use aggressive monitoring and alerting for messages in the dlq + +## Deployment and Production Testing Patterns + +### Rolling Deployment Pattern + +- when deploying a newer version of our application to servers, we bring down the servers during a "maintenance window" +- sometimes, we might not be able to bring our servers down entirely, e.g. during an emergency release, which is not during the maintenance window +- steps - + - stop the load balancer from forwarding traffic to one server + - upgrade the application on this one server + - run some tests on this new version if needed + - allow the load balancer to send traffic to it again +- keep redoing this one after another till this is done for all the servers + +![rolling deployment pattern](/assets/img/high-level-design/rolling-deployment-pattern.png) + +- this way, our application is always up +- when releasing, if we notice any issues / errors, we can follow the same set of steps to perform a rollback +- advantage - + - no extra cost for hardware + - most widely used due to its simplicity +- drawbacks - + - it can result in "cascading failures" e.g. suppose the new servers start failing. now all the traffic will go to the old servers, which can inturn start failing as well due to "overload". now, this brings down our entire service + - if the new version is "incompatible" with the old version, there might be issues - e.g. db schema changes + +### Blue Green Deployment Pattern + +- "blue environment" - we keep the old version of our servers running as is throughout the release +- "green environment" - we deploy the new version of our servers to this environment +- we carry out tests on the green environment +- if the tests etc run fine, we shift the load balancer to point to the green environment +- if we see a failure at any point, we can shift the load balancer back to point to the blue environment +- finally, we can terminate the blue environment once we are done + +![blue green deployment pattern](/assets/img/high-level-design/blue-green-deployment-pattern.png) + +- advantages - both disadvantages of [rolling deployments](#rolling-deployment-pattern) - + - both environments have an equal number of servers, so the issue of cascading failures is prevented + - we can only run a single version of our software at a given moment, so the issue of incompatibility is prevented +- disadvantage - both advantages of [rolling deployment](#rolling-deployment-pattern) + - extra cost for hardware + - complicated to implement + +### Canary Testing and A/B Testing Deployment Pattern + +- "canary release" - borrows patterns from both [rolling deployment](#rolling-deployment-pattern) and [blue green deployment](#blue-green-deployment-pattern) +- we deploy the new version of the application to a small set of "existing" servers (instead of one by one to all existing servers like in rolling deployment) +- it is considered safer than rest of the deployment patterns because - + - for canary release, the performance etc is monitored for much longer than in other patterns + - only beta users get the traffic to the new servers - this can be done by the load balancer for e.g. by inspecting the origin header +- "ab testing / deployment" - ab testing works just like canary release +- however, in this case, we deploy with the motive of rolling back to the old version +- use case - we test the new feature and how it performs, but are not fully ready with them yet to go into full scale production +- sometimes, the users who are a part of this ab testing do not even know about it - they might be seeing new features and can be asked for feedback about it. this helps with genuine feedback + +![canary testing](/assets/img/high-level-design/canary-testing.png) + +### Chaos Engineering + +- "chaos engineering" deliberately injects random failures into our production systems +- it helps us find single points of failure, performance bottlenecks, etc +- advantage - + - system becomes more reliable with time + - development team becomes more proficient in monitoring and debugging production issues +- the types of failures we can inject - terminate random services, inject random latencies, etc +- e.g. of a tool - chaos monkey by netflix + +## Multi Tier Architecture + +- organize system into multiple "physical" and "logical" tiers +- "logical separation" - different tiers handle different concerns +- "physical separation" - allows each tier to be separately developed, scaled and upgraded +- multi tier != multi layer architecture +- multi layer is when the same application is broken into different modules +- however, it will still run as a single unit during runtime and will be a single tier architecture +- in a multi tier architecture, the different tiers run on different machines altogether +- restriction - communication cannot be skipped between tiers. this helps keep the tiers loosely coupled + +![multi tier constraint](/assets/img/high-level-design/multi-tier-constraint.svg) + +- most common architecture - "three tier architecture" +- tier 1 - "presentation tier" - the ui on web browser, mobile app, desktop gui, etc +- it takes input from the users / shows them the relevant output +- it does not contain business logic +- tier 2 - "application tier", "logic tier", "business tier" +- it has all all the business logic based on the [features of the system](#features-of-the-system) +- tier 3 - "data tier" - responsible for storage and persistence +- it can contain files and or database +- this three tier architecture fits most use cases +- it allows for easy horizontal scaling +- tier 1 does not need any scaling since it runs on user devices +- tier 2 can run behind a load balancer and be scaled easily if it is stateless +- tier 3 can also be scaled well using techniques like partitioning and replication discussed [here](#improve-quality-attributes-of-databases) +- drawback of three tier architecture - tier 2 becomes a monolith +- monolith drawbacks + - high resource (cpu and memory) consumption + - harder to maintain codebase + - a fault can result in the entire system being down +- so, three tier architecture is good for companies who have a small codebase +- "two tier architecture" + - tier 1 - has both ui and business logic + - tier 2 - data tier +- "four tier architecture" - a new tier in the three tier architecture is introduced between tier 1 and tier 2 for [api gateway](#api-gateway), to address caching, security, etc + +## Microservices Architecture + +- recall [monolith drawbacks](#multi-tier-architecture) - high resource consumption, hard maintainability, lack of fault tolerance. microservices removes all these drawbacks - + - "independently deployable" + - each service can be easily scaled horizontally + - unlike monoliths which would relatively be much more resource intensive, microservices are much more efficient to scale and maintain + - we can make the right choice for tech stack for each microservice based on use case + - "loosely coupled" - helps with [organization scalability](#scalability) by breaking down codebase. now, a small team is responsible for this codebase + - helps with [fault tolerance](#fault-tolerance), since faults are now scoped to a smaller component +- disadvantage - + - overhead increases around testing, debugging issues, etc + - latency increases - more so if we do not ensure loose coupling when [decomposing the service](#migration-to-microservices) + - most important - distributed transaction management is much harder when compared to using a single database + +## Migration to Microservices + +- e.g. assume we have a [three tier architecture](#multi-tier-architecture) for our e commerce application, and would like to split the second tier into [microservices](#microservices-architecture) +- 3 principles to follow when creating microservices - + - "cohesive" - elements that are tightly coupled to each other should stay together inside the same microservice, so that each microservice can be developed and maintained independently + - srp or "single responsibility principle" - a microservice should only do one thing. this removes "ambiguity" around which microservice should own what piece of functionality + - "loosely coupled" - there should be minimum communication required between different microservices, and a microservice should be able to do its task independently +- size of a microservice does not matter, it is the 3 principles above that should influence decisions +- popular decomposition techniques - + - "business capabilities" - identify what provides value to business. take stakeholders pov + - "domain / subdomain" - also called "domain driven design" - instead of looking at it from a business side, we take the developers pov in this. types of domains / subdomains are - + - "core" - key differentiator / features of system + - "supporting" - integral in delivering the core capabilities, but not a differentiator - e.g. shipping + - "generic" - not specific to any business - can even be bought off the shelf - e.g. payments +- "incremental and continuous" approach should be used - + - identify the parts which will benefit the most from this migration + - parts requiring frequent changes - most important + - parts that have scalability issues +- "strangler fig pattern" - + - we keep a "strangler facade", which can be implemented using an api gateway, that sits between clients and our backend systems + - now, the api gateway initially routes requests to the monolith + - when the microservice is ready, the api gateway is switched to route requests to the microservice instead + - we can also use [canary testing / ab testing pattern](#canary-testing-and-ab-testing-deployment-pattern) here to direct a part of the traffic to the new decomposed microservices and slowly increase this percentage + - finally, the components of the microservice are removed from the monolith +- because of our incremental and continuous approach, the monolith keeps getting smaller and smaller +- original monolith should have a good "test coverage", to ensure this split does not break anything + +![strangler fig pattern](/assets/img/high-level-design/strangler-fig-pattern.png) + +## Microservices Best Patterns + +### Database Per Microservice + +- if we use the same database across different services, it results in "tight coupling" - recall that one of the principles of microservices was loose coupling +- e.g. if the schema changes due to one microservice, this change needs to be propagated to other microservices that use the same database as well +- if we use a database per microservice, each microservice owns its data and does not expose it to any other service +- the database of another microservice cannot be accessed directly - they have to go through the api of the owning microservice +- advantage - we can chose the database optimized for the workload of the microservice +- downsides - + - "added latency" - sending an additional request to the microservice and parsing the response is slower than accessing the data directly. to prevent the overhead of communication, we can cache the response of the responding microservice in the requestor microservice. however, this caching makes our system "eventually consistent" from "strictly consistent" + - cannot perform joins as easily now, since data is spilt across databases - solved by [cqrs](#cqrs-pattern) + - we lose out on acid transactions - performing a distributed transaction is very hard - solved by [saga](#saga-pattern) + - "data duplication" - data is now duplicated across microservices - e.g. product information might be duplicated in orders service + +### DRY Principle + +- dry - don't repeat yourself - we should not repeat ourselves +- this way, we only need to change the logic in one place +- by this logic, we might want to package the repeated logic of microservices into a shared library +- but, this is not a good practice - dry does not hold for microservices +- sharing a library introduces "tight coupling" - recall that one of the principles of microservices was loose coupling +- because for e.g. if a team makes changes to the shared library's apis, these changes need to be communicated to the other teams as well +- another drawback - "dependency hell" + - e.g. a microservice uses v1 of a library directly + - its shared library uses a different version (v2) of the same library + - now, the microservice needs to upgrade the library to v2 because of the shared library, retest the changes, etc +- solutions - + - we can increase the boundary of some microservice to include this shared logic, and other microservices call this microservice for the same + - we can spin up a new microservices containing that shared logic + - we can use the [sidecar or ambassador pattern](#sidecar-and-ambassador-pattern) as well for e.g. for observability +- note - shared libraries is a good pattern for sharing data models - request and response dto +- for this, we have techniques like code generation tools that can generate implementations for all languages based on an "interface definition" + +### Structured Autonomy + +- myth - teams can chose their own tech stack, databases, tools, etc +- doing things differently in different microservices around building, testing, maintaining codebase, etc introduces a lot of overhead +- autonomous is allowed but under certain boundaries, hence the term "structured autonomy" +- tier 1 - "fully restrictive" - should be uniform across the whole firm - e.g. monitoring and alerting, ci / cd, etc +- tier 2 - "autonomy within boundaries" - database technologies +- tier 3 - "complete autonomy" - e.g. release process + +### Microfrontends + +- we can split the monolithic frontend just like we [split microservices](#microservices-architecture) - based on domain / subdomain or based on business capabilities +- each microfrontend is an spa +- all these microfrontends are assembled inside a "runtime container" +- the runtime container can also handle things like authentication / authorization +- now, each microfrontend has its own ci cd and can be released independently +- best practices - + - microfrontends should be loaded at runtime, and not as compile time dependencies, otherwise the release schedule etc would still be tied to each other + - sharing state should not be done - it is equivalent to [sharing a database in microservice](#database-per-microservice). we should instead use custom events, pass callbacks, use address bar, etc + +## Event Driven Architecture + +- three actors are involved - producer, consumer and event +- use event driven architecture when we can classify actions as "fire and forget" / "asynchronous" +- events are immutable +- events can be stored indefinitely in our system (unlike requests in synchronous communication) +- unlike in the "request response model", where the sender needs to be aware of receiver's api, data models, url, etc. in event driven architecture, the publisher does not care and is not even aware of its consumers. this helps achieve "decoupling", [one of the principles in designing microservices](#migration-to-microservices) +- refer [message brokers](#message-brokers) for more points, advantages and examples of this approach +- 2 event delivery patterns are supported - event streaming and publish / subscribe +- "event streaming" + - the message broker acts like a permanent storage + - the consumer can view any number of past events based on use case + - optionally, the message broker can remove the events from storage after a period of time +- "publish / subscribe" + - the message broker acts like a temporary storage + - only new events from the point the consumer joins are visible +- allows for implementing patterns like [event sourcing](#event-sourcing-pattern), [cqrs](#cqrs-pattern), [saga](#saga-pattern) + +## Message Delivery Semantics + +- failures can happen during multiple stages - (draw quickly in interview using arrows) + - the producer sending the message to the broker fails + - the producer sending the message succeeds but the acknowledgement from the broker fails + - the message broker sending the message to the receiver fails + - the receiver receiving the message succeeds but the processing fails + - the receiver processing succeeds but the acknowledgement to the broker fails +- this is what the "message delivery semantics" help addressing +- "at most once delivery" - + - the producer does not wait for acknowledgement from broker - so, if the message is lost from producer to broker, we loose the event + - the consumer sends the acknowledgement immediately to the broker before starting its processing - so, if the consumer crashes after receiving the event, we loose the event + - use case of at most once delivery - when we are fine with data loss + - we can extrapolate lost events - e.g. location updates in a ride sharing service + - advantage - at most once delivery has the least latency and cost +- "at least once delivery semantics" - + - the producer will resend the event if the acknowledgement is not received - so, it can result in duplicate events if the message is received but the acknowledgement is lost + - consumer sends the acknowledgement to the broker only after successfully processing the event - so, if the consumer crashes after processing the event and before the acknowledgement, it can result in duplicate events + - use of at least once delivery - data loss is not acceptable + - e.g. reviews can be overridden if received multiple times + - disadvantage - more latency, e.g. broker and producer need to wait for acknowledgements etc +- "exactly once delivery" - + - very difficult to achieve + - we generate a unique id / the message broker does this for us automatically + - then, the message broker checks if it has already received this id in the past by checking its log + - the consumer needs to check in its database if the event with this id has already been processed, and accordingly handle the event + - understand that the consumer can still receive the message multiple times like in "at least once delivery". however, our consumer code logic is smart and if it sees a duplicate, it simply ignores it and sends an acknowledgement, thus avoiding processing the event multiple times + - so, my understanding - exactly once from producer to message broker might be guaranteed by message broker, but message broker to consumer needs to be guaranteed by us? + - e.g. processing of payments need to happen exactly once + - note - kafka guarantees exactly once when transferring data between kafka topics +- so, my final understanding - + - for ensuring that the message reaches the broker from the producer, use [transactional outbox pattern](#transactional-outbox-pattern) + - for ensuring that the message reaches the consumer from the broker, use at least once delivery semantics + - to ensure exactly once, maintain the processed ids of events in the consumer database + +## Testing + +- unit test - + - test a class / method / module in isolation + - advantage - cheap to maintain, fast to execute + - we should have a lot of unit tests + - disadvantage - give the least confidence about overall system +- integration test - + - verify the different systems we integrate with, e.g. databases, message brokers, etc + - disadvantage - run slower + - we should have fewer integration tests + - give more confidence about our system +- functional / end to end test - + - run on the entire system + - works from an end user perspective - so each test should test the entire user journey + - very slow to run + - we should have very few end to end tests + +![testing pyramid](/assets/img/high-level-design/testing-pyramid.png) + +- in microservices, for integration tests, we can use "lightweight mocks" for our upstream services +- disadvantage - mocks will not help us identify changes to the api of the actual upstream services +- so, we can use "contract tests" alongside the integration tests +- the idea is that the downstream service saves the results of its integration tests (the requests and responses it expects) in a contract file +- these tests are then run on the actual upstream service - the requests are replayed, and the responses are asserted using the expected response of the downstream service +- contract testing can be used for asynchronous communication as well - the downstream service tells the message it expects, and the upstream service asserts that the message is triggered when the appropriate functionality is called +- so, contract tests are basically a great addition / alternative to integration tests by themselves in microservices +- e.g. spring cloud contract +- if our company cannot afford functional / end to end tests, we can directly test in production + - using [blue green deployment](#blue-green-deployment-pattern), we can test in the blue environment before we switch traffic of the load balancer from blue environment to green environment + - [canary testing](#canary-testing-and-ab-testing-deployment-pattern) + +## Network Protocols + +- "application layer protocol" - two methods are there - + - "client server protocol" - e.g. http, ftp, smtp, websockets + - everything in client server protocol (including websockets) uses tcp + - http - follows a request response model. the client sends a request, while the server returns a response + - websockets - + - client and server have a bidirectional full duplex communication + - note - websockets are not the same as peer to peer - clients can talk to server, but clients cannot talk with each other + - it is an alternative to inefficient continuous polling using the request response model + - "peer to peer protocol" - e.g. web rtc (realtime communication) + - all can talk with each other - even clients can talk to each other + - this makes it fast, since messages need not be "relayed" via the server + - web rtc uses udp, which also makes it fast +- "transport / network layer" - + - tcp - + - transport control protocol + - a single (virtual) connection is maintained + - on this connection, all packets are sent one by one + - maintains an ordering of packets + - receiver sends acknowledgements for every packet + - udp - + - user datagram protocol + - no connection as such is maintained + - packets can be sent in parallel + - no concept of ordering + - this makes it less reliable than tcp + - but, this also makes it faster than tcp + - use case - live streaming - if we miss some bits of a live video call, we will not rewind back to listen what we missed + +## Caching + +- store frequently accessed data in fast memory rather than accessing it every time from slow memory +- it helps reduce latency +- important - it also helps achieve "fault tolerance" +- there are different places where data can be cached - client side (on browser), cdn, api gateway / load balancer and application caching at server side (the focus of this section) +- this application cache (e.g. redis) sits between our server and database +- distributed caching - imagine we had only one cache server - it would become a single point of failure. so, we use [consistent hashing technique](/posts/high-level-design-case-studies/#consistent-hashing) to help scale the cache server easily - now, based on the key that our application server uses, the request would be directed to the right underlying cache server automatically +- the 5 strategies have been discussed below +- vvimp - all strategies below can be explained nicely if using sequence diagrams + +### Cache Aside Strategy + +- if cache hit, return +- if cache miss - + - application to cache - miss + - application to db to fetch data + - application to cache to populate cache + - application returns response to client +- the application continues to work even if the cache goes down - it falls back to database +- our current strategy does not interact with the cache for db writes, only reads. this results in following problems - + - for new data, there will always be a cache miss first + - inconsistency - we do not invalidate cache for updates, so updates to our data will not be reflected in the cache +- note - here, our application server has the logic for interaction with cache - so, we can also modify the data being stored in cache based on our needs to optimize it + +### Read Through Strategy + +- same as [cache aside strategy](#cache-aside-strategy) +- however, now the cache interacts with the database, and we do not get a chance to modify the data being stored in the cache i.e. the data inside cache would be the same as the data inside database +- how cache miss works - application will never know if it was actually a cache miss or a hit - + - application to cache - miss + - cache to db to fetch data + - cache populates itself + - cache returns response to application + - application returns response to client + +### Write Around Strategy + +- when writing data to the database - invalidate the cache for the key +- the application removes the key from cache / marks the dirty flag as true for this document +- it is used alongside [read through strategy](#read-through-strategy) or [cache aside strategy](#cache-aside-strategy) +- it basically solves the inconsistency problem we had there + +### Write Through Strategy + +- first write into the cache, and then write the same thing into the database +- "2 phase commit" - we need to ensure that both the operations are performed inside a single transaction - either both pass or both fail +- this too is used alongside [read through strategy](#read-through-strategy) or [cache aside strategy](#cache-aside-strategy) +- advantage over [write around strategy](#write-around-strategy) - fetches for new data would not result in a cache miss - write around only solved the inconsistency problem, not this problem +- drawback - our system is now less fault tolerant - if either db or cache goes down, our application will go down + +### Write Back (or Behind) Strategy + +- unlike [write through strategy](#write-through-strategy) where we synchronously write into the database for a successful write, we asynchronously put the write into a queue after updating the cache in this case +- the data gets written into the database from the queue into the database eventually +- advantage - if our system is write heavy, it helps buffer writes into the database +- it also adds a lot of fault tolerance to our system - we no longer depend on the availability of database +- one failure scenario and its solution - + - a write operation is performed - write is performed on the cache and is put onto the queue + - the db is down for 5 hrs / the db is already performing writes which will take another 5 hrs to complete - so the message just sits in the queue + - the cache ttl is 3 hrs - so after 3 hrs, it tries to fetch the data from the database again + - now, since the write has not been processed by the database yet, it will not return this record to the cache, and our system will think that the data does nt exist in the first place + - solution - make the ttl of cache higher + +## Transaction + +- database transactions should be "acid" compliant + - "atomicity" - either all operations are completed successfully or they are all rolled back + - "consistency" - database should go from one consistent state to another. e.g. - + - some operation should not lead to a state where for e.g. the payer's balance has reduced but receiver's balance has not increased + - operations should not violate "integrity constraints". recall - key, domain, entity, referential + - "isolated" - concurrent transactions do not interfere with each other - one transaction will not see the "intermediate state" of another transaction + - "durable" - results are persisted to disk so that they can withstand system failure +- "commit" - make operations of the transaction complete +- "rollback" - revert all changes caused due to a transaction +- "save point" - allows us to rollback parts of transactions +- when executing a transaction, "database locks" are used to lock either tables or rows depending on the database implementation +- so, transactions should be small, since they consume a lot of locks +- when something is locked, other operations will wait on this lock for it to be released +- these concepts work when transactions are local to a particular database. for distributed systems, we can use - + - [2 phase commit](#2-phase-commit) - popular + - [3 phase commit](#3-phase-commit) - not much used due to complexity + - [saga pattern](#saga-pattern) - popular +- both 2 phase and 3 phase commit are said to be "synchronous", while saga pattern is "asynchronous" - because the locks are not held in saga pattern +- typically, saga pattern is used for long transactions, when it is not feasible to keep the locks for such long periods and block all other operations + +## 2 Phase Commit + +- there are two phases - + - voting / prepare phase + - decision / commit / abort phase +- we have a "transaction coordinator" +- all the microservices participating in this flow are called "participants" +- note - before performing any operation - both actors - transaction coordinator and participants write the operation to their local file - i think this is like "write ahead log". before performing any request / any acknowledgements, this file is updated first +- this way, if any of them go down, they can read from this file when they come back up +- voting / prepare phase - all the services are notified about the update to perform. at this point, the services obtain relevant locks for this update and respond with an ok +- if for any reason this operation cannot be performed - e.g. "order service" is the transaction coordinator, but the participant "inventory service" responds that there is not enough stock - then the service responds with a not ok +- based on the response from participants from earlier phase, the transaction coordinator asks all participants to commit / abort the transaction +- disadvantage of 2 phase commit - coordinator service is the single point of failure +- if for some reason the transaction coordinator goes down after the participants have obtained locks, the other transactions performed on the participants would be stalled because of this lock. the lock would be held till the transaction coordinator comes back up and responds + +![two phase commit](/assets/img/high-level-design/two-phase-commit.png) + +## 3 Phase Commit + +- same as [2 phase commit](#2-phase-commit), except that the commit phase is broken into two parts - + - pre commit phase + - commit phase +- during the pre commit phase - the transaction coordinator only sends the decision of commit or abort - this operation is not performed +- the actual commit / abort is only performed the next phase - the commit phase +- now in this pattern, unlike in 2 phase, there is intercommunication between the participants +- this way, if there is a failure at either the pre commit or the commit phase, the participants can make decisions - e.g. if any of the participants had received the commit message from the coordinator, it means that the rest of the participants can also kick off the commit phase + +## Database Indexing + +- data pages - + - internally, data is not stored as tables - that is just a representation + - it creates data pages - generally 8kb i.e. 8192 bytes in size + - it has three parts - + - header - 96 bytes - metadata like page number, free space, checksum, etc + - data records - 8192-(96+36) = 8060 bytes - holds the actual data records + - offset - 36 bytes - using an array, each index stores a pointer to the corresponding data record in the data records section described above + - e.g. if a row is 64 bytes, one data page can store 8060/64 = 125 table rows + - so for storing one table, the underlying dbms will manage multiple data pages +- data blocks - + - data pages ultimately get written to data blocks + - a data block is a section in the actual underlying physical memory that can be read from / written to in one i/o operation + - the dbms does not have control over the actual data block, only data page + - a data block can hold one or more data pages + - so, the dbms maintains a mapping of what data page is stored inside what data block +- indexing - it is used to increase the performance of database queries. without indexing, the database would have to - + - load all the data blocks one by one + - go through all the data pages in this block one by one + - go through all the data records this page one by one +- b+ trees - + - databases instead use b+ tree to help achieve logN time, instead of the n described above for crud operations + - b trees vs b+ trees - in b+ trees, the nodes at the leaf node level also maintain links to each other, unlike in b trees + - m order tree or m ary tree means means a node can have m - 1 keys and m pointers + - this tree maintains the sorted property + - the tree is always height balanced + - the actual values are always in leaf nodes + - the values in all other intermediary nodes just help with traversing the tree / reaching the leaf nodes quickly + - right is greater than or equal to, left is strictly lesser than + - notice how the leaf node level is like a sorted array + - the key is the value of the node, which helps us with efficient traversal + - alongside this, an additional pointer is stored in every (mainly leaf) node as well, which points to the actual data page + - now, using the data page to data block mapping, we can fetch the right data block + +![b+ tree](/assets/img/high-level-design/b+-tree.png) + +- types of indexing present in rdbms - clustered indexing and non clustered indexing +- clustered indexing - + - what order the original b+ tree is constructed in is determined by the column we use for clustered indexing + - this is why only one clustered index is allowed - because it affects how the original b+ tree is constructed + - the records in the "data records" section of data page may be jumbled - they are ordered according to insertion time + - however, we want them to be ordered based on our indexed column + - so, we use the offset field - recall that offset is an array + - assume our id insertion order is 1 4 5 2 + - the offset would look like this - (pointer to 1, pointer to 2, pointer to 4, pointer to 5) = (0, 3, 1, 2) + - if we do not specify anything - the primary key is used for clustered index +- non clustered indexing - + - we have many other keys - secondary index, composite index, etc - they all use non clustered indexing under the hood + - we can have multiple non clustered indices unlike clustered index + - each non clustered index will use a new b+ tree + - so, while clustered indexing determines how the original b+ tree is constructed, non clustered indexing determines how this additional b+ tree is constructed + - the leaf nodes of this new b+ tree contains pointers to the actual data pages + +## Concurrency Control + +- "critical section" - accessing a shared resource +- e.g. multiple users try to book the same seat, which is seen as free by all of them - and they all try to confirm the same seat +- techniques like using `synchronized` work only for contention among multiple threads of the same process +- so, we need to use "distributed concurrency control" for different processes on potentially different machines +- we have two types of distributed concurrency control - "optimistic concurrency control" and "pessimistic concurrency control" +- "shared locks" - + - shared locks are used for reads + - assume one transaction puts a shared lock on some row + - another transaction can also come in and put a shared lock on this row + - however, another transaction cannot come in and put an exclusive lock on this row - it would have to wait till all the shared locks are removed from this row +- "exclusive locks" - + - exclusive locks are used for writes + - assume one transaction puts a exclusive lock on some row + - another transaction cannot come in - neither with shared nor exclusive lock +- "dirty read problem" - + - both transaction a and transaction b start + - transaction a updates the value of a row to 5 + - transaction b reads this value as 5 + - however, due to some error, transaction a has to rollback its changes back to original value + - so, transaction b reads intermediate, uncommitted data of transaction a +- "non repeatable read" - + - transaction a reads the value of balance as 100 + - transaction b comes in, updates the value to 110 and commits + - when transaction a tries reading the value of the row again, it reads it as 110 + - so, transaction a read different values for the same row during different parts of its transaction +- "phantom read" - + - transaction a sees 500 rows in the database + - transaction b comes in and commits 5 new rows + - transaction a now sees 505 rows in the database + - so, transaction a basically saw different number of rows in the database during different points in the transaction +- isolation level - recall isolation of [acid](#transaction) + +| isolation level | dirty read
possible | non repeatable read
possible | phantom read
possible | +|------------------|--------------------------|-----------------------------------|----------------------------| +| read uncommitted | yes | yes | yes | +| read committed | no | yes | yes | +| repeatable read | no | no | yes | +| serializable | no | no | no | + +- "read uncommitted" - + - no locks are used + - only use when system only involves reads +- "read committed" - + - shared lock is acquired for read but released as soon as read is over + - this explains why we can see committed values by other transactions when we try reading twice + - exclusive lock is acquired for write and kept till the end of the transaction +- "repeatable read" - + - shared lock is acquired for read and kept till the end of the transaction + - exclusive lock is acquired for write and kept till the end of the transaction +- "serializable" - + - works just like repeatable read + - additionally, it puts a "range lock" on the rows that it touches +- typically, we can set the transaction isolation level like so - + ```sql + set transaction isolation level repeatable read; + begin_transaction; + ... + commit transaction; + end_transaction; + ``` +- "optimistic concurrency control" - + - uses isolation level of read committed + - solves concurrency problem using "versions" + - in case of the non repeatable read, transaction a would know that the version has changed (refer example of non repeatable read above) + - advantage - allows much higher levels of concurrency as compared to pessimistic concurrency control + - disadvantage - if we have too many concurrent writes, we would fail at the last step for all of them, thus wasting too many resources +- "pessimistic concurrency control" - + - uses isolation level of repeatable read / serializable + - can have much more deadlock scenarios - + - transaction a and transaction b start off in parallel + - transaction a acquires shared lock on row a + - transaction b acquires shared lock on row b + - transaction a tries to acquire exclusive lock on row b - cannot because of transaction b + - transaction b tries to acquire exclusive lock on row a - cannot because of transaction a + - understand how the exact same scenario discussed above would not have resulted in a deadlock scenario in case of optimistic concurrency control, because it does not hold the shared lock + - database systems are able to detect deadlocks like this and then fail the transactions + +## 2 Phase Locking + +- 2 phase locking is a type of [pessimistic concurrency control](#concurrency-control) +- there are 3 types of 2 phase locking - "basic", "conservative" and "strict" +- "basic 2 phase locking" - + - phase 1 - "growing phase" - transaction can only acquire new locks. the lock manager can either grant or reject this request + - phase 2 - "shrinking phase" - transaction cannot acquire any new locks, only release locks +- basic 2 phase locking has two issues - deadlocks and cascading aborts +- "deadlock" example - the exact one we discussed in [pessimistic concurrency control](#concurrency-control) is a good example +- "cascading aborts" example - + - recall how releasing of locks is done one by one in the shrinking phase of basic 2 phase locking + - so, lets say transaction a releases exclusive lock on a row as part of the shrinking phase + - now, lets say transaction b acquires a shared lock on this row as a part of its growing phase + - now, what if transaction a had to be aborted suddenly due to some error + - now, transaction b has an inconsistent value of the row, and it would have to be aborted as well +- deadlocks can be solved by conservative 2 phase locking and wait for graph +- cascading aborts can be solved by strict 2 phase locking +- cascading aborts are considered very expensive, since they can result in a "chain of cascades" +- note - we want to maintain some degree of concurrency as well, not just consistency, like discussed during optimistic and pessimistic concurrency control +- so, we typically use strict 2 phase locking to resolve cascading aborts and wait for graph for resolving deadlocks + +![2 phase locking](/assets/img/high-level-design/2-phase-locking.png) + +- "wait for graph" - + - the scheduler maintains a graph, where the nodes represent the transactions + - e.g. if transaction a is waiting on a lock to be released which has been acquired by transaction b, then there is an edge from transaction a to transaction b + - once there is a cycle that is detected by the scheduler in the graph, it looks for the "victim" in this graph, and then aborts that transaction + - in choosing the victim, it might make considerations like the amount of effort already put in by this transaction, amount of effort to rollback this transaction, how many cycles would be removed by aborting this transaction, etc +- "conservative 2 phase locking" - + - requires transactions to acquire all locks at the beginning itself + - either the scheduler assigns all the locks to the transaction if possible + - or the transaction will have to wait if one or more of the locks are unavailable + - disadvantages - allows very less concurrency, does not prevent [cascading aborts](#cascading-aborts) +- "strict 2 phase locking" - + - all the locks are released at once when the transaction is aborted / committed + - disadvantages - allows very less concurrency, does not prevent [deadlocks](#deadlocks) diff --git a/_posts/2024-04-12-high-level-design-case-studies.md b/_posts/2024-04-12-high-level-design-case-studies.md new file mode 100644 index 0000000..447d4b9 --- /dev/null +++ b/_posts/2024-04-12-high-level-design-case-studies.md @@ -0,0 +1,681 @@ +--- +title: High Level Design Case Studies +--- + +## Steps to Follow + +- abstraction - we need to stay at the abstraction level of the system and not its low level implementation +- there is no one correct solution +- step 1 - ~ 5 mins - gathering functional requirements. it also helps "narrow down" the scope +- step 2 - ~ 5 mins - gathering the non functional requirements. helps ensure our system has the right quality attributes for the given workload +- step 3 - ~ 5 mins - combination of sequence and use case diagram. additionally, define the api of system as a part of it +- step 4 - ~ 15 mins - software architecture diagram for functional requirements - what data to store in what type of database, describe the flow of network requests, define the architectural pattern used, etc +- step 5 - ~ 10 mins - optimize the software architecture diagram to address the non functional requirements. add the components, remove the single points of failure, etc +- to help revise efficiently - skipping step 3, merging steps 4 and 5 +- document effectively in the interview - interviewer might take snapshots which would be assessed later + +## Non Functional Requirements Questions to Ask + +following are the typical generic questions to ask - + +- number of users - ask for each actor separately +- performance in terms of percentile distribution graph - of different api calls +- availability percentage +- size of data / objects being stored +- number of events / api calls, etc + +## Social Media Platform + +### Gather Functional Requirements + +- what information should the system store for a particular user? +- what type of media can be shared - only images, or videos / text as well? +- what type of relationship exists between users? - bidirectional i.e. friends, or unidirectional i.e. followers and following - unidirectional +- what kinds of timelines to support? - both user and home page +- social media platforms are typically very read heavy and not so much write heavy +- can anyone post / view, or there is registration / login required +- are the comments organized using a flat list or tree - flat list + +### Software Architecture + +- user service - + - used for storing profile information + - store public information like user name, first name, last name, etc + - store private information like password, email, phone, etc + - store optional fields like location, interests, etc + - instead of storing profile image directly, store the image in an object store, and store the reference to this inside user db + - note - there might be optimizations like resizing around storing thumbnails + - instead of a new followers service, we can also track followers via user service itself + - it has a collection called "followers", which stores the follower id and followee id + - i think we can index on both - + - follower id - retrieve influencers followed by a user + - followee id - retrieve followers of the post's created by + - now, the entire user service can use a sql database, because of the nature of the data and the fact that the number of users should not typically be as much - we can also model the followers table as a many to many constraint + - we saw we can index on followers id, followee id, etc for faster access. optionally, we could have used redis as a layer of cache - + - key = follower id, value = followees - to retrieve all the people this user is following + - key = followee id, value - followers - all the followers of this user + - key = user id, value = profile - we might want to display a short profile of users alongside their posts / comments + - my understanding - we could have used cqrs pattern here to separate the redis into a separate query service. but i think here, its more around caching. cqrs makes sense when we want to use materialized view pattern as well +- posts service - + - a platform like twitter can have a huge number of writes. so, we use cassandra / dynamodb for storing posts + - comments are very similar to posts - have the same up votes / down votes fields, the same structure around the content, etc. so, instead of a new comments service, we can store the comments inside the posts service itself + - we can use the post id when sharding for posts + - we can use the post id when sharding for comments as well - this helps us easily retrieve the comments for a post + - second approach - compound index + range shard strategy + - we use a combination of the post id, comment id + - post id is the partition key, and comment id the sort key + - this way, all comments for a post can be fetched fast, since the comments for the same post will be located close to each other +- search service - + - used to help search for users and posts + - using a specialized search service can also help us with type ahead etc + - it will have its own nosql database with text search capabilities - e.g. elasticsearch + - users service and posts service communicate changes to search service via a message broker + - this is the cqrs pattern - hence, it is eventually consistent + - now, instead of always running elasticsearch through our query because searching through elasticsearch can be expensive, we can cache our results in redis as well. reason - there can be hot topics - e.g. elections, and we can cache the elasticsearch response for such queries +- timeline service - + - approach 1 - construct the timeline on the fly every time + - however, it is very inefficient - the complex query to posts service can become a bottleneck + - so, we use the cqrs + materialized view pattern + - we add a new service called the timeline service, which has the timeline for each user stored inside an in memory key value database, where the key is the user id, and value is the timeline i.e. list of posts + - first, the posts service publishes a message to the message broker every time a new post is created + - then, the timeline service calls the user service with the user id of the post, to retrieve all the followers for this user + - finally, it updates the database to add this new post to all these followers + - we use a queue like data structure to keep track of the latest n posts in the timeline for a user - first in first out i.e. the oldest post would be removed from the timeline + - so now, the client only needs to request the timeline service for the timeline of the current user + - then, it needs to fetch the posts (in bulk) from posts service + - finally, it needs to fetch all the assets from the object store + - downside - we now have eventual consistency as opposed to the strong consistency earlier + - we can also "archive" the timeline data - people might end up scrolling more, and instead of recomputing everything from scratch, the old timeline data can be stored again inside cassandra +- handling influencers - + - some users can be an influencer - they can have thousands or millions of followers, based on our system + - when the timeline service receives a post from such users, it will have to update the timeline of all of its followers, which means there can be millions of potential writes + - instead, we track for each user if they are an influencer or not via a boolean in the user service + - now as usual, for each post that the timeline service received from the post service, it tries to fetch all the followers for that user + - this time, since the user service knows that this user is an influencer, instead of returning all the followers, it will just return true for the influencer field + - the timeline service will just track the posts of all influencers in a separate collection - the key would be the influencer id, and the value all the posts by this influencer + - note - we can still use the redis for recent posts, and cassandra for archival + - now, when the timeline service is called for getting the timeline, it does not return the timeline directly + - it first fetches all the influencers the current user is following from the user service + - it then merges the timeline of the user with the posts by all the influencers this user follows and return this final list. the merge can be performed using the timestamp field + - a user could be following hundreds of normal people, but a very handful of them would classify as influencers. so, this technique of merging works the best +- scaling timeline service further - + - assume the functional requirement wants to support user timeline as well - we can look at the posts by a user after clicking on a user + - the user timeline will be designed in the same as the "influencer timeline" - the key would be the user id, and the value the entire timeline + - however, we maintain the timeline only for active users - and not for all users + - note - the same can apply to home page timelines as well i.e. we only maintain the home page timeline for active users + - e.g. our platform might have 2 billion users, but only 100 million of them are active, rest of them are passive users + - this helps us saves on disk space / infrastructure cost + - the active / inactive flag can be tracked inside user service itself, just like influencer / non influencer + - when an inactive user comes in - we first have to populate redis with the right active data + - then, we generate the entire timeline and populate our key value database with this +- scaling (generic) - + - we can horizontally scale all services and run them behind a load balancer + - sharding for all databases - remember posts service has an sql database, so might or might not be supported + - replication for all databases + - run the system in different geographical regions +- some other considerations - + - we can use connection management as described in [realtime instant messaging](#real-time-instant-messaging-service), for pushing notifications about new posts by followers, when users are tagged in a post, when a user is followed by another user, etc. we use the choreography pattern here to help subscribe to events on the message broker, and then we use websockets to push these events + - assets - for storing media, we can use the optimizations we talked about in [video on demand](#video-on-demand-streaming-service). we can perform compression, perform processing on thumbnails, use cdn, etc. assets of old posts should automatically expire from cdn, thus saving us on the costs for using the cdn + - given that tweets have a character limit, we might want to shorten the urls. we can do this using a [url shortener](#url-shortener) + - to the timeline of a user, we might want to add posts based on their interests. so, we can have analytics running on users and on posts, to tag both these entities based on different parameters. finally, we can add matching posts to the timelines of all the relevant users + +## Video On Demand Streaming Service + +### Gather Functional Requirements + +- can the content be modified - videos can be deleted but not modified +- what kind of information is provided along with the video + - mandatory - title, author, description + - optional - tags + - note - these fields can be updated +- does it support only video on demand or live streaming as well - live streaming is out of scope +- is there any particular video format we would like to support - all formats / codecs should be supported +- what kind of devices do we want to support - all - browsers and apps, with varying network bandwidths +- main functional requirement - close to no buffering +- should we support subscription based content - no + +### Software Architecture + +- challenge - there are different formats, different aspect ratios, different bandwidths, etc that we need to handle - so too many combinations +- handling different formats of videos - + - videos can come inside different formats - each with different formats of audio, video, subtitles, etc + - some videos, e.g. ones taken from cameras can have very high resolution, since they use "lossless" compression algorithms + - this makes them ideal for video editing but not for storage / streaming + - so, we can use "lossy" compression algorithms to reduce the amount of data stored and transferred +- supporting varying devices - + - video size is ~ bitrate * video length + - so, to optimize the video size to support different devices and network conditions, we need to optimize the bitrate + - now, bitrate is directly proportional to the resolution and frame rate of the video + - so, for reducing the bitrate, we need to reduce the resolution and frame rate + - so, using the transcoder, we need to generate videos with different resolution and frame rate to support different devices +- supporting varying network conditions - + - network bandwidth is not stable - it can be different when the user started watching vs when the user is halfway through + - so, we can use "adaptive bitrate" or "adaptive streaming" + - we break the transcoded video into segments, each a few seconds long + - the client first downloads a "manifest file", that contains the different resolutions, frame rate, etc available + - the app then based on this manifest and the device information that it has access to, downloads a few segments of the right encoding + - it evaluates the time taken to download these files, and then accordingly switches to a different bitrate based on the network conditions +- finally, the different devices support different kinds of video protocols - + - i think this is around the application layer protocol being used + - so, we need to package our transcoded video chunks according to the different protocols and then forward it to the client +- we use an api gateway to handle authentication and authorization +- video service - + - store the videos in an object store + - store the metadata (title, description, author, tags) in the document store of the video service +- content processing service - + - "pipes and filter pattern" can be used for it + - runs when the raw video has been finished uploading to the object store + - so, the final sequence of steps using a pipes and filters patter would be - + - chunking + - perform appropriate compression + - perform sanitization - flag piracy, only allow legal content, etc + - generate thumbnails for these chunks, generate subtitles using nlp, etc + - transcoding - convert to different resolutions and frame rates + - package for different protocols + - generate manifest and send the link to video service + - after completion of all these steps, the service publishes an event to the video service for it to store the link to manifest file + - finally, we also need to notify the uploader of the successful upload - this entire process is asynchronous because it can be very time consuming, and the uploader should know once the upload is complete +- search service - + - after the video service has all the necessary details for a video, it publishes an event to this search service + - this search service uses an optimized database for text search, and stores this event there + - note - elasticsearch can take care of type ahead, perform fuzzy searching for typos in complex names, etc + - this is also a cqrs pattern - video service is the command service, and the search service is the query service +- get videos flow - + - when a user searches for videos, their requests go to the search service, which returns the video ids + - then, when the user clicks on a video, they request for the video from the video service, which returns the manifest file, description of video, etc + - finally, the device will start requesting for the right video chunks from the object store + - my doubt - we can extend the presigned url here as well for subscription based content? +- upload videos flow - approach 1 - + - right now, our request for uploading videos goes through the api gateway to make use of the authorization features etc + - then, the video service routes it to the object storage + - this means we will have to scale our api gateway and video service to just support this uploading functionality +- upload videos flow - approach 2 - + - we can use "presigned urls" - this allows the client to make requests to the object storage directly for a limited time + - so now, the request to the video service does not contain the actual video - just metadata containing title etc + - the video service with its permissions then generates a signed url and sends it back to the client + - the client now uses this signed url to upload the video - thus bypassing the api gateway and video service +- cdn and its optimizations - + - to improve the video delivery time, we use a cdn. "push strategy" might work better because videos cannot be updated + - we need not store all the videos in all the cdns + - we can instead have a few main cdns with the actual content, and the smaller local cdns can only have the content that a user is likely to watch - e.g. german content need not be cached at an indian cdn + - we can pre compute and store these likely videos beforehand using ml models to improve on our non functional requirements + - additionally - all cdns need not be sent all chunks of all formats from the main cdns - we can kind of distribute these chunks among the local cdns, and then these local cdns can use a peer to peer like model to distribute the chunks among themselves - this reduces the load on our main cdn as well +- tagging a video - + - during the processing of video, determine tags for the video based on some machine learning + - then, add these tags to the video + - basically, for each chunk, we would determine some tags + - then, we would group all these tags by video id + - finally, we can choose the top x tags which are the most relevant, highest count, etc to obtain the tags for a video + - we can also tag users based on their watch history etc + - now, based on tags assigned to videos and users, we can send the right recommendations to users + +## Real Time Instant Messaging Service + +### Gather Functional Requirements + +- one to one direct messaging or group messaging? + - three types of mediums need to be supported - one to one, groups and channels + - one to one chats are a specialized form of group chats only involving two users + - channels - other users can join or leave channels. useful inside companies, where employees can join or leave channels +- types of media supported - text based, sharing images / documents, video calls +- persisting chat history - e.g. a user is offline when they receive a message. should they see the message after coming back online? + - users should be notified immediately after receiving a message + - a user should be able to see both - previously read messages and all unread messages after going offline +- other messaging statuses to show - + - if a user is online - in scope. a user can click on the profile of a user to see if they are online + - if a user is currently typing a message - out of scope +- system needs to have close to no lag - it is a messaging system, unlike [social media platform](#social-media-platform) etc + +### Software Architecture + +- approach for message delivery - + - user a gets the ip address of user b from our system + - then, user a talks to user b directly, and the communication does not go through our system + - benefit - since the system would not be involved, this approach can be easily scaled to millions of users chatting "one to one" (one to one is key) with each other + - drawback - hard to support groups and channels - the user would have to establish a connection to potentially thousands of users, which is not possible for mobile devices + - another drawback - messages would not be stored and hence be lost for offline users + - so, we proxy the messages instead via our system + - so now, the user sends the message and the target group / channel to our system + - our system then takes care of creating history, pushing it to the right receivers, etc +- delivering the messages - + - in the typical client server model, a request needs to be received first in order to send a response + - so, one approach - clients keep polling the server for messages + - disadvantage - polling from all the end users will lead to too much resource consumption, along with computation for potential inactive channels and chats as well + - so, we instead use "websockets", which allow for full duplex, bidirectional communication + - note - websockets use tcp underneath + - first, the users establish the websocket connection with the server + - then, the server can push the messages / receive the messages using this connection +- user service - + - persist the user information - we can use an sql database + - step 1 - when searching for users, it should return the correct user + - as a part of this, index based on username to make this search faster + - worth pointing out - an additional user search service feels like an overkill - we do not need features like sophisticated autocomplete when searching for users +- groups and channels service - + - used to maintain groups and channels - recall that one to one messaging is a specialized case of group chats + - one groups table - (group_id, created_at, ...) + - one channels table - (channel_id, channel_name, channel_url, owner_id, created_at, ...) - users use channel url to join a channel + - a many to many table for storing which users are a part of which groups + - a many to many table for storing which users are a part of which channels + - index channel table by name for quick searching + - index the two many to many tables by group / channel id to look up all the members in a group / channel quickly + - step 2.1 - a user can make a call to this service to create a group + - step 2.2 - then, they can search for users and add them to the group + - creating a channel has similar steps, just that an extra step of searching and joining the channel might be involved + - step 4.1 - if the user is offline and comes back online later, they first request this service to know about the groups that they are a part of + - optionally, this can be backed by a redis key value store - serves as a cache, retrieving all members of a group becomes very fast - we use group id as key +- chat history service - + - one messages table - (message_id, group_id / channel_id, sender_id, text, created_at, ...) + - step 4.2 - using the ids of the groups and channels that the user receives from the groups and channels service, the user requests for the chat history from the chat history service + - again, the complex sharding problem might arise - one shard might not be able to fit all the messages for a group, and so we use a compound index using group id, message id - + ```sql + select * + from messages + where group_id = xyz + sort by message_id desc + ``` +- messaging service - + - users establish a bidirectional connection with this messaging service + - step 3 - a user sends a message to the messaging service (group_id / channel_id, message). the messaging service does the following - + - step 3.1 - store it in the chat history service (asynchronously) + - step 3.2 - request for the users part of this group / channel from the groups and channels service + - step 3.3 - push the message to the currently online users via the bidirectional connections + - scaling message service - unlike other services, the messaging service cannot be simply horizontally scaled behind a load balancer, since it is not stateless + - e.g. we use a simple round robin load balancer in front of two instances of the messaging service + - assume user a is connected to instance 1, and user b is connected to instance 2 + - if user a wants to send a message to user b, instance 1 will need to communicate this message to instance 2 +- approach 1 - when our user base is "partitioned" + - if in our use case, we have dedicated groups - one group for company 1, one group for company 2, and so on + - we can have all users for a particular group aka company connect to the same instance + - in this case, we would have ensured all users of a particular company are connected to each other + - the idea is that we can be sure that users of one company would not want to communicate with another company + - we can also vertically scale each instance individually depending on the size of that company + - i feel this approach will work well in use cases like games - whenever there are smaller clusters of users, and users of different clusters cannot talk to each other +- approach 2 - when our user base is not partitioned i.e. any user can send a message to any other user + - we introduce a new connection service + - this "manages connections" via a key value store (redis) - it knows which user is connected to which message service instance + - now, when a message service instance receives a message, it asynchronously sends it to the connection service + - advantage of asynchronous communication between messaging service and connection service - the sender does not care about this flow, so it does not have to wait to receive a "message delivered" notification + - then, the connection management service (again asynchronously) asks the chat history service to backup the message and synchronously asks the group and channel service for the members of the group of the message + - finally, it notifies the right message service instances - it knows which message service instance is responsible for which user via its database +- handling race condition - user a sends message and user b comes online simultaneously + - user b's call to chat history service does not return this message because this message had not been persisted yet + - user a's call is unable to push the message to user b because this user has not established an active connection with connection service yet + - one solution - connection service polls chat history service for the messages of users with active connections and pushes them to the messaging service +- asset management - + - should have compression logic, discussed in depth in [video on demand](#video-on-demand-streaming-service) + - once the upload is completed successfully, the message in the connection service needs to be updated with the url before it starts pushing them to messaging / chat history service + - can use cdn for faster delivery, etc +- based on whether or not the messages are end to end encrypted, we can run analytics on message content + +## Type Ahead / Autocomplete for a Search Engine + +### Gather Functional Requirements + +- what criteria to use for suggestions - most popular queries in the last 24 hours +- is spell checking in scope +- how many suggestions to provide - 10 +- what languages to support it for - only english +- maximum length of prefix - 60 characters, beyond this we do not show suggestions + +### Software Architecture + +- we need two api queries + - /complete?query=xyz - to help with autocomplete + - /search?query=xyz - used when the user hits the search button / selects one of the results. note - our analytics will be updated based on this, so "get" http verb might feel wrong +- solution 1 - trie + - a subtree contains words with the prefix the same as the path up to it from the root + - we can store the frequency of search in the terminal nodes of this trie + - disadvantage 1 - can become too big - with 60 character being the maximum limit, there can be 26^60 (if considering only lowercase) + - disadvantage 2 - for a given prefix, we would have to traverse all its branches and then even sort them based on popularity +- observations - + - our autocomplete suggestion has to be very quick + - the update corresponding to our search query can be slow + - so, we can use cqrs along with batch processing +- autocomplete service - + - resolves autocomplete related requests + - uses a key value store underneath, where the key is the prefix and the value is the list of results +- autocomplete updater service - + - triggered once the button / result is clicked + - receives a tuple of (search term, timestamp) + - this is then stored in a dfs for further processing +- now our big data processing workflow is as follows - + - read all the (search term, timestamp) tuples populated by the autocomplete updater service + - we have a mapper with the following steps - + - filter the tuples with timestamp within the last 24 hours + - for each prefix of this search query, emit a tuple of (prefix, search query) + - understand that the above mapper will emit all prefixes of a word twice if the autocomplete updater service populated it twice + - now, all the search queries with the same prefix should end up in the same computer, given our mapper outputs the prefix as the key + - then, the reducer can perform an aggregation to find out all the top k (10 in our case) searches for that prefix + - remember to run multiple instances of both mapper and reducer +- cdc - change data capture - + - we use a cdc pipeline to send changes in our dfs i.e. every time the results for a particular prefix change to the autocomplete service + - the autocomplete service reads these events and updates its key value store + - note how we do not surface the autocomplete queries from the dfs directly since it is inefficient + - instead we use the more optimized key value store + - we could also have used the reducer to write the results to the key value store directly, which too would have been very inefficient + - now, cdc will only send over the changes, as opposed to the reducer, which would have done a complete rewrite +- we can horizontally scale both autocomplete service and autocomplete updater service +- shard manager service - + - used for scaling the key value store + - the key value store being used should have sharding given the amount of data + - we can also use read replicas to help distribute the load + - both replication and sharding should be available for databases by default + - despite of all of the above, some shards like ones storing the prefix "how" can be hot, while others would not even be close + - the default replication mechanism will usually use the same number of replicas for every shard, which might not work in our case + - so, one approach is to have a "proxy" between the autocomplete service and key value store, which can dynamically increase the number of replicas for hot shards i.e. different shards can have different number of replicas based on our workload + +## URL Shortener + +### Gather Functional Requirements + +- short url and actual url mapping should be maintained in a database + - generate a short url for a long url + - return the long url when queried for a short url +- traffic per day - 10 million urls per day +- we will run this service for a 100 years +- characters we can use - alphanumeric + +### Software Architecture + +- calculation behind how many characters we should use - + - characters allowed = 26 + 26 + 10 = 62 characters + - number of urls to support = 100 * 365 * 10,000,000 = 365 billion + - 62^7 = 3521 614 606 000 - 3 trillion - so, our url shortener would have to use at least 7 characters +- popular hash generation techniques - + - md5 hashes are 16 bytes long = 128 (16 * 8) bits = 32 (128 / 4) hexadecimals + - sha1 is 20 bytes = 20 * 8 / 4 = 40 hexadecimals + - sha256 is 64 (256 / 4) hexadecimals + - sha512 is 128 (512 / 4) hexadecimals +- none of them work because we only need 7 hexadecimals - this feels wrong? we need to check log16(365 billion) = 10 hexadecimals? when we said 7, it included the whole range of both small and capital letters +- one solution - "ticket server" - + - we maintain a counter inside redis - remember - redis is an excellent choice for atomic incrementing counters + - all instances of our shortener request this instance for the next number + - issue - a single redis server might not be able to scale + - solution - + - e.g. we need 7 hexadecimal numbers i.e. 28 bits + - imagine we have 8 redis instances, and number them - 000, 001, 010 and so on using 3 bits + - remaining 25 bits are used by redis to generate the auto incrementing ids - each instance can generate 0 to 2^25 - 1 + - so now, we have a unique number +- another solution - "token service" - + - when our shortener system comes up, it would ask our token service for a range to use + - each instance would be assigned a range - 10000-20000, 20000-30000 and so on + - now, the instance can generate 10000 urls before asking for the next range + - the token service need not worry too much about scaling - it would take some time for our shortening service instance to exhaust the 10000 range it has been assigned - so, it can use a regular mysql database + - a disadvantage - the instance can go down after being assigned a range from the token service and before exhausting this list entirely, and thus we have now wasted a range of numbers +- till now, we only took care of generating a unique 28 bit number - now, we can convert it to a hexadecimal number +- issue - 16 would map to just g, 17 to 10 and so on, but we would like to ensure urls of 7 characters +- solution - pad using a special symbol like `=` +- finally, we can use cassandra to store the short url to actual url mapping +- follow up question - analytics - how many times a short url was accessed etc - solution - put messages onto kafka, have a separate analytics service + +## Key Value Store + +### Gather Functional Requirements + +- equivalent question - dynamodb, maybe others like redis, memcached, etc can draw inspiration +- use cases - customer preferences, shopping cart, etc +- key value stores are "distributed hash tables" +- we want our system to be configurable - applications can chose between high consistency and high availability models + - note - this cannot be done dynamically, only when instantiating our key value store +- we should always be able to write - availability > consistency +- scalability - add or remove servers easily with little to no impact on availability +- fault tolerance - operate uninterrupted despite failures +- get(key) - when choosing eventual consistency model, more than one value can be returned +- put(key, value) - server can store additional metadata alongside as well, e.g. version etc +- we can use md5 for generating a hash for the key +- if amount of data becomes too much, we can store the data in s3 and the link to this object in cassandra + +### Consistent Hashing - Scalability + +- we pass our value through a hash function to obtain a hash +- "mod hashing" - used when the size of our hash table is limited - we can perform % m with the hash obtained from above +- problems when using mod hashing - + - what if we suddenly increase the number of shards? - the hash will change! e.g. we had 3 shards initially, our data was stored in shard 1 (3 % 3) + 1, but after the increasing to 4 shards, we would end up looking in shard 4 (3 % 4) + 1 + - even distribution - we might end up having hot shards +- so, to avoid both these problems, we need to use "consistent hashing" instead +- using consistent hashing, when we add or remove a node, we should only have to re balance (1 / n)% of the data, where n is the number of nodes +- imagine a virtual ring like structure + +![consistent hashing](/assets/img/high-level-design/consistent-hashing.png) + +- we set the servers randomly at some point on this ring +- we go clockwise to decide which key should go into which server +- imagine we have 2 servers initially + - data for keys 2, 3, 4, 5 and 6 go into server 7 + - data for keys 7, 8, 9, 10 and 1 go into server 2 +- suppose, we add a new server at 4 +- now, we only need to re balance the data for keys 2 and 3 +- we can similarly work it out in the cases when we remove a server +- now, we have handled issue 1 - change in number of instances without re balancing +- with our current strategy, we can still end up with skewed partitions - + +![consistent hashing disadvantage](/assets/img/high-level-design/consistent-hashing-disadvantage.png) + +- workaround - we replicate the server in random places on the ring +- for calculating where the data should be in the ring - we use a single hash function and move in clockwise direction to find the responsible node +- for calculating where the node should be in the ring - we use multiple hash functions and place all these replicas on the right parts of the ring +- this is called a "virtual server" - our physical server is present on multiple parts of the ring +- another advantage - if we have a heterogenous architecture i.e. the node being added to the mix has more capacity - we can replicate it more times in the ring + +![consistent hashing workaround](/assets/img/high-level-design/consistent-hashing-workaround.png) + +### Replication + +- why an active passive architecture would not work - + - we want a write heavy system, and the active system can become a bottleneck + - there is a lag for acknowledgements from replicas +- therefore, we use an active active / peer to peer architecture +- the data is replicated into (x - 1) other replicas (not all n servers). this x is typically 3 or 5 + - for quorum, it makes sense to have an odd number of replicas - quorum required for 5 replicas is 3, for 6 replicas is 4. so we can withstand 2 failures in case of both 5 and 6 replicas +- we find the virtual server responsible for our write in the ring - this is called the "coordinator" +- then, we replicate the data to the next x - 1 replicas in clockwise direction +- note - due to the virtual server concept we discussed, skip the virtual servers which point to physical server where the key is already stored. this way, we have our data stored in x different physical servers + +![consistent hashing replication](/assets/img/high-level-design/consistent-hashing-replication.png) + +### Versioning - Vector Clocks + +- issue - during network partitions / node failures, the version history of the same object might diverge across nodes +- so, we need some reconciliation logic +- so, we need to use versioning +- we can use timestamp, but it is not reliable in a distributed system +- so, we use "vector clocks" +- the idea is like git - if automatic merging is possible then good, else the client has to resolve the conflicts for deciding the ultimate value + - note - the client in our case is the [load balancer / another node](#load-balancer) +- so, now our api works as follows - + - get requests return multiple vector clocks in case of conflicts + - put requests accept a vector clock to help resolve conflicts +- e.g. we have 3 replicas, A B and C +- first write goes to A, all nodes will have (A,1), value E1 +- second write goes to A, all nodes will have (A,2), value E2 - this overwrites E1 +- at this point, we have a network partition, and say A becomes unavailable as well +- third write goes to B, B will have (A,2), (B,1), value E3 +- fourth write goes to C, C will have (A,2), (C,1), value E4 +- now suppose the network partition is resolved +- now, when a write comes in, all nodes will have (A, 3), (B, 1), (C, 1), value E5 + +### Load Balancer + +- "generic load balancer" - + - the load balancer can forward the request to any of the replicas + - all servers are aware of the preference list for all key ranges - [gossip protocol](#fault-tolerance) + - thus, they can forward the request to the right coordinating node + - disadvantage - has more latency, since there is an extra hop to reach the coordinating node +- "partition aware load balancer" - + - the load balancer itself is aware of the right partition to direct the read request to + - disadvantage - non trivial load balancer logic + +### r and w + +- we maintain a "preference list" for each key - this contains the servers where the data is stored +- now, if we want to maintain x replicas, we do not necessarily wait for response from all x replicas for writes / reads +- r - for a get request - wait for r - 1 replicas to respond with their vector clocks and values +- if the histories of the total r replicas are divergent, return everything to the client to merge +- w - for a put request - wait for w - 1 replicas to acknowledge +- replicate to remaining x-w replicas "asynchronously" +- we need to ensure r + w > x + - this way, we ensure that at least one replica between r and w is common + - this way, we ensure we always get the latest value + - this is because at least w of them will have the value updated synchronously + - one of them would end up being used as a part of r +- e.g. x is 3, r and w can both be 2 +- if r is 3 and w is 1, we provide speedy reads but slower writes +- similarly, if r is 1 and w is 3, we can have slower writes but much faster reads + +### Fault Tolerance + +- "gossip protocol" - servers are able to communicate things like preference lists using this +- they send each other heartbeats, and this way the servers would know which servers are up and which ones are down +- we know that nodes are responsible for maintaining ranges of hash +- now, using "merkle tree", hash of each key's value is stored at the leaves +- remaining nodes of the tree contain the hash of all of their children +- this way, if the hash of a node in two different merkle trees match, we do not need to recursively check in the subtree and leaves +- e.g. look how we duplicate the value below if number of nodes are odd + +![merkle tree](/assets/img/high-level-design/merkle-tree.png) + +## Ride Sharing Service + +### Gather Functional Requirements + +- should the system design the payment processing system and automate the payments - + - yes, collect payment from users + - track bank information of drivers to pay them +- should we consider eta - + - yes, ride matching should reduce the wait time of rider / reduce the distance traveled by the driver to reach the user + - we can use an external eta service +- can multiple people share a ride - + - yes, a driver can be matched to multiple riders if the riders want to share rides and the driver has capacity left in the vehicle +- how do we calculate the riders fee and driver compensation - + - it has three components - a flat fee, duration of the ride and the distance traveled + - the exact formula is not in scope, but we need to track the three parameters above +- is registering users and drivers supported - + - registering and logging in of users is supported + - registering of drivers is not supported, since they go through a background check, vehicle inspection, etc + - logging in of drivers is however supported + +### Sequence Diagram + +- driver flow - + - the driver logs in + - then, the driver hits the "join button", which adds them to the driver pool + - since this point, their location gets sent to our system every 5-10 secs +- rider flow - + - the rider registers / logs in + - then, the rider requests for a ride. the request contains their location + - the system looks for drivers - close to the rider, about to finish their ride, etc + - if the driver accepts the ride, they start sending the coordinates continuously to our system + - simultaneously, our systems keeps sending the location updates to the rider + - finally, once the trip ends, the driver hits the finish trip button + - the system sends both the driver and the rider the fare details + - once the payment is completed successfully, the driver's account is credited with their pay + +### State Diagram + +- the driver has 5 states - logged out -> logged in -> available for matching -> matched with rider -> in trip +- from in trip, they can either jump to matched with rider (if their current trip is about to end but they already have a match) or available for matching (if their previous trip has ended) +- the driver can transition into the logged in state when they do not wish to accept any further rides + +### Software Architecture + +- user service - + - will contain two tables - riders and drivers. both should have name, auth information, etc + - note - the drivers table might have additional information like vehicle information + - the user service returns an authentication token on successful login +- payment service - + - store user's billing information + - store driver's bank information + - it will integrate with the different banks / credit card companies with their apis +- driver service - + - maintains a bidirectional websocket connection with the driver + - removes the overhead around establishing a new connection for every location update + - another reason - our system needs to "push" the potential riders to the driver +- rider service - + - just like driver service, a long bidirectional connection is maintained + - helps push updates from the system around driver location updates, etc +- location service - + - receives location updates from the driver service via a message broker + - benefit of this event driven model - we can use separation of concerns / decoupling + - now, we can use the event sourcing pattern from the location service to trace the entire path + - location service has another functionality - to serve the matching service + - the matching service sends it the rider's starting coordinates + - the location service can respond to it with the close by drivers + - given the location of user, we try to build a 1km radius and try to look for all the drivers in this range using the location service + - first challenge - how to optimally find drivers in 1km radius + - a technique used by location based services is to divide the earth's surface into cells + - each cell is given a unique id, and the location service alongside the actual coordinates, stores this cell id as well + - now, we only need to know the cells that fall within the range of a given cell + - we also save ourselves from the complex distance computation involved when using latitude and longitude, which are floating point numbers + - this technique is called "geo hash" + - it is readily available in multiple databases and libraries + - second challenge - what if the direct distance is less than 1km, but the arrival time is more than an hour, given the u turn, some new construction and traffic + - so, we need to use an eta service + - while gathering functional requirements, we were told we can use an external service +- matching service - + - match the driver and rider + - the matching service accepts the user location request from the user service + - the matching service forwards this to the location service and receives the response containing (driver, eta) combinations + - after this, the matching service can contain complex logic - + - favor riders who have not earned anything yet + - consider drivers who are about to finish the trip + - consider sharing of ride + - then this gets sent to the driver service for acceptance + - once it receives the accept from a driver, it will notify the trip service about it + - the trip service can now create an entry in its database about the driver rider combination + - finally, the rider is notified about this as well + - i do not think matching service needs a database +- trip service - + - maintains the trip details + - calculates the fare details +- the entire architecture is "choreography pattern", since there is an event driven workflow between multiple services +- for scalability, we again hit the same issue for both driver service and rider service as in [instant messaging](#real-time-instant-messaging-service). we solve it by introducing a connection service in between which tracks which user is connected to which instance. the matching service, trip service, etc can communicate with connection service, which calls the right instance of driver / rider service +- we want to minimize the login time - + - essentially as a part of the functional requirement, we want to immediately notify the user (driver or rider) if there are typos + - approach 1 - we use in memory hash set - if the username in the request payload is not present inside the hash set, we avoid performing the expensive database lookup to find the password, profile information, etc and immediately inform the user that the username does not exist + - drawback - storing millions of users in memory in each of our user service instance is infeasible + - solution - we can use bloom filters + - how hash set works - for each hash, it stores a linked list like data structure to handle collisions + - how bloom filter works - for each hash, it just stores a binary 0 or 1 + - drawback - false positives - due to collision, it might happen that a user is actually not present in the database but holds one for the bloom filter + - one solution - multiple hashing functions. thus, the bloom filter will have to have 1 for all the computed hashes + - however, it is a tradeoff we make given our use case + - another drawback - we can only add values to a bloom filter, not remove them. should be fine - + - deleting a user is rare + - the bloom filter is recreated every time a fresh instance of the user service boots up + +## Video Conferencing Service + +### Software Architecture + +- websockets - + - there can be functionalities like calling etc that we want to support + - so, we would need to have a client-server, websocket feature to be able to push such events + - now this connection can additionally be used for validating credentials of meeting (entering the right pass code) etc +- meetings - + - assume there are n participants. zoom has to send n-1 video streams to all the n participants + - the participants try to obtain the public ip addresses of each other - using the websocket described above + - finally, they can now start talking to each other directly + - so, we started from client server protocol and eventually switch to a peer to peer model using web rtc (realtime communication) + - udp is used underneath for faster delivery at the sake of lesser reliability - people would not (or sometimes even cannot) go back to hear what they miss +- issues and solutions for meetings - + - we still typically use ipv4, so we need techniques like nat + - recall how we had to exchange the ip address between each other using websockets + - "stun server" - tells a participant what its public ip address is - this would be a combination of the isp's router's public address and the port which helps with port forwarding + - now that a participant knows about its public ip address, it can send this via the websockets to other participants + - now, all the participants can talk using peer to peer + - issue - some nats or isp might or might not allow peer to peer communication directly + - so, we can use "turn server" instead - it is like the intermediary server through which calls are proxied + - so, we are back to the client server model when using turn server +- webinars / large conferences - + - issue with web rtc discussed in meetings - if there is a large conference, one participant (speaker) needs to send to few thousand participants + - here, the problem statement itself is different from meetings - the speaker is broadcasting, and others are just receiving + - handling so many peer to peer communications might not be possible by the small user device + - so, we use rtmp - realtime messaging protocol + - the speaker relays communication via an rtmp server in our system, which then forwards it to all the remaining clients + - now, unlike in meetings, we want more reliability around the speaker's communication here + - so, rtmp uses tcp instead for guaranteed delivery, unlike web rtc described in meetings + - also, it is more client server here, unlike in meetings which was peer to peer + - my understanding - unlike in peer to peer, the transcoding in webinars is heavier - it supports adaptive streaming as well - the speaker's resolution would depend on the participant's bandwidth +- video recording - + - for meetings - clients send it asynchronously in batches to the "recording server", without affecting the actual meeting + - for large meetings - rtmp server writes it asynchronously maybe to a dfs mounted on it, which can then eventually be sent to an object storage depending on use case diff --git a/_sass/addon/commons.scss b/_sass/addon/commons.scss new file mode 100644 index 0000000..86b4899 --- /dev/null +++ b/_sass/addon/commons.scss @@ -0,0 +1,1576 @@ +/* + The common styles +*/ + +html { + @media (prefers-color-scheme: light) { + &:not([data-mode]), + &[data-mode='light'] { + @include light-scheme; + } + + &[data-mode='dark'] { + @include dark-scheme; + } + } + + @media (prefers-color-scheme: dark) { + &:not([data-mode]), + &[data-mode='dark'] { + @include dark-scheme; + } + + &[data-mode='light'] { + @include light-scheme; + } + } + + font-size: 16px; +} + +body { + background: var(--main-bg); + padding: env(safe-area-inset-top) env(safe-area-inset-right) + env(safe-area-inset-bottom) env(safe-area-inset-left); + color: var(--text-color); + -webkit-font-smoothing: antialiased; + font-family: $font-family-base; +} + +/* --- Typography --- */ + +@for $i from 1 through 5 { + h#{$i} { + @extend %heading; + + @if $i > 1 { + @extend %section; + @extend %anchor; + } + + @if $i < 5 { + $factor: 0.18rem; + + @if $i == 1 { + $factor: 0.23rem; + } + + font-size: 1rem + (5 - $i) * $factor; + } @else { + font-size: 1rem; + } + } +} + +a { + @extend %link-color; + + text-decoration: none; +} + +img { + max-width: 100%; + height: auto; + transition: all 0.35s ease-in-out; + + &[data-src] { + &[data-lqip='true'] { + &.lazyload, + &.lazyloading { + -webkit-filter: blur(20px); + filter: blur(20px); + } + } + + &:not([data-lqip='true']) { + &.lazyload, + &.lazyloading { + background: var(--img-bg); + } + + &.lazyloaded { + -webkit-animation: fade-in 0.35s ease-in; + animation: fade-in 0.35s ease-in; + } + } + + &.shadow { + -webkit-filter: drop-shadow(2px 4px 6px rgba(0, 0, 0, 0.08)); + filter: drop-shadow(2px 4px 6px rgba(0, 0, 0, 0.08)); + box-shadow: none !important; /* cover the Bootstrap 4.6.1 styles */ + } + + @extend %img-caption; + } + + @-webkit-keyframes fade-in { + from { + opacity: 0; + } + to { + opacity: 1; + } + } + + @keyframes fade-in { + from { + opacity: 0; + } + to { + opacity: 1; + } + } +} + +blockquote { + border-left: 5px solid var(--blockquote-border-color); + padding-left: 1rem; + color: var(--blockquote-text-color); + + &[class^='prompt-'] { + border-left: 0; + position: relative; + padding: 1rem 1rem 1rem 3rem; + color: var(--prompt-text-color); + + @extend %rounded; + + &::before { + text-align: center; + width: 3rem; + position: absolute; + left: 0.25rem; + margin-top: 0.4rem; + text-rendering: auto; + -webkit-font-smoothing: antialiased; + } + + > p:last-child { + margin-bottom: 0; + } + } + + @include prompt('tip', '\f0eb', 'regular'); + @include prompt('info', '\f06a'); + @include prompt('warning', '\f06a'); + @include prompt('danger', '\f071'); +} + +kbd { + font-family: inherit; + display: inline-block; + vertical-align: middle; + line-height: 1.3rem; + min-width: 1.75rem; + text-align: center; + margin: 0 0.3rem; + padding-top: 0.1rem; + color: var(--kbd-text-color); + background-color: var(--kbd-bg-color); + border-radius: 0.25rem; + border: solid 1px var(--kbd-wrap-color); + box-shadow: inset 0 -2px 0 var(--kbd-wrap-color); +} + +footer { + font-size: 0.8rem; + background-color: var(--main-bg); + + div.d-flex { + height: $footer-height; + line-height: 1.2rem; + padding-bottom: 1rem; + border-top: 1px solid var(--main-border-color); + flex-wrap: wrap; + } + + a { + @extend %text-color; + + &:hover { + @extend %link-hover; + } + } + + p { + width: 100%; + text-align: center; + margin-bottom: 0; + } +} + +/* fontawesome icons */ +i { + &.far, + &.fas { + @extend %no-cursor; + } +} + +/* --- Panels --- */ + +.access { + top: 2rem; + transition: top 0.2s ease-in-out; + margin-top: 3rem; + margin-bottom: 4rem; + + &:only-child { + position: -webkit-sticky; + position: sticky; + } + + > div { + padding-left: 1rem; + border-left: 1px solid var(--main-border-color); + + &:not(:last-child) { + margin-bottom: 4rem; + } + } + + .post-content { + font-size: 0.9rem; + } +} + +#panel-wrapper { + /* the headings */ + .panel-heading { + @include label(inherit); + } + + .post-tag { + line-height: 1.05rem; + font-size: 0.85rem; + border: 1px solid var(--btn-border-color); + border-radius: 0.8rem; + padding: 0.3rem 0.5rem; + margin: 0 0.35rem 0.5rem 0; + + &:hover { + transition: all 0.3s ease-in; + } + } +} + +#access-lastmod { + a { + &:hover { + @extend %link-hover; + } + + @extend %no-bottom-border; + + color: inherit; + } +} + +.footnotes > ol { + padding-left: 2rem; + margin-top: 0.5rem; + + > li { + &:not(:last-child) { + margin-bottom: 0.3rem; + } + + > p { + margin-left: 0.25em; + margin-top: 0; + margin-bottom: 0; + } + } +} + +.footnote { + @at-root a#{&} { + @include ml-mr(1px); + @include pl-pr(2px); + + border-bottom-style: none !important; + transition: background-color 1.5s ease-in-out; + } +} + +.reversefootnote { + @at-root a#{&} { + font-size: 0.6rem; + line-height: 1; + position: relative; + bottom: 0.25em; + margin-left: 0.25em; + border-bottom-style: none !important; + } +} + +/* --- Begin of Markdown table style --- */ + +/* it will be created by Liquid */ +.table-wrapper { + overflow-x: auto; + margin-bottom: 1.5rem; + + > table { + min-width: 100%; + overflow-x: auto; + border-spacing: 0; + + thead { + border-bottom: solid 2px rgba(210, 215, 217, 0.75); + + th { + @extend %table-cell; + } + } + + tbody { + tr { + border-bottom: 1px solid var(--tb-border-color); + + &:nth-child(2n) { + background-color: var(--tb-even-bg); + } + + &:nth-child(2n + 1) { + background-color: var(--tb-odd-bg); + } + + td { + @extend %table-cell; + } + } + } /* tbody */ + } /* table */ +} + +/* --- post --- */ + +.post-preview { + @extend %rounded; + + border: 0; + background: var(--card-bg); + box-shadow: var(--card-shadow); + + &::before { + @extend %rounded; + + content: ''; + width: 100%; + height: 100%; + position: absolute; + background-color: var(--card-hovor-bg); + opacity: 0; + transition: opacity 0.35s ease-in-out; + } + + &:hover { + &::before { + opacity: 0.3; + } + } +} + +.post { + h1 { + margin-top: 2rem; + margin-bottom: 1.5rem; + } + + p { + > img[data-src], + > a.popup { + &:not(.normal):not(.left):not(.right) { + @include align-center; + } + } + } +} + +.post-meta { + font-size: 0.85rem; + + a { + &:not([class]):hover { + @extend %link-hover; + } + } + + em { + @extend %normal-font-style; + } +} + +.post-content { + font-size: 1.08rem; + margin-top: 2rem; + overflow-wrap: break-word; + + a { + &.popup { + @extend %no-cursor; + @extend %img-caption; + @include mt-mb(0.5rem); + + cursor: zoom-in; + } + + &:not(.img-link) { + @extend %link-underline; + + &:hover { + @extend %link-hover; + } + } + } + + ol, + ul { + &:not([class]), + &.task-list { + -webkit-padding-start: 1.75rem; + padding-inline-start: 1.75rem; + + li { + margin: 0.25rem 0; + padding-left: 0.25rem; + } + + ol, + ul { + -webkit-padding-start: 1.25rem; + padding-inline-start: 1.25rem; + margin: 0.5rem 0; + } + } + } + + ul.task-list { + -webkit-padding-start: 1.25rem; + padding-inline-start: 1.25rem; + + li { + list-style-type: none; + padding-left: 0; + + /* checkbox icon */ + > i { + width: 2rem; + margin-left: -1.25rem; + color: var(--checkbox-color); + + &.checked { + color: var(--checkbox-checked-color); + } + } + + ul { + -webkit-padding-start: 1.75rem; + padding-inline-start: 1.75rem; + } + } + + input[type='checkbox'] { + margin: 0 0.5rem 0.2rem -1.3rem; + vertical-align: middle; + } + } /* ul */ + + dl > dd { + margin-left: 1rem; + } + + ::marker { + color: var(--text-muted-color); + } +} /* .post-content */ + +.tag:hover { + @extend %tag-hover; +} + +.post-tag { + display: inline-block; + min-width: 2rem; + text-align: center; + border-radius: 0.3rem; + padding: 0 0.4rem; + color: inherit; + line-height: 1.3rem; + + &:not(:last-child) { + margin-right: 0.2rem; + } +} + +.rounded-10 { + border-radius: 10px !important; +} + +.img-link { + color: transparent; + display: inline-flex; +} + +.shimmer { + overflow: hidden; + position: relative; + background: var(--img-bg); + + &::before { + content: ''; + position: absolute; + background: var(--shimmer-bg); + height: 100%; + width: 100%; + -webkit-animation: shimmer 1s infinite; + animation: shimmer 1s infinite; + } + + @-webkit-keyframes shimmer { + 0% { + transform: translateX(-100%); + } + 100% { + transform: translateX(100%); + } + } + + @keyframes shimmer { + 0% { + transform: translateX(-100%); + } + 100% { + transform: translateX(100%); + } + } +} + +.embed-video { + width: 100%; + height: 100%; + margin-bottom: 1rem; + + @extend %rounded; + + &.youtube { + aspect-ratio: 16 / 9; + } + + &.twitch { + aspect-ratio: 310 / 189; + } +} + +/* --- buttons --- */ +.btn-lang { + border: 1px solid !important; + padding: 1px 3px; + border-radius: 3px; + color: var(--link-color); + + &:focus { + box-shadow: none; + } +} + +/* --- Effects classes --- */ + +.loaded { + display: block !important; + + @at-root .d-flex#{&} { + display: flex !important; + } +} + +.unloaded { + display: none !important; +} + +.visible { + visibility: visible !important; +} + +.hidden { + visibility: hidden !important; +} + +.flex-grow-1 { + flex-grow: 1 !important; +} + +.btn-box-shadow { + box-shadow: 0 0 8px 0 var(--btn-box-shadow) !important; +} + +/* overwrite bootstrap muted */ +.text-muted { + color: var(--text-muted-color) !important; +} + +/* Overwrite bootstrap tooltip */ +.tooltip-inner { + font-size: 0.7rem; + max-width: 220px; + text-align: left; +} + +/* Overwrite bootstrap outline button */ +.btn.btn-outline-primary { + &:not(.disabled):hover { + border-color: #007bff !important; + } +} + +.disabled { + color: rgb(206, 196, 196); + pointer-events: auto; + cursor: not-allowed; +} + +.hide-border-bottom { + border-bottom: none !important; +} + +.input-focus { + box-shadow: none; + border-color: var(--input-focus-border-color) !important; + background: center !important; + transition: background-color 0.15s ease-in-out, border-color 0.15s ease-in-out; +} + +.left { + float: left; + margin: 0.75rem 1rem 1rem 0 !important; +} + +.right { + float: right; + margin: 0.75rem 0 1rem 1rem !important; +} + +/* --- Overriding --- */ + +/* magnific-popup */ + +figure .mfp-title { + text-align: center; + padding-right: 0; + margin-top: 0.5rem; +} + +.mfp-img { + transition: none; +} + +/* mermaid */ +.mermaid { + text-align: center; +} + +/* MathJax */ +mjx-container { + overflow-y: hidden; + min-width: auto !important; +} + +/* --- sidebar layout --- */ + +$sidebar-display: 'sidebar-display'; +$btn-gap: 0.8rem; // for the bottom icons +$btn-border-width: 3px; +$btn-mb: 0.5rem; + +#sidebar { + @include pl-pr(0); + + position: fixed; + top: 0; + left: 0; + height: 100%; + overflow-y: auto; + width: $sidebar-width; + z-index: 99; + background: var(--sidebar-bg); + + /* Hide scrollbar for Chrome, Safari and Opera */ + &::-webkit-scrollbar { + display: none; + } + + /* Hide scrollbar for IE, Edge and Firefox */ + -ms-overflow-style: none; /* IE and Edge */ + scrollbar-width: none; /* Firefox */ + + %sidebar-link-hover { + &:hover { + color: var(--sidebar-active-color); + } + } + + a { + @extend %sidebar-links; + } + + #avatar { + display: block; + width: 7rem; + height: 7rem; + overflow: hidden; + box-shadow: var(--avatar-border-color) 0 0 0 2px; + transform: translateZ(0); /* fixed the zoom in Safari */ + + img { + transition: transform 0.5s; + + &:hover { + transform: scale(1.2); + } + } + } + + .profile-wrapper { + @include mt-mb(2.5rem); + @extend %clickable-transition; + + padding-left: 2.5rem; + padding-right: 1.25rem; + width: 100%; + } + + .site-title { + font-weight: 900; + font-size: 1.75rem; + line-height: 1.2; + letter-spacing: 0.25px; + color: rgba(134, 133, 133, 0.99); + margin-top: 1.25rem; + margin-bottom: 0.5rem; + + a { + @extend %clickable-transition; + @extend %sidebar-link-hover; + } + } + + .site-subtitle { + font-size: 95%; + color: var(--sidebar-muted-color); + margin-top: 0.25rem; + word-spacing: 1px; + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; + } + + ul { + margin-bottom: 2rem; + + li.nav-item { + opacity: 0.9; + width: 100%; + padding-left: 1.5rem; + padding-right: 1.5rem; + + a.nav-link { + @include pt-pb(0.6rem); + + display: flex; + align-items: center; + border-radius: 0.75rem; + font-weight: 600; + + &:hover { + background-color: var(--sidebar-hover-bg); + } + + i { + font-size: 95%; + opacity: 0.8; + margin-right: 1.5rem; + } + + span { + font-size: 90%; + letter-spacing: 0.2px; + } + } + + &.active { + .nav-link { + color: var(--sidebar-active-color); + background-color: var(--sidebar-hover-bg); + + span { + opacity: 1; + } + } + } + + &:not(:first-child) { + margin-top: 0.25rem; + } + } + } + + .sidebar-bottom { + @include pl-pr(2rem); + + margin-bottom: 1.5rem; + + %button { + width: 1.75rem; + height: 1.75rem; + margin-bottom: $btn-mb; // multi line gap + border-radius: 50%; + color: var(--sidebar-btn-color); + background-color: var(--sidebar-btn-bg); + text-align: center; + display: flex; + align-items: center; + justify-content: center; + + &:hover { + background-color: var(--sidebar-hover-bg); + } + } + + a { + @extend %button; + @extend %sidebar-link-hover; + @extend %clickable-transition; + + &:not(:last-child) { + margin-right: $btn-gap; + } + } + + i { + line-height: 1.75rem; + } + + .mode-toggle { + padding: 0; + border: 0; + + @extend %button; + @extend %sidebar-links; + @extend %sidebar-link-hover; + } + + .icon-border { + @extend %no-cursor; + @include ml-mr(calc(($btn-gap - $btn-border-width) / 2)); + + background-color: var(--sidebar-muted-color); + content: ''; + width: $btn-border-width; + height: $btn-border-width; + border-radius: 50%; + margin-bottom: $btn-mb; + } + } /* .sidebar-bottom */ +} /* #sidebar */ + +@media (hover: hover) { + #sidebar ul > li:last-child::after { + transition: top 0.5s ease; + } + + .nav-link { + transition: background-color 0.3s ease-in-out; + } + + .post-preview { + transition: background-color 0.35s ease-in-out; + } +} + +#search-result-wrapper { + display: none; + height: 100%; + width: 100%; + overflow: auto; + + .post-content { + margin-top: 2rem; + } +} + +/* --- top-bar --- */ + +#topbar-wrapper { + height: $topbar-height; + background-color: var(--topbar-bg); +} + +#topbar { + /* icons */ + i { + color: #999999; + } + + #breadcrumb { + font-size: 1rem; + color: gray; + padding-left: 0.5rem; + + a:hover { + @extend %link-hover; + } + + span { + &:not(:last-child) { + &::after { + content: '›'; + padding: 0 0.3rem; + } + } + } + } +} /* #topbar */ + +#sidebar-trigger, +#search-trigger { + display: none; +} + +#search-wrapper { + display: flex; + width: 100%; + border-radius: 1rem; + border: 1px solid var(--search-wrapper-border-color); + background: var(--main-bg); + padding: 0 0.5rem; + + i { + z-index: 2; + font-size: 0.9rem; + color: var(--search-icon-color); + } +} + +/* 'Cancel' link */ +#search-cancel { + color: var(--link-color); + margin-left: 0.75rem; + display: none; + white-space: nowrap; + + @extend %cursor-pointer; +} + +#search-input { + background: center; + border: 0; + border-radius: 0; + padding: 0.18rem 0.3rem; + color: var(--text-color); + height: auto; + + &:focus { + box-shadow: none; + + &.form-control { + &::-moz-placeholder { + @include input-placeholder; + } + &::-webkit-input-placeholder { + @include input-placeholder; + } + &:-ms-input-placeholder { + @include input-placeholder; + } + &::-ms-input-placeholder { + @include input-placeholder; + } + &::placeholder { + @include input-placeholder; + } + } + } +} + +#search-hints { + padding: 0 1rem; + + h4 { + margin-bottom: 1.5rem; + } + + .post-tag { + display: inline-block; + line-height: 1rem; + font-size: 1rem; + background: var(--search-tag-bg); + border: none; + padding: 0.5rem; + margin: 0 1.25rem 1rem 0; + + &::before { + content: '#'; + color: var(--text-muted-color); + padding-right: 0.2rem; + } + + @extend %link-color; + } +} + +#search-results { + padding-bottom: 3rem; + + a { + &:hover { + @extend %link-hover; + } + + @extend %link-color; + @extend %no-bottom-border; + @extend %heading; + + font-size: 1.4rem; + line-height: 2.5rem; + } + + > div { + width: 100%; + + &:not(:last-child) { + margin-bottom: 1rem; + } + + /* icons */ + i { + color: #818182; + margin-right: 0.15rem; + font-size: 80%; + } + + > p { + overflow: hidden; + text-overflow: ellipsis; + display: -webkit-box; + -webkit-line-clamp: 3; + -webkit-box-orient: vertical; + } + } +} /* #search-results */ + +#topbar-title { + display: none; + font-size: 1.1rem; + font-weight: 600; + font-family: sans-serif; + color: var(--topbar-text-color); + text-align: center; + width: 70%; + overflow: hidden; + text-overflow: ellipsis; + word-break: keep-all; + white-space: nowrap; +} + +#core-wrapper { + line-height: 1.75; + + .categories, + #tags, + #archives { + a:not(:hover) { + @extend %no-bottom-border; + } + } +} + +#mask { + display: none; + position: fixed; + inset: 0 0 0 0; + height: 100%; + width: 100%; + z-index: 1; + + @at-root [#{$sidebar-display}] & { + display: block !important; + } +} + +/* --- main wrapper --- */ + +#main-wrapper { + background-color: var(--main-bg); + position: relative; + min-height: calc(100vh - $footer-height-mobile); + + @include pl-pr(0); +} + +#topbar-wrapper.row, +#main > .row, +#search-result-wrapper > .row { + @include ml-mr(0); +} + +/* --- button back-to-top --- */ + +#back-to-top { + $size: 3rem; + + display: none; + z-index: 1; + cursor: pointer; + position: fixed; + right: 1rem; + bottom: 2rem; + background: var(--button-bg); + color: var(--btn-backtotop-color); + padding: 0; + width: $size; + height: $size; + border-radius: 50%; + border: 1px solid var(--btn-backtotop-border-color); + transition: transform 0.2s ease-out; + -webkit-transition: transform 0.2s ease-out; + + &:hover { + transform: translate3d(0, -5px, 0); + -webkit-transform: translate3d(0, -5px, 0); + } + + i { + line-height: $size; + position: relative; + bottom: 2px; + } +} + +#notification { + @-webkit-keyframes popup { + from { + opacity: 0; + bottom: 0; + } + } + + @keyframes popup { + from { + opacity: 0; + bottom: 0; + } + } + + .toast-header { + background: none; + border-bottom: none; + color: inherit; + } + + .toast-body { + font-family: Lato, sans-serif; + line-height: 1.25rem; + + button { + font-size: 90%; + min-width: 4rem; + } + } + + &.toast { + &.show { + display: block; + min-width: 20rem; + border-radius: 0.5rem; + -webkit-backdrop-filter: blur(10px); + backdrop-filter: blur(10px); + background-color: rgba(255, 255, 255, 0.5); + color: #1b1b1eba; + position: fixed; + left: 50%; + bottom: 20%; + transform: translateX(-50%); + -webkit-animation: popup 0.8s; + animation: popup 0.8s; + } + } +} + +/* + Responsive Design: + + {sidebar, content, panel} >= 1200px screen width + {sidebar, content} >= 850px screen width + {content} <= 849px screen width + +*/ + +@media all and (max-width: 576px) { + #main-wrapper { + min-height: calc(100vh - #{$footer-height-mobile}); + } + + #core-wrapper { + .post-content { + > blockquote[class^='prompt-'] { + @include ml-mr(-1.25rem); + + border-radius: 0; + max-width: none; + } + } + } + + #avatar { + width: 5rem; + height: 5rem; + } +} + +@media all and (max-width: 768px) { + %full-width { + max-width: 100%; + } + + #topbar { + @extend %full-width; + } + + #main { + @extend %full-width; + @include pl-pr(0); + } +} + +/* hide sidebar and panel */ +@media all and (max-width: 849px) { + @mixin slide($append: null) { + $basic: transform 0.4s ease; + + @if $append { + transition: $basic, $append; + } @else { + transition: $basic; + } + } + + html, + body { + overflow-x: hidden; + } + + footer { + @include slide; + + height: $footer-height-mobile; + + div.d-flex { + padding: 1.5rem 0; + line-height: 1.65; + flex-wrap: wrap; + } + } + + [#{$sidebar-display}] { + #sidebar { + transform: translateX(0); + } + + #main-wrapper, + footer { + transform: translateX(#{$sidebar-width}); + } + + #back-to-top { + visibility: hidden; + } + } + + #sidebar { + @include slide; + + transform: translateX(-#{$sidebar-width}); /* hide */ + -webkit-transform: translateX(-#{$sidebar-width}); + } + + #main-wrapper { + @include slide; + } + + #topbar, + #main, + footer > .container { + max-width: 100%; + } + + #search-result-wrapper { + width: 100%; + } + + #breadcrumb, + #search-wrapper { + display: none; + } + + #topbar-wrapper { + @include slide(top 0.2s ease); + + left: 0; + } + + #core-wrapper, + #panel-wrapper { + margin-top: 0; + } + + #topbar-title, + #sidebar-trigger, + #search-trigger { + display: block; + } + + #search-result-wrapper .post-content { + letter-spacing: 0; + } + + #tags { + justify-content: center !important; + } + + h1.dynamic-title { + display: none; + + ~ .post-content { + margin-top: 2.5rem; + } + } +} /* max-width: 849px */ + +/* Phone & Pad */ +@media all and (min-width: 577px) and (max-width: 1199px) { + footer .d-flex > div { + width: 312px; + } +} + +/* Sidebar is visible */ +@media all and (min-width: 850px) { + /* Solved jumping scrollbar */ + html { + overflow-y: scroll; + } + + #main-wrapper, + footer { + margin-left: $sidebar-width; + } + + #main-wrapper { + min-height: calc(100vh - $footer-height); + } + + footer { + p { + width: auto; + &:last-child { + &::before { + content: '-'; + margin: 0 0.75rem; + opacity: 0.8; + } + } + } + } + + #sidebar { + .profile-wrapper { + margin-top: 3rem; + } + } + + #search-hints { + display: none; + } + + #search-wrapper { + max-width: $search-max-width; + } + + #search-result-wrapper { + max-width: $main-content-max-width; + justify-content: start !important; + } + + .post { + h1 { + margin-top: 3rem; + } + } + + div.post-content .table-wrapper > table { + min-width: 70%; + } + + /* button 'back-to-Top' position */ + #back-to-top { + bottom: 5.5rem; + right: 5%; + } + + #topbar-title { + text-align: left; + } +} + +/* Pad horizontal */ +@media all and (min-width: 992px) and (max-width: 1199px) { + #main .col-lg-11 { + flex: 0 0 96%; + max-width: 96%; + } +} + +/* Compact icons in sidebar & panel hidden */ +@media all and (min-width: 850px) and (max-width: 1199px) { + #search-results > div { + max-width: 700px; + } + + #breadcrumb { + width: 65%; + overflow: hidden; + text-overflow: ellipsis; + word-break: keep-all; + white-space: nowrap; + } +} + +/* panel hidden */ +@media all and (max-width: 1199px) { + #panel-wrapper { + display: none; + } + + #main > div.row { + justify-content: center !important; + } +} + +/* --- desktop mode, both sidebar and panel are visible --- */ + +@media all and (min-width: 1200px) { + #back-to-top { + bottom: 6.5rem; + } + + #search-wrapper { + margin-right: 4rem; + } + + #search-input { + transition: all 0.3s ease-in-out; + } + + #search-results > div { + width: 46%; + + &:nth-child(odd) { + margin-right: 1.5rem; + } + + &:nth-child(even) { + margin-left: 1.5rem; + } + + &:last-child:nth-child(odd) { + position: relative; + right: 24.3%; + } + } + + .post-content { + font-size: 1.03rem; + } + + footer { + div.d-felx { + width: 85%; + } + } +} + +@media all and (min-width: 1400px) { + #back-to-top { + right: calc((100vw - #{$sidebar-width} - 1140px) / 2 + 3rem); + } +} + +@media all and (min-width: 1650px) { + $icon-gap: 1rem; + + #main-wrapper, + footer { + margin-left: $sidebar-width-large; + } + + #topbar-wrapper { + left: $sidebar-width-large; + } + + #search-wrapper { + margin-right: calc( + #{$main-content-max-width} * 0.25 - #{$search-max-width} - 0.75rem + ); + } + + #main, + footer > .container { + max-width: $main-content-max-width; + padding-left: 1.75rem !important; + padding-right: 1.75rem !important; + } + + #core-wrapper, + #tail-wrapper { + padding-right: 4.5rem !important; + } + + #back-to-top { + right: calc( + (100vw - #{$sidebar-width-large} - #{$main-content-max-width}) / 2 + 2rem + ); + } + + #sidebar { + width: $sidebar-width-large; + + $icon-gap: 1rem; // for the bottom icons + + .profile-wrapper { + margin-top: 3.5rem; + margin-bottom: 2.5rem; + padding-left: 3.5rem; + } + + ul { + li.nav-item { + @include pl-pr(2.75rem); + } + } + + .sidebar-bottom { + padding-left: 2.75rem; + margin-bottom: 1.75rem; + + a:not(:last-child) { + margin-right: $icon-gap; + } + + .icon-border { + @include ml-mr(calc(($icon-gap - $btn-border-width) / 2)); + } + } + } +} /* min-width: 1650px */ diff --git a/_sass/addon/module.scss b/_sass/addon/module.scss new file mode 100644 index 0000000..10e0d69 --- /dev/null +++ b/_sass/addon/module.scss @@ -0,0 +1,173 @@ +/* +* Mainly scss modules, only imported to `assets/css/main.scss` +*/ + +/* ---------- scss placeholder --------- */ + +%heading { + color: var(--heading-color); + font-weight: 400; + font-family: $font-family-heading; +} + +%section { + #core-wrapper & { + margin-top: 2.5rem; + margin-bottom: 1.25rem; + + &:focus { + outline: none; /* avoid outline in Safari */ + } + } +} + +%anchor { + .anchor { + font-size: 80%; + } + + @media (hover: hover) { + .anchor { + visibility: hidden; + opacity: 0; + transition: opacity 0.25s ease-in, visibility 0s ease-in 0.25s; + } + + &:hover { + .anchor { + visibility: visible; + opacity: 1; + transition: opacity 0.25s ease-in, visibility 0s ease-in 0s; + } + } + } +} + +%tag-hover { + background: var(--tag-hover); + transition: background 0.35s ease-in-out; +} + +%table-cell { + padding: 0.4rem 1rem; + font-size: 95%; + white-space: nowrap; +} + +%link-hover { + color: #d2603a !important; + border-bottom: 1px solid #d2603a; + text-decoration: none; +} + +%link-color { + color: var(--link-color); +} + +%link-underline { + border-bottom: 1px solid var(--link-underline-color); +} + +%clickable-transition { + transition: all 0.3s ease-in-out; +} + +%no-cursor { + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +%no-bottom-border { + border-bottom: none; +} + +%cursor-pointer { + cursor: pointer; +} + +%normal-font-style { + font-style: normal; +} + +%rounded { + border-radius: $base-radius; +} + +%img-caption { + + em { + display: block; + text-align: center; + font-style: normal; + font-size: 80%; + padding: 0; + color: #6d6c6c; + } +} + +%sidebar-links { + color: rgba(117, 117, 117, 0.9); + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +%text-clip { + display: -webkit-box; + overflow: hidden; + text-overflow: ellipsis; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; +} + +/* ---------- scss mixin --------- */ + +@mixin mt-mb($value) { + margin-top: $value; + margin-bottom: $value; +} + +@mixin ml-mr($value) { + margin-left: $value; + margin-right: $value; +} + +@mixin pt-pb($val) { + padding-top: $val; + padding-bottom: $val; +} + +@mixin pl-pr($val) { + padding-left: $val; + padding-right: $val; +} + +@mixin input-placeholder { + opacity: 0.6; +} + +@mixin label($font-size: 1rem, $font-weight: 600, $color: var(--label-color)) { + color: $color; + font-size: $font-size; + font-weight: $font-weight; +} + +@mixin align-center { + position: relative; + left: 50%; + transform: translateX(-50%); +} + +@mixin prompt($type, $fa-content, $fa-style: 'solid') { + &.prompt-#{$type} { + background-color: var(--prompt-#{$type}-bg); + + &::before { + content: $fa-content; + color: var(--prompt-#{$type}-icon-color); + font: var(--fa-font-#{$fa-style}); + } + } +} diff --git a/_sass/addon/syntax.scss b/_sass/addon/syntax.scss new file mode 100644 index 0000000..df756a7 --- /dev/null +++ b/_sass/addon/syntax.scss @@ -0,0 +1,270 @@ +/* +* The syntax highlight. +*/ + +@import 'colors/light-syntax'; +@import 'colors/dark-syntax'; + +html { + @media (prefers-color-scheme: light) { + &:not([data-mode]), + &[data-mode='light'] { + @include light-syntax; + } + + &[data-mode='dark'] { + @include dark-syntax; + } + } + + @media (prefers-color-scheme: dark) { + &:not([data-mode]), + &[data-mode='dark'] { + @include dark-syntax; + } + + &[data-mode='light'] { + @include light-syntax; + } + } +} + +/* -- code snippets -- */ + +%code-snippet-bg { + background-color: var(--highlight-bg-color); +} + +%code-snippet-padding { + padding-left: 1rem; + padding-right: 1.5rem; +} + +.highlighter-rouge { + color: var(--highlighter-rouge-color); + margin-top: 0.5rem; + margin-bottom: 1.2em; /* Override BS Inline-code style */ +} + +.highlight { + @extend %rounded; + @extend %code-snippet-bg; + + @at-root figure#{&} { + @extend %code-snippet-bg; + } + + overflow: auto; + padding-top: 0.5rem; + padding-bottom: 1rem; + + pre { + margin-bottom: 0; + font-size: $code-font-size; + line-height: 1.4rem; + word-wrap: normal; /* Fixed Safari overflow-x */ + } + + table { + td pre { + overflow: visible; /* Fixed iOS safari overflow-x */ + word-break: normal; /* Fixed iOS safari linenos code break */ + } + } + + .lineno { + padding-right: 0.5rem; + min-width: 2.2rem; + text-align: right; + color: var(--highlight-lineno-color); + -webkit-user-select: none; + -moz-user-select: none; + -o-user-select: none; + -ms-user-select: none; + user-select: none; + } +} /* .highlight */ + +code { + -webkit-hyphens: none; + -ms-hyphens: none; + hyphens: none; + + &.highlighter-rouge { + font-size: $code-font-size; + padding: 3px 5px; + word-break: break-word; + border-radius: 4px; + background-color: var(--inline-code-bg); + } + + &.filepath { + background-color: inherit; + color: var(--filepath-text-color); + font-weight: 600; + padding: 0; + } + + a > &.highlighter-rouge { + padding-bottom: 0; /* show link's underlinke */ + color: inherit; + } + + a:hover > &.highlighter-rouge { + border-bottom: none; + } + + blockquote & { + color: inherit; + } +} + +td.rouge-code { + @extend %code-snippet-padding; + + /* + Prevent some browser extends from + changing the URL string of code block. + */ + a { + color: inherit !important; + border-bottom: none !important; + pointer-events: none; + } +} + +div[class^='language-'] { + @extend %rounded; + @extend %code-snippet-bg; + + box-shadow: var(--language-border-color) 0 0 0 1px; + + .post-content > & { + @include ml-mr(-1.25rem); + + border-radius: 0; + } +} + +/* Hide line numbers for default, console, and terminal code snippets */ +div { + &.nolineno, + &.language-plaintext, + &.language-console, + &.language-terminal { + pre.lineno { + display: none; + } + + td.rouge-code { + padding-left: 1.5rem; + } + } +} + +.code-header { + @extend %no-cursor; + + $code-header-height: 2.25rem; + + display: flex; + justify-content: space-between; + align-items: center; + height: $code-header-height; + margin-left: 1rem; + margin-right: 0.5rem; + + /* the label block */ + span { + /* label icon */ + i { + font-size: 1rem; + margin-right: 0.5rem; + color: var(--code-header-icon-color); + + &.small { + font-size: 70%; + } + } + + @at-root [file] #{&} > i { + position: relative; + top: 1px; /* center the file icon */ + } + + /* label text */ + &::after { + content: attr(data-label-text); + font-size: 0.85rem; + font-weight: 600; + color: var(--code-header-text-color); + } + } + + /* clipboard */ + button { + @extend %cursor-pointer; + @extend %rounded; + + border: 1px solid transparent; + height: $code-header-height; + width: $code-header-height; + padding: 0; + background-color: inherit; + + i { + color: var(--code-header-icon-color); + } + + &[timeout] { + &:hover { + border-color: var(--clipboard-checked-color); + } + + i { + color: var(--clipboard-checked-color); + } + } + + &:focus { + outline: none; + } + + &:not([timeout]):hover { + background-color: rgba(128, 128, 128, 0.37); + + i { + color: white; + } + } + } +} + +@media all and (min-width: 576px) { + div[class^='language-'] { + .post-content > & { + @include ml-mr(0); + + border-radius: $base-radius; + } + + .code-header { + @include ml-mr(0); + + &::before { + $dot-size: 0.75rem; + $dot-margin: 0.5rem; + + content: ''; + display: inline-block; + margin-left: 1rem; + width: $dot-size; + height: $dot-size; + border-radius: 50%; + background-color: var(--code-header-muted-color); + box-shadow: ($dot-size + $dot-margin) 0 0 var(--code-header-muted-color), + ($dot-size + $dot-margin) * 2 0 0 var(--code-header-muted-color); + } + } + } +} diff --git a/_sass/addon/variables.scss b/_sass/addon/variables.scss new file mode 100644 index 0000000..0c68281 --- /dev/null +++ b/_sass/addon/variables.scss @@ -0,0 +1,27 @@ +/* + * The SCSS variables + */ + +/* sidebar */ + +$sidebar-width: 260px !default; /* the basic width */ +$sidebar-width-large: 300px !default; /* screen width: >= 1650px */ + +/* other framework sizes */ + +$topbar-height: 3rem !default; +$search-max-width: 210px !default; +$footer-height: 5rem !default; +$footer-height-mobile: 6rem !default; /* screen width: < 850px */ +$main-content-max-width: 1250px !default; +$bottom-min-height: 35rem !default; +$base-radius: 0.5rem; + +/* syntax highlight */ + +$code-font-size: 0.85rem !default; + +/* fonts */ + +$font-family-base: 'Source Sans Pro', 'Microsoft Yahei', sans-serif; +$font-family-heading: Lato, 'Microsoft Yahei', sans-serif; diff --git a/_sass/colors/dark-syntax.scss b/_sass/colors/dark-syntax.scss new file mode 100644 index 0000000..36e9651 --- /dev/null +++ b/_sass/colors/dark-syntax.scss @@ -0,0 +1,91 @@ +/* + * The syntax dark mode styles. + */ + +@mixin dark-syntax { + --language-border-color: rgba(84, 83, 83, 0.27); + --highlight-bg-color: #252525; + --highlighter-rouge-color: #de6b18; + --highlight-lineno-color: #6c6c6d; + --inline-code-bg: #272822; + --code-header-text-color: #6a6a6a; + --code-header-muted-color: rgb(60, 60, 60); + --code-header-icon-color: rgb(86, 86, 86); + --clipboard-checked-color: #2bcc2b; + --filepath-text-color: #bdbdbd; + + /* override Bootstrap */ + pre { + color: #bfbfbf; + } + + .highlight .gp { + color: #818c96; + } + + /* syntax highlight colors from https://raw.githubusercontent.com/jwarby/pygments-css/master/monokai.css */ + + .highlight pre { background-color: var(--highlight-bg-color); } + .highlight .hll { background-color: var(--highlight-bg-color); } + .highlight .c { color: #75715e; } /* Comment */ + .highlight .err { color: #960050; background-color: #1e0010; } /* Error */ + .highlight .k { color: #66d9ef; } /* Keyword */ + .highlight .l { color: #ae81ff; } /* Literal */ + .highlight .n { color: #f8f8f2; } /* Name */ + .highlight .o { color: #f92672; } /* Operator */ + .highlight .p { color: #f8f8f2; } /* Punctuation */ + .highlight .cm { color: #75715e; } /* Comment.Multiline */ + .highlight .cp { color: #75715e; } /* Comment.Preproc */ + .highlight .c1 { color: #75715e; } /* Comment.Single */ + .highlight .cs { color: #75715e; } /* Comment.Special */ + .highlight .ge { color: inherit; font-style: italic; } /* Generic.Emph */ + .highlight .gs { font-weight: bold; } /* Generic.Strong */ + .highlight .kc { color: #66d9ef; } /* Keyword.Constant */ + .highlight .kd { color: #66d9ef; } /* Keyword.Declaration */ + .highlight .kn { color: #f92672; } /* Keyword.Namespace */ + .highlight .kp { color: #66d9ef; } /* Keyword.Pseudo */ + .highlight .kr { color: #66d9ef; } /* Keyword.Reserved */ + .highlight .kt { color: #66d9ef; } /* Keyword.Type */ + .highlight .ld { color: #e6db74; } /* Literal.Date */ + .highlight .m { color: #ae81ff; } /* Literal.Number */ + .highlight .s { color: #e6db74; } /* Literal.String */ + .highlight .na { color: #a6e22e; } /* Name.Attribute */ + .highlight .nb { color: #f8f8f2; } /* Name.Builtin */ + .highlight .nc { color: #a6e22e; } /* Name.Class */ + .highlight .no { color: #66d9ef; } /* Name.Constant */ + .highlight .nd { color: #a6e22e; } /* Name.Decorator */ + .highlight .ni { color: #f8f8f2; } /* Name.Entity */ + .highlight .ne { color: #a6e22e; } /* Name.Exception */ + .highlight .nf { color: #a6e22e; } /* Name.Function */ + .highlight .nl { color: #f8f8f2; } /* Name.Label */ + .highlight .nn { color: #f8f8f2; } /* Name.Namespace */ + .highlight .nx { color: #a6e22e; } /* Name.Other */ + .highlight .py { color: #f8f8f2; } /* Name.Property */ + .highlight .nt { color: #f92672; } /* Name.Tag */ + .highlight .nv { color: #f8f8f2; } /* Name.Variable */ + .highlight .ow { color: #f92672; } /* Operator.Word */ + .highlight .w { color: #f8f8f2; } /* Text.Whitespace */ + .highlight .mf { color: #ae81ff; } /* Literal.Number.Float */ + .highlight .mh { color: #ae81ff; } /* Literal.Number.Hex */ + .highlight .mi { color: #ae81ff; } /* Literal.Number.Integer */ + .highlight .mo { color: #ae81ff; } /* Literal.Number.Oct */ + .highlight .sb { color: #e6db74; } /* Literal.String.Backtick */ + .highlight .sc { color: #e6db74; } /* Literal.String.Char */ + .highlight .sd { color: #e6db74; } /* Literal.String.Doc */ + .highlight .s2 { color: #e6db74; } /* Literal.String.Double */ + .highlight .se { color: #ae81ff; } /* Literal.String.Escape */ + .highlight .sh { color: #e6db74; } /* Literal.String.Heredoc */ + .highlight .si { color: #e6db74; } /* Literal.String.Interpol */ + .highlight .sx { color: #e6db74; } /* Literal.String.Other */ + .highlight .sr { color: #e6db74; } /* Literal.String.Regex */ + .highlight .s1 { color: #e6db74; } /* Literal.String.Single */ + .highlight .ss { color: #e6db74; } /* Literal.String.Symbol */ + .highlight .bp { color: #f8f8f2; } /* Name.Builtin.Pseudo */ + .highlight .vc { color: #f8f8f2; } /* Name.Variable.Class */ + .highlight .vg { color: #f8f8f2; } /* Name.Variable.Global */ + .highlight .vi { color: #f8f8f2; } /* Name.Variable.Instance */ + .highlight .il { color: #ae81ff; } /* Literal.Number.Integer.Long */ + .highlight .gu { color: #75715e; } /* Generic.Subheading & Diff Unified/Comment? */ + .highlight .gd { color: #f92672; background-color: #561c08; } /* Generic.Deleted & Diff Deleted */ + .highlight .gi { color: #a6e22e; background-color: #0b5858; } /* Generic.Inserted & Diff Inserted */ +} diff --git a/_sass/colors/dark-typography.scss b/_sass/colors/dark-typography.scss new file mode 100644 index 0000000..310828e --- /dev/null +++ b/_sass/colors/dark-typography.scss @@ -0,0 +1,151 @@ +/* + * The main dark mode styles + */ + +@mixin dark-scheme { + /* Framework color */ + --main-bg: rgb(27, 27, 30); + --mask-bg: rgb(68, 69, 70); + --main-border-color: rgb(44, 45, 45); + + /* Common color */ + --text-color: rgb(175, 176, 177); + --text-muted-color: rgb(107, 116, 124); + --heading-color: #cccccc; + --blockquote-border-color: rgb(66, 66, 66); + --blockquote-text-color: rgb(117, 117, 117); + --link-color: rgb(138, 180, 248); + --link-underline-color: rgb(82, 108, 150); + --button-bg: rgb(39, 40, 43); + --btn-border-color: rgb(63, 65, 68); + --btn-backtotop-color: var(--text-color); + --btn-backtotop-border-color: var(--btn-border-color); + --btn-box-shadow: var(--main-bg); + --card-header-bg: rgb(48, 48, 48); + --label-color: rgb(108, 117, 125); + --checkbox-color: rgb(118, 120, 121); + --checkbox-checked-color: var(--link-color); + --img-bg: radial-gradient(circle, rgb(22, 22, 24) 0%, rgb(32, 32, 32) 100%); + --shimmer-bg: linear-gradient( + 90deg, + rgba(255, 255, 255, 0) 0%, + rgba(58, 55, 55, 0.4) 50%, + rgba(255, 255, 255, 0) 100% + ); + + /* Sidebar */ + --sidebar-bg: radial-gradient(circle, #242424 0%, #1d1f27 100%); + --sidebar-muted-color: #6d6c6b; + --sidebar-active-color: rgb(255, 255, 255, 0.95); + --sidebar-hover-bg: rgb(54, 54, 54, 0.33); + --sidebar-btn-bg: rgb(84, 83, 83, 0.3); + --sidebar-btn-color: #787878; + --avatar-border-color: rgb(206, 206, 206, 0.9); + + /* Topbar */ + --topbar-bg: rgb(27, 27, 30, 0.64); + --topbar-text-color: var(--text-color); + --search-wrapper-border-color: rgb(55, 55, 55); + --search-icon-color: rgb(100, 102, 105); + --input-focus-border-color: rgb(112, 114, 115); + + /* Home page */ + --post-list-text-color: rgb(175, 176, 177); + --btn-patinator-text-color: var(--text-color); + --btn-paginator-hover-color: rgb(64, 65, 66); + --btn-paginator-border-color: var(--btn-border-color); + --btn-text-color: var(--text-color); + + /* Posts */ + --toc-highlight: rgb(116, 178, 243); + --tag-bg: rgb(41, 40, 40); + --tag-hover: rgb(43, 56, 62); + --tb-odd-bg: rgba(42, 47, 53, 0.52); /* odd rows of the posts' table */ + --tb-even-bg: rgb(31, 31, 34); /* even rows of the posts' table */ + --tb-border-color: var(--tb-odd-bg); + --footnote-target-bg: rgb(63, 81, 181); + --btn-share-color: #6c757d; + --btn-share-hover-color: #bfc1ca; + --relate-post-date: var(--text-muted-color); + --card-bg: #1e1e1e; + --card-hovor-bg: #464d51; + --card-shadow: rgb(21, 21, 21, 0.72) 0 6px 18px 0, + rgb(137, 135, 135, 0.24) 0 0 0 1px; + --kbd-wrap-color: #6a6a6a; + --kbd-text-color: #d3d3d3; + --kbd-bg-color: #242424; + --prompt-text-color: rgb(216, 212, 212, 0.75); + --prompt-tip-bg: rgb(22, 60, 36, 0.64); + --prompt-tip-icon-color: rgb(15, 164, 15, 0.81); + --prompt-info-bg: rgb(7, 59, 104, 0.8); + --prompt-info-icon-color: #0075d1; + --prompt-warning-bg: rgb(90, 69, 3, 0.88); + --prompt-warning-icon-color: rgb(255, 165, 0, 0.8); + --prompt-danger-bg: rgb(86, 28, 8, 0.8); + --prompt-danger-icon-color: #cd0202; + + /* tags */ + --tag-border: rgb(59, 79, 88); + --tag-shadow: rgb(32, 33, 33); + --search-tag-bg: var(--tag-bg); + --dash-color: rgb(63, 65, 68); + + /* categories */ + --categories-border: rgb(64, 66, 69, 0.5); + --categories-hover-bg: rgb(73, 75, 76); + --categories-icon-hover-color: white; + + /* archives */ + --timeline-node-bg: rgb(150, 152, 156); + --timeline-color: rgb(63, 65, 68); + --timeline-year-dot-color: var(--timeline-color); + + .light { + display: none; + } + + hr { + border-color: var(--main-border-color); + } + + /* categories */ + .categories.card, + .list-group-item { + background-color: var(--card-bg); + } + + .categories { + .card-header { + background-color: var(--card-header-bg); + } + + .list-group-item { + border-left: none; + border-right: none; + padding-left: 2rem; + border-color: var(--categories-border); + + &:last-child { + border-bottom-color: var(--card-bg); + } + } + } + + #archives li:nth-child(odd) { + background-image: linear-gradient( + to left, + rgb(26, 26, 30), + rgb(39, 39, 45), + rgb(39, 39, 45), + rgb(39, 39, 45), + rgb(26, 26, 30) + ); + } + + color-scheme: dark; + + /* stylelint-disable-next-line selector-id-pattern */ + #disqus_thread { + color-scheme: none; + } +} /* dark-scheme */ diff --git a/_sass/colors/light-syntax.scss b/_sass/colors/light-syntax.scss new file mode 100644 index 0000000..040a5f5 --- /dev/null +++ b/_sass/colors/light-syntax.scss @@ -0,0 +1,83 @@ +/* + * The syntax light mode code snippet colors. + */ + +@mixin light-syntax { + /* see: */ + .highlight .hll { background-color: #ffffcc; } + .highlight .c { color: #999988; font-style: italic; } /* Comment */ + .highlight .err { color: #a61717; background-color: #e3d2d2; } /* Error */ + .highlight .k { color: #000000; font-weight: bold; } /* Keyword */ + .highlight .o { color: #000000; font-weight: bold; } /* Operator */ + .highlight .cm { color: #999988; font-style: italic; } /* Comment.Multiline */ + .highlight .cp { color: #999999; font-weight: bold; font-style: italic; } /* Comment.Preproc */ + .highlight .c1 { color: #999988; font-style: italic; } /* Comment.Single */ + .highlight .cs { color: #999999; font-weight: bold; font-style: italic; } /* Comment.Special */ + .highlight .gd { color: #d01040; background-color: #ffdddd; } /* Generic.Deleted */ + .highlight .ge { color: #000000; font-style: italic; } /* Generic.Emph */ + .highlight .gr { color: #aa0000; } /* Generic.Error */ + .highlight .gh { color: #999999; } /* Generic.Heading */ + .highlight .gi { color: #008080; background-color: #ddffdd; } /* Generic.Inserted */ + .highlight .go { color: #888888; } /* Generic.Output */ + .highlight .gp { color: #555555; } /* Generic.Prompt */ + .highlight .gs { font-weight: bold; } /* Generic.Strong */ + .highlight .gu { color: #aaaaaa; } /* Generic.Subheading */ + .highlight .gt { color: #aa0000; } /* Generic.Traceback */ + .highlight .kc { color: #000000; font-weight: bold; } /* Keyword.Constant */ + .highlight .kd { color: #000000; font-weight: bold; } /* Keyword.Declaration */ + .highlight .kn { color: #000000; font-weight: bold; } /* Keyword.Namespace */ + .highlight .kp { color: #000000; font-weight: bold; } /* Keyword.Pseudo */ + .highlight .kr { color: #000000; font-weight: bold; } /* Keyword.Reserved */ + .highlight .kt { color: #445588; font-weight: bold; } /* Keyword.Type */ + .highlight .m { color: #009999; } /* Literal.Number */ + .highlight .s { color: #d01040; } /* Literal.String */ + .highlight .na { color: #008080; } /* Name.Attribute */ + .highlight .nb { color: #0086b3; } /* Name.Builtin */ + .highlight .nc { color: #445588; font-weight: bold; } /* Name.Class */ + .highlight .no { color: #008080; } /* Name.Constant */ + .highlight .nd { color: #3c5d5d; font-weight: bold; } /* Name.Decorator */ + .highlight .ni { color: #800080; } /* Name.Entity */ + .highlight .ne { color: #990000; font-weight: bold; } /* Name.Exception */ + .highlight .nf { color: #990000; font-weight: bold; } /* Name.Function */ + .highlight .nl { color: #990000; font-weight: bold; } /* Name.Label */ + .highlight .nn { color: #555555; } /* Name.Namespace */ + .highlight .nt { color: #000080; } /* Name.Tag */ + .highlight .nv { color: #008080; } /* Name.Variable */ + .highlight .ow { color: #000000; font-weight: bold; } /* Operator.Word */ + .highlight .w { color: #bbbbbb; } /* Text.Whitespace */ + .highlight .mf { color: #009999; } /* Literal.Number.Float */ + .highlight .mh { color: #009999; } /* Literal.Number.Hex */ + .highlight .mi { color: #009999; } /* Literal.Number.Integer */ + .highlight .mo { color: #009999; } /* Literal.Number.Oct */ + .highlight .sb { color: #d01040; } /* Literal.String.Backtick */ + .highlight .sc { color: #d01040; } /* Literal.String.Char */ + .highlight .sd { color: #d01040; } /* Literal.String.Doc */ + .highlight .s2 { color: #d01040; } /* Literal.String.Double */ + .highlight .se { color: #d01040; } /* Literal.String.Escape */ + .highlight .sh { color: #d01040; } /* Literal.String.Heredoc */ + .highlight .si { color: #d01040; } /* Literal.String.Interpol */ + .highlight .sx { color: #d01040; } /* Literal.String.Other */ + .highlight .sr { color: #009926; } /* Literal.String.Regex */ + .highlight .s1 { color: #d01040; } /* Literal.String.Single */ + .highlight .ss { color: #990073; } /* Literal.String.Symbol */ + .highlight .bp { color: #999999; } /* Name.Builtin.Pseudo */ + .highlight .vc { color: #008080; } /* Name.Variable.Class */ + .highlight .vg { color: #008080; } /* Name.Variable.Global */ + .highlight .vi { color: #008080; } /* Name.Variable.Instance */ + .highlight .il { color: #009999; } /* Literal.Number.Integer.Long */ + + /* --- custom light colors --- */ + --language-border-color: rgba(172, 169, 169, 0.2); + --highlight-bg-color: #f7f7f7; + --highlighter-rouge-color: #3f596f; + --highlight-lineno-color: #c2c6cc; + --inline-code-bg: #f6f6f7; + --code-header-text-color: #a3a3b1; + --code-header-muted-color: #ebebeb; + --code-header-icon-color: #d1d1d1; + --clipboard-checked-color: #43c743; + + [class^='prompt-'] { + --inline-code-bg: #fbfafa; + } +} /* light-syntax */ diff --git a/_sass/colors/light-typography.scss b/_sass/colors/light-typography.scss new file mode 100644 index 0000000..9fc8162 --- /dev/null +++ b/_sass/colors/light-typography.scss @@ -0,0 +1,109 @@ +/* + * The syntax light mode typography colors + */ + +@mixin light-scheme { + /* Framework color */ + --main-bg: white; + --mask-bg: #c1c3c5; + --main-border-color: #f3f3f3; + + /* Common color */ + --text-color: #34343c; + --text-muted-color: #8e8e8e; + --heading-color: black; + --blockquote-border-color: #eeeeee; + --blockquote-text-color: #9a9a9a; + --link-color: #0153ab; + --link-underline-color: #dee2e6; + --button-bg: #ffffff; + --btn-border-color: #e9ecef; + --btn-backtotop-color: #686868; + --btn-backtotop-border-color: #f1f1f1; + --btn-box-shadow: #eaeaea; + --checkbox-color: #c5c5c5; + --checkbox-checked-color: #07a8f7; + --img-bg: radial-gradient( + circle, + rgb(255, 255, 255) 0%, + rgb(239, 239, 239) 100% + ); + --shimmer-bg: linear-gradient( + 90deg, + rgba(250, 250, 250, 0) 0%, + rgba(232, 230, 230, 1) 50%, + rgba(250, 250, 250, 0) 100% + ); + + /* Sidebar */ + --sidebar-bg: #f6f8fa; + --sidebar-muted-color: #a2a19f; + --sidebar-active-color: #1d1d1d; + --sidebar-hover-bg: rgb(223, 233, 241, 0.64); + --sidebar-btn-bg: white; + --sidebar-btn-color: #8e8e8e; + --avatar-border-color: white; + + /* Topbar */ + --topbar-bg: rgb(255, 255, 255, 0.7); + --topbar-text-color: rgb(78, 78, 78); + --search-wrapper-border-color: rgb(240, 240, 240); + --search-tag-bg: #f8f9fa; + --search-icon-color: #c2c6cc; + --input-focus-border-color: #b8b8b8; + + /* Home page */ + --post-list-text-color: dimgray; + --btn-patinator-text-color: #555555; + --btn-paginator-hover-color: var(--sidebar-bg); + --btn-paginator-border-color: var(--sidebar-bg); + --btn-text-color: #676666; + + /* Posts */ + --toc-highlight: #563d7c; + --btn-share-hover-color: var(--link-color); + --card-bg: white; + --card-hovor-bg: #e2e2e2; + --card-shadow: rgb(104, 104, 104, 0.05) 0 2px 6px 0, + rgba(211, 209, 209, 0.15) 0 0 0 1px; + --label-color: #616161; + --relate-post-date: rgba(30, 55, 70, 0.4); + --footnote-target-bg: lightcyan; + --tag-bg: rgba(0, 0, 0, 0.075); + --tag-border: #dee2e6; + --tag-shadow: var(--btn-border-color); + --tag-hover: rgb(222, 226, 230); + --tb-odd-bg: #fbfcfd; + --tb-border-color: #eaeaea; + --dash-color: silver; + --kbd-wrap-color: #bdbdbd; + --kbd-text-color: var(--text-color); + --kbd-bg-color: white; + --prompt-text-color: rgb(46, 46, 46, 0.77); + --prompt-tip-bg: rgb(123, 247, 144, 0.2); + --prompt-tip-icon-color: #03b303; + --prompt-info-bg: #e1f5fe; + --prompt-info-icon-color: #0070cb; + --prompt-warning-bg: rgb(255, 243, 205); + --prompt-warning-icon-color: #ef9c03; + --prompt-danger-bg: rgb(248, 215, 218, 0.56); + --prompt-danger-icon-color: #df3c30; + + [class^='prompt-'] { + --link-underline-color: rgb(219, 216, 216); + } + + .dark { + display: none; + } + + /* Categories */ + --categories-border: rgba(0, 0, 0, 0.125); + --categories-hover-bg: var(--btn-border-color); + --categories-icon-hover-color: darkslategray; + + /* Archive */ + --timeline-color: rgba(0, 0, 0, 0.075); + --timeline-node-bg: #c2c6cc; + --timeline-year-dot-color: #ffffff; +} /* light-scheme */ diff --git a/_sass/jekyll-theme-chirpy.scss b/_sass/jekyll-theme-chirpy.scss new file mode 100644 index 0000000..83cf7e3 --- /dev/null +++ b/_sass/jekyll-theme-chirpy.scss @@ -0,0 +1,19 @@ +/*! + * Chirpy v6.1.0 (https://github.com/cotes2020/jekyll-theme-chirpy) + * © 2019 Cotes Chung + * MIT Licensed + */ + +@import 'colors/light-typography'; +@import 'colors/dark-typography'; +@import 'addon/variables'; +@import 'variables-hook'; +@import 'addon/module'; +@import 'addon/syntax'; +@import 'addon/commons'; +@import 'layout/home'; +@import 'layout/post'; +@import 'layout/tags'; +@import 'layout/archives'; +@import 'layout/categories'; +@import 'layout/category-tag'; diff --git a/_sass/layout/archives.scss b/_sass/layout/archives.scss new file mode 100644 index 0000000..3a2e86b --- /dev/null +++ b/_sass/layout/archives.scss @@ -0,0 +1,144 @@ +/* + Style for Archives +*/ + +#archives { + letter-spacing: 0.03rem; + + $timeline-width: 4px; + + %timeline { + content: ''; + width: $timeline-width; + position: relative; + float: left; + background-color: var(--timeline-color); + } + + .year { + height: 3.5rem; + font-size: 1.5rem; + position: relative; + left: 2px; + margin-left: -$timeline-width; + + &::before { + @extend %timeline; + + height: 72px; + left: 79px; + bottom: 16px; + } + + &:first-child::before { + @extend %timeline; + + height: 32px; + top: 24px; + } + + /* Year dot */ + &::after { + content: ''; + display: inline-block; + position: relative; + border-radius: 50%; + width: 12px; + height: 12px; + left: 21.5px; + border: 3px solid; + background-color: var(--timeline-year-dot-color); + border-color: var(--timeline-node-bg); + box-shadow: 0 0 2px 0 #c2c6cc; + z-index: 1; + } + } + + ul { + li { + font-size: 1.1rem; + line-height: 3rem; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + + &:nth-child(odd) { + background-color: var(--main-bg, #ffffff); + background-image: linear-gradient( + to left, + #ffffff, + #fbfbfb, + #fbfbfb, + #fbfbfb, + #ffffff + ); + } + + &::before { + @extend %timeline; + + top: 0; + left: 77px; + height: 3.1rem; + } + } + + &:last-child li:last-child::before { + height: 1.5rem; + } + } /* #archives ul */ + + .date { + white-space: nowrap; + display: inline-block; + position: relative; + right: 0.5rem; + + &.month { + width: 1.4rem; + text-align: center; + } + + &.day { + font-size: 85%; + font-family: Lato, sans-serif; + } + } + + a { + /* post title in Archvies */ + margin-left: 2.5rem; + position: relative; + top: 0.1rem; + + &:hover { + border-bottom: none; + } + + &::before { + /* the dot before post title */ + content: ''; + display: inline-block; + position: relative; + border-radius: 50%; + width: 8px; + height: 8px; + float: left; + top: 1.35rem; + left: 71px; + background-color: var(--timeline-node-bg); + box-shadow: 0 0 3px 0 #c2c6cc; + z-index: 1; + } + } +} /* #archives */ + +@media all and (max-width: 576px) { + #archives { + margin-top: -1rem; + + ul { + letter-spacing: 0; + } + } +} diff --git a/_sass/layout/categories.scss b/_sass/layout/categories.scss new file mode 100644 index 0000000..330d3d3 --- /dev/null +++ b/_sass/layout/categories.scss @@ -0,0 +1,83 @@ +/* + Style for Tab Categories +*/ + +%category-icon-color { + color: gray; +} + +.categories { + margin-bottom: 2rem; + border-color: var(--categories-border); + + &.card, + .list-group { + @extend %rounded; + } + + .card-header { + $radius: calc($base-radius - 1px); + + padding: 0.75rem; + border-radius: $radius; + border-bottom: 0; + + &.hide-border-bottom { + border-bottom-left-radius: 0; + border-bottom-right-radius: 0; + } + } + + i { + @extend %category-icon-color; + + font-size: 86%; /* fontawesome icons */ + } + + .list-group-item { + border-left: none; + border-right: none; + padding-left: 2rem; + + &:first-child { + border-top-left-radius: 0; + border-top-right-radius: 0; + } + + &:last-child { + border-bottom: 0; + } + } +} /* .categories */ + +.category-trigger { + width: 1.7rem; + height: 1.7rem; + border-radius: 50%; + text-align: center; + color: #6c757d !important; + + i { + position: relative; + height: 0.7rem; + width: 1rem; + transition: transform 300ms ease; + } + + &:hover { + i { + color: var(--categories-icon-hover-color); + } + } +} + +/* only works on desktop */ +@media (hover: hover) { + .category-trigger:hover { + background-color: var(--categories-hover-bg); + } +} + +.rotate { + transform: rotate(-90deg); +} diff --git a/_sass/layout/category-tag.scss b/_sass/layout/category-tag.scss new file mode 100644 index 0000000..3b25db5 --- /dev/null +++ b/_sass/layout/category-tag.scss @@ -0,0 +1,77 @@ +/* + Style for page Category and Tag +*/ + +.dash { + margin: 0 0.5rem 0.6rem 0.5rem; + border-bottom: 2px dotted var(--dash-color); +} + +#page-category, +#page-tag { + ul > li { + line-height: 1.5rem; + padding: 0.6rem 0; + + /* dot */ + &::before { + background: #999999; + width: 5px; + height: 5px; + border-radius: 50%; + display: block; + content: ''; + position: relative; + top: 0.6rem; + margin-right: 0.5rem; + } + + /* post's title */ + > a { + @extend %no-bottom-border; + + font-size: 1.1rem; + } + + /* post's date */ + > span:last-child { + white-space: nowrap; + } + } +} + +/* tag icon */ +#page-tag h1 > i { + font-size: 1.2rem; +} + +#page-category h1 > i { + font-size: 1.25rem; +} + +#page-category, +#page-tag, +#access-lastmod { + a:hover { + @extend %link-hover; + + margin-bottom: -1px; /* Avoid jumping */ + } +} + +@media all and (max-width: 576px) { + #page-category, + #page-tag { + ul > li { + &::before { + margin: 0 0.5rem; + } + + > a { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + } + } + } +} diff --git a/_sass/layout/home.scss b/_sass/layout/home.scss new file mode 100644 index 0000000..499de47 --- /dev/null +++ b/_sass/layout/home.scss @@ -0,0 +1,219 @@ +/* + Style for Homepage +*/ + +#post-list { + margin-top: 2rem; + + a.card-wrapper { + display: block; + + &:hover { + text-decoration: none; + } + + &:not(:last-child) { + margin-bottom: 1.25rem; + } + } + + .card { + %img-radius { + border-radius: $base-radius $base-radius 0 0; + } + + .preview-img { + height: 10rem; + + @extend %img-radius; + + img { + width: 100%; + height: 100%; + -o-object-fit: cover; + object-fit: cover; + + @extend %img-radius; + } + } + + .card-body { + min-height: 10.5rem; + padding: 1rem; + + .card-title { + @extend %text-clip; + + font-size: 1.25rem; + } + + %muted { + color: var(--text-muted-color) !important; + } + + .card-text.post-content { + @extend %muted; + + p { + @extend %text-clip; + + line-height: 1.5; + margin: 0; + } + } + + .post-meta { + @extend %muted; + + i { + &:not(:first-child) { + margin-left: 1.5rem; + } + } + + em { + @extend %normal-font-style; + + color: inherit; + } + + > div:first-child { + display: block; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + } + } + } + } +} /* #post-list */ + +.pagination { + color: var(--btn-patinator-text-color); + font-family: Lato, sans-serif; + + a:hover { + text-decoration: none; + } + + .page-item { + .page-link { + color: inherit; + width: 2.5rem; + height: 2.5rem; + padding: 0; + display: -webkit-box; + -webkit-box-pack: center; + -webkit-box-align: center; + border-radius: 50%; + border: 1px solid var(--btn-paginator-border-color); + background-color: var(--button-bg); + + &:hover { + background-color: var(--btn-paginator-hover-color); + } + } + + &.active { + .page-link { + background-color: var(--btn-paginator-hover-color); + color: var(--btn-text-color); + } + } + + &.disabled { + cursor: not-allowed; + + .page-link { + color: rgba(108, 117, 125, 0.57); + border-color: var(--btn-paginator-border-color); + background-color: var(--button-bg); + } + } + + &:first-child .page-link, + &:last-child .page-link { + border-radius: 50%; + } + } /* .page-item */ +} /* .pagination */ + +/* Tablet */ +@media all and (min-width: 768px) { + #post-list { + %img-radius { + border-radius: 0 $base-radius $base-radius 0; + } + + .card { + .preview-img { + width: 20rem; + height: 11.55rem; // can hold 2 lines each for title and content + } + + .card-body { + min-height: 10.75rem; + width: 60%; + padding: 1.75rem 1.75rem 1.25rem 1.75rem; + + .card-text { + display: inherit !important; + } + + .post-meta { + i { + &:not(:first-child) { + margin-left: 1.75rem; + } + } + } + } + } + } +} + +/* Hide SideBar and TOC */ +@media all and (max-width: 830px) { + .pagination { + justify-content: space-evenly; + + .page-item { + &:not(:first-child):not(:last-child) { + display: none; + } + } + } +} + +/* Sidebar is visible */ +@media all and (min-width: 831px) { + #post-list { + margin-top: 2.5rem; + } + + .pagination { + font-size: 0.85rem; + + .page-item { + &:not(:last-child) { + margin-right: 0.7rem; + } + + .page-link { + width: 2rem; + height: 2rem; + } + } + + .page-index { + display: none; + } + } /* .pagination */ +} + +/* Panel is visible */ +@media all and (min-width: 1200px) { + #post-list { + padding-right: 0.5rem; + } +} diff --git a/_sass/layout/post.scss b/_sass/layout/post.scss new file mode 100644 index 0000000..3d01b4d --- /dev/null +++ b/_sass/layout/post.scss @@ -0,0 +1,417 @@ +/* + Post-specific style +*/ + +@mixin btn-sharing-color($light-color, $important: false) { + @if $important { + color: var(--btn-share-color, $light-color) !important; + } @else { + color: var(--btn-share-color, $light-color); + } +} + +%btn-post-nav { + width: 50%; + position: relative; + border-color: var(--btn-border-color); +} + +@mixin dot($pl: 0.25rem, $pr: 0.25rem) { + content: '\2022'; + padding-left: $pl; + padding-right: $pr; +} + +%text-color { + color: var(--text-color); +} + +.preview-img { + overflow: hidden; + aspect-ratio: 40 / 21; + + @extend %rounded; + + &:not(.no-bg) { + img.lazyloaded { + background: var(--img-bg); + } + } + + img { + -o-object-fit: cover; + object-fit: cover; + + @extend %rounded; + } +} + +h1 + .post-meta { + span + span::before { + @include dot; + } + + em { + @extend %text-color; + + a { + @extend %text-color; + } + } +} + +.post-tail-wrapper { + margin-top: 6rem; + border-bottom: 1px double var(--main-border-color); + font-size: 0.85rem; + + .post-tail-bottom a { + color: inherit; + } + + .license-wrapper { + line-height: 1.2rem; + + > a { + color: var(--text-color); + + &:hover { + @extend %link-hover; + } + } + + span:last-child { + font-size: 0.85rem; + } + } /* .license-wrapper */ + + .post-meta a:not(:hover) { + @extend %link-underline; + } + + .share-wrapper { + vertical-align: middle; + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; + + .share-icons { + font-size: 1.2rem; + + > i { + position: relative; + bottom: 1px; + + @extend %cursor-pointer; + + &:hover { + @extend %btn-share-hovor; + } + } + + a { + &:not(:last-child) { + margin-right: 0.25rem; + } + + &:hover { + text-decoration: none; + + > i { + @extend %btn-share-hovor; + } + } + } + + .fab { + &.fa-twitter { + @include btn-sharing-color(rgba(29, 161, 242, 1)); + } + + &.fa-facebook-square { + @include btn-sharing-color(rgb(66, 95, 156)); + } + + &.fa-telegram { + @include btn-sharing-color(rgb(39, 159, 217)); + } + + &.fa-linkedin { + @include btn-sharing-color(rgb(0, 119, 181)); + } + + &.fa-weibo { + @include btn-sharing-color(rgb(229, 20, 43)); + } + } + } /* .share-icons */ + + .fas.fa-link { + @include btn-sharing-color(rgb(171, 171, 171)); + } + } /* .share-wrapper */ +} + +.post-tags { + line-height: 2rem; + + .post-tag { + background: var(--tag-bg); + + &:hover { + @extend %link-hover; + @extend %tag-hover; + @extend %no-bottom-border; + } + } +} + +.post-navigation { + padding-top: 3rem; + padding-bottom: 4rem; + + .btn { + @extend %btn-post-nav; + + &:not(:hover) { + color: var(--link-color); + } + + &:hover { + &:not(.disabled)::before { + color: whitesmoke; + } + } + + &.disabled { + @extend %btn-post-nav; + + pointer-events: auto; + cursor: not-allowed; + background: none; + color: gray; + } + + &.btn-outline-primary.disabled:focus { + box-shadow: none; + } + + &::before { + color: var(--text-muted-color); + font-size: 0.65rem; + text-transform: uppercase; + content: attr(prompt); + } + + &:first-child { + border-radius: $base-radius 0 0 $base-radius; + left: 0.5px; + } + + &:last-child { + border-radius: 0 $base-radius $base-radius 0; + right: 0.5px; + } + } + + p { + font-size: 1.1rem; + line-height: 1.5rem; + margin-top: 0.3rem; + white-space: normal; + } +} /* .post-navigation */ + +@media (hover: hover) { + .post-navigation { + .btn, + .btn::before { + transition: all 0.35s ease-in-out; + } + } +} + +@-webkit-keyframes fade-up { + from { + opacity: 0; + position: relative; + top: 2rem; + } + + to { + opacity: 1; + position: relative; + top: 0; + } +} + +@keyframes fade-up { + from { + opacity: 0; + position: relative; + top: 2rem; + } + + to { + opacity: 1; + position: relative; + top: 0; + } +} + +#toc-wrapper { + border-left: 1px solid rgba(158, 158, 158, 0.17); + position: -webkit-sticky; + position: sticky; + top: 4rem; + transition: top 0.2s ease-in-out; + -webkit-animation: fade-up 0.8s; + animation: fade-up 0.8s; + + ul { + list-style: none; + font-size: 0.85rem; + line-height: 1.25; + padding-left: 0; + + li { + &:not(:last-child) { + margin: 0.4rem 0; + } + + a { + padding: 0.2rem 0 0.2rem 1.25rem; + } + } + + /* Overwrite TOC plugin style */ + + .toc-link { + display: block; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + + &:hover { + color: var(--toc-highlight); + text-decoration: none; + } + + &::before { + display: none; + } + } + + .is-active-link { + color: var(--toc-highlight) !important; + font-weight: 600; + + &::before { + display: inline-block; + width: 1px; + left: -1px; + height: 1.25rem; + background-color: var(--toc-highlight) !important; + } + } + + ul { + a { + padding-left: 2rem; + } + } + } +} + +/* --- Related Posts --- */ + +#related-posts { + > h3 { + @include label(1.1rem, 600); + } + + em { + @extend %normal-font-style; + + color: var(--relate-post-date); + } + + p { + font-size: 0.9rem; + margin-bottom: 0.5rem; + overflow: hidden; + text-overflow: ellipsis; + display: -webkit-box; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; + } + + .card { + h4 { + @extend %text-color; + @extend %text-clip; + } + } +} + +#tail-wrapper { + min-height: 2rem; + + > div:last-of-type { + margin-bottom: 2rem; + } + + /* stylelint-disable-next-line selector-id-pattern */ + #disqus_thread { + min-height: 8.5rem; + } +} + +%btn-share-hovor { + color: var(--btn-share-hover-color) !important; +} + +.share-label { + @include label(inherit, 400, inherit); + + &::after { + content: ':'; + } +} + +@media all and (max-width: 576px) { + .preview-img[data-src] { + margin-top: 2.2rem; + } + + .post-tail-bottom { + flex-wrap: wrap-reverse !important; + + > div:first-child { + width: 100%; + margin-top: 1rem; + } + } +} + +@media all and (max-width: 768px) { + .post-content > p > img { + max-width: calc(100% + 1rem); + } +} + +/* Hide SideBar and TOC */ +@media all and (max-width: 849px) { + .post-navigation { + padding-left: 0; + padding-right: 0; + margin-left: -0.5rem; + margin-right: -0.5rem; + } + + .preview-img[data-src] { + max-width: 100vw; + border-radius: 0; + } +} diff --git a/_sass/layout/tags.scss b/_sass/layout/tags.scss new file mode 100644 index 0000000..4cf5d3b --- /dev/null +++ b/_sass/layout/tags.scss @@ -0,0 +1,19 @@ +/* + Styles for Tab Tags +*/ + +.tag { + border-radius: 0.7em; + padding: 6px 8px 7px; + margin-right: 0.8rem; + line-height: 3rem; + letter-spacing: 0; + border: 1px solid var(--tag-border) !important; + box-shadow: 0 0 3px 0 var(--tag-shadow); + + span { + margin-left: 0.6em; + font-size: 0.7em; + font-family: Oswald, sans-serif; + } +} diff --git a/_sass/variables-hook.scss b/_sass/variables-hook.scss new file mode 100644 index 0000000..f27e0eb --- /dev/null +++ b/_sass/variables-hook.scss @@ -0,0 +1,3 @@ +/* + Appending custom SCSS variables will override the default ones in `_sass/addon/variables.scsss` +*/ diff --git a/_tabs/projects.md b/_tabs/projects.md new file mode 100644 index 0000000..826d49b --- /dev/null +++ b/_tabs/projects.md @@ -0,0 +1,5 @@ +--- +# the default layout is 'page' +icon: fas fa-diagram-project +order: 3 +--- diff --git a/_tabs/resume.md b/_tabs/resume.md new file mode 100644 index 0000000..5f94c5c --- /dev/null +++ b/_tabs/resume.md @@ -0,0 +1,11 @@ +--- +# the default layout is 'page' +icon: fas fa-file +order: 2 +--- + +
+ +
diff --git a/assets/404.html b/assets/404.html new file mode 100644 index 0000000..0337bac --- /dev/null +++ b/assets/404.html @@ -0,0 +1,16 @@ +--- +layout: page +title: "404: Page not found" +permalink: /404.html + +redirect_from: + - /norobots/ + - /assets/ + - /posts/ +--- + +{% include lang.html %} + +
+

{{ site.data.locales[lang].not_found.statment }}

+
diff --git a/assets/Shameek-Agarwal-Resume.pdf b/assets/Shameek-Agarwal-Resume.pdf new file mode 100644 index 0000000..ccc4881 Binary files /dev/null and b/assets/Shameek-Agarwal-Resume.pdf differ diff --git a/assets/css/style.scss b/assets/css/style.scss new file mode 100644 index 0000000..a8805f4 --- /dev/null +++ b/assets/css/style.scss @@ -0,0 +1,6 @@ +--- +--- + +@import '{{ site.theme }}'; + +/* append your custom style below */ diff --git a/assets/feed.xml b/assets/feed.xml new file mode 100644 index 0000000..a244a56 --- /dev/null +++ b/assets/feed.xml @@ -0,0 +1,61 @@ +--- +layout: compress +permalink: /feed.xml +# Atom Feed, reference: https://validator.w3.org/feed/docs/atom.html +--- + +{% capture source %} + + {{ "/" | absolute_url }} + {{ site.title }} + {{ site.description }} + {{ site.time | date_to_xmlschema }} + + {{ site.social.name }} + {{ "/" | absolute_url }} + + + + Jekyll + © {{ 'now' | date: '%Y' }} {{ site.social.name }} + {{ site.baseurl }}/assets/img/favicons/favicon.ico + {{ site.baseurl }}/assets/img/favicons/favicon-96x96.png + +{% for post in site.posts limit: 5 %} + {% assign post_absolute_url = post.url | absolute_url %} + + {{ post.title }} + + {{ post.date | date_to_xmlschema }} + {% if post.last_modified_at %} + {{ post.last_modified_at | date_to_xmlschema }} + {% else %} + {{ post.date | date_to_xmlschema }} + {% endif %} + {{ post_absolute_url }} + + + {{ post.author | default: site.social.name }} + + + {% if post.categories %} + {% for category in post.categories %} + + {% endfor %} + {% endif %} + + {% if post.summary %} + {{ post.summary | strip }} + {% else %} + + {% include no-linenos.html content=post.content %} + {{ content | strip_html | truncate: 400 }} + + {% endif %} + + +{% endfor %} + +{% endcapture %} +{{ source | replace: '&', '&' }} diff --git a/assets/img/docker-and-kubernetes/docker-vs-vm.drawio b/assets/img/docker-and-kubernetes/docker-vs-vm.drawio new file mode 100644 index 0000000..43b9d1e --- /dev/null +++ b/assets/img/docker-and-kubernetes/docker-vs-vm.drawio @@ -0,0 +1 @@ +5Zpdb5swFIZ/TS4ngd2Q5LJN03YXVbVF2rRLFxywZmxkTBP263dYDAmYqllUCg03kTk2OH5e+3yETPAy3t0rkkSPMqB8gpxgN8G3E4Tm3hw+C0O+N0xdb28IFQv2JvdgWLM/1BgdY81YQNPaQC0l1yypG30pBPV1zUaUktv6sI3k9VkTElLLsPYJt60/WaAjsyw0O9gfKAujcmbXW+x7YlIONitJIxLI7ZEJryZ4qaTU+1a8W1JesCu57O+7e6W3+mKKCn3KDcHT/TeZ5Rv9dDdDYR59Z+TXF/OUF8Izs+AJ8jg87yZNiIB2WLS/io0iqVaZrzNFyxEw1fEgs0qdl+iUzERAi9kd6N5GTNN1Qvyidwt7BWyRjjlcudXdL1Rpunt1gW6FDbYblTHVKoch5oYrA9rsNDQ319uDbq5nbNGRZuV9xGyVsHrygSY0DND/gOudBPcpoYpoJkIYus5TTePPAhj1DXhuAX7IgeYLS6UaGq2KTG+0Spd6EYe9Ot2DOe0uasE7NEq9H1kXW5TaHGCDGxDQdTiwQeVvupQcTjq+FVLAyJsN47xhIpyFAi59IEbBflPwZBDfr01HzIKgmKZVjbpe7yHIoinI1BbkqkUQ3JkgVyd5haUUmjABCJGzEiG0PotnGIDjbUsEeqY0rUNq8Z5tiLzOEM0sRCRJLtcPnIcfdYbfzqSeGRxpeJrD2fNodHCdvoVYWEKEGU01mGQ6Hhla0pQPlQHZdfGPx6H57Ja49rGQBpjwunhYcQ3Z6e5Fx7Uz+Xe3R+3sdhyBrSlE75GtLHRGFtksHXoPbXYxMoDQ1sTUe2yz65HeGTV/Ozj1SHdW1yK7arjo4HauAN3tUrtaGEd0s5To26tiu2A4+sVsYI6j76QMD7BwwLOBOVc8ssrhXAG626VjLR0sJXp3rnbKOhzn2qTVu3O1M9civ3eulV+wMC91L3XvNsTA2Fajciwf8noN2zny8au0caiCFtO3ZcHvIwtcHv5T9a/v6I9pePUX \ No newline at end of file diff --git a/assets/img/docker-and-kubernetes/docker-vs-vm.drawio.png b/assets/img/docker-and-kubernetes/docker-vs-vm.drawio.png new file mode 100644 index 0000000..5bbd5a3 Binary files /dev/null and b/assets/img/docker-and-kubernetes/docker-vs-vm.drawio.png differ diff --git a/assets/img/docker-and-kubernetes/networks.drawio b/assets/img/docker-and-kubernetes/networks.drawio new file mode 100644 index 0000000..2bff8ce --- /dev/null +++ b/assets/img/docker-and-kubernetes/networks.drawio @@ -0,0 +1 @@ +3ZbdjpswEIWfhsuusElIctnQbFeqKm0VVb12sQNWjI2MKaFP33EwEC9s1UpNhVaRIvvM+O87AziIkuLyUZMy/6woEwEO6SWIPgQYb+Mt/Fuh7YQ1ijsh05x2EhqFI//JnBg6teaUVV6iUUoYXvpiqqRkqfE0orVq/LSTEv6qJcnYRDimREzVb5ya3B0Lb0b9ifEs71dG8a6LFKRPdiepckJVcyNFhyBKtFKmaxWXhAnLrufSjXt8JTpsTDNp/mQATb4envnzU9a0fPdllXw/40/v3Cw/iKjdgd1mTdsT0KqWlNlJwiDaNzk37FiS1EYbsBy03BQCegiabjqmDbu8uk80nB6qhqmCGd1Cihuwcrxav9uM9FGPNL8lv3EicY5nw8wjFGg4Ln/BCE8YofABwQ8WfMSrCTCYEqoTOnuwvLRiKlRN/wu8tQ9vPQMvnIF3N3bR8uoLh0srsNV8geElFtjwYl5Kha2n8Hb4AcVb+4iiCTs4ufGhVEarM0uUUBoUqaQFe+JCvJCI4Jm0rIEUA31vOXL4Urx3gYJTapeZdcGv8n9gxMb3YfDlxoh4xofoXj7Ev/Nh+tS/GR8GoksxYjMxwuTMDrK4JLHjJTON0mcrVterCrUJ6uSFJClYZeFBSmIn5NrU1+FVw02adyoz6dv19sXXFEX3e9lBd7wKXmM39+no8As= \ No newline at end of file diff --git a/assets/img/docker-and-kubernetes/networks.drawio.png b/assets/img/docker-and-kubernetes/networks.drawio.png new file mode 100644 index 0000000..ee70b6a Binary files /dev/null and b/assets/img/docker-and-kubernetes/networks.drawio.png differ diff --git a/assets/img/docker-and-kubernetes/pod-creation.drawio b/assets/img/docker-and-kubernetes/pod-creation.drawio new file mode 100644 index 0000000..bd4042d --- /dev/null +++ b/assets/img/docker-and-kubernetes/pod-creation.drawio @@ -0,0 +1 @@ +mxfile host="Electron" modified="2023-06-30T11:18:18.069Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/19.0.3 Chrome/102.0.5005.63 Electron/19.0.3 Safari/537.36" etag="IFS0dK-y9SUW6tHiZXQp" version="19.0.3" type="device">5ZrNctowEICfhmM72MIQjglJfw7tpENn2h5Va4M1CMuR5QB9+kpYNlgyCSE4ZsyFQWt5Je/ut7sy9NBksfoscBJ94wRYz++TVQ/d9nz/anilPrVgnQsCb5gLZoKSXORtBVP6D4ywb6QZJZBWJkrOmaRJVRjyOIZQVmRYCL6sTnvgrLpqgmfgCKYhZq70FyUyMo/lj7byL0BnUbGyNxznVxa4mGyeJI0w4csdEbrroYngXObfFqsJMG27wi75fZ/2XC03JiCWh9zwdbS8i39+J17qzeePD9Pk/sf0g9HyhFlmHnie/VV2ZGbPcl0YQvAsJqB19XvoZhlRCdMEh/rqUnleySK5YGrkqa9GKwgJq73b9UojqOABvgAp1mqKuWFgzLauDpc7TjCiaMf+xTRs3D4r9W4to74Y47zCUL5jKMdCEJNrHXFqFPMYqhapmk/ZQKx/68HHoBj+MTM3g9vV7szbtRnlawJxYtayqoIEixnIlzzvWn/HvkGNfQuZAIYlfapuo87oZoV7TtUGS+deWc613ZbyTIRgbtqNaUuPF1QVeSNLUW4GR9EmBMqnPj4qkBMVOKE6KkCo4G+dIW94XhANugYRahMiv38iivxRuxQFTlikYQQkY2eAkJOqWkZo2DWEgjYRQv6JEELjdhEaOWGh+zgGsnWABnaWahmgq64BNGoToIHdph8LUOC1C9DYCQvCw/kZFKDATlEt81P4qTsAjdsEKLBb9GMBGqJ2AfLcVwnPxUXIcJrS8LnQONTFZ3KI9dCRnrP7eEdR057zHc8JeMwg1XuUXH2EArAEvSNtf04cx6r0JquuTKXgc5hwxsU2CzxQxiwRZnQW63hQPlXZFt3oZElDzK7NhQUlRC9Tm2rrAuZtJ2bLF+PDsq3tsdNl29cdmTtAld2SO3nsaKzeOyG6R7UllmHU02+hVV/Rz2LtrFmsXLOhKu0uVr7d3SGXqxK99wHLPTFdGFj+8ERgOYqaBss9wuUg6XIVQVGi8tKlK1bMCWzMLDV8mKjilpezMMm0INazF7DgJq4uA8Ga0laLoO3d0yHonrk6jqB9Wi6RfCuCjqKGESyW21vb8nLW3ylwGxqpTEseuwoaGuxJjy/0kEFTnBVYXS5n5TvIt3LmKGqaM/cHxiwhefHKi1wqscw63DnaNKEDaWqscfQv7kTmVK1jG0f7XZejqGmaan5oTKhuG7Ok7B1DHktM45o3v51hamD7wW+MKTXc/sMp9+P2b2Lo7j8= \ No newline at end of file diff --git a/assets/img/docker-and-kubernetes/pod-creation.drawio.png b/assets/img/docker-and-kubernetes/pod-creation.drawio.png new file mode 100644 index 0000000..80692bd Binary files /dev/null and b/assets/img/docker-and-kubernetes/pod-creation.drawio.png differ diff --git a/assets/img/docker-and-kubernetes/replica-set-creation.drawio b/assets/img/docker-and-kubernetes/replica-set-creation.drawio new file mode 100644 index 0000000..e5c4fa4 --- /dev/null +++ b/assets/img/docker-and-kubernetes/replica-set-creation.drawio @@ -0,0 +1 @@ +5VhNU9swEP01PjYT2SQkR0iA9tCZDsy05SisTSyQrVRWcNJfX8mWPySZFlJCmHDJaFfyavXe7mqjIJqlmyuBV8lXToAF4ZBsgmgehOFkPFG/WrGtFCM0rhRLQUmlQq3ihv4Goxwa7ZoSyK2FknMm6cpWxjzLIJaWDgvBC3vZgjN71xVegqe4iTHztT8okYk5Vnja6j8DXSb1zmg8rWZSXC82J8kTTHjRUUUXQTQTnMtqlG5mwDR2NS7Vd5dPzDaOCcjkcz4oRleT++js+vvtff5lO59eTxZ3n4yVR8zW5sAP6zuFIzM+y20NhODrjIC2NQyi8yKhEm5WONazhWJe6RKZMiUhNTRWQUjYPOkuakBQwQM8BSm2aon54MTAtrXFokOCUSUd/Otl2NC+bOy2yKiBAecFQIUeUHhFNUIg1DEPjhYavy+4Ig8uAStGY1xCJquElYIz9g7Aa6B5J+CdeOB5EEFGznR1U1LGM7AhsfGDDZU/9XgwMtKtWafH801n2XxbCxm5pNrpUqp2B+JVSgdg5SFfixj+VW98IjpIj3qQrnUCGJb00XajD36zwzdOM9nhObR5bi6Z2kTlvvmqW0odQ068NHJtR2KxBOnZKYOhOfXu8TE63vgIDxkfU5vW6Y7REQ4PGx7j4w2P6KDlA71OfERuGXrj+Dh9UXzEDOc5jf8WIs8l+FDEOeUauRf3rontGdozcROPOAG/1pDL8k+R7qkEYAnaoRI0q+FyGFbdkrQ5zVU79gAzzrhoy8JCpbGjwowuMx0YilzVvEXnuvdSG7EzM5FSQvQ2vZ1bX+T8V/PmFG009Ju3RteNrWhf3dv0o6WXW8/Qa12cnqE951dd3zvUFVjGSaDfDYSbUvnx5lSTHDURIz+pTt8yp5D/TvHBkiocv1JSeYb2nVT+y0lzTbX5lPGB+uULTQEnenh3D/FR55jLC+q5uMKeANvbswPqe7Qpm4uSF6pHNNe9RqLJy3EKlRs1ZyWvlGeDweCIeXMajkbu8Bb28bZDcVRi+z5cJWT7yB5d/AE= \ No newline at end of file diff --git a/assets/img/docker-and-kubernetes/replica-set-creation.drawio.png b/assets/img/docker-and-kubernetes/replica-set-creation.drawio.png new file mode 100644 index 0000000..e28bdae Binary files /dev/null and b/assets/img/docker-and-kubernetes/replica-set-creation.drawio.png differ diff --git a/assets/img/docker-and-kubernetes/service-creation.drawio b/assets/img/docker-and-kubernetes/service-creation.drawio new file mode 100644 index 0000000..25fd95b --- /dev/null +++ b/assets/img/docker-and-kubernetes/service-creation.drawio @@ -0,0 +1 @@ +5VrRbpswFP2aSNvDJsBAk8c2addNnTSpD1v35oAb3Do4M6ZJ+vUzwQTwJW3ahhAlL1F8AWPfc459LtBDw+nim8Cz6CcPCes5VrjooVHPcfp+X/1mgWUe8Gw/D0wEDfOQXQZu6TPRQUtHUxqSpHai5JxJOqsHAx7HJJC1GBaCz+un3XNWv+sMTwgI3AaYwehvGspIT8s5K+PXhE6i4s62P8iPTHFxsp5JEuGQzyshdNlDQ8G5zP9NF0PCstwVecmvu9pwdD0wQWK5zQX94O/3ufU7vXueW8jBN9c/xOiLq8cml8WESajmr5tcyIhPeIzZZRm9EDyNQ5L1aqlWec4N5zMVtFXwgUi51GDiVHIViuSU6aMkDs8zaFQz5jHJI1eUMd1lPqZsIBunqkMJT0VAXpifvU60IijhUyLFUl0nCMOSPtX7x5oqk/V560t/caru7FgFqzWkmtNu0S56kFhMiNQXlZCoP5VRlKEVUG8ATY/3CbNUz+AxHSvyMwBmHap5RCW5neFVxuZKrnVYdK9ESLJ4OfEwofoC10iMbs4rytGhqCIa19oMQC13b02Uf+TsPmuF3Y7VLb3PAL3xjGbpIEKRs3OO2/5hkbx/5CQftEJy5HRL8gEguUrgTI9RmRkpOGMHwHaw13XM9sIXHi3d7XYsi2tuzfv2LM2mJUuS4Itl5zx3zU2va547x85z1ArP/a55jjbxPIyTzlnuHxrLXZAtSPuSkwHDSUKDelrqOdyWoTBFlRx4DTkoYrstHW3PSG2uG0DPV0066KhtnnsAOUH+pSTJxqiWFWViBMEyIz7W3p0qXpvgKirLOpyJcj6PZMgZF+UqdK+WICOEGZ3EGScUrsonoYtMGDTA7FwfmNIwXK2KTbJqIs2HlDUw4LChsmyrgVaoNWn5pyYt09Lbg11py+yobW3BCniOZRD1sueX4hTUtH78WyDgdy6n/qnLydnVVgU6altOsNZe702VopuPH0ggoUc7WlE5ne9RDiwKj1xUZhnumOXJu0W15zqnQGrjHnVKykImqg11VaOyWiusHFiGflBZKhVi+afauMsaX72iOVpUD46Wb1SkqhpWjH1pUt4hSRfZ75SuO3ilo7alC0s3Ous5PsvEOFba9SfZP4nHDFrMfT+x8EwX0ofSchtA91tTFnQTOAxh+or1L4GHcBxmdEwZaTi4qp034nHEi6jrGUgjiLTfgLS58e0M6eL+p+NOzIenaFfuBHTU8hKHoLE8sQraM6H0trQkrZl9BB3jnuS0hbVA7iHpzjVB2FZ35m4JOmpbd9B2rkvt1fsPlamAi/CIdYcMBKwG3dl71R18x7KCwvQXxec1ZvxT8rhUF3wGmO39FS348GY7g/AOK6ia5YeZuTjKr1vR5X8= \ No newline at end of file diff --git a/assets/img/docker-and-kubernetes/service-creation.drawio.png b/assets/img/docker-and-kubernetes/service-creation.drawio.png new file mode 100644 index 0000000..b8818ff Binary files /dev/null and b/assets/img/docker-and-kubernetes/service-creation.drawio.png differ diff --git a/assets/img/elasticsearch/data-table.png b/assets/img/elasticsearch/data-table.png new file mode 100644 index 0000000..b25a877 Binary files /dev/null and b/assets/img/elasticsearch/data-table.png differ diff --git a/assets/img/elasticsearch/discover.drawio b/assets/img/elasticsearch/discover.drawio new file mode 100644 index 0000000..41380df --- /dev/null +++ b/assets/img/elasticsearch/discover.drawio @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/elasticsearch/discover.drawio.png b/assets/img/elasticsearch/discover.drawio.png new file mode 100644 index 0000000..795dd36 Binary files /dev/null and b/assets/img/elasticsearch/discover.drawio.png differ diff --git a/assets/img/elasticsearch/filters.png b/assets/img/elasticsearch/filters.png new file mode 100644 index 0000000..922e6b1 Binary files /dev/null and b/assets/img/elasticsearch/filters.png differ diff --git a/assets/img/elasticsearch/heat-map.png b/assets/img/elasticsearch/heat-map.png new file mode 100644 index 0000000..3d941cb Binary files /dev/null and b/assets/img/elasticsearch/heat-map.png differ diff --git a/assets/img/elasticsearch/interactivity.png b/assets/img/elasticsearch/interactivity.png new file mode 100644 index 0000000..d97efe9 Binary files /dev/null and b/assets/img/elasticsearch/interactivity.png differ diff --git a/assets/img/elasticsearch/line-chart.png b/assets/img/elasticsearch/line-chart.png new file mode 100644 index 0000000..ab3d294 Binary files /dev/null and b/assets/img/elasticsearch/line-chart.png differ diff --git a/assets/img/elasticsearch/metrics.png b/assets/img/elasticsearch/metrics.png new file mode 100644 index 0000000..2f6e0e5 Binary files /dev/null and b/assets/img/elasticsearch/metrics.png differ diff --git a/assets/img/elasticsearch/tag-clouds.png b/assets/img/elasticsearch/tag-clouds.png new file mode 100644 index 0000000..4ebac0c Binary files /dev/null and b/assets/img/elasticsearch/tag-clouds.png differ diff --git a/assets/img/favicons/android-chrome-192x192.png b/assets/img/favicons/android-chrome-192x192.png new file mode 100644 index 0000000..a949d2f Binary files /dev/null and b/assets/img/favicons/android-chrome-192x192.png differ diff --git a/assets/img/favicons/android-chrome-512x512.png b/assets/img/favicons/android-chrome-512x512.png new file mode 100644 index 0000000..a0cdd95 Binary files /dev/null and b/assets/img/favicons/android-chrome-512x512.png differ diff --git a/assets/img/favicons/apple-touch-icon.png b/assets/img/favicons/apple-touch-icon.png new file mode 100644 index 0000000..648097f Binary files /dev/null and b/assets/img/favicons/apple-touch-icon.png differ diff --git a/assets/img/favicons/browserconfig.xml b/assets/img/favicons/browserconfig.xml new file mode 100644 index 0000000..a02a5c7 --- /dev/null +++ b/assets/img/favicons/browserconfig.xml @@ -0,0 +1,13 @@ +--- +layout: compress +--- + + + + + + + #da532c + + + diff --git a/assets/img/favicons/favicon-16x16.png b/assets/img/favicons/favicon-16x16.png new file mode 100644 index 0000000..f44237a Binary files /dev/null and b/assets/img/favicons/favicon-16x16.png differ diff --git a/assets/img/favicons/favicon-32x32.png b/assets/img/favicons/favicon-32x32.png new file mode 100644 index 0000000..d5d021d Binary files /dev/null and b/assets/img/favicons/favicon-32x32.png differ diff --git a/assets/img/favicons/favicon.ico b/assets/img/favicons/favicon.ico new file mode 100644 index 0000000..5611568 Binary files /dev/null and b/assets/img/favicons/favicon.ico differ diff --git a/assets/img/favicons/mstile-150x150.png b/assets/img/favicons/mstile-150x150.png new file mode 100644 index 0000000..c0d045e Binary files /dev/null and b/assets/img/favicons/mstile-150x150.png differ diff --git a/assets/img/favicons/site.webmanifest b/assets/img/favicons/site.webmanifest new file mode 100644 index 0000000..03c6113 --- /dev/null +++ b/assets/img/favicons/site.webmanifest @@ -0,0 +1,26 @@ +--- +layout: compress +--- + +{% assign favicon_path = "/assets/img/favicons" | relative_url %} + +{ + "name": "{{ site.title }}", + "short_name": "{{ site.title }}", + "description": "{{ site.description }}", + "icons": [ + { + "src": "{{ favicon_path }}/android-chrome-192x192.png", + "sizes": "192x192", + "type": "image/png" + }, + { + "src": "{{ favicon_path }}/android-chrome-512x512.png", + "sizes": "512x512", + "type": "image/png" + }], + "start_url": "{{ '/index.html' | relative_url }}", + "theme_color": "#2a1e6b", + "background_color": "#ffffff", + "display": "fullscreen" +} diff --git a/assets/img/hadoop/hadoop1.x.drawio b/assets/img/hadoop/hadoop1.x.drawio new file mode 100644 index 0000000..37348e0 --- /dev/null +++ b/assets/img/hadoop/hadoop1.x.drawio @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/hadoop/hadoop1.x.drawio.png b/assets/img/hadoop/hadoop1.x.drawio.png new file mode 100644 index 0000000..a6e8323 Binary files /dev/null and b/assets/img/hadoop/hadoop1.x.drawio.png differ diff --git a/assets/img/hadoop/hadoop2.x.drawio b/assets/img/hadoop/hadoop2.x.drawio new file mode 100644 index 0000000..7dc200b --- /dev/null +++ b/assets/img/hadoop/hadoop2.x.drawio @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/hadoop/hadoop2.x.drawio.png b/assets/img/hadoop/hadoop2.x.drawio.png new file mode 100644 index 0000000..3cc44da Binary files /dev/null and b/assets/img/hadoop/hadoop2.x.drawio.png differ diff --git a/assets/img/hadoop/hdfs.drawio b/assets/img/hadoop/hdfs.drawio new file mode 100644 index 0000000..b104b4d --- /dev/null +++ b/assets/img/hadoop/hdfs.drawio @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/hadoop/hdfs.drawio.png b/assets/img/hadoop/hdfs.drawio.png new file mode 100644 index 0000000..a01b444 Binary files /dev/null and b/assets/img/hadoop/hdfs.drawio.png differ diff --git a/assets/img/high-level-design/2-phase-locking.png b/assets/img/high-level-design/2-phase-locking.png new file mode 100644 index 0000000..dd5f630 Binary files /dev/null and b/assets/img/high-level-design/2-phase-locking.png differ diff --git a/assets/img/high-level-design/anti-corruption-adapter-layer-pattern.png b/assets/img/high-level-design/anti-corruption-adapter-layer-pattern.png new file mode 100644 index 0000000..a58fd08 Binary files /dev/null and b/assets/img/high-level-design/anti-corruption-adapter-layer-pattern.png differ diff --git a/assets/img/high-level-design/b+-tree.png b/assets/img/high-level-design/b+-tree.png new file mode 100644 index 0000000..3db915c Binary files /dev/null and b/assets/img/high-level-design/b+-tree.png differ diff --git a/assets/img/high-level-design/backends-for-frontends-pattern.png b/assets/img/high-level-design/backends-for-frontends-pattern.png new file mode 100644 index 0000000..9cdf2cc Binary files /dev/null and b/assets/img/high-level-design/backends-for-frontends-pattern.png differ diff --git a/assets/img/high-level-design/blue-green-deployment-pattern.png b/assets/img/high-level-design/blue-green-deployment-pattern.png new file mode 100644 index 0000000..6b7df6c Binary files /dev/null and b/assets/img/high-level-design/blue-green-deployment-pattern.png differ diff --git a/assets/img/high-level-design/canary-testing.png b/assets/img/high-level-design/canary-testing.png new file mode 100644 index 0000000..4b80cae Binary files /dev/null and b/assets/img/high-level-design/canary-testing.png differ diff --git a/assets/img/high-level-design/cap-theorem-introduction.excalidraw b/assets/img/high-level-design/cap-theorem-introduction.excalidraw new file mode 100644 index 0000000..495853c --- /dev/null +++ b/assets/img/high-level-design/cap-theorem-introduction.excalidraw @@ -0,0 +1,1043 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "cZTSQwT7h9aEkKp0RZYUC", + "type": "ellipse", + "x": 470, + "y": 138, + "width": 61, + "height": 64, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1321756799, + "version": 57, + "versionNonce": 1991398737, + "isDeleted": false, + "boundElements": [ + { + "id": "_B02NJtgoNBJYYYLXVhP-", + "type": "arrow" + }, + { + "type": "text", + "id": "jp8VWtYlR34L9aHbxlY4k" + }, + { + "id": "XBVA2zJQJ21mjmvFhum6O", + "type": "arrow" + }, + { + "id": "HfyHktbDks70F3aR6q8_7", + "type": "arrow" + } + ], + "updated": 1710153362633, + "link": null, + "locked": false + }, + { + "id": "jp8VWtYlR34L9aHbxlY4k", + "type": "text", + "x": 494.5738681738103, + "y": 157.87258300203047, + "width": 11.71875, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1694838033, + "version": 4, + "versionNonce": 1802625649, + "isDeleted": false, + "boundElements": null, + "updated": 1710153277350, + "link": null, + "locked": false, + "text": "6", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "cZTSQwT7h9aEkKp0RZYUC", + "originalText": "6", + "lineHeight": 1.2 + }, + { + "type": "ellipse", + "version": 141, + "versionNonce": 701269617, + "isDeleted": false, + "id": "TxBVq2G_GfkJgm8jBI7sm", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 638.5, + "y": 162, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 61, + "height": 64, + "seed": 1603753681, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "id": "hCiqyN7QmOnn0eQKJ4K3s", + "type": "arrow" + }, + { + "type": "text", + "id": "tDLgiNFO90WiS9jCe7Ks5" + }, + { + "id": "XBVA2zJQJ21mjmvFhum6O", + "type": "arrow" + }, + { + "id": "5I4b30WuFigten-WiF8QS", + "type": "arrow" + } + ], + "updated": 1710153388343, + "link": null, + "locked": false + }, + { + "id": "tDLgiNFO90WiS9jCe7Ks5", + "type": "text", + "x": 663.0738681738103, + "y": 181.87258300203047, + "width": 11.71875, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 431929905, + "version": 73, + "versionNonce": 1479839825, + "isDeleted": false, + "boundElements": null, + "updated": 1710153388343, + "link": null, + "locked": false, + "text": "5", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "TxBVq2G_GfkJgm8jBI7sm", + "originalText": "5", + "lineHeight": 1.2 + }, + { + "type": "ellipse", + "version": 111, + "versionNonce": 262119185, + "isDeleted": false, + "id": "TmpUxOmXkPJBHUH7ixRGh", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 472.5, + "y": 299, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 61, + "height": 64, + "seed": 2073728657, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "type": "text", + "id": "hgBEfZcxjJm0souY2Qz3c" + }, + { + "id": "5I4b30WuFigten-WiF8QS", + "type": "arrow" + }, + { + "id": "HfyHktbDks70F3aR6q8_7", + "type": "arrow" + } + ], + "updated": 1710153374566, + "link": null, + "locked": false + }, + { + "id": "hgBEfZcxjJm0souY2Qz3c", + "type": "text", + "x": 497.0738681738103, + "y": 318.87258300203047, + "width": 11.71875, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1246776145, + "version": 32, + "versionNonce": 291214577, + "isDeleted": false, + "boundElements": null, + "updated": 1710153374566, + "link": null, + "locked": false, + "text": "5", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "TmpUxOmXkPJBHUH7ixRGh", + "originalText": "5", + "lineHeight": 1.2 + }, + { + "type": "ellipse", + "version": 204, + "versionNonce": 14578047, + "isDeleted": false, + "id": "AJ1m4LrxsYWp3Nbzaq4zQ", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 310.790999465622, + "y": 170.82009220775217, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22, + "height": 23, + "seed": 1654595327, + "groupIds": [ + "AY0cinYhwJeho5EwyTham" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "id": "_B02NJtgoNBJYYYLXVhP-", + "type": "arrow" + } + ], + "updated": 1710153265599, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 227, + "versionNonce": 1469753841, + "isDeleted": false, + "id": "zbwXexqCAbSOzZMYRFfXQ", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 320.790999465622, + "y": 191.82009220775217, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 49, + "seed": 343992095, + "groupIds": [ + "AY0cinYhwJeho5EwyTham" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49 + ] + ] + }, + { + "type": "line", + "version": 187, + "versionNonce": 70379935, + "isDeleted": false, + "id": "QbDhJFXPSHETpEc-VRuRb", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 320.790999465622, + "y": 241.82009220775217, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 15, + "height": 18, + "seed": 322218815, + "groupIds": [ + "AY0cinYhwJeho5EwyTham" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -15, + 18 + ] + ] + }, + { + "type": "line", + "version": 196, + "versionNonce": 2138194897, + "isDeleted": false, + "id": "fQ1ZSjURUSBkq9OhBT8UC", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 321.790999465622, + "y": 247.82009220775217, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 17, + "seed": 1796926303, + "groupIds": [ + "AY0cinYhwJeho5EwyTham" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 17 + ] + ] + }, + { + "type": "line", + "version": 195, + "versionNonce": 446782911, + "isDeleted": false, + "id": "hIscJMbAgasZ483xaVPce", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 321.790999465622, + "y": 212.82009220775217, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 15, + "seed": 1216620415, + "groupIds": [ + "AY0cinYhwJeho5EwyTham" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 15 + ] + ] + }, + { + "type": "line", + "version": 193, + "versionNonce": 1627103665, + "isDeleted": false, + "id": "xayh_HsV0ea-ZNXXO6p26", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 319.790999465622, + "y": 212.82009220775217, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 16, + "seed": 2041634719, + "groupIds": [ + "AY0cinYhwJeho5EwyTham" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -16, + 16 + ] + ] + }, + { + "id": "_B02NJtgoNBJYYYLXVhP-", + "type": "arrow", + "x": 344, + "y": 188, + "width": 116, + "height": 18, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1290861215, + "version": 31, + "versionNonce": 2001554911, + "isDeleted": false, + "boundElements": null, + "updated": 1710153265599, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 116, + -18 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "AJ1m4LrxsYWp3Nbzaq4zQ", + "focus": 0.7849771931188008, + "gap": 11.893719232587433 + }, + "endBinding": { + "elementId": "cZTSQwT7h9aEkKp0RZYUC", + "focus": 0.19427677262720397, + "gap": 10 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "type": "ellipse", + "version": 217, + "versionNonce": 683348447, + "isDeleted": false, + "id": "TWyTxagvj4byeb2LOMk_a", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 801.2368813798204, + "y": 213.10194573283195, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22, + "height": 23, + "seed": 2120917951, + "groupIds": [ + "POSJwaQGe1EUvePNCTKgP" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "id": "hCiqyN7QmOnn0eQKJ4K3s", + "type": "arrow" + } + ], + "updated": 1710153269818, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 240, + "versionNonce": 356923903, + "isDeleted": false, + "id": "fgPkAfF4gi4Y2L-370fbi", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 811.2368813798204, + "y": 234.10194573283195, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 49, + "seed": 1576915935, + "groupIds": [ + "POSJwaQGe1EUvePNCTKgP" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49 + ] + ] + }, + { + "type": "line", + "version": 200, + "versionNonce": 1959897457, + "isDeleted": false, + "id": "ikqDOf96tyFHfO7pMcCtz", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 811.2368813798204, + "y": 284.10194573283195, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 15, + "height": 18, + "seed": 536215551, + "groupIds": [ + "POSJwaQGe1EUvePNCTKgP" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -15, + 18 + ] + ] + }, + { + "type": "line", + "version": 209, + "versionNonce": 965517855, + "isDeleted": false, + "id": "QDDr5wktsOOSCWLzBcqGw", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 812.2368813798204, + "y": 290.10194573283195, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 17, + "seed": 643016735, + "groupIds": [ + "POSJwaQGe1EUvePNCTKgP" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 17 + ] + ] + }, + { + "type": "line", + "version": 208, + "versionNonce": 1822249809, + "isDeleted": false, + "id": "nPNIb3-v3DLTZHfgAoFbY", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 812.2368813798204, + "y": 255.10194573283195, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 15, + "seed": 1057442879, + "groupIds": [ + "POSJwaQGe1EUvePNCTKgP" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 15 + ] + ] + }, + { + "type": "line", + "version": 206, + "versionNonce": 890308159, + "isDeleted": false, + "id": "u_ZFxAK5vmcAc0BkdI2uf", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 810.2368813798204, + "y": 255.10194573283195, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 16, + "seed": 636578911, + "groupIds": [ + "POSJwaQGe1EUvePNCTKgP" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710153265599, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -16, + 16 + ] + ] + }, + { + "id": "hCiqyN7QmOnn0eQKJ4K3s", + "type": "arrow", + "x": 794.3610349594956, + "y": 220.84369204641044, + "width": 84.77075619139328, + "height": 20.7675972154112, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 2074833681, + "version": 173, + "versionNonce": 240611345, + "isDeleted": false, + "boundElements": null, + "updated": 1710153388343, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -84.77075619139328, + -20.7675972154112 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "TWyTxagvj4byeb2LOMk_a", + "focus": -0.054852948965267594, + "gap": 7.2462859154274 + }, + "endBinding": { + "elementId": "TxBVq2G_GfkJgm8jBI7sm", + "focus": -0.11770655895007504, + "gap": 10.511343268280434 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "XBVA2zJQJ21mjmvFhum6O", + "type": "arrow", + "x": 543.584779481007, + "y": 169.372306767417, + "width": 92.22979777396336, + "height": 32.60606055213128, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1051091057, + "version": 174, + "versionNonce": 630546385, + "isDeleted": false, + "boundElements": null, + "updated": 1710153388343, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 92.22979777396336, + 32.60606055213128 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "cZTSQwT7h9aEkKp0RZYUC", + "focus": -0.4669635423107049, + "gap": 12.589049176495859 + }, + "endBinding": { + "elementId": "TxBVq2G_GfkJgm8jBI7sm", + "focus": -0.5837044278883812, + "gap": 3.553910870509334 + }, + "startArrowhead": "triangle", + "endArrowhead": "triangle" + }, + { + "id": "5I4b30WuFigten-WiF8QS", + "type": "arrow", + "x": 655.915835191837, + "y": 234.59197018015885, + "width": 119.29816681808734, + "height": 77.75029897767553, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 753889649, + "version": 113, + "versionNonce": 1047158673, + "isDeleted": false, + "boundElements": null, + "updated": 1710153388343, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -119.29816681808734, + 77.75029897767553 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "TxBVq2G_GfkJgm8jBI7sm", + "focus": -0.8504524746394679, + "gap": 10.796795872030433 + }, + "endBinding": { + "elementId": "TmpUxOmXkPJBHUH7ixRGh", + "focus": 0.08632412336566028, + "gap": 7.610268376597304 + }, + "startArrowhead": "triangle", + "endArrowhead": "triangle" + }, + { + "id": "HfyHktbDks70F3aR6q8_7", + "type": "arrow", + "x": 502.70526856282265, + "y": 291.50581223504287, + "width": 4.913678825258955, + "height": 82.9950153189236, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 436366623, + "version": 87, + "versionNonce": 759501937, + "isDeleted": false, + "boundElements": null, + "updated": 1710153374566, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 4.913678825258955, + -82.9950153189236 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "TmpUxOmXkPJBHUH7ixRGh", + "focus": -0.08982127757646, + "gap": 7.495376032460523 + }, + "endBinding": { + "elementId": "cZTSQwT7h9aEkKp0RZYUC", + "focus": -0.3075698292769691, + "gap": 7.2157139089914715 + }, + "startArrowhead": "triangle", + "endArrowhead": "triangle" + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/assets/img/high-level-design/cap-theorem-introduction.svg b/assets/img/high-level-design/cap-theorem-introduction.svg new file mode 100644 index 0000000..86c4917 --- /dev/null +++ b/assets/img/high-level-design/cap-theorem-introduction.svg @@ -0,0 +1,21 @@ + + + + + + + + 655 \ No newline at end of file diff --git a/assets/img/high-level-design/choreography-pattern.png b/assets/img/high-level-design/choreography-pattern.png new file mode 100644 index 0000000..9a8531d Binary files /dev/null and b/assets/img/high-level-design/choreography-pattern.png differ diff --git a/assets/img/high-level-design/circuit-breaker-pattern.png b/assets/img/high-level-design/circuit-breaker-pattern.png new file mode 100644 index 0000000..63773b5 Binary files /dev/null and b/assets/img/high-level-design/circuit-breaker-pattern.png differ diff --git a/assets/img/high-level-design/consistent-hashing-disadvantage.png b/assets/img/high-level-design/consistent-hashing-disadvantage.png new file mode 100644 index 0000000..327adb6 Binary files /dev/null and b/assets/img/high-level-design/consistent-hashing-disadvantage.png differ diff --git a/assets/img/high-level-design/consistent-hashing-replication.png b/assets/img/high-level-design/consistent-hashing-replication.png new file mode 100644 index 0000000..d15e66d Binary files /dev/null and b/assets/img/high-level-design/consistent-hashing-replication.png differ diff --git a/assets/img/high-level-design/consistent-hashing-workaround.png b/assets/img/high-level-design/consistent-hashing-workaround.png new file mode 100644 index 0000000..4b4e0b4 Binary files /dev/null and b/assets/img/high-level-design/consistent-hashing-workaround.png differ diff --git a/assets/img/high-level-design/consistent-hashing.png b/assets/img/high-level-design/consistent-hashing.png new file mode 100644 index 0000000..e87ca94 Binary files /dev/null and b/assets/img/high-level-design/consistent-hashing.png differ diff --git a/assets/img/high-level-design/cqrs.png b/assets/img/high-level-design/cqrs.png new file mode 100644 index 0000000..eaa5e59 Binary files /dev/null and b/assets/img/high-level-design/cqrs.png differ diff --git a/assets/img/high-level-design/degradation-point.excalidraw b/assets/img/high-level-design/degradation-point.excalidraw new file mode 100644 index 0000000..92f574f --- /dev/null +++ b/assets/img/high-level-design/degradation-point.excalidraw @@ -0,0 +1,512 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "8t2bTIYw3YP7wd4y-HPpj", + "type": "arrow", + "x": 251, + "y": 448, + "width": 2.842170943040401e-14, + "height": 213.00000000000003, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 85199810, + "version": 31, + "versionNonce": 1319681986, + "isDeleted": false, + "boundElements": null, + "updated": 1709965946163, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 2.842170943040401e-14, + -213.00000000000003 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "b98LjBuCbkM76BT7J3l-R", + "type": "arrow", + "x": 248, + "y": 446, + "width": 294, + "height": 0, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 2113670274, + "version": 79, + "versionNonce": 1561695582, + "isDeleted": false, + "boundElements": null, + "updated": 1709965946163, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 294, + 0 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "H4nzm3n1M_G5ZG6QIAmPU", + "type": "arrow", + "x": 252, + "y": 344, + "width": 165, + "height": 72, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 496634178, + "version": 287, + "versionNonce": 1488942978, + "isDeleted": false, + "boundElements": null, + "updated": 1709965946163, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 141, + -9 + ], + [ + 165, + -72 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null + }, + { + "type": "arrow", + "version": 57, + "versionNonce": 783759774, + "isDeleted": false, + "id": "4YW3uXxy6Ysz1vd7RL3Lw", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 718.622055205726, + "y": 449.86003382032277, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 2.842170943040401e-14, + "height": 213.00000000000003, + "seed": 1405625282, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709965946163, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "points": [ + [ + 0, + 0 + ], + [ + 2.842170943040401e-14, + -213.00000000000003 + ] + ] + }, + { + "type": "arrow", + "version": 105, + "versionNonce": 648623938, + "isDeleted": false, + "id": "4AmTRAByf5gBnFQYeec2Z", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 715.622055205726, + "y": 447.86003382032277, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 294, + "height": 0, + "seed": 1280465794, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709965946163, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "points": [ + [ + 0, + 0 + ], + [ + 294, + 0 + ] + ] + }, + { + "type": "arrow", + "version": 477, + "versionNonce": 1562007006, + "isDeleted": false, + "id": "h1qGVoLkhIzWupDVLuO9f", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 721.622055205726, + "y": 445.86003382032277, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 163, + "height": 109, + "seed": 1849662274, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709965946163, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 139, + -109 + ], + [ + 163, + -27 + ] + ] + }, + { + "id": "HNqz6AFnms_GyBoXRs2UF", + "type": "text", + "x": 122, + "y": 337, + "width": 93.75, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1556972062, + "version": 27, + "versionNonce": 1489305346, + "isDeleted": false, + "boundElements": null, + "updated": 1709965946163, + "link": null, + "locked": false, + "text": "Response\n Time", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Response\n Time", + "lineHeight": 1.2 + }, + { + "type": "text", + "version": 81, + "versionNonce": 1095756318, + "isDeleted": false, + "id": "UenHDOxn87DabKl8oyiRh", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 582.125, + "y": 336, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 117.1875, + "height": 24, + "seed": 1428543362, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709965946163, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Throughput", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Throughput", + "lineHeight": 1.2 + }, + { + "type": "text", + "version": 140, + "versionNonce": 730975938, + "isDeleted": false, + "id": "khqMMsfxhMxULJVXZ2dek", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 356.40625, + "y": 466, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 46.875, + "height": 24, + "seed": 1596700738, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709965946163, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Load", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Load", + "lineHeight": 1.2 + }, + { + "type": "text", + "version": 190, + "versionNonce": 1783166558, + "isDeleted": false, + "id": "jzJAjiUGPaaPw0RMU5CGr", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 836.5625, + "y": 466, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 46.875, + "height": 24, + "seed": 754231362, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709965946163, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Load", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Load", + "lineHeight": 1.2 + }, + { + "id": "GZAupg3dL6KpjTS_tiWgD", + "type": "line", + "x": 397, + "y": 332, + "width": 0, + "height": 111, + "angle": 0, + "strokeColor": "#2f9e44", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 433751618, + "version": 33, + "versionNonce": 152994462, + "isDeleted": false, + "boundElements": null, + "updated": 1709965949450, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 111 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null + }, + { + "id": "kTWbbC798BWAR78mnq1ui", + "type": "line", + "x": 862, + "y": 336, + "width": 0, + "height": 107, + "angle": 0, + "strokeColor": "#2f9e44", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 1, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1925309954, + "version": 36, + "versionNonce": 1119177630, + "isDeleted": false, + "boundElements": null, + "updated": 1709965956751, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 107 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/assets/img/high-level-design/degradation-point.svg b/assets/img/high-level-design/degradation-point.svg new file mode 100644 index 0000000..5d0e76c --- /dev/null +++ b/assets/img/high-level-design/degradation-point.svg @@ -0,0 +1,21 @@ + + + + + + + + Response TimeThroughputLoadLoad \ No newline at end of file diff --git a/assets/img/high-level-design/event-sourcing+cqrs.png b/assets/img/high-level-design/event-sourcing+cqrs.png new file mode 100644 index 0000000..9aa46c5 Binary files /dev/null and b/assets/img/high-level-design/event-sourcing+cqrs.png differ diff --git a/assets/img/high-level-design/execution-orchestrator-pattern.png b/assets/img/high-level-design/execution-orchestrator-pattern.png new file mode 100644 index 0000000..de0eaf7 Binary files /dev/null and b/assets/img/high-level-design/execution-orchestrator-pattern.png differ diff --git a/assets/img/high-level-design/features-of-the-system.excalidraw b/assets/img/high-level-design/features-of-the-system.excalidraw new file mode 100644 index 0000000..d8443f4 --- /dev/null +++ b/assets/img/high-level-design/features-of-the-system.excalidraw @@ -0,0 +1,2684 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "type": "ellipse", + "version": 250, + "versionNonce": 372959362, + "isDeleted": false, + "id": "9npojYAx0wJ43XtyDbxJq", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 456.40261024851816, + "y": 231.65552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22, + "height": 23, + "seed": 1428488734, + "groupIds": [ + "yzsDnNDPPeBFCJCKKvwgs" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660308, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 270, + "versionNonce": 2118014622, + "isDeleted": false, + "id": "H9yA11TDFq4sHJPzLHwdc", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 466.40261024851816, + "y": 252.65552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 49, + "seed": 1941638750, + "groupIds": [ + "yzsDnNDPPeBFCJCKKvwgs" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49 + ] + ] + }, + { + "type": "line", + "version": 230, + "versionNonce": 1282304578, + "isDeleted": false, + "id": "2Y7sy29JCcopGONxKvrNW", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 466.40261024851816, + "y": 302.65552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 15, + "height": 18, + "seed": 608539294, + "groupIds": [ + "yzsDnNDPPeBFCJCKKvwgs" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -15, + 18 + ] + ] + }, + { + "type": "line", + "version": 239, + "versionNonce": 1203098334, + "isDeleted": false, + "id": "S7tOfMgovXK_1-rOhfcRX", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 467.40261024851816, + "y": 308.65552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 17, + "seed": 561352414, + "groupIds": [ + "yzsDnNDPPeBFCJCKKvwgs" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 17 + ] + ] + }, + { + "type": "line", + "version": 238, + "versionNonce": 359387650, + "isDeleted": false, + "id": "0pyC_3hjq6gYsgo5PFXbq", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 467.40261024851816, + "y": 273.65552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 15, + "seed": 668730142, + "groupIds": [ + "yzsDnNDPPeBFCJCKKvwgs" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 15 + ] + ] + }, + { + "type": "line", + "version": 236, + "versionNonce": 628344606, + "isDeleted": false, + "id": "tbZUJjNhnAHhp8MbEFP_1", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 465.40261024851816, + "y": 273.65552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 16, + "seed": 971632478, + "groupIds": [ + "yzsDnNDPPeBFCJCKKvwgs" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -16, + 16 + ] + ] + }, + { + "type": "ellipse", + "version": 208, + "versionNonce": 1910915522, + "isDeleted": false, + "id": "2skvFOt_-CkUDvAqMlsR2", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 983.9897521165245, + "y": 234.60035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22, + "height": 23, + "seed": 807231170, + "groupIds": [ + "NuweDgEEev0cYpZrrRiqA" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 232, + "versionNonce": 938029918, + "isDeleted": false, + "id": "RLEHxic60tJA2HsZWTlTC", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 993.9897521165245, + "y": 255.60035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 49, + "seed": 4732546, + "groupIds": [ + "NuweDgEEev0cYpZrrRiqA" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49 + ] + ] + }, + { + "type": "line", + "version": 192, + "versionNonce": 2096937346, + "isDeleted": false, + "id": "-B7z2E04xqIKsuJmht7so", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 993.9897521165245, + "y": 305.60035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 15, + "height": 18, + "seed": 2093094466, + "groupIds": [ + "NuweDgEEev0cYpZrrRiqA" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -15, + 18 + ] + ] + }, + { + "type": "line", + "version": 201, + "versionNonce": 1768363934, + "isDeleted": false, + "id": "hrXr_22oiA10oDFgFlQix", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 994.9897521165245, + "y": 311.60035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 17, + "seed": 1449786882, + "groupIds": [ + "NuweDgEEev0cYpZrrRiqA" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 17 + ] + ] + }, + { + "type": "line", + "version": 200, + "versionNonce": 367933762, + "isDeleted": false, + "id": "OMoL1v_iW-rtxNYfKw0YO", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 994.9897521165245, + "y": 276.60035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 15, + "seed": 1245638082, + "groupIds": [ + "NuweDgEEev0cYpZrrRiqA" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 15 + ] + ] + }, + { + "type": "line", + "version": 198, + "versionNonce": 840856542, + "isDeleted": false, + "id": "Rn85q_jlgfximLNgIQQcL", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 992.9897521165245, + "y": 276.60035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 16, + "seed": 492823938, + "groupIds": [ + "NuweDgEEev0cYpZrrRiqA" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957660307, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -16, + 16 + ] + ] + }, + { + "id": "YjRASZE3oI1VO0vRO-6Bc", + "type": "rectangle", + "x": 645, + "y": 236, + "width": 179, + "height": 67, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 481661378, + "version": 57, + "versionNonce": 1545191682, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "gXzOTe5tvHiWtwELJAkl4" + } + ], + "updated": 1709957660307, + "link": null, + "locked": false + }, + { + "id": "gXzOTe5tvHiWtwELJAkl4", + "type": "text", + "x": 675.90625, + "y": 245.5, + "width": 117.1875, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 2067460802, + "version": 30, + "versionNonce": 1545619486, + "isDeleted": false, + "boundElements": null, + "updated": 1709957660308, + "link": null, + "locked": false, + "text": "Hitchikers\nService", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "YjRASZE3oI1VO0vRO-6Bc", + "originalText": "Hitchikers\nService", + "lineHeight": 1.2 + }, + { + "id": "c7cgnoRTqbFCE3N8WvJkF", + "type": "line", + "x": 465, + "y": 315, + "width": 1, + "height": 419, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1703839810, + "version": 99, + "versionNonce": 900563650, + "isDeleted": false, + "boundElements": null, + "updated": 1709957686283, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 1, + 419 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null + }, + { + "type": "line", + "version": 164, + "versionNonce": 717005314, + "isDeleted": false, + "id": "hu_WYdmYATCr1zr8-eMB6", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 996.2432590988744, + "y": 318.48996176640503, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 5, + "height": 411, + "seed": 1256573406, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957690757, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 5, + 411 + ] + ] + }, + { + "type": "line", + "version": 144, + "versionNonce": 578320770, + "isDeleted": false, + "id": "zUX0FdHjhwUJfKTA9JJ4O", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 736.2432590988744, + "y": 307.48996176640503, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 428, + "seed": 50284446, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957712794, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 428 + ] + ] + }, + { + "id": "Vcwii2qNDkipOWQe6US6E", + "type": "arrow", + "x": 995, + "y": 340, + "width": 254, + "height": 0, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 631603458, + "version": 70, + "versionNonce": 917423774, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "FvQbzILEV6FJgX-DCiHuQ" + } + ], + "updated": 1709957766334, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -254, + 0 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "FvQbzILEV6FJgX-DCiHuQ", + "type": "text", + "x": 821.125, + "y": 316, + "width": 93.75, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1147329886, + "version": 19, + "versionNonce": 1031881410, + "isDeleted": false, + "boundElements": null, + "updated": 1709957765608, + "link": null, + "locked": false, + "text": "Ready to\nPickup", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Vcwii2qNDkipOWQe6US6E", + "originalText": "Ready to\nPickup", + "lineHeight": 1.2 + }, + { + "type": "arrow", + "version": 161, + "versionNonce": 2032344578, + "isDeleted": false, + "id": "9vDRefa5U4AEbvUOa74CG", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 471.1299963510269, + "y": 384.22480994316584, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 262, + "height": 1, + "seed": 482067806, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "type": "text", + "id": "-Mjz3vGu7DSl5YI7e8XLE" + } + ], + "updated": 1709957775656, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "points": [ + [ + 0, + 0 + ], + [ + 262, + -1 + ] + ] + }, + { + "id": "-Mjz3vGu7DSl5YI7e8XLE", + "type": "text", + "x": 561.1143713510269, + "y": 359.72480994316584, + "width": 82.03125, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1887975198, + "version": 14, + "versionNonce": 1210005058, + "isDeleted": false, + "boundElements": null, + "updated": 1709957774706, + "link": null, + "locked": false, + "text": "Request\nRide", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "9vDRefa5U4AEbvUOa74CG", + "originalText": "Request\nRide", + "lineHeight": 1.2 + }, + { + "id": "7CJ7ilsEAyD9scEwqssge", + "type": "arrow", + "x": 727, + "y": 434, + "width": 73, + "height": 115, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 2027163074, + "version": 250, + "versionNonce": 1405554398, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "pTyoM30MvMNNQQHpKkMQm" + } + ], + "updated": 1709957819426, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -63, + 63 + ], + [ + 10, + 115 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "pTyoM30MvMNNQQHpKkMQm", + "type": "text", + "x": 634.703125, + "y": 485, + "width": 58.59375, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1851226910, + "version": 29, + "versionNonce": 825265794, + "isDeleted": false, + "boundElements": null, + "updated": 1709957818506, + "link": null, + "locked": false, + "text": "Match", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "7CJ7ilsEAyD9scEwqssge", + "originalText": "Match", + "lineHeight": 1.2 + }, + { + "id": "IuRfraODVptK8RaA0gCS2", + "type": "arrow", + "x": 736, + "y": 575, + "width": 261, + "height": 1, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 335381278, + "version": 76, + "versionNonce": 1182385282, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "mO6w3l1ixI_yPQ3YNbskJ" + } + ], + "updated": 1709957850008, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 261, + 1 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "mO6w3l1ixI_yPQ3YNbskJ", + "type": "text", + "x": 831.34375, + "y": 563.5, + "width": 70.3125, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 202706626, + "version": 7, + "versionNonce": 301345822, + "isDeleted": false, + "boundElements": null, + "updated": 1709957849070, + "link": null, + "locked": false, + "text": "Notify", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "IuRfraODVptK8RaA0gCS2", + "originalText": "Notify", + "lineHeight": 1.2 + }, + { + "type": "arrow", + "version": 142, + "versionNonce": 318535490, + "isDeleted": false, + "id": "bc8m9-a9EWJXhP9CZmwkn", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 732.5633472323836, + "y": 574.9768911687191, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 262, + "height": 2, + "seed": 231426882, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "type": "text", + "id": "aAhVsjYtdaqkFn_d3L3bj" + } + ], + "updated": 1709957846112, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "points": [ + [ + 0, + 0 + ], + [ + -262, + 2 + ] + ] + }, + { + "id": "aAhVsjYtdaqkFn_d3L3bj", + "type": "text", + "x": 566.4070972323836, + "y": 563.9768911687191, + "width": 70.3125, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 795144706, + "version": 9, + "versionNonce": 1741051230, + "isDeleted": false, + "boundElements": null, + "updated": 1709957845515, + "link": null, + "locked": false, + "text": "Notify", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "bc8m9-a9EWJXhP9CZmwkn", + "originalText": "Notify", + "lineHeight": 1.2 + }, + { + "id": "sIWYQg5q60-uTt2ho2SyA", + "type": "arrow", + "x": 999, + "y": 672, + "width": 264, + "height": 1, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1636951106, + "version": 56, + "versionNonce": 13386114, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "HTaSAWo0f2xlsF6Kldz_2" + } + ], + "updated": 1709957875337, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -264, + -1 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "HTaSAWo0f2xlsF6Kldz_2", + "type": "text", + "x": 837.703125, + "y": 647.5, + "width": 58.59375, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1619056258, + "version": 18, + "versionNonce": 849264414, + "isDeleted": false, + "boundElements": null, + "updated": 1709957874165, + "link": null, + "locked": false, + "text": "Start\nRide", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "sIWYQg5q60-uTt2ho2SyA", + "originalText": "Start\nRide", + "lineHeight": 1.2 + }, + { + "id": "YgFiuY3t1tM920rF3PQKD", + "type": "arrow", + "x": 733, + "y": 721, + "width": 264, + "height": 1, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 109887810, + "version": 76, + "versionNonce": 485816962, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "sDrVl3PHTDXzZWg9eCauH" + } + ], + "updated": 1709957886376, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -264, + 1 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "sDrVl3PHTDXzZWg9eCauH", + "type": "text", + "x": 559.984375, + "y": 697.5, + "width": 82.03125, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 293813890, + "version": 14, + "versionNonce": 206820894, + "isDeleted": false, + "boundElements": null, + "updated": 1709957885517, + "link": null, + "locked": false, + "text": "Ride\nStarted", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "YgFiuY3t1tM920rF3PQKD", + "originalText": "Ride\nStarted", + "lineHeight": 1.2 + }, + { + "id": "9kkMZGdCCdeiqYQrkwCUY", + "type": "text", + "x": 645, + "y": 168, + "width": 175.78125, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 916805662, + "version": 20, + "versionNonce": 955309058, + "isDeleted": false, + "boundElements": null, + "updated": 1709957971242, + "link": null, + "locked": false, + "text": "Ride Initiation", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Ride Initiation", + "lineHeight": 1.2 + }, + { + "type": "ellipse", + "version": 366, + "versionNonce": 312490690, + "isDeleted": false, + "id": "UnwcpXgA4ENFYugusa29-", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1173.6379065477618, + "y": 228.90552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22, + "height": 23, + "seed": 494097886, + "groupIds": [ + "Hu-9uOIkU2s04N9xqXCyY" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 386, + "versionNonce": 1175425666, + "isDeleted": false, + "id": "IZK4ghzy6V37VhH74RWyW", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1183.6379065477618, + "y": 249.90552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 49, + "seed": 243853854, + "groupIds": [ + "Hu-9uOIkU2s04N9xqXCyY" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49 + ] + ] + }, + { + "type": "line", + "version": 346, + "versionNonce": 1228796482, + "isDeleted": false, + "id": "PVHSKPy2RRBF0IawFefD2", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1183.6379065477618, + "y": 299.90552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 15, + "height": 18, + "seed": 677508702, + "groupIds": [ + "Hu-9uOIkU2s04N9xqXCyY" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -15, + 18 + ] + ] + }, + { + "type": "line", + "version": 355, + "versionNonce": 1594513922, + "isDeleted": false, + "id": "9-dfAxvWrbeBrfhU3yKvt", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1184.6379065477618, + "y": 305.90552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 17, + "seed": 1942977182, + "groupIds": [ + "Hu-9uOIkU2s04N9xqXCyY" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 17 + ] + ] + }, + { + "type": "line", + "version": 354, + "versionNonce": 376687042, + "isDeleted": false, + "id": "4u_Y-sH_xZnmnqZ6ldshg", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1184.6379065477618, + "y": 270.90552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 15, + "seed": 931296990, + "groupIds": [ + "Hu-9uOIkU2s04N9xqXCyY" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 15 + ] + ] + }, + { + "type": "line", + "version": 352, + "versionNonce": 1184448898, + "isDeleted": false, + "id": "nlk5aTkI7-JsJmWwbuUII", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1182.6379065477618, + "y": 270.90552048887594, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 16, + "seed": 1007414046, + "groupIds": [ + "Hu-9uOIkU2s04N9xqXCyY" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -16, + 16 + ] + ] + }, + { + "type": "ellipse", + "version": 324, + "versionNonce": 179945794, + "isDeleted": false, + "id": "Gkson0JfePMCO97FcpWrr", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1701.225048415768, + "y": 231.85035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22, + "height": 23, + "seed": 266131294, + "groupIds": [ + "xoQypCq5g1drMxI4tDsHd" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 348, + "versionNonce": 1043613954, + "isDeleted": false, + "id": "NEnVlNu8U-CKtNk1t3qLs", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1711.225048415768, + "y": 252.85035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 49, + "seed": 1262893982, + "groupIds": [ + "xoQypCq5g1drMxI4tDsHd" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49 + ] + ] + }, + { + "type": "line", + "version": 308, + "versionNonce": 2022168770, + "isDeleted": false, + "id": "BfAJvotEbAyZ5MuNxiy54", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1711.225048415768, + "y": 302.85035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 15, + "height": 18, + "seed": 1417222110, + "groupIds": [ + "xoQypCq5g1drMxI4tDsHd" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -15, + 18 + ] + ] + }, + { + "type": "line", + "version": 317, + "versionNonce": 156463234, + "isDeleted": false, + "id": "f4cceANDJSTby9EB-LaQA", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1712.225048415768, + "y": 308.85035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 17, + "seed": 596012062, + "groupIds": [ + "xoQypCq5g1drMxI4tDsHd" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 17 + ] + ] + }, + { + "type": "line", + "version": 316, + "versionNonce": 1951797314, + "isDeleted": false, + "id": "ZtGXReFMyvBRZ8Fogwx8G", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1712.225048415768, + "y": 273.85035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 15, + "seed": 106903646, + "groupIds": [ + "xoQypCq5g1drMxI4tDsHd" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 15 + ] + ] + }, + { + "type": "line", + "version": 314, + "versionNonce": 1835972610, + "isDeleted": false, + "id": "M7kif7QDFOUpVvzjynxnT", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1710.225048415768, + "y": 273.85035292990506, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 16, + "seed": 1691647134, + "groupIds": [ + "xoQypCq5g1drMxI4tDsHd" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -16, + 16 + ] + ] + }, + { + "type": "rectangle", + "version": 173, + "versionNonce": 1419536322, + "isDeleted": false, + "id": "3rQUCF_cfcfkQxiNThC-5", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1362.2352962992436, + "y": 233.25, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 179, + "height": 67, + "seed": 77019358, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "uy1gH9F-x9RKgjDft7m-p" + } + ], + "updated": 1709957997854, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 146, + "versionNonce": 838737794, + "isDeleted": false, + "id": "uy1gH9F-x9RKgjDft7m-p", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1393.1415462992436, + "y": 242.75, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 117.1875, + "height": 48, + "seed": 1295543582, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Hitchikers\nService", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "3rQUCF_cfcfkQxiNThC-5", + "originalText": "Hitchikers\nService", + "lineHeight": 1.2 + }, + { + "type": "line", + "version": 215, + "versionNonce": 835903298, + "isDeleted": false, + "id": "P5Z-Tn02VXsJnNYm0EaZd", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1182.2352962992436, + "y": 312.25, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 1, + "height": 419, + "seed": 204020062, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 1, + 419 + ] + ] + }, + { + "type": "line", + "version": 280, + "versionNonce": 544843522, + "isDeleted": false, + "id": "m8vw9oSY5tqObC4mxRfuW", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1713.4785553981178, + "y": 315.73996176640503, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 5, + "height": 411, + "seed": 951729566, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 5, + 411 + ] + ] + }, + { + "type": "line", + "version": 260, + "versionNonce": 1718712002, + "isDeleted": false, + "id": "aQNhr1D9KLt3VTS4ggQK1", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1453.4785553981178, + "y": 304.73996176640503, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 428, + "seed": 941414878, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 428 + ] + ] + }, + { + "type": "arrow", + "version": 187, + "versionNonce": 2046981598, + "isDeleted": false, + "id": "LLFDeW5JLaIjVX8y8-XD_", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1712.2352962992436, + "y": 337.25, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 254, + "height": 0, + "seed": 560066078, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "type": "text", + "id": "XK_DerYbhKdCZd5uZKCz7" + } + ], + "updated": 1709958057434, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "points": [ + [ + 0, + 0 + ], + [ + -254, + 0 + ] + ] + }, + { + "type": "text", + "version": 36, + "versionNonce": 1826707330, + "isDeleted": false, + "id": "XK_DerYbhKdCZd5uZKCz7", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1561.7977962992436, + "y": 313.25, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 46.875, + "height": 48, + "seed": 24803934, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709958056386, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "End\nRide", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "LLFDeW5JLaIjVX8y8-XD_", + "originalText": "End\nRide", + "lineHeight": 1.2 + }, + { + "type": "text", + "version": 146, + "versionNonce": 979178690, + "isDeleted": false, + "id": "3sQKk3tER7iDp8a3ermkO", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1362.2352962992436, + "y": 165.25, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 175.78125, + "height": 24, + "seed": 1421623710, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709957997854, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Ride Completion", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Ride Completion", + "lineHeight": 1.2 + }, + { + "id": "2lxHylOaYf2H38G91enbB", + "type": "text", + "x": 958, + "y": 205, + "width": 70.3125, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 132059358, + "version": 7, + "versionNonce": 430639746, + "isDeleted": false, + "boundElements": null, + "updated": 1709958011120, + "link": null, + "locked": false, + "text": "Driver", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Driver", + "lineHeight": 1.2 + }, + { + "type": "text", + "version": 133, + "versionNonce": 339014430, + "isDeleted": false, + "id": "p85hjD4e_NI53dKoH67VF", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 430.84375, + "y": 194, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 58.59375, + "height": 24, + "seed": 1361199554, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709958041712, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Rider", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Rider", + "lineHeight": 1.2 + }, + { + "type": "text", + "version": 44, + "versionNonce": 1547972802, + "isDeleted": false, + "id": "iz8mt2OjXTPvILaTiZEXN", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1669.84375, + "y": 203, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 70.3125, + "height": 24, + "seed": 1642965442, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709958027685, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Driver", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Driver", + "lineHeight": 1.2 + }, + { + "type": "text", + "version": 204, + "versionNonce": 1580565790, + "isDeleted": false, + "id": "yZKpjZqS2vkFtmR5yCUUJ", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1153.703125, + "y": 193, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 58.59375, + "height": 24, + "seed": 1657619358, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709958046109, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Rider", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Rider", + "lineHeight": 1.2 + }, + { + "id": "GrJJbNJO8rwxgcM4Zx_cx", + "type": "arrow", + "x": 1443, + "y": 361, + "width": 63, + "height": 109, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1820398466, + "version": 174, + "versionNonce": 69648578, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "l4EUZGfTjiypCk3rWnRJ3" + } + ], + "updated": 1709958130207, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -56, + 55 + ], + [ + 7, + 109 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "l4EUZGfTjiypCk3rWnRJ3", + "type": "text", + "x": 1339.265625, + "y": 355, + "width": 105.46875, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 335430786, + "version": 21, + "versionNonce": 356097502, + "isDeleted": false, + "boundElements": null, + "updated": 1709958093637, + "link": null, + "locked": false, + "text": "Calculate\nFee", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "GrJJbNJO8rwxgcM4Zx_cx", + "originalText": "Calculate\nFee", + "lineHeight": 1.2 + }, + { + "id": "u1uQn7YP7caRnuJZ1VuWy", + "type": "arrow", + "x": 1455, + "y": 486, + "width": 272, + "height": 2, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 534912194, + "version": 77, + "versionNonce": 2027920514, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "0XyfKyvVvxFo0OAe9S0KY" + } + ], + "updated": 1709958127002, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -272, + 2 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "0XyfKyvVvxFo0OAe9S0KY", + "type": "text", + "x": 1275.984375, + "y": 442, + "width": 82.03125, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 918133662, + "version": 18, + "versionNonce": 1848920258, + "isDeleted": false, + "boundElements": null, + "updated": 1709958124187, + "link": null, + "locked": false, + "text": "Show\nReceipt", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "u1uQn7YP7caRnuJZ1VuWy", + "originalText": "Show\nReceipt", + "lineHeight": 1.2 + }, + { + "id": "ZbBevtVtjX60KqM5WGZpy", + "type": "arrow", + "x": 1184, + "y": 557, + "width": 274, + "height": 3, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 56293442, + "version": 71, + "versionNonce": 1848480130, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "rSbhg6j8A1i5PBinMOU49" + } + ], + "updated": 1709958158478, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 274, + 3 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "rSbhg6j8A1i5PBinMOU49", + "type": "text", + "x": 1279.984375, + "y": 534.5, + "width": 82.03125, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1772758658, + "version": 18, + "versionNonce": 1290170142, + "isDeleted": false, + "boundElements": null, + "updated": 1709958157557, + "link": null, + "locked": false, + "text": "Make\nPayment", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "ZbBevtVtjX60KqM5WGZpy", + "originalText": "Make\nPayment", + "lineHeight": 1.2 + }, + { + "type": "arrow", + "version": 113, + "versionNonce": 461637342, + "isDeleted": false, + "id": "aKfYOuNdpo9bEsQ1-lmks", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1455.6393724702298, + "y": 615.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 261, + "height": 3, + "seed": 16707614, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "type": "text", + "id": "BrCTrmppdCGZ283OPrswD" + } + ], + "updated": 1709958166944, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "points": [ + [ + 0, + 0 + ], + [ + 261, + 3 + ] + ] + }, + { + "type": "text", + "version": 20, + "versionNonce": 1547681922, + "isDeleted": false, + "id": "BrCTrmppdCGZ283OPrswD", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1456.6237474702298, + "y": 609, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 82.03125, + "height": 48, + "seed": 707214430, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1709958162221, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Make\nPayment", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "aKfYOuNdpo9bEsQ1-lmks", + "originalText": "Make\nPayment", + "lineHeight": 1.2 + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/assets/img/high-level-design/features-of-the-system.svg b/assets/img/high-level-design/features-of-the-system.svg new file mode 100644 index 0000000..15f1cdf --- /dev/null +++ b/assets/img/high-level-design/features-of-the-system.svg @@ -0,0 +1,21 @@ + + + + + + + + HitchikersServiceReady toPickupRequestRideMatchNotifyNotifyStartRideRideStartedRide InitiationHitchikersServiceEndRideRide CompletionDriverRiderDriverRiderCalculateFeeShowReceiptMakePaymentMakePayment \ No newline at end of file diff --git a/assets/img/high-level-design/gslb.excalidraw b/assets/img/high-level-design/gslb.excalidraw new file mode 100644 index 0000000..7d6eac6 --- /dev/null +++ b/assets/img/high-level-design/gslb.excalidraw @@ -0,0 +1,1115 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "UXXIjXc_n_COIAEuc1jYN", + "type": "rectangle", + "x": 684, + "y": 190, + "width": 223, + "height": 112, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 2014098050, + "version": 74, + "versionNonce": 787011166, + "isDeleted": false, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "id": "b1zMfjz4PQHrPV-s4p0-7", + "type": "text", + "x": 718, + "y": 157, + "width": 152.34375, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 14942622, + "version": 66, + "versionNonce": 954508866, + "isDeleted": false, + "boundElements": null, + "updated": 1710028521299, + "link": null, + "locked": false, + "text": "Data Center 1", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Data Center 1", + "lineHeight": 1.2 + }, + { + "id": "u_fpMgzqjvecWHlYWKPrC", + "type": "rectangle", + "x": 719, + "y": 215, + "width": 25, + "height": 51, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 987346946, + "version": 51, + "versionNonce": 835802846, + "isDeleted": false, + "boundElements": null, + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 72, + "versionNonce": 263884290, + "isDeleted": false, + "id": "tERS0--bJs9ZnKBHETKHg", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 767.5, + "y": 214.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 1611708254, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 81, + "versionNonce": 1233046302, + "isDeleted": false, + "id": "nW3fgXHv1VWxjcdrHw2sX", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 811.5, + "y": 215.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 1362425822, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 130, + "versionNonce": 721255874, + "isDeleted": false, + "id": "6CaGLJT53ySRB_5xhbJOP", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 682.5, + "y": 367.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 223, + "height": 112, + "seed": 1120187458, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "id": "cqyUwqpFGFzJ8JEENsYF9", + "type": "arrow" + } + ], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 125, + "versionNonce": 737392478, + "isDeleted": false, + "id": "whwYbbjqS_cr2QyRaog6H", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 716.5, + "y": 334.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 152.34375, + "height": 24, + "seed": 1047059458, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Data Center 2", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Data Center 2", + "lineHeight": 1.2 + }, + { + "type": "rectangle", + "version": 111, + "versionNonce": 130839938, + "isDeleted": false, + "id": "b-SahyMhODF-7BeWDnxpP", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 717.5, + "y": 392.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 1253182402, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 129, + "versionNonce": 25238430, + "isDeleted": false, + "id": "WBYNT8N_6EzaPJHzpVdjR", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 766, + "y": 392, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 499073922, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 138, + "versionNonce": 661026114, + "isDeleted": false, + "id": "JVipqtVoq5gkVj1pei3rw", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 810, + "y": 393, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 1372554050, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "id": "bDS5FXYbGUCrp31TkxIrV", + "type": "rectangle", + "x": 657, + "y": 220, + "width": 46, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 1866839234, + "version": 56, + "versionNonce": 1079506910, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "AtdonHPaYF-8vBMucveaY" + }, + { + "id": "6zpHVOOkHpHxV__KWtg51", + "type": "arrow" + } + ], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "id": "AtdonHPaYF-8vBMucveaY", + "type": "text", + "x": 668.28125, + "y": 232, + "width": 23.4375, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1521528094, + "version": 27, + "versionNonce": 187333890, + "isDeleted": false, + "boundElements": null, + "updated": 1710028521299, + "link": null, + "locked": false, + "text": "LB", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "bDS5FXYbGUCrp31TkxIrV", + "originalText": "LB", + "lineHeight": 1.2 + }, + { + "type": "rectangle", + "version": 88, + "versionNonce": 223126658, + "isDeleted": false, + "id": "N4uuFo8_7y4r0BI1F8SA6", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 653, + "y": 398, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "width": 46, + "height": 48, + "seed": 1790196866, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "ZKMCCztN3sDELsKEDts63" + } + ], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 60, + "versionNonce": 916413598, + "isDeleted": false, + "id": "ZKMCCztN3sDELsKEDts63", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 664.28125, + "y": 410, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "width": 23.4375, + "height": 24, + "seed": 129850434, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "LB", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "N4uuFo8_7y4r0BI1F8SA6", + "originalText": "LB", + "lineHeight": 1.2 + }, + { + "type": "ellipse", + "version": 243, + "versionNonce": 185123906, + "isDeleted": false, + "id": "Mgfgq2GStWm5K9_Hu7hQ1", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 249.47876139779578, + "y": 195.2757201212412, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22, + "height": 23, + "seed": 671377986, + "groupIds": [ + "mVaVlfYSHkZ0nX0i84oWy" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "id": "qYL2-591_spAbU_-_Mwto", + "type": "arrow" + }, + { + "id": "6zpHVOOkHpHxV__KWtg51", + "type": "arrow" + } + ], + "updated": 1710028521299, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 265, + "versionNonce": 436607198, + "isDeleted": false, + "id": "O2HnYrR1TRGR5cUOzU8PK", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 259.4787613977958, + "y": 216.2757201212412, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 49, + "seed": 239194626, + "groupIds": [ + "mVaVlfYSHkZ0nX0i84oWy" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49 + ] + ] + }, + { + "type": "line", + "version": 225, + "versionNonce": 61533186, + "isDeleted": false, + "id": "J5PAOCdg1ITi-HxTnOADd", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 259.4787613977958, + "y": 266.2757201212412, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 15, + "height": 18, + "seed": 1610367426, + "groupIds": [ + "mVaVlfYSHkZ0nX0i84oWy" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -15, + 18 + ] + ] + }, + { + "type": "line", + "version": 234, + "versionNonce": 298536222, + "isDeleted": false, + "id": "RE1GcFjSeLbuyyqmnHBGB", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 260.4787613977958, + "y": 272.2757201212412, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 17, + "seed": 944945538, + "groupIds": [ + "mVaVlfYSHkZ0nX0i84oWy" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 17 + ] + ] + }, + { + "type": "line", + "version": 233, + "versionNonce": 1026694082, + "isDeleted": false, + "id": "088NiyMSOQp1djmt55e4u", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 260.4787613977958, + "y": 237.2757201212412, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 15, + "seed": 1988376898, + "groupIds": [ + "mVaVlfYSHkZ0nX0i84oWy" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 15 + ] + ] + }, + { + "type": "line", + "version": 231, + "versionNonce": 2069887326, + "isDeleted": false, + "id": "TC1kYBfIdzvqJ6mynJdAx", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 258.4787613977958, + "y": 237.2757201212412, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 16, + "seed": 972013826, + "groupIds": [ + "mVaVlfYSHkZ0nX0i84oWy" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710028521299, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -16, + 16 + ] + ] + }, + { + "id": "S4wfKl2a6-g1nznNTOju-", + "type": "rectangle", + "x": 497, + "y": 285, + "width": 86, + "height": 91, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 409129758, + "version": 78, + "versionNonce": 1706906498, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "z99k6w_yhbpbqmhxbEZsj" + }, + { + "id": "qYL2-591_spAbU_-_Mwto", + "type": "arrow" + }, + { + "id": "4qU48_a-GfM2OSLcfJ-PG", + "type": "arrow" + } + ], + "updated": 1710028521300, + "link": null, + "locked": false + }, + { + "id": "z99k6w_yhbpbqmhxbEZsj", + "type": "text", + "x": 516.5625, + "y": 318.5, + "width": 46.875, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 593963934, + "version": 47, + "versionNonce": 1578442142, + "isDeleted": false, + "boundElements": null, + "updated": 1710028521300, + "link": null, + "locked": false, + "text": "GSLB", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "S4wfKl2a6-g1nznNTOju-", + "originalText": "GSLB", + "lineHeight": 1.2 + }, + { + "id": "qYL2-591_spAbU_-_Mwto", + "type": "arrow", + "x": 284.304796555187, + "y": 209.46276308505884, + "width": 196.695203444813, + "height": 75.21946527657371, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1359819166, + "version": 265, + "versionNonce": 260805442, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "irUzr7fgybSFGb6e4hduT" + } + ], + "updated": 1710028521300, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 196.695203444813, + 75.21946527657371 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Mgfgq2GStWm5K9_Hu7hQ1", + "focus": -0.5246475721126254, + "gap": 12.970914572638272 + }, + "endBinding": { + "elementId": "S4wfKl2a6-g1nznNTOju-", + "focus": 0.37542364141380685, + "gap": 16 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "irUzr7fgybSFGb6e4hduT", + "type": "text", + "x": 309.65239827759353, + "y": 255.4724957233457, + "width": 150, + "height": 19.2, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 353714078, + "version": 35, + "versionNonce": 790346206, + "isDeleted": false, + "boundElements": null, + "updated": 1710028521300, + "link": null, + "locked": false, + "text": "Step 1 - xyz.com", + "fontSize": 16, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "qYL2-591_spAbU_-_Mwto", + "originalText": "Step 1 - xyz.com", + "lineHeight": 1.2 + }, + { + "id": "4qU48_a-GfM2OSLcfJ-PG", + "type": "arrow", + "x": 484, + "y": 366, + "width": 204, + "height": 102, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 265266590, + "version": 97, + "versionNonce": 754703106, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "JwWyS8OxK-jcZ1RlY9U2l" + } + ], + "updated": 1710028521300, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -204, + -102 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "S4wfKl2a6-g1nznNTOju-", + "focus": -0.9477611940298507, + "gap": 13 + }, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "JwWyS8OxK-jcZ1RlY9U2l", + "type": "text", + "x": 307, + "y": 305.4, + "width": 150, + "height": 19.2, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 501470594, + "version": 30, + "versionNonce": 361025054, + "isDeleted": false, + "boundElements": null, + "updated": 1710028521300, + "link": null, + "locked": false, + "text": "Step 2 - 1.2.3.4", + "fontSize": 16, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "4qU48_a-GfM2OSLcfJ-PG", + "originalText": "Step 2 - 1.2.3.4", + "lineHeight": 1.2 + }, + { + "id": "6zpHVOOkHpHxV__KWtg51", + "type": "arrow", + "x": 271, + "y": 192, + "width": 384, + "height": 36, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1026633090, + "version": 67, + "versionNonce": 210578114, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "hCcZGxNhHAvtx89bfzUB-" + } + ], + "updated": 1710028521300, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 384, + 36 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Mgfgq2GStWm5K9_Hu7hQ1", + "focus": -1.3651383659990217, + "gap": 6.810798118321909 + }, + "endBinding": { + "elementId": "bDS5FXYbGUCrp31TkxIrV", + "focus": 0.5221027479091996, + "gap": 2 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "hCcZGxNhHAvtx89bfzUB-", + "type": "text", + "x": 388, + "y": 200.4, + "width": 150, + "height": 19.2, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 464597662, + "version": 26, + "versionNonce": 776034910, + "isDeleted": false, + "boundElements": null, + "updated": 1710028521300, + "link": null, + "locked": false, + "text": "Step 3 - 1.2.3.4", + "fontSize": 16, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "6zpHVOOkHpHxV__KWtg51", + "originalText": "Step 3 - 1.2.3.4", + "lineHeight": 1.2 + }, + { + "id": "ycbPVXn1yfZCwutWotv7F", + "type": "text", + "x": 647, + "y": 269, + "width": 65.625, + "height": 19.2, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1415910878, + "version": 211, + "versionNonce": 1464588062, + "isDeleted": false, + "boundElements": null, + "updated": 1710028527203, + "link": null, + "locked": false, + "text": "1.2.3.4", + "fontSize": 16, + "fontFamily": 3, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "1.2.3.4", + "lineHeight": 1.2 + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/assets/img/high-level-design/gslb.svg b/assets/img/high-level-design/gslb.svg new file mode 100644 index 0000000..dbbf989 --- /dev/null +++ b/assets/img/high-level-design/gslb.svg @@ -0,0 +1,21 @@ + + + + + + + + Data Center 1Data Center 2LBLBGSLBStep 1 - xyz.comStep 2 - 1.2.3.4Step 3 - 1.2.3.41.2.3.4 \ No newline at end of file diff --git a/assets/img/high-level-design/hw-and-sw-lb-disadvantage.excalidraw b/assets/img/high-level-design/hw-and-sw-lb-disadvantage.excalidraw new file mode 100644 index 0000000..0559ea4 --- /dev/null +++ b/assets/img/high-level-design/hw-and-sw-lb-disadvantage.excalidraw @@ -0,0 +1,449 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "type": "rectangle", + "version": 75, + "versionNonce": 1172515138, + "isDeleted": false, + "id": "UXXIjXc_n_COIAEuc1jYN", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 684, + "y": 190, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 223, + "height": 112, + "seed": 2014098050, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 67, + "versionNonce": 1546683358, + "isDeleted": false, + "id": "b1zMfjz4PQHrPV-s4p0-7", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 718, + "y": 157, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 152.34375, + "height": 24, + "seed": 14942622, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Data Center 1", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Data Center 1", + "lineHeight": 1.2 + }, + { + "type": "rectangle", + "version": 52, + "versionNonce": 535867650, + "isDeleted": false, + "id": "u_fpMgzqjvecWHlYWKPrC", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 719, + "y": 215, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 987346946, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 73, + "versionNonce": 426792990, + "isDeleted": false, + "id": "tERS0--bJs9ZnKBHETKHg", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 767.5, + "y": 214.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 1611708254, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 82, + "versionNonce": 913785026, + "isDeleted": false, + "id": "nW3fgXHv1VWxjcdrHw2sX", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 811.5, + "y": 215.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 1362425822, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 131, + "versionNonce": 2860126, + "isDeleted": false, + "id": "6CaGLJT53ySRB_5xhbJOP", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 682.5, + "y": 367.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 223, + "height": 112, + "seed": 1120187458, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "id": "cqyUwqpFGFzJ8JEENsYF9", + "type": "arrow" + } + ], + "updated": 1710028779368, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 126, + "versionNonce": 622188674, + "isDeleted": false, + "id": "whwYbbjqS_cr2QyRaog6H", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 716.5, + "y": 334.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 152.34375, + "height": 24, + "seed": 1047059458, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "Data Center 2", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Data Center 2", + "lineHeight": 1.2 + }, + { + "type": "rectangle", + "version": 112, + "versionNonce": 1110665374, + "isDeleted": false, + "id": "b-SahyMhODF-7BeWDnxpP", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 717.5, + "y": 392.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 1253182402, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "id": "cqyUwqpFGFzJ8JEENsYF9", + "type": "arrow" + } + ], + "updated": 1710028779368, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 130, + "versionNonce": 998996034, + "isDeleted": false, + "id": "WBYNT8N_6EzaPJHzpVdjR", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 766, + "y": 392, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 499073922, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 139, + "versionNonce": 792241374, + "isDeleted": false, + "id": "JVipqtVoq5gkVj1pei3rw", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 810, + "y": 393, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 25, + "height": 51, + "seed": 1372554050, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 57, + "versionNonce": 2042044418, + "isDeleted": false, + "id": "bDS5FXYbGUCrp31TkxIrV", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 657, + "y": 220, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "width": 46, + "height": 48, + "seed": 1866839234, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "AtdonHPaYF-8vBMucveaY" + }, + { + "id": "cqyUwqpFGFzJ8JEENsYF9", + "type": "arrow" + } + ], + "updated": 1710028779368, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 28, + "versionNonce": 2026658078, + "isDeleted": false, + "id": "AtdonHPaYF-8vBMucveaY", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 668.28125, + "y": 232, + "strokeColor": "#1e1e1e", + "backgroundColor": "#ffffff", + "width": 23.4375, + "height": 24, + "seed": 1521528094, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "LB", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "bDS5FXYbGUCrp31TkxIrV", + "originalText": "LB", + "lineHeight": 1.2 + }, + { + "type": "arrow", + "version": 109, + "versionNonce": 194498498, + "isDeleted": false, + "id": "cqyUwqpFGFzJ8JEENsYF9", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 671, + "y": 277, + "strokeColor": "#e03131", + "backgroundColor": "#ffffff", + "width": 35, + "height": 128, + "seed": 1136117406, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710028779368, + "link": null, + "locked": false, + "startBinding": { + "elementId": "bDS5FXYbGUCrp31TkxIrV", + "focus": 0.6096723044397463, + "gap": 9 + }, + "endBinding": { + "elementId": "b-SahyMhODF-7BeWDnxpP", + "focus": -1.049949849548646, + "gap": 11.5 + }, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "points": [ + [ + 0, + 0 + ], + [ + 35, + 128 + ] + ] + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/assets/img/high-level-design/hw-and-sw-lb-disadvantage.svg b/assets/img/high-level-design/hw-and-sw-lb-disadvantage.svg new file mode 100644 index 0000000..61f2659 --- /dev/null +++ b/assets/img/high-level-design/hw-and-sw-lb-disadvantage.svg @@ -0,0 +1,21 @@ + + + + + + + + Data Center 1Data Center 2LB \ No newline at end of file diff --git a/assets/img/high-level-design/lambda-architecture.excalidraw b/assets/img/high-level-design/lambda-architecture.excalidraw new file mode 100644 index 0000000..097e56f --- /dev/null +++ b/assets/img/high-level-design/lambda-architecture.excalidraw @@ -0,0 +1,965 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "1OCcmpRxloJ6DgPF0vyNU", + "type": "rectangle", + "x": 384, + "y": 147, + "width": 350, + "height": 106, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 864043377, + "version": 30, + "versionNonce": 330994769, + "isDeleted": false, + "boundElements": null, + "updated": 1710176612937, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 55, + "versionNonce": 170153279, + "isDeleted": false, + "id": "8FhIsSxIJlimcgB4RlKYq", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 387, + "y": 333, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 350, + "height": 106, + "seed": 1966809439, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710176612937, + "link": null, + "locked": false + }, + { + "id": "oHO2eTWTvJ-tVFJnoN2bJ", + "type": "rectangle", + "x": 405, + "y": 170, + "width": 55, + "height": 61, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 861559199, + "version": 33, + "versionNonce": 2055558495, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "7dZKI556nnlJfsD4MMMkT" + }, + { + "id": "YbCMBzhJHdU51pSwhoRdF", + "type": "arrow" + } + ], + "updated": 1710176612937, + "link": null, + "locked": false + }, + { + "id": "7dZKI556nnlJfsD4MMMkT", + "type": "text", + "x": 412.88001251220703, + "y": 188, + "width": 39.23997497558594, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 810034463, + "version": 8, + "versionNonce": 1529639953, + "isDeleted": false, + "boundElements": null, + "updated": 1710176612937, + "link": null, + "locked": false, + "text": "DFS", + "fontSize": 20, + "fontFamily": 1, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "oHO2eTWTvJ-tVFJnoN2bJ", + "originalText": "DFS", + "lineHeight": 1.25 + }, + { + "id": "rNWzJ1bQ9jxPVRC1qcs1w", + "type": "rectangle", + "x": 408, + "y": 356, + "width": 137, + "height": 60, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 928640511, + "version": 58, + "versionNonce": 1141187167, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "s9xjJF4xIe1dfzc96sCS5" + }, + { + "id": "6VAkpFxlGmvrRIaHgXfbV", + "type": "arrow" + } + ], + "updated": 1710176636972, + "link": null, + "locked": false + }, + { + "id": "s9xjJF4xIe1dfzc96sCS5", + "type": "text", + "x": 435.36004638671875, + "y": 361, + "width": 82.2799072265625, + "height": 50, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 880530431, + "version": 20, + "versionNonce": 1585826289, + "isDeleted": false, + "boundElements": null, + "updated": 1710176612937, + "link": null, + "locked": false, + "text": "Message\nBroker", + "fontSize": 20, + "fontFamily": 1, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "rNWzJ1bQ9jxPVRC1qcs1w", + "originalText": "Message\nBroker", + "lineHeight": 1.25 + }, + { + "id": "Qe-iZkdxkm5IFmst6BQQY", + "type": "ellipse", + "x": 631, + "y": 164, + "width": 81.99999999999997, + "height": 69, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 2049677503, + "version": 105, + "versionNonce": 1006199761, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "zhgtKfozjEB4rIo1zvS0q" + }, + { + "id": "sudHYj04fhVRVygtlmUZO", + "type": "arrow" + } + ], + "updated": 1710176612937, + "link": null, + "locked": false + }, + { + "id": "zhgtKfozjEB4rIo1zvS0q", + "type": "text", + "x": 653.0886390611953, + "y": 186.1048160490641, + "width": 37.8399658203125, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1811765489, + "version": 66, + "versionNonce": 630996415, + "isDeleted": false, + "boundElements": null, + "updated": 1710176612937, + "link": null, + "locked": false, + "text": "View", + "fontSize": 20, + "fontFamily": 1, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Qe-iZkdxkm5IFmst6BQQY", + "originalText": "View", + "lineHeight": 1.25 + }, + { + "type": "ellipse", + "version": 142, + "versionNonce": 1067368881, + "isDeleted": false, + "id": "vjKvs7iTUn04a1o6fvV5E", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 635, + "y": 352.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 81.99999999999997, + "height": 69, + "seed": 449226303, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [ + { + "type": "text", + "id": "daxPz2pLLGqFsE07VZ4k-" + }, + { + "id": "6VAkpFxlGmvrRIaHgXfbV", + "type": "arrow" + }, + { + "id": "SL6m5RK9xRUWfj4_UDFBm", + "type": "arrow" + } + ], + "updated": 1710176612937, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 103, + "versionNonce": 535571935, + "isDeleted": false, + "id": "daxPz2pLLGqFsE07VZ4k-", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 657.0886390611953, + "y": 374.6048160490641, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 37.8399658203125, + "height": 25, + "seed": 1152747103, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710176612938, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "View", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "vjKvs7iTUn04a1o6fvV5E", + "originalText": "View", + "lineHeight": 1.25 + }, + { + "id": "YbCMBzhJHdU51pSwhoRdF", + "type": "arrow", + "x": 461, + "y": 200, + "width": 172, + "height": 0, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1490124639, + "version": 201, + "versionNonce": 1184719761, + "isDeleted": false, + "boundElements": [], + "updated": 1710176612938, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 172, + 0 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "oHO2eTWTvJ-tVFJnoN2bJ", + "focus": -0.01639344262295082, + "gap": 1 + }, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "triangle" + }, + { + "id": "6VAkpFxlGmvrRIaHgXfbV", + "type": "arrow", + "x": 545, + "y": 386, + "width": 88, + "height": 0, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 739439761, + "version": 45, + "versionNonce": 489474417, + "isDeleted": false, + "boundElements": null, + "updated": 1710176612938, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 88, + 0 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "rNWzJ1bQ9jxPVRC1qcs1w", + "focus": 0, + "gap": 1 + }, + "endBinding": { + "elementId": "vjKvs7iTUn04a1o6fvV5E", + "focus": 0.028985507246376812, + "gap": 2.016107863236094 + }, + "startArrowhead": null, + "endArrowhead": "triangle" + }, + { + "id": "sj-m39OC5idw4UlmM4Ywy", + "type": "rectangle", + "x": 824, + "y": 156, + "width": 121.99999999999997, + "height": 288, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 2035621713, + "version": 124, + "versionNonce": 804487711, + "isDeleted": false, + "boundElements": [ + { + "id": "RuhZKd106tr5EVuwecksu", + "type": "arrow" + }, + { + "id": "sudHYj04fhVRVygtlmUZO", + "type": "arrow" + }, + { + "id": "SL6m5RK9xRUWfj4_UDFBm", + "type": "arrow" + } + ], + "updated": 1710176612938, + "link": null, + "locked": false + }, + { + "id": "lyjE6D0mONYuv5okp0Q3b", + "type": "text", + "x": 407, + "y": 302, + "width": 119.51986694335938, + "height": 25, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1704234879, + "version": 32, + "versionNonce": 2073814591, + "isDeleted": false, + "boundElements": null, + "updated": 1710176612938, + "link": null, + "locked": false, + "text": "Speed Layer", + "fontSize": 20, + "fontFamily": 1, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Speed Layer", + "lineHeight": 1.25 + }, + { + "type": "text", + "version": 109, + "versionNonce": 153870641, + "isDeleted": false, + "id": "rAucD1WceQoQ-_PYcsqjz", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 407.2400665283203, + "y": 115.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 123.33987426757812, + "height": 25, + "seed": 1351620447, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710176612938, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Batch Layer", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Batch Layer", + "lineHeight": 1.25 + }, + { + "type": "text", + "version": 220, + "versionNonce": 1491256927, + "isDeleted": false, + "id": "uOgkU2FqRKgqxvyLj_x9n", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 852.3300628662109, + "y": 186.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 65.89993286132812, + "height": 50, + "seed": 910941681, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710176612938, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 1, + "text": "Serving\nLayer", + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Serving\nLayer", + "lineHeight": 1.25 + }, + { + "id": "sudHYj04fhVRVygtlmUZO", + "type": "arrow", + "x": 715, + "y": 200, + "width": 105, + "height": 117, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1957875007, + "version": 68, + "versionNonce": 811242257, + "isDeleted": false, + "boundElements": null, + "updated": 1710176612938, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 105, + 117 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "Qe-iZkdxkm5IFmst6BQQY", + "focus": -0.8107457140822503, + "gap": 2.0362277498507098 + }, + "endBinding": { + "elementId": "sj-m39OC5idw4UlmM4Ywy", + "focus": -0.42188974255290485, + "gap": 4 + }, + "startArrowhead": null, + "endArrowhead": "triangle" + }, + { + "id": "SL6m5RK9xRUWfj4_UDFBm", + "type": "arrow", + "x": 718, + "y": 389.602196, + "width": 100, + "height": 74, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1645434175, + "version": 78, + "versionNonce": 110666367, + "isDeleted": false, + "boundElements": null, + "updated": 1710176612938, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 100, + -74 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "vjKvs7iTUn04a1o6fvV5E", + "focus": 0.7331289930444519, + "gap": 1.1124726488678505 + }, + "endBinding": { + "elementId": "sj-m39OC5idw4UlmM4Ywy", + "focus": 0.17964367135455228, + "gap": 6 + }, + "startArrowhead": null, + "endArrowhead": "triangle" + }, + { + "id": "RuhZKd106tr5EVuwecksu", + "type": "arrow", + "x": 947, + "y": 318.6880364139432, + "width": 55, + "height": 1.6880364139432231, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1738381713, + "version": 146, + "versionNonce": 1184368881, + "isDeleted": false, + "boundElements": null, + "updated": 1710176612938, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 55, + -1.6880364139432231 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "sj-m39OC5idw4UlmM4Ywy", + "focus": 0.14115723095066343, + "gap": 1 + }, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "triangle" + }, + { + "type": "arrow", + "version": 217, + "versionNonce": 1557001727, + "isDeleted": false, + "id": "r4LDeJQkIEfQ-uqNYv5nX", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 563.4277783017823, + "y": 390.3281457532928, + "strokeColor": "#2f9e44", + "backgroundColor": "#ffffff", + "width": 36.62343372214919, + "height": 19.720310465772638, + "seed": 226865791, + "groupIds": [ + "0XtP12tT20JbYfhhqkKxp" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710176636957, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "triangle", + "points": [ + [ + 0, + 0 + ], + [ + 11.268748837584363, + -19.720310465772638 + ], + [ + 36.62343372214919, + -8.451561628188273 + ] + ] + }, + { + "type": "arrow", + "version": 229, + "versionNonce": 1390002751, + "isDeleted": false, + "id": "tbcDfIGZMWHqUBrkswwUQ", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 600.4536673395595, + "y": 388.3158691751527, + "strokeColor": "#2f9e44", + "backgroundColor": "#ffffff", + "width": 33.00133588149707, + "height": 19.317855150144627, + "seed": 1548290719, + "groupIds": [ + "0XtP12tT20JbYfhhqkKxp" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710176636972, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "triangle", + "points": [ + [ + 0, + 0 + ], + [ + -16.903123256376546, + 19.317855150144627 + ], + [ + -33.00133588149707, + 6.8417403656762215 + ] + ] + }, + { + "type": "arrow", + "version": 282, + "versionNonce": 931028081, + "isDeleted": false, + "id": "a0XY26tTZ2eGo2mVj_vQq", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 532.0936391467952, + "y": 201.2707683548221, + "strokeColor": "#2f9e44", + "backgroundColor": "#ffffff", + "width": 36.62343372214919, + "height": 19.720310465772638, + "seed": 309929137, + "groupIds": [ + "Yi5N9VZ8-dP0Tt6o4sTvz" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710176645994, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "triangle", + "points": [ + [ + 0, + 0 + ], + [ + 11.268748837584363, + -19.720310465772638 + ], + [ + 36.62343372214919, + -8.451561628188273 + ] + ] + }, + { + "type": "arrow", + "version": 294, + "versionNonce": 1027319377, + "isDeleted": false, + "id": "LvJWMW4uClSiulZvcAiZL", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 569.1195281845725, + "y": 199.25849177668198, + "strokeColor": "#2f9e44", + "backgroundColor": "#ffffff", + "width": 33.00133588149707, + "height": 19.317855150144627, + "seed": 909338257, + "groupIds": [ + "Yi5N9VZ8-dP0Tt6o4sTvz" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710176645994, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "triangle", + "points": [ + [ + 0, + 0 + ], + [ + -16.903123256376546, + 19.317855150144627 + ], + [ + -33.00133588149707, + 6.8417403656762215 + ] + ] + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/assets/img/high-level-design/lambda-architecture.svg b/assets/img/high-level-design/lambda-architecture.svg new file mode 100644 index 0000000..8e7dc7d --- /dev/null +++ b/assets/img/high-level-design/lambda-architecture.svg @@ -0,0 +1,21 @@ + + + + + + + + DFSMessageBrokerViewViewSpeed LayerBatch LayerServingLayer \ No newline at end of file diff --git a/assets/img/high-level-design/load-balancer-vs-api-gateway.png b/assets/img/high-level-design/load-balancer-vs-api-gateway.png new file mode 100644 index 0000000..2207ff0 Binary files /dev/null and b/assets/img/high-level-design/load-balancer-vs-api-gateway.png differ diff --git a/assets/img/high-level-design/load-balancing-microservices.excalidraw b/assets/img/high-level-design/load-balancing-microservices.excalidraw new file mode 100644 index 0000000..4443395 --- /dev/null +++ b/assets/img/high-level-design/load-balancing-microservices.excalidraw @@ -0,0 +1,1242 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "p76ZtehLXDYaYPgy57ZRG", + "type": "rectangle", + "x": 753, + "y": 407, + "width": 31, + "height": 56, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 1944850215, + "version": 56, + "versionNonce": 1731275815, + "isDeleted": false, + "boundElements": [ + { + "id": "HktgtQRc2CmCmbRJR_YjR", + "type": "arrow" + } + ], + "updated": 1710311921140, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 87, + "versionNonce": 1755778887, + "isDeleted": false, + "id": "P-fBKOaRr5fVivKcZJ6rQ", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 845.5, + "y": 403, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 31, + "height": 56, + "seed": 1724863817, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710311921140, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 85, + "versionNonce": 62099047, + "isDeleted": false, + "id": "k40rGjU0Mpkw0e57GHh0B", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 798.5, + "y": 409, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 31, + "height": 56, + "seed": 323966729, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710311921140, + "link": null, + "locked": false + }, + { + "id": "O8pnhh7uNvLF3pOKKPtgp", + "type": "rectangle", + "x": 640, + "y": 398, + "width": 52, + "height": 86, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 1105065033, + "version": 71, + "versionNonce": 1112815817, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "esdljNMYHW-InUdBno-br" + }, + { + "id": "HktgtQRc2CmCmbRJR_YjR", + "type": "arrow" + }, + { + "id": "TYrkACaEDcY-dsyLC6tDe", + "type": "arrow" + } + ], + "updated": 1710312024611, + "link": null, + "locked": false + }, + { + "id": "esdljNMYHW-InUdBno-br", + "type": "text", + "x": 654.28125, + "y": 429, + "width": 23.4375, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 785417225, + "version": 56, + "versionNonce": 607281319, + "isDeleted": false, + "boundElements": null, + "updated": 1710311921140, + "link": null, + "locked": false, + "text": "LB", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "O8pnhh7uNvLF3pOKKPtgp", + "originalText": "LB", + "lineHeight": 1.2 + }, + { + "id": "HktgtQRc2CmCmbRJR_YjR", + "type": "arrow", + "x": 692, + "y": 439.8973379328761, + "width": 45, + "height": 1.4009383710219936, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 424909543, + "version": 220, + "versionNonce": 1199658633, + "isDeleted": false, + "boundElements": null, + "updated": 1710311921186, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 45, + 1.4009383710219936 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "O8pnhh7uNvLF3pOKKPtgp", + "focus": -0.043645696365462665, + "gap": 1 + }, + "endBinding": { + "elementId": "u-4jZcFvPXUqqmKrNTIHE", + "focus": -0.12533377461999445, + "gap": 1 + }, + "startArrowhead": null, + "endArrowhead": "triangle" + }, + { + "id": "u-4jZcFvPXUqqmKrNTIHE", + "type": "rectangle", + "x": 738, + "y": 390, + "width": 156, + "height": 95.00000000000001, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 511094057, + "version": 128, + "versionNonce": 1833390567, + "isDeleted": false, + "boundElements": [ + { + "id": "HktgtQRc2CmCmbRJR_YjR", + "type": "arrow" + }, + { + "id": "3oHvvRYgMDYy5ycTn4p__", + "type": "arrow" + }, + { + "id": "SxLkOvAVf1VEl1A37dfAR", + "type": "arrow" + } + ], + "updated": 1710312017608, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 124, + "versionNonce": 19548361, + "isDeleted": false, + "id": "s6H_2RCtA8wQH_SYtt2h7", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1118, + "y": 274.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 31, + "height": 56, + "seed": 1332715337, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "id": "BGT-pewaOiROgtyZgzfq2", + "type": "arrow" + } + ], + "updated": 1710311917454, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 163, + "versionNonce": 359684871, + "isDeleted": false, + "id": "5Xbtfnpy_hRN0gwU9gliR", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1209.5, + "y": 275.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 31, + "height": 56, + "seed": 950980137, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710311954096, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 153, + "versionNonce": 723741321, + "isDeleted": false, + "id": "cs88_UmKvjmgToDTIAli4", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1163.5, + "y": 276.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 31, + "height": 56, + "seed": 1407525129, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710311917454, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 139, + "versionNonce": 1443738153, + "isDeleted": false, + "id": "Ghf4YoX_OUZH-a5qsgZ3x", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1005, + "y": 265.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 52, + "height": 86, + "seed": 512628713, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "dGF-svLrXvTaXDpl-cixn" + }, + { + "id": "BGT-pewaOiROgtyZgzfq2", + "type": "arrow" + }, + { + "id": "3oHvvRYgMDYy5ycTn4p__", + "type": "arrow" + } + ], + "updated": 1710312009874, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 124, + "versionNonce": 1125063753, + "isDeleted": false, + "id": "dGF-svLrXvTaXDpl-cixn", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1019.28125, + "y": 296.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 23.4375, + "height": 24, + "seed": 1521824457, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710311917454, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "LB", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "Ghf4YoX_OUZH-a5qsgZ3x", + "originalText": "LB", + "lineHeight": 1.2 + }, + { + "type": "arrow", + "version": 490, + "versionNonce": 47149895, + "isDeleted": false, + "id": "BGT-pewaOiROgtyZgzfq2", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1058, + "y": 307.25613951408525, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 44, + "height": 1.0816515442621153, + "seed": 2096492969, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710311946648, + "link": null, + "locked": false, + "startBinding": { + "elementId": "Ghf4YoX_OUZH-a5qsgZ3x", + "gap": 1, + "focus": -0.043645696365462665 + }, + "endBinding": { + "elementId": "znbSM2RW6ERSelrgg29fT", + "gap": 1, + "focus": -0.12533377461999445 + }, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "triangle", + "points": [ + [ + 0, + 0 + ], + [ + 44, + 1.0816515442621153 + ] + ] + }, + { + "type": "rectangle", + "version": 230, + "versionNonce": 690407687, + "isDeleted": false, + "id": "znbSM2RW6ERSelrgg29fT", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1103, + "y": 257.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 241, + "height": 95.00000000000001, + "seed": 1518064777, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "id": "BGT-pewaOiROgtyZgzfq2", + "type": "arrow" + } + ], + "updated": 1710311946647, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 44, + "versionNonce": 908576745, + "isDeleted": false, + "id": "XNE-jd-TqyQXuBAtxcTTs", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1146, + "y": 532.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 31, + "height": 56, + "seed": 650977513, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "id": "rFJ7SmnVIlx-RoVk-rDeo", + "type": "arrow" + } + ], + "updated": 1710311925003, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 59, + "versionNonce": 10003049, + "isDeleted": false, + "id": "6bnfiSOAfWWZ6llCHJWx7", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1033, + "y": 523.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 52, + "height": 86, + "seed": 541818249, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "tSkkEOqSNsOKgozCICPKz" + }, + { + "id": "rFJ7SmnVIlx-RoVk-rDeo", + "type": "arrow" + }, + { + "id": "SxLkOvAVf1VEl1A37dfAR", + "type": "arrow" + } + ], + "updated": 1710312015318, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 44, + "versionNonce": 2084834153, + "isDeleted": false, + "id": "tSkkEOqSNsOKgozCICPKz", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1047.28125, + "y": 554.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 23.4375, + "height": 24, + "seed": 922018921, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710311925003, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "LB", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "6bnfiSOAfWWZ6llCHJWx7", + "originalText": "LB", + "lineHeight": 1.2 + }, + { + "type": "arrow", + "version": 242, + "versionNonce": 346085767, + "isDeleted": false, + "id": "rFJ7SmnVIlx-RoVk-rDeo", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1086, + "y": 565.7834377367436, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 44, + "height": 1.9578376538195243, + "seed": 1166962505, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710311932737, + "link": null, + "locked": false, + "startBinding": { + "elementId": "6bnfiSOAfWWZ6llCHJWx7", + "gap": 1, + "focus": -0.043645696365462665 + }, + "endBinding": { + "elementId": "bH1K_t2yFo5p2eJDxyY77", + "gap": 1, + "focus": -0.12533377461999445 + }, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "triangle", + "points": [ + [ + 0, + 0 + ], + [ + 44, + 1.9578376538195243 + ] + ] + }, + { + "type": "rectangle", + "version": 142, + "versionNonce": 349224775, + "isDeleted": false, + "id": "bH1K_t2yFo5p2eJDxyY77", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1131, + "y": 515.5, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 60.00000000000001, + "height": 95.00000000000001, + "seed": 578002473, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "id": "rFJ7SmnVIlx-RoVk-rDeo", + "type": "arrow" + } + ], + "updated": 1710311932736, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 172, + "versionNonce": 2008939081, + "isDeleted": false, + "id": "65_B2sRowMtlfx3zJYcCk", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1302.5, + "y": 274, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 31, + "height": 56, + "seed": 136316297, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710311944482, + "link": null, + "locked": false + }, + { + "type": "rectangle", + "version": 180, + "versionNonce": 725888073, + "isDeleted": false, + "id": "fCgF8XJzRDQXqouz0zdit", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 1258.5, + "y": 278, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 31, + "height": 56, + "seed": 400619337, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [], + "updated": 1710311942664, + "link": null, + "locked": false + }, + { + "id": "3oHvvRYgMDYy5ycTn4p__", + "type": "arrow", + "x": 899, + "y": 432, + "width": 102, + "height": 116, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1398572359, + "version": 68, + "versionNonce": 344194025, + "isDeleted": false, + "boundElements": null, + "updated": 1710312009874, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 102, + -116 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "u-4jZcFvPXUqqmKrNTIHE", + "focus": 0.652630821276902, + "gap": 5 + }, + "endBinding": { + "elementId": "Ghf4YoX_OUZH-a5qsgZ3x", + "focus": 0.36679275871386113, + "gap": 4 + }, + "startArrowhead": null, + "endArrowhead": "triangle" + }, + { + "type": "arrow", + "version": 140, + "versionNonce": 391397767, + "isDeleted": false, + "id": "SxLkOvAVf1VEl1A37dfAR", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 896.0505365595221, + "y": 438.34703483350563, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "width": 133.0000000000001, + "height": 133, + "seed": 44160583, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710312017789, + "link": null, + "locked": false, + "startBinding": { + "elementId": "u-4jZcFvPXUqqmKrNTIHE", + "focus": -0.6311035994104899, + "gap": 2.050536559522129 + }, + "endBinding": { + "elementId": "6bnfiSOAfWWZ6llCHJWx7", + "focus": -0.5042970764345416, + "gap": 3.949463440477757 + }, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "triangle", + "points": [ + [ + 0, + 0 + ], + [ + 133.0000000000001, + 133 + ] + ] + }, + { + "id": "TYrkACaEDcY-dsyLC6tDe", + "type": "arrow", + "x": 502, + "y": 439, + "width": 134, + "height": 1, + "angle": 0, + "strokeColor": "#2f9e44", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 626576327, + "version": 55, + "versionNonce": 681224489, + "isDeleted": false, + "boundElements": null, + "updated": 1710312029233, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 134, + -1 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": { + "elementId": "O8pnhh7uNvLF3pOKKPtgp", + "focus": 0.07463718037318591, + "gap": 4 + }, + "startArrowhead": null, + "endArrowhead": "triangle" + }, + { + "type": "ellipse", + "version": 222, + "versionNonce": 663092679, + "isDeleted": false, + "id": "RAVQDSncmSo4MNFoUMdsQ", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 464.1277520675212, + "y": 394.98628092367204, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 22, + "height": 23, + "seed": 261403943, + "groupIds": [ + "_78nfUriGcxIZRV7-6A56" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710312037607, + "link": null, + "locked": false + }, + { + "type": "line", + "version": 246, + "versionNonce": 363900135, + "isDeleted": false, + "id": "HwVccMHbVZRjYsYuDsPEG", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 474.1277520675212, + "y": 415.98628092367204, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 0, + "height": 49, + "seed": 766605383, + "groupIds": [ + "_78nfUriGcxIZRV7-6A56" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710312037607, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 49 + ] + ] + }, + { + "type": "line", + "version": 206, + "versionNonce": 1157134343, + "isDeleted": false, + "id": "PBZHWyM7H4JK_uYRG71Dy", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 474.1277520675212, + "y": 465.98628092367204, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 15, + "height": 18, + "seed": 1837684583, + "groupIds": [ + "_78nfUriGcxIZRV7-6A56" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710312037607, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -15, + 18 + ] + ] + }, + { + "type": "line", + "version": 215, + "versionNonce": 1264181031, + "isDeleted": false, + "id": "QJN97JyBH2iE1Iz56rLpX", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 475.1277520675212, + "y": 471.98628092367204, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 17, + "seed": 1969705607, + "groupIds": [ + "_78nfUriGcxIZRV7-6A56" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710312037607, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 17 + ] + ] + }, + { + "type": "line", + "version": 214, + "versionNonce": 1262576199, + "isDeleted": false, + "id": "6dWePiXtj9AweSHDqr7mU", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 475.1277520675212, + "y": 436.98628092367204, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 15, + "seed": 1846110631, + "groupIds": [ + "_78nfUriGcxIZRV7-6A56" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710312037607, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 16, + 15 + ] + ] + }, + { + "type": "line", + "version": 212, + "versionNonce": 1703641447, + "isDeleted": false, + "id": "0EOCyD9shjKiQ6JsmKVCQ", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 473.1277520675212, + "y": 436.98628092367204, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 16, + "height": 16, + "seed": 882613447, + "groupIds": [ + "_78nfUriGcxIZRV7-6A56" + ], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710312037607, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + -16, + 16 + ] + ] + }, + { + "id": "Gc5oISZtMQl8Balg3XIze", + "type": "line", + "x": 573, + "y": 235, + "width": 1, + "height": 409, + "angle": 0, + "strokeColor": "#2f9e44", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 988290633, + "version": 63, + "versionNonce": 380029287, + "isDeleted": false, + "boundElements": null, + "updated": 1710312046490, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + -1, + 409 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/assets/img/high-level-design/load-balancing-microservices.png b/assets/img/high-level-design/load-balancing-microservices.png new file mode 100644 index 0000000..4dcd243 Binary files /dev/null and b/assets/img/high-level-design/load-balancing-microservices.png differ diff --git a/assets/img/high-level-design/merkle-tree.png b/assets/img/high-level-design/merkle-tree.png new file mode 100644 index 0000000..7815755 Binary files /dev/null and b/assets/img/high-level-design/merkle-tree.png differ diff --git a/assets/img/high-level-design/multi-tier-constraint.excalidraw b/assets/img/high-level-design/multi-tier-constraint.excalidraw new file mode 100644 index 0000000..d4e882b --- /dev/null +++ b/assets/img/high-level-design/multi-tier-constraint.excalidraw @@ -0,0 +1,443 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "1v0eZ3rjB5Wq4stkzHgaH", + "type": "rectangle", + "x": 333, + "y": 154, + "width": 123.00000000000001, + "height": 188, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "seed": 268692287, + "version": 34, + "versionNonce": 101966591, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "Hu4SIUpFxsXR1MiIwesBV" + }, + { + "id": "bCU7pbVUQgQ_pW3RzMwcJ", + "type": "arrow" + }, + { + "id": "h59C1k10-GIdE-Fg42l0n", + "type": "arrow" + } + ], + "updated": 1710165313869, + "link": null, + "locked": false + }, + { + "id": "Hu4SIUpFxsXR1MiIwesBV", + "type": "text", + "x": 359.34375, + "y": 236, + "width": 70.3125, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 2119886303, + "version": 24, + "versionNonce": 52515807, + "isDeleted": false, + "boundElements": null, + "updated": 1710165341750, + "link": null, + "locked": false, + "text": "tier 1", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "1v0eZ3rjB5Wq4stkzHgaH", + "originalText": "tier 1", + "lineHeight": 1.2 + }, + { + "type": "rectangle", + "version": 75, + "versionNonce": 1110419217, + "isDeleted": false, + "id": "2SKasoWxTW2cEpRs-8PbY", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 574.5, + "y": 153, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 123.00000000000001, + "height": 188, + "seed": 1877396721, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "iUE6T4BwdjkFz-WKOtfMH" + }, + { + "id": "bCU7pbVUQgQ_pW3RzMwcJ", + "type": "arrow" + }, + { + "id": "KcpiOyt-TLOWM6HRm4pri", + "type": "arrow" + } + ], + "updated": 1710165296417, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 67, + "versionNonce": 1171654783, + "isDeleted": false, + "id": "iUE6T4BwdjkFz-WKOtfMH", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 600.84375, + "y": 235, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 70.3125, + "height": 24, + "seed": 541198033, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710165346222, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "tier 2", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "2SKasoWxTW2cEpRs-8PbY", + "originalText": "tier 2", + "lineHeight": 1.2 + }, + { + "type": "rectangle", + "version": 150, + "versionNonce": 726167359, + "isDeleted": false, + "id": "LVQRg06EkSBzz8Qx1MVvR", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 814.5, + "y": 153, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 123.00000000000001, + "height": 188, + "seed": 129709855, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 3 + }, + "boundElements": [ + { + "type": "text", + "id": "SgHDmMNo-NYkx7KwyrC8r" + }, + { + "id": "KcpiOyt-TLOWM6HRm4pri", + "type": "arrow" + }, + { + "id": "h59C1k10-GIdE-Fg42l0n", + "type": "arrow" + } + ], + "updated": 1710165313869, + "link": null, + "locked": false + }, + { + "type": "text", + "version": 142, + "versionNonce": 1575344415, + "isDeleted": false, + "id": "SgHDmMNo-NYkx7KwyrC8r", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 840.84375, + "y": 235, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 70.3125, + "height": 24, + "seed": 1252326207, + "groupIds": [], + "frameId": null, + "roundness": null, + "boundElements": [], + "updated": 1710165349783, + "link": null, + "locked": false, + "fontSize": 20, + "fontFamily": 3, + "text": "tier 3", + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "LVQRg06EkSBzz8Qx1MVvR", + "originalText": "tier 3", + "lineHeight": 1.2 + }, + { + "id": "bCU7pbVUQgQ_pW3RzMwcJ", + "type": "arrow", + "x": 463, + "y": 244, + "width": 111, + "height": 0, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 784148113, + "version": 39, + "versionNonce": 233390481, + "isDeleted": false, + "boundElements": null, + "updated": 1710165285784, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 111, + 0 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "1v0eZ3rjB5Wq4stkzHgaH", + "focus": -0.0425531914893617, + "gap": 7 + }, + "endBinding": { + "elementId": "2SKasoWxTW2cEpRs-8PbY", + "focus": 0.031914893617021274, + "gap": 1 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "type": "arrow", + "version": 155, + "versionNonce": 1849825375, + "isDeleted": false, + "id": "KcpiOyt-TLOWM6HRm4pri", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 702.6937630655896, + "y": 245.69837785437568, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 108.30623693441044, + "height": 0, + "seed": 489449535, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1710165307237, + "link": null, + "locked": false, + "startBinding": { + "elementId": "2SKasoWxTW2cEpRs-8PbY", + "focus": -0.013847044102386399, + "gap": 5.193763065589565 + }, + "endBinding": { + "elementId": "LVQRg06EkSBzz8Qx1MVvR", + "focus": 0.013847044102386399, + "gap": 3.5 + }, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": "arrow", + "points": [ + [ + 0, + 0 + ], + [ + 108.30623693441044, + 0 + ] + ] + }, + { + "id": "h59C1k10-GIdE-Fg42l0n", + "type": "arrow", + "x": 418, + "y": 356, + "width": 446, + "height": 57, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 1527060209, + "version": 141, + "versionNonce": 596270129, + "isDeleted": false, + "boundElements": [ + { + "type": "text", + "id": "fGJh9EwrNEEqPPjJzGidW" + } + ], + "updated": 1710165329451, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 211, + 57 + ], + [ + 446, + 5 + ] + ], + "lastCommittedPoint": null, + "startBinding": { + "elementId": "1v0eZ3rjB5Wq4stkzHgaH", + "focus": 0.9189785556674307, + "gap": 14 + }, + "endBinding": { + "elementId": "LVQRg06EkSBzz8Qx1MVvR", + "focus": -1.0347200253084465, + "gap": 20 + }, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "fGJh9EwrNEEqPPjJzGidW", + "type": "text", + "x": 618.453125, + "y": 391.4, + "width": 21.09375, + "height": 43.199999999999996, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1462920831, + "version": 5, + "versionNonce": 1942451999, + "isDeleted": false, + "boundElements": null, + "updated": 1710165328167, + "link": null, + "locked": false, + "text": "X", + "fontSize": 36, + "fontFamily": 3, + "textAlign": "center", + "verticalAlign": "middle", + "containerId": "h59C1k10-GIdE-Fg42l0n", + "originalText": "X", + "lineHeight": 1.2 + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/assets/img/high-level-design/multi-tier-constraint.svg b/assets/img/high-level-design/multi-tier-constraint.svg new file mode 100644 index 0000000..d8a0ee4 --- /dev/null +++ b/assets/img/high-level-design/multi-tier-constraint.svg @@ -0,0 +1,21 @@ + + + + + + + + tier 1tier 2tier 3X \ No newline at end of file diff --git a/assets/img/high-level-design/percentile-distribution-response-time.excalidraw b/assets/img/high-level-design/percentile-distribution-response-time.excalidraw new file mode 100644 index 0000000..fcd9a9e --- /dev/null +++ b/assets/img/high-level-design/percentile-distribution-response-time.excalidraw @@ -0,0 +1,1194 @@ +{ + "type": "excalidraw", + "version": 2, + "source": "https://excalidraw.com", + "elements": [ + { + "id": "JdoxsrWvNODNLpMMTC5uB", + "type": "arrow", + "x": 413, + "y": 451, + "width": 590, + "height": 0, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 2110908610, + "version": 121, + "versionNonce": 1359093854, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123932, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 590, + 0 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "h6afKQg60_TUv760egqOh", + "type": "arrow", + "x": 409, + "y": 452, + "width": 0, + "height": 333, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 2097272414, + "version": 104, + "versionNonce": 1341463682, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123932, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + -333 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": "arrow" + }, + { + "id": "eGOuo0bWAAi7fmHaAv91w", + "type": "text", + "x": 277, + "y": 258, + "width": 105.46875, + "height": 48, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 207665346, + "version": 79, + "versionNonce": 718955678, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123932, + "link": null, + "locked": false, + "text": "Response\nTime (ms)", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Response\nTime (ms)", + "lineHeight": 1.2 + }, + { + "id": "5CmJv_sYMYz7VFEorhafN", + "type": "text", + "x": 626, + "y": 484, + "width": 117.1875, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 314589598, + "version": 60, + "versionNonce": 1373781058, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123932, + "link": null, + "locked": false, + "text": "Percentile", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "Percentile", + "lineHeight": 1.2 + }, + { + "id": "bdJisICrQfqp4ltUJcpyj", + "type": "line", + "x": 412, + "y": 389, + "width": 557, + "height": 0, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 829407234, + "version": 110, + "versionNonce": 1175294914, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123932, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 557, + 0 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null + }, + { + "type": "line", + "version": 147, + "versionNonce": 437780830, + "isDeleted": false, + "id": "U-4Pzx2USWRWSxEtMsASo", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 413.2104943774641, + "y": 348.47409456744793, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 557, + "height": 0, + "seed": 1786988738, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709965123932, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 557, + 0 + ] + ] + }, + { + "type": "line", + "version": 163, + "versionNonce": 879348610, + "isDeleted": false, + "id": "Nge0FoeBIHyrgRt4NukeD", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 415.13378942236307, + "y": 320.8426383104471, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 557, + "height": 0, + "seed": 2083702594, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709965123932, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 557, + 0 + ] + ] + }, + { + "type": "line", + "version": 206, + "versionNonce": 2022450590, + "isDeleted": false, + "id": "ubDRZfoHDGNzrr-phmyle", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 407.9821135781705, + "y": 225.50453187003728, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 557, + "height": 0, + "seed": 756894466, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709965123932, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 557, + 0 + ] + ] + }, + { + "type": "line", + "version": 269, + "versionNonce": 1105791810, + "isDeleted": false, + "id": "atrswTiXD-Xk7eBFvOc6L", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "angle": 0, + "x": 411.2570978663861, + "y": 175.07110832706093, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "width": 557, + "height": 0, + "seed": 1711478110, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "boundElements": [], + "updated": 1709965123932, + "link": null, + "locked": false, + "startBinding": null, + "endBinding": null, + "lastCommittedPoint": null, + "startArrowhead": null, + "endArrowhead": null, + "points": [ + [ + 0, + 0 + ], + [ + 557, + 0 + ] + ] + }, + { + "id": "WEBogZk998fdvopmv9F_a", + "type": "freedraw", + "x": 472, + "y": 387, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 2099526494, + "version": 14, + "versionNonce": 869914142, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "uCZfxwrZXApBJVoaKPZP7", + "type": "freedraw", + "x": 519, + "y": 378, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1809338398, + "version": 14, + "versionNonce": 1877779138, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "y_qBHrT9Ix9xbx6eZSGXX", + "type": "freedraw", + "x": 555, + "y": 375, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 975832862, + "version": 10, + "versionNonce": 1458234078, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "ozP1zcHn272j7jeYSy8Oj", + "type": "freedraw", + "x": 615, + "y": 350, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1602008030, + "version": 10, + "versionNonce": 198164994, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "2EMdBPfkXb5vvfMvcqF1b", + "type": "freedraw", + "x": 650, + "y": 345, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1420662942, + "version": 10, + "versionNonce": 730257182, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "B6q0en6zlWSei-K5e44Ac", + "type": "freedraw", + "x": 679, + "y": 349, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 2127017310, + "version": 10, + "versionNonce": 1447834050, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "yFsk1w-hcYjlvQ_Bzfn3f", + "type": "freedraw", + "x": 709, + "y": 344, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 985008670, + "version": 10, + "versionNonce": 689587038, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "zQCmdl9wD1nW6wd2BkWRJ", + "type": "freedraw", + "x": 731, + "y": 320, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1766037214, + "version": 10, + "versionNonce": 1059367298, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "tx7cv_tWhyFwuU44lx4sV", + "type": "freedraw", + "x": 772, + "y": 269, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 2065390494, + "version": 10, + "versionNonce": 838215582, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "sWQ1jR8AcR0feKE7ZpGDo", + "type": "freedraw", + "x": 816, + "y": 224, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 2093504606, + "version": 10, + "versionNonce": 764946754, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "hSavKw3rhZ36G4ijvAYvY", + "type": "freedraw", + "x": 800, + "y": 257, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 582323486, + "version": 10, + "versionNonce": 910918622, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "FX-jyeopRjNS15fh_OoK6", + "type": "freedraw", + "x": 861, + "y": 176, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1429076446, + "version": 10, + "versionNonce": 1225109762, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "EZKyrUSQ6K59yqcB_Ccwb", + "type": "freedraw", + "x": 592, + "y": 359, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 862339906, + "version": 9, + "versionNonce": 1961640130, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "ebAQrymXDDrj_XM6AYfGt", + "type": "freedraw", + "x": 441, + "y": 396, + "width": 0.0001, + "height": 0.0001, + "angle": 0, + "strokeColor": "#e03131", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 4, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 880226946, + "version": 9, + "versionNonce": 1334223966, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0.0001, + 0.0001 + ] + ], + "pressures": [], + "simulatePressure": true, + "lastCommittedPoint": [ + 0.0001, + 0.0001 + ] + }, + { + "id": "TH-aJYYbte0UrvJxyaQi3", + "type": "line", + "x": 859, + "y": 181, + "width": 0, + "height": 269, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 347391646, + "version": 93, + "versionNonce": 1848535198, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 269 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null + }, + { + "id": "sV-foxwcdA4uTTC8FQmEz", + "type": "line", + "x": 815, + "y": 226, + "width": 0, + "height": 220, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 921314434, + "version": 79, + "versionNonce": 549484610, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 220 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null + }, + { + "id": "B_HN1pOxcc5mSH71wlGf3", + "type": "line", + "x": 440, + "y": 396, + "width": 0, + "height": 55, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": { + "type": 2 + }, + "seed": 947239426, + "version": 23, + "versionNonce": 601072862, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "points": [ + [ + 0, + 0 + ], + [ + 0, + 55 + ] + ], + "lastCommittedPoint": null, + "startBinding": null, + "endBinding": null, + "startArrowhead": null, + "endArrowhead": null + }, + { + "id": "G0MdFb6wtdBQk96q83qBE", + "type": "text", + "x": 842, + "y": 465, + "width": 35.15625, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 688419550, + "version": 25, + "versionNonce": 1884037122, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "text": "100", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "100", + "lineHeight": 1.2 + }, + { + "id": "gtaBLlrgbKBhyW-Q6dyhA", + "type": "text", + "x": 428, + "y": 468, + "width": 23.4375, + "height": 24, + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "transparent", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "dotted", + "roughness": 1, + "opacity": 100, + "groupIds": [], + "frameId": null, + "roundness": null, + "seed": 1138359454, + "version": 22, + "versionNonce": 570906910, + "isDeleted": false, + "boundElements": null, + "updated": 1709965123933, + "link": null, + "locked": false, + "text": "10", + "fontSize": 20, + "fontFamily": 3, + "textAlign": "left", + "verticalAlign": "top", + "containerId": null, + "originalText": "10", + "lineHeight": 1.2 + } + ], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} \ No newline at end of file diff --git a/assets/img/high-level-design/percentile-distribution-response-time.svg b/assets/img/high-level-design/percentile-distribution-response-time.svg new file mode 100644 index 0000000..9378ba9 --- /dev/null +++ b/assets/img/high-level-design/percentile-distribution-response-time.svg @@ -0,0 +1,21 @@ + + + + + + + + ResponseTime (ms)Percentile10010 \ No newline at end of file diff --git a/assets/img/high-level-design/pipes-and-filters.png b/assets/img/high-level-design/pipes-and-filters.png new file mode 100644 index 0000000..aa7bcda Binary files /dev/null and b/assets/img/high-level-design/pipes-and-filters.png differ diff --git a/assets/img/high-level-design/rolling-deployment-pattern.png b/assets/img/high-level-design/rolling-deployment-pattern.png new file mode 100644 index 0000000..958bb74 Binary files /dev/null and b/assets/img/high-level-design/rolling-deployment-pattern.png differ diff --git a/assets/img/high-level-design/saga-pattern.png b/assets/img/high-level-design/saga-pattern.png new file mode 100644 index 0000000..846f289 Binary files /dev/null and b/assets/img/high-level-design/saga-pattern.png differ diff --git a/assets/img/high-level-design/scatter-gather.png b/assets/img/high-level-design/scatter-gather.png new file mode 100644 index 0000000..33d8a66 Binary files /dev/null and b/assets/img/high-level-design/scatter-gather.png differ diff --git a/assets/img/high-level-design/sidecar-pattern.png b/assets/img/high-level-design/sidecar-pattern.png new file mode 100644 index 0000000..468816b Binary files /dev/null and b/assets/img/high-level-design/sidecar-pattern.png differ diff --git a/assets/img/high-level-design/strangler-fig-pattern.png b/assets/img/high-level-design/strangler-fig-pattern.png new file mode 100644 index 0000000..9c64da5 Binary files /dev/null and b/assets/img/high-level-design/strangler-fig-pattern.png differ diff --git a/assets/img/high-level-design/testing-pyramid.png b/assets/img/high-level-design/testing-pyramid.png new file mode 100644 index 0000000..68c548d Binary files /dev/null and b/assets/img/high-level-design/testing-pyramid.png differ diff --git a/assets/img/high-level-design/transactional-outbox-pattern.png b/assets/img/high-level-design/transactional-outbox-pattern.png new file mode 100644 index 0000000..6ec19f3 Binary files /dev/null and b/assets/img/high-level-design/transactional-outbox-pattern.png differ diff --git a/assets/img/high-level-design/two-phase-commit.png b/assets/img/high-level-design/two-phase-commit.png new file mode 100644 index 0000000..734bd88 Binary files /dev/null and b/assets/img/high-level-design/two-phase-commit.png differ diff --git a/assets/img/java/generics-typecasting.png b/assets/img/java/generics-typecasting.png new file mode 100644 index 0000000..24c647c Binary files /dev/null and b/assets/img/java/generics-typecasting.png differ diff --git a/assets/img/java/inheritance.drawio b/assets/img/java/inheritance.drawio new file mode 100644 index 0000000..c3ae610 --- /dev/null +++ b/assets/img/java/inheritance.drawio @@ -0,0 +1,115 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/java/inheritance.drawio.png b/assets/img/java/inheritance.drawio.png new file mode 100644 index 0000000..0bdcf55 Binary files /dev/null and b/assets/img/java/inheritance.drawio.png differ diff --git a/assets/img/java/io-bound-architecture.drawio b/assets/img/java/io-bound-architecture.drawio new file mode 100644 index 0000000..35fc0b1 --- /dev/null +++ b/assets/img/java/io-bound-architecture.drawio @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/java/io-bound-architecture.drawio.png b/assets/img/java/io-bound-architecture.drawio.png new file mode 100644 index 0000000..05c9a47 Binary files /dev/null and b/assets/img/java/io-bound-architecture.drawio.png differ diff --git a/assets/img/java/java-main-output.png b/assets/img/java/java-main-output.png new file mode 100644 index 0000000..af0a54f Binary files /dev/null and b/assets/img/java/java-main-output.png differ diff --git a/assets/img/java/multithreading.drawio b/assets/img/java/multithreading.drawio new file mode 100644 index 0000000..016dcb6 --- /dev/null +++ b/assets/img/java/multithreading.drawio @@ -0,0 +1 @@ +zZhdT8IwFIZ/zS5NWLsNuISBeqFG5cLoXbNVtqRbsRQ3/PUW1+6DQoJR2Lkh7duuH0/7nh3m4DArbwRZJfc8psxBg7h08MxBaBSM1O9O2FaC7w4qYSnSuJLcRlikX1SLptsmjem601FyzmS66ooRz3MayY5GhOBFt9s7Z91ZV2RJLWEREWarL2ksE70tNGz0W5ouEzOzG4yrloyYznon64TEvGhJeO7gUHAuq1JWhpTt2Bku1XPXR1rrhQmay1Me+MwnaPb0chNPP4LXN6+cPbzmV3qUT8I2esN6sXJrCAi+yWO6G2Tg4GmRpJIuViTatRbqyJWWyIypmquK7yljIWdcqHrOc9VpqmegQtLy6NLdGoi6SJRnVIqt6mIeMAz1JfJ0tWhOxPO1lrROA421SPQtWNZDN6BUQbP6BTdkcQu5oEpx/8bvP2DhLizfhuUODsAanouVZ7G6g0fJHZ6ICZ8Lk4l4bU5/dOMZONWBucWp1i7DyY5ZADhhHxwnO0bd4d45WffpQHjy8EU5YYvT8+QeHqhx76ACC9Sj4BFdr9XrHhquut5bPEf2vQKTI9QZFJQcAYFMEvYx9Z8kIB+wBy1c/XvQDllgPLifMfTvwSFED+5jAuDBEWAPWrj69+AYrge9AJgHzSnA8uA+pv49iO0/gXA8aOE6owdVtfmE+NPW+g6L598= \ No newline at end of file diff --git a/assets/img/java/multithreading.drawio.png b/assets/img/java/multithreading.drawio.png new file mode 100644 index 0000000..c2a0487 Binary files /dev/null and b/assets/img/java/multithreading.drawio.png differ diff --git a/assets/img/java/phases-and-goals.png b/assets/img/java/phases-and-goals.png new file mode 100644 index 0000000..e8025bd Binary files /dev/null and b/assets/img/java/phases-and-goals.png differ diff --git a/assets/img/java/protected-caveat.png b/assets/img/java/protected-caveat.png new file mode 100644 index 0000000..71cc04c Binary files /dev/null and b/assets/img/java/protected-caveat.png differ diff --git a/assets/img/low-level-design/activity.drawio b/assets/img/low-level-design/activity.drawio new file mode 100644 index 0000000..dc39d22 --- /dev/null +++ b/assets/img/low-level-design/activity.drawio @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/low-level-design/activity.drawio.png b/assets/img/low-level-design/activity.drawio.png new file mode 100644 index 0000000..8fcbb30 Binary files /dev/null and b/assets/img/low-level-design/activity.drawio.png differ diff --git a/assets/img/low-level-design/interpreter-ast.drawio b/assets/img/low-level-design/interpreter-ast.drawio new file mode 100644 index 0000000..47381e7 --- /dev/null +++ b/assets/img/low-level-design/interpreter-ast.drawio @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/low-level-design/interpreter-ast.drawio.png b/assets/img/low-level-design/interpreter-ast.drawio.png new file mode 100644 index 0000000..31603c3 Binary files /dev/null and b/assets/img/low-level-design/interpreter-ast.drawio.png differ diff --git a/assets/img/low-level-design/use-case.drawio b/assets/img/low-level-design/use-case.drawio new file mode 100644 index 0000000..59d7f85 --- /dev/null +++ b/assets/img/low-level-design/use-case.drawio @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/low-level-design/use-case.drawio.png b/assets/img/low-level-design/use-case.drawio.png new file mode 100644 index 0000000..2579079 Binary files /dev/null and b/assets/img/low-level-design/use-case.drawio.png differ diff --git a/assets/img/messaging-systems/dead-letter-exchange.png b/assets/img/messaging-systems/dead-letter-exchange.png new file mode 100644 index 0000000..34771a1 Binary files /dev/null and b/assets/img/messaging-systems/dead-letter-exchange.png differ diff --git a/assets/img/messaging-systems/queue-processor-design.png b/assets/img/messaging-systems/queue-processor-design.png new file mode 100644 index 0000000..66a67ba Binary files /dev/null and b/assets/img/messaging-systems/queue-processor-design.png differ diff --git a/assets/img/messaging-systems/queue-router-design.png b/assets/img/messaging-systems/queue-router-design.png new file mode 100644 index 0000000..6825144 Binary files /dev/null and b/assets/img/messaging-systems/queue-router-design.png differ diff --git a/assets/img/relational-databases/er-diagram-example b/assets/img/relational-databases/er-diagram-example new file mode 100644 index 0000000..fe4933d --- /dev/null +++ b/assets/img/relational-databases/er-diagram-example @@ -0,0 +1 @@ +7Vtdc5s4FP01fmwGJMDw2DrdbWcnO53JzG7zqBjFMAHkEXJt769fYb6v7IApILeTlwRdCwFHR7rn3gsLvIoPf3KyDR6YT6MFMvzDAt8vEFoiLP9mhmNusLCRGzY89HOTWRsew/9oYSy77UKfpq2OgrFIhNu2cc2ShK5Fy0Y4Z/t2txcWta+6JRuqGB7XJFKt/4a+CHKri5a1/QsNN0F5ZdPx8l9iUnYuniQNiM/2DRP+vMArzpjIj+LDikYZdiUu+Xl/XPi1ujFOE9HnhFX8bL9uv358iP8yAsz/eX1h+w/FKD9ItCseOBU7Pxsxv2dxLIHgbJf4NBvLWOBP+yAU9HFL1tmveznz0haIOJItUx4Wo1Iu6OHi7ZoVCJI8lMVU8KPsUp7gFLgVxHGL5r4xC4UpaEyAVdhIMe+bauAaGnlQoHMFUkhBaoGcSF71004ebLIDeX5hkleorAqS8mKSvXQWFK02iEgziFgBUa5bQeS6RUbC7tJbwko34SwFq4TE9JYQMi3NENln1uRQeF7CKFqxiHHZTlhCx0EM2y3ElrYCmHcGMHsqwJwegCX+x8xv1jA0QGo7AXoIxffil+z4KbPf2UXr/tDodn8sG4l8ju/NRuOsrFmfdmqV5+X3SX3FW4OZkM/CdnxNu3dyQfiGii7fqM5sY+7sN+aO04iI8Ef7ds9NaHGFbyzMvO4l9+cASuRPWZzU9PpgnEpIlWsWjJOjoIxzYlf10MMJt3wnXLWT3zbhkAGkAh7KOBswzp2Xcu475SrPeOOUg0zxBlLOXIKBoAOdmHKeQrktZxtOYoV5c0dRlntborZ0P79YGOWAzVG38DXVuF17cAAxMnUHm6Yasq/lhpKqMM29KjECvlb7slQD82wVamQTAvoXO7ohusF4HLu3BlKfiPy3F1+FHOgWX8b52Z1HfdlwExqqvmygvhR6Tay+zPesxjWk0yr5HQ9wZTmQdEuY2UYzk26azEZNtKcmzzpIV/PsqSTuPKQrdUMn63J0tCXT7JG2OgxVycyBpjlycuMCg4wOBp1nq/kmW0dkXfnQ3XudpZN1nWTpyzoHeGpszMw6Nb9xKtuFiVqw4wGLn3fpdfUWn76QXVRH+T9ZpAJxaE9RDNOU41WOz+U8xoBtxDKVh+7alapqy2yWqmwVNRNycTzY1PBdhe2awH0i6Mw2cNprfEiN5cfxEddojCF6RoeHaLyJo8NDwJrL0CofhgJn5jIfUpMj5VSNKkz60s7UQjqjJ+k8nZyzIVWG1vkcw7lDwGnMXOpDgyJ/n6TBiWnmWxRMBWevVFUocrziLUV06kW4aLT70klbyc1rzxh2nYE7jtEx0IWpl3NBjo1u26xDOgk51FhJkFcqr2XstqqS0K5blberdOtWfE63OiTOAEme0+zf31Ns8n2jz+YmP1/s6fZVFnozHkBZYCgIepfWobuAEmXiLR6rlc4HncrCWOhQFn1Jp/eVNVhQGJrbha9zzJ3bxWrkSRPOshNvzmvAUrJ2pzFH9NldWBlSxBlxwZZfqnStV6zVSSjvXw1NUMI4VkkKTb1g1bLzWMrEwE6beZbldnEva32jPJSPRfnV5Lp10gCZ6g0MIOALHjMXjfG50HFMcVsnwBzc2IQ+SEoZztt1PdmA9GkwErlem5HLpTctI3+XdJtljiRQqo8YdYlitfg8Urrtl/KyuC/ttCbcMCQLdI69vazb4a4Hs0426+9Q8+71x7z48/8= \ No newline at end of file diff --git a/assets/img/relational-databases/er-diagram-example.drawio.png b/assets/img/relational-databases/er-diagram-example.drawio.png new file mode 100644 index 0000000..2836ade Binary files /dev/null and b/assets/img/relational-databases/er-diagram-example.drawio.png differ diff --git a/assets/img/spark/aqe-shuffle-partitions.drawio b/assets/img/spark/aqe-shuffle-partitions.drawio new file mode 100644 index 0000000..42663cc --- /dev/null +++ b/assets/img/spark/aqe-shuffle-partitions.drawio @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spark/aqe-shuffle-partitions.drawio.png b/assets/img/spark/aqe-shuffle-partitions.drawio.png new file mode 100644 index 0000000..6dd1ac7 Binary files /dev/null and b/assets/img/spark/aqe-shuffle-partitions.drawio.png differ diff --git a/assets/img/spark/aqe-skew-joins.drawio b/assets/img/spark/aqe-skew-joins.drawio new file mode 100644 index 0000000..a3dfc59 --- /dev/null +++ b/assets/img/spark/aqe-skew-joins.drawio @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spark/aqe-skew-joins.drawio.png b/assets/img/spark/aqe-skew-joins.drawio.png new file mode 100644 index 0000000..6043c9a Binary files /dev/null and b/assets/img/spark/aqe-skew-joins.drawio.png differ diff --git a/assets/img/spark/broadcast-join-working.drawio b/assets/img/spark/broadcast-join-working.drawio new file mode 100644 index 0000000..12944ae --- /dev/null +++ b/assets/img/spark/broadcast-join-working.drawio @@ -0,0 +1,280 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spark/broadcast-join-working.drawio.png b/assets/img/spark/broadcast-join-working.drawio.png new file mode 100644 index 0000000..b319f41 Binary files /dev/null and b/assets/img/spark/broadcast-join-working.drawio.png differ diff --git a/assets/img/spark/bucket-by-output.png b/assets/img/spark/bucket-by-output.png new file mode 100644 index 0000000..f0bdb82 Binary files /dev/null and b/assets/img/spark/bucket-by-output.png differ diff --git a/assets/img/spark/execution-plan.jpg b/assets/img/spark/execution-plan.jpg new file mode 100644 index 0000000..3b846e9 Binary files /dev/null and b/assets/img/spark/execution-plan.jpg differ diff --git a/assets/img/spark/job-stages-tasks.drawio b/assets/img/spark/job-stages-tasks.drawio new file mode 100644 index 0000000..bf9504f --- /dev/null +++ b/assets/img/spark/job-stages-tasks.drawio @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spark/job-stages-tasks.drawio.png b/assets/img/spark/job-stages-tasks.drawio.png new file mode 100644 index 0000000..f7235a8 Binary files /dev/null and b/assets/img/spark/job-stages-tasks.drawio.png differ diff --git a/assets/img/spark/list-databases.png b/assets/img/spark/list-databases.png new file mode 100644 index 0000000..c853e2f Binary files /dev/null and b/assets/img/spark/list-databases.png differ diff --git a/assets/img/spark/partition-by-output.png b/assets/img/spark/partition-by-output.png new file mode 100644 index 0000000..ed03f76 Binary files /dev/null and b/assets/img/spark/partition-by-output.png differ diff --git a/assets/img/spark/repartition-output.png b/assets/img/spark/repartition-output.png new file mode 100644 index 0000000..d71d5a4 Binary files /dev/null and b/assets/img/spark/repartition-output.png differ diff --git a/assets/img/spark/shuffle-join-working.drawio b/assets/img/spark/shuffle-join-working.drawio new file mode 100644 index 0000000..7b6647b --- /dev/null +++ b/assets/img/spark/shuffle-join-working.drawio @@ -0,0 +1,310 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spark/shuffle-join-working.drawio.png b/assets/img/spark/shuffle-join-working.drawio.png new file mode 100644 index 0000000..f954263 Binary files /dev/null and b/assets/img/spark/shuffle-join-working.drawio.png differ diff --git a/assets/img/spark/simple-output.png b/assets/img/spark/simple-output.png new file mode 100644 index 0000000..1a80e8f Binary files /dev/null and b/assets/img/spark/simple-output.png differ diff --git a/assets/img/spark/spark-architecture.drawio b/assets/img/spark/spark-architecture.drawio new file mode 100644 index 0000000..8e457c2 --- /dev/null +++ b/assets/img/spark/spark-architecture.drawio @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spark/spark-architecture.drawio.png b/assets/img/spark/spark-architecture.drawio.png new file mode 100644 index 0000000..a61577f Binary files /dev/null and b/assets/img/spark/spark-architecture.drawio.png differ diff --git a/assets/img/spark/spark-ecosystem.drawio b/assets/img/spark/spark-ecosystem.drawio new file mode 100644 index 0000000..0e881a4 --- /dev/null +++ b/assets/img/spark/spark-ecosystem.drawio @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spark/spark-ecosystem.drawio.png b/assets/img/spark/spark-ecosystem.drawio.png new file mode 100644 index 0000000..0b4758a Binary files /dev/null and b/assets/img/spark/spark-ecosystem.drawio.png differ diff --git a/assets/img/spark/spark-streaming-jobs.png b/assets/img/spark/spark-streaming-jobs.png new file mode 100644 index 0000000..ed8d698 Binary files /dev/null and b/assets/img/spark/spark-streaming-jobs.png differ diff --git a/assets/img/spark/spark-to-java-types.png b/assets/img/spark/spark-to-java-types.png new file mode 100644 index 0000000..283ed14 Binary files /dev/null and b/assets/img/spark/spark-to-java-types.png differ diff --git a/assets/img/spark/streaming-input.png b/assets/img/spark/streaming-input.png new file mode 100644 index 0000000..96c5f29 Binary files /dev/null and b/assets/img/spark/streaming-input.png differ diff --git a/assets/img/spark/streaming-output-complete.png b/assets/img/spark/streaming-output-complete.png new file mode 100644 index 0000000..45b1d53 Binary files /dev/null and b/assets/img/spark/streaming-output-complete.png differ diff --git a/assets/img/spark/streaming-output-update.png b/assets/img/spark/streaming-output-update.png new file mode 100644 index 0000000..d0fbd66 Binary files /dev/null and b/assets/img/spark/streaming-output-update.png differ diff --git a/assets/img/spring-reactive/schedulers.drawio b/assets/img/spring-reactive/schedulers.drawio new file mode 100644 index 0000000..aaf0646 --- /dev/null +++ b/assets/img/spring-reactive/schedulers.drawio @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spring-reactive/schedulers.drawio.png b/assets/img/spring-reactive/schedulers.drawio.png new file mode 100644 index 0000000..c0b9169 Binary files /dev/null and b/assets/img/spring-reactive/schedulers.drawio.png differ diff --git a/assets/img/spring/envers.png b/assets/img/spring/envers.png new file mode 100644 index 0000000..3987569 Binary files /dev/null and b/assets/img/spring/envers.png differ diff --git a/assets/img/spring/join-table-one-to-many.png b/assets/img/spring/join-table-one-to-many.png new file mode 100644 index 0000000..c0f1641 Binary files /dev/null and b/assets/img/spring/join-table-one-to-many.png differ diff --git a/assets/img/spring/joined.png b/assets/img/spring/joined.png new file mode 100644 index 0000000..111df20 Binary files /dev/null and b/assets/img/spring/joined.png differ diff --git a/assets/img/spring/many-to-many-with-entity.png b/assets/img/spring/many-to-many-with-entity.png new file mode 100644 index 0000000..691afbc Binary files /dev/null and b/assets/img/spring/many-to-many-with-entity.png differ diff --git a/assets/img/spring/many-to-many.png b/assets/img/spring/many-to-many.png new file mode 100644 index 0000000..02bff3a Binary files /dev/null and b/assets/img/spring/many-to-many.png differ diff --git a/assets/img/spring/mapped-superclass.png b/assets/img/spring/mapped-superclass.png new file mode 100644 index 0000000..04facbf Binary files /dev/null and b/assets/img/spring/mapped-superclass.png differ diff --git a/assets/img/spring/order-column.png b/assets/img/spring/order-column.png new file mode 100644 index 0000000..6eca471 Binary files /dev/null and b/assets/img/spring/order-column.png differ diff --git a/assets/img/spring/secondary-table.png b/assets/img/spring/secondary-table.png new file mode 100644 index 0000000..4fbb3b6 Binary files /dev/null and b/assets/img/spring/secondary-table.png differ diff --git a/assets/img/spring/single-table.png b/assets/img/spring/single-table.png new file mode 100644 index 0000000..549b542 Binary files /dev/null and b/assets/img/spring/single-table.png differ diff --git a/assets/img/spring/spring-security-architecture.drawio b/assets/img/spring/spring-security-architecture.drawio new file mode 100644 index 0000000..71f2c87 --- /dev/null +++ b/assets/img/spring/spring-security-architecture.drawio @@ -0,0 +1,121 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spring/spring-security-architecture.drawio.png b/assets/img/spring/spring-security-architecture.drawio.png new file mode 100644 index 0000000..61c7cf8 Binary files /dev/null and b/assets/img/spring/spring-security-architecture.drawio.png differ diff --git a/assets/img/spring/webmvc-architecture.drawio b/assets/img/spring/webmvc-architecture.drawio new file mode 100644 index 0000000..cd714ee --- /dev/null +++ b/assets/img/spring/webmvc-architecture.drawio @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/spring/webmvc-architecture.drawio.png b/assets/img/spring/webmvc-architecture.drawio.png new file mode 100644 index 0000000..6a645c8 Binary files /dev/null and b/assets/img/spring/webmvc-architecture.drawio.png differ diff --git a/assets/img/warehouse-and-snowflake/b-tree.drawio b/assets/img/warehouse-and-snowflake/b-tree.drawio new file mode 100644 index 0000000..7466061 --- /dev/null +++ b/assets/img/warehouse-and-snowflake/b-tree.drawio @@ -0,0 +1 @@ +7VZNb9swDP01BrZDh/gjaXNM7G7doUGxHNaeBsFibaGyFShKbO/Xj6rkD9kpuhYDOhS9WOITRYp8z7K9MC7qb5Ls8mtBgXvBjNZemHhBcLG4wKcGGgPM/cgAmWTUQH4PbNlvsODMogdGYe84KiG4YjsXTEVZQqocjEgpKtftXnA3645kMAG2KeFT9CejKrdlBec9fgUsy9vM/mJpVgrSOttK9jmhohpA4aUXxlIIZWZFHQPXvWv7YvZ9fWK1O5iEUv3NhmSxuf21fLhJo/PrDVnRIN58P7NRjoQfbMHxCu1PiMc4BmYIzRB9tpWopm2PghqTr3NVcAR8nO6VFA8QCy4kIqUo0XN9zzgfQYSzrEQzxeMD4usjSMWw8Su7UDBKdZp1lTMF2x1Jdc4KVYaYFIeSgq5shpYtAgNA/WR3/K7nqFUQBSjZoEsrVMuSlWlkzWrAuYXyAd2hxYhVWdbF7YnAieXiBbwEJ3j5kQyYec9cRC4X/uyNyYhOkLG90mS078c7JsMP/jM25hM2Js2Hkq703a+7yMl+z1KXCrdJUDN1q+df5ta6s356ntQDt6RpjRJLGWzS5t1wrd/2aDUOIUAnH50RHViOOMgUnr+6FZEZqOeukim9A/rmJ+hrMQmcKHZ0j3uKU5vhRjAspFNPuHTV00mlDWHKtLuGX69RoGh0KUz0ZfowCfQosa7s16tu8aG6F6kuelPVLUZiWb5Wdf4oUPSvVIdm/+tn3Pv/5/DyDw== \ No newline at end of file diff --git a/assets/img/warehouse-and-snowflake/b-tree.drawio.png b/assets/img/warehouse-and-snowflake/b-tree.drawio.png new file mode 100644 index 0000000..85be4c2 Binary files /dev/null and b/assets/img/warehouse-and-snowflake/b-tree.drawio.png differ diff --git a/assets/img/warehouse-and-snowflake/data-warehouse.drawio b/assets/img/warehouse-and-snowflake/data-warehouse.drawio new file mode 100644 index 0000000..0af1f00 --- /dev/null +++ b/assets/img/warehouse-and-snowflake/data-warehouse.drawio @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/img/warehouse-and-snowflake/data-warehouse.png b/assets/img/warehouse-and-snowflake/data-warehouse.png new file mode 100644 index 0000000..6057f9e Binary files /dev/null and b/assets/img/warehouse-and-snowflake/data-warehouse.png differ diff --git a/assets/img/warehouse-and-snowflake/role-hierarchy.png b/assets/img/warehouse-and-snowflake/role-hierarchy.png new file mode 100644 index 0000000..bd11e70 Binary files /dev/null and b/assets/img/warehouse-and-snowflake/role-hierarchy.png differ diff --git a/assets/img/warehouse-and-snowflake/warehouse-architecture.jpg b/assets/img/warehouse-and-snowflake/warehouse-architecture.jpg new file mode 100644 index 0000000..5e1a7b1 Binary files /dev/null and b/assets/img/warehouse-and-snowflake/warehouse-architecture.jpg differ diff --git a/assets/js/data/search.json b/assets/js/data/search.json new file mode 100644 index 0000000..2601ed0 --- /dev/null +++ b/assets/js/data/search.json @@ -0,0 +1,20 @@ +--- +layout: compress +swcache: true +--- + +[ + {% for post in site.posts %} + { + "title": {{ post.title | jsonify }}, + "url": {{ post.url | relative_url | jsonify }}, + "categories": {{ post.categories | join: ', ' | jsonify }}, + "tags": {{ post.tags | join: ', ' | jsonify }}, + "date": "{{ post.date }}", + {% include no-linenos.html content=post.content %} + {% assign _content = content | strip_html | strip_newlines %} + "snippet": {{ _content | truncate: 200 | jsonify }}, + "content": {{ _content | jsonify }} + }{% unless forloop.last %},{% endunless %} + {% endfor %} +] diff --git a/assets/js/data/swcache.js b/assets/js/data/swcache.js new file mode 100644 index 0000000..9ff3899 --- /dev/null +++ b/assets/js/data/swcache.js @@ -0,0 +1,50 @@ +--- +layout: compress + +# The list to be cached by PWA +--- + +const resource = [ + /* --- CSS --- */ + '{{ "/assets/css/style.css" | relative_url }}', + + /* --- PWA --- */ + '{{ "/app.js" | relative_url }}', + '{{ "/sw.js" | relative_url }}', + + /* --- HTML --- */ + '{{ "/index.html" | relative_url }}', + '{{ "/404.html" | relative_url }}', + + {% for tab in site.tabs %} + '{{ tab.url | relative_url }}', + {% endfor %} + + /* --- Favicons & compressed JS --- */ + {% assign cache_list = site.static_files | where: 'swcache', true %} + {% for file in cache_list %} + '{{ file.path | relative_url }}'{%- unless forloop.last -%},{%- endunless -%} + {% endfor %} +]; + +/* The request url with below domain will be cached */ +const allowedDomains = [ + {% if site.google_analytics.id != empty and site.google_analytics.id %} + 'www.googletagmanager.com', + 'www.google-analytics.com', + {% endif %} + + '{{ site.url | split: "//" | last }}', + + {% if site.img_cdn contains '//' and site.img_cdn %} + '{{ site.img_cdn | split: '//' | last | split: '/' | first }}', + {% endif %} + + 'fonts.gstatic.com', + 'fonts.googleapis.com', + 'cdn.jsdelivr.net', + 'polyfill.io' +]; + +/* Requests that include the following path will be banned */ +const denyUrls = []; diff --git a/assets/js/dist/categories.min.js b/assets/js/dist/categories.min.js new file mode 100644 index 0000000..bebf80f --- /dev/null +++ b/assets/js/dist/categories.min.js @@ -0,0 +1,6 @@ +/*! + * Chirpy v6.1.0 (https://github.com/cotes2020/jekyll-theme-chirpy/) + * © 2019 Cotes Chung + * MIT Licensed + */ +!function(){"use strict";function e(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function t(e,t){for(var r=0;re.length)&&(t=e.length);for(var r=0,o=new Array(t);r.row"),v=$("#topbar-title"),m=$("#search-wrapper"),g=$("#search-result-wrapper"),y=$("#search-results"),h=$("#search-input"),C=$("#search-hints"),w=$("html,body"),k="loaded",A="unloaded",S="input-focus",T="d-flex",j=function(){function t(){e(this,t)}return r(t,null,[{key:"on",value:function(){t.offset=window.scrollY,w.scrollTop(0)}},{key:"off",value:function(){w.scrollTop(t.offset)}}]),t}();o(j,"offset",0),o(j,"resultVisible",!1);var E=function(){function t(){e(this,t)}return r(t,null,[{key:"on",value:function(){f.addClass(A),v.addClass(A),d.addClass(A),m.addClass(T),p.addClass(k)}},{key:"off",value:function(){p.removeClass(k),m.removeClass(T),f.removeClass(A),v.removeClass(A),d.removeClass(A)}}]),t}(),O=function(){function t(){e(this,t)}return r(t,null,[{key:"on",value:function(){j.resultVisible||(j.on(),g.removeClass(A),b.addClass(A),j.resultVisible=!0)}},{key:"off",value:function(){j.resultVisible&&(y.empty(),C.hasClass(A)&&C.removeClass(A),g.addClass(A),b.removeClass(A),j.off(),h.val(""),j.resultVisible=!1)}}]),t}();function x(){return p.hasClass(k)}var P=$(".collapse");var V,I;$(".code-header>button").children().attr("class"),V=$(window),I=$("#back-to-top"),V.on("scroll",(function(){V.scrollTop()>50?I.fadeIn():I.fadeOut()})),I.on("click",(function(){V.scrollTop(0)})),n(document.querySelectorAll('[data-bs-toggle="tooltip"]')).map((function(e){return new bootstrap.Tooltip(e)})),0!==i.length&&i.off().on("click",(function(e){var t=$(e.target),r=t.prop("tagName")==="button".toUpperCase()?t:t.parent();modeToggle.flipMode(),r.trigger("blur")})),$("#sidebar-trigger").on("click",c.toggle),$("#mask").on("click",c.toggle),d.on("click",(function(){E.on(),O.on(),h.trigger("focus")})),p.on("click",(function(){E.off(),O.off()})),h.on("focus",(function(){m.addClass(S)})),h.on("focusout",(function(){m.removeClass(S)})),h.on("input",(function(){""===h.val()?x()?C.removeClass(A):O.off():(O.on(),x()&&C.addClass(A))})),P.on("hide.bs.collapse",(function(){var e="h_"+$(this).attr("id").substring(2);e&&($("#".concat(e," .far.fa-folder-open")).attr("class","far fa-folder fa-fw"),$("#".concat(e," i.fas")).addClass("rotate"),$("#".concat(e)).removeClass("hide-border-bottom"))})),P.on("show.bs.collapse",(function(){var e="h_"+$(this).attr("id").substring(2);e&&($("#".concat(e," .far.fa-folder")).attr("class","far fa-folder-open fa-fw"),$("#".concat(e," i.fas")).removeClass("rotate"),$("#".concat(e)).addClass("hide-border-bottom"))}))}(); diff --git a/assets/js/dist/commons.min.js b/assets/js/dist/commons.min.js new file mode 100644 index 0000000..97d930b --- /dev/null +++ b/assets/js/dist/commons.min.js @@ -0,0 +1,6 @@ +/*! + * Chirpy v6.1.0 (https://github.com/cotes2020/jekyll-theme-chirpy/) + * © 2019 Cotes Chung + * MIT Licensed + */ +!function(){"use strict";function e(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function t(e,t){for(var r=0;re.length)&&(t=e.length);for(var r=0,n=new Array(t);r.row"),m=$("#topbar-title"),v=$("#search-wrapper"),y=$("#search-result-wrapper"),g=$("#search-results"),h=$("#search-input"),C=$("#search-hints"),w=$("html,body"),k="loaded",A="unloaded",S="input-focus",T="d-flex",j=function(){function t(){e(this,t)}return r(t,null,[{key:"on",value:function(){t.offset=window.scrollY,w.scrollTop(0)}},{key:"off",value:function(){w.scrollTop(t.offset)}}]),t}();n(j,"offset",0),n(j,"resultVisible",!1);var E,O,x=function(){function t(){e(this,t)}return r(t,null,[{key:"on",value:function(){c.addClass(A),m.addClass(A),d.addClass(A),v.addClass(T),p.addClass(k)}},{key:"off",value:function(){p.removeClass(k),v.removeClass(T),c.removeClass(A),m.removeClass(A),d.removeClass(A)}}]),t}(),P=function(){function t(){e(this,t)}return r(t,null,[{key:"on",value:function(){j.resultVisible||(j.on(),y.removeClass(A),b.addClass(A),j.resultVisible=!0)}},{key:"off",value:function(){j.resultVisible&&(g.empty(),C.hasClass(A)&&C.removeClass(A),y.addClass(A),b.removeClass(A),j.off(),h.val(""),j.resultVisible=!1)}}]),t}();function V(){return p.hasClass(k)}E=$(window),O=$("#back-to-top"),E.on("scroll",(function(){E.scrollTop()>50?O.fadeIn():O.fadeOut()})),O.on("click",(function(){E.scrollTop(0)})),o(document.querySelectorAll('[data-bs-toggle="tooltip"]')).map((function(e){return new bootstrap.Tooltip(e)})),0!==l.length&&l.off().on("click",(function(e){var t=$(e.target),r=t.prop("tagName")==="button".toUpperCase()?t:t.parent();modeToggle.flipMode(),r.trigger("blur")})),$("#sidebar-trigger").on("click",f.toggle),$("#mask").on("click",f.toggle),d.on("click",(function(){x.on(),P.on(),h.trigger("focus")})),p.on("click",(function(){x.off(),P.off()})),h.on("focus",(function(){v.addClass(S)})),h.on("focusout",(function(){v.removeClass(S)})),h.on("input",(function(){""===h.val()?V()?C.removeClass(A):P.off():(P.on(),V()&&C.addClass(A))}))}(); diff --git a/assets/js/dist/home.min.js b/assets/js/dist/home.min.js new file mode 100644 index 0000000..f8cd3f1 --- /dev/null +++ b/assets/js/dist/home.min.js @@ -0,0 +1,6 @@ +/*! + * Chirpy v6.1.0 (https://github.com/cotes2020/jekyll-theme-chirpy/) + * © 2019 Cotes Chung + * MIT Licensed + */ +!function(){"use strict";function t(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}function e(t,e){for(var r=0;rt.length)&&(e=t.length);for(var r=0,n=new Array(e);r.row"),g=$("#topbar-title"),v=$("#search-wrapper"),y=$("#search-result-wrapper"),b=$("#search-results"),h=$("#search-input"),C=$("#search-hints"),w=$("html,body"),k="loaded",T="unloaded",j="input-focus",A="d-flex",S=function(){function e(){t(this,e)}return r(e,null,[{key:"on",value:function(){e.offset=window.scrollY,w.scrollTop(0)}},{key:"off",value:function(){w.scrollTop(e.offset)}}]),e}();n(S,"offset",0),n(S,"resultVisible",!1);var x=function(){function e(){t(this,e)}return r(e,null,[{key:"on",value:function(){f.addClass(T),g.addClass(T),d.addClass(T),v.addClass(A),m.addClass(k)}},{key:"off",value:function(){m.removeClass(k),v.removeClass(A),f.removeClass(T),g.removeClass(T),d.removeClass(T)}}]),e}(),E=function(){function e(){t(this,e)}return r(e,null,[{key:"on",value:function(){S.resultVisible||(S.on(),y.removeClass(T),p.addClass(T),S.resultVisible=!0)}},{key:"off",value:function(){S.resultVisible&&(b.empty(),C.hasClass(T)&&C.removeClass(T),y.addClass(T),p.removeClass(T),S.off(),h.val(""),S.resultVisible=!1)}}]),e}();function F(){return m.hasClass(k)}$(".collapse");function O(t){t.parent().removeClass("shimmer")}$(".code-header>button").children().attr("class");var D,P,V,I=function(){function e(){t(this,e)}return r(e,null,[{key:"attrTimestamp",get:function(){return"data-ts"}},{key:"attrDateFormat",get:function(){return"data-df"}},{key:"locale",get:function(){return $("html").attr("lang").substring(0,2)}},{key:"getTimestamp",value:function(t){return Number(t.attr(e.attrTimestamp))}},{key:"getDateFormat",value:function(t){return t.attr(e.attrDateFormat)}}]),e}();D=$(window),P=$("#back-to-top"),D.on("scroll",(function(){D.scrollTop()>50?P.fadeIn():P.fadeOut()})),P.on("click",(function(){D.scrollTop(0)})),o(document.querySelectorAll('[data-bs-toggle="tooltip"]')).map((function(t){return new bootstrap.Tooltip(t)})),0!==l.length&&l.off().on("click",(function(t){var e=$(t.target),r=e.prop("tagName")==="button".toUpperCase()?e:e.parent();modeToggle.flipMode(),r.trigger("blur")})),$("#sidebar-trigger").on("click",c.toggle),$("#mask").on("click",c.toggle),d.on("click",(function(){x.on(),E.on(),h.trigger("focus")})),m.on("click",(function(){x.off(),E.off()})),h.on("focus",(function(){v.addClass(j)})),h.on("focusout",(function(){v.removeClass(j)})),h.on("input",(function(){""===h.val()?F()?C.removeClass(T):E.off():(E.on(),F()&&C.addClass(T))})),dayjs.locale(I.locale),dayjs.extend(window.dayjs_plugin_localizedFormat),$("[".concat(I.attrTimestamp,"]")).each((function(){var t=dayjs.unix(I.getTimestamp($(this))),e=t.format(I.getDateFormat($(this)));$(this).text(e),$(this).removeAttr(I.attrTimestamp),$(this).removeAttr(I.attrDateFormat);var r=$(this).attr("data-bs-toggle");if(void 0!==r&&"tooltip"===r){var n=t.format("llll");$(this).attr("data-bs-title",n),new bootstrap.Tooltip($(this))}})),(V=$("#core-wrapper img[data-src]")).length<=0||(document.addEventListener("lazyloaded",(function(t){O($(t.target))})),V.each((function(){$(this).hasClass("ls-is-cached")&&O($(this))})))}(); diff --git a/assets/js/dist/misc.min.js b/assets/js/dist/misc.min.js new file mode 100644 index 0000000..f365a6f --- /dev/null +++ b/assets/js/dist/misc.min.js @@ -0,0 +1,6 @@ +/*! + * Chirpy v6.1.0 (https://github.com/cotes2020/jekyll-theme-chirpy/) + * © 2019 Cotes Chung + * MIT Licensed + */ +!function(){"use strict";function t(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}function e(t,e){for(var r=0;rt.length)&&(e=t.length);for(var r=0,n=new Array(e);r.row"),v=$("#topbar-title"),g=$("#search-wrapper"),y=$("#search-result-wrapper"),b=$("#search-results"),h=$("#search-input"),C=$("#search-hints"),k=$("html,body"),w="loaded",T="unloaded",j="input-focus",A="d-flex",S=function(){function e(){t(this,e)}return r(e,null,[{key:"on",value:function(){e.offset=window.scrollY,k.scrollTop(0)}},{key:"off",value:function(){k.scrollTop(e.offset)}}]),e}();n(S,"offset",0),n(S,"resultVisible",!1);var x=function(){function e(){t(this,e)}return r(e,null,[{key:"on",value:function(){f.addClass(T),v.addClass(T),d.addClass(T),g.addClass(A),m.addClass(w)}},{key:"off",value:function(){m.removeClass(w),g.removeClass(A),f.removeClass(T),v.removeClass(T),d.removeClass(T)}}]),e}(),E=function(){function e(){t(this,e)}return r(e,null,[{key:"on",value:function(){S.resultVisible||(S.on(),y.removeClass(T),p.addClass(T),S.resultVisible=!0)}},{key:"off",value:function(){S.resultVisible&&(b.empty(),C.hasClass(T)&&C.removeClass(T),y.addClass(T),p.removeClass(T),S.off(),h.val(""),S.resultVisible=!1)}}]),e}();function F(){return m.hasClass(w)}$(".collapse");$(".code-header>button").children().attr("class");var O,D,P=function(){function e(){t(this,e)}return r(e,null,[{key:"attrTimestamp",get:function(){return"data-ts"}},{key:"attrDateFormat",get:function(){return"data-df"}},{key:"locale",get:function(){return $("html").attr("lang").substring(0,2)}},{key:"getTimestamp",value:function(t){return Number(t.attr(e.attrTimestamp))}},{key:"getDateFormat",value:function(t){return t.attr(e.attrDateFormat)}}]),e}();O=$(window),D=$("#back-to-top"),O.on("scroll",(function(){O.scrollTop()>50?D.fadeIn():D.fadeOut()})),D.on("click",(function(){O.scrollTop(0)})),o(document.querySelectorAll('[data-bs-toggle="tooltip"]')).map((function(t){return new bootstrap.Tooltip(t)})),0!==l.length&&l.off().on("click",(function(t){var e=$(t.target),r=e.prop("tagName")==="button".toUpperCase()?e:e.parent();modeToggle.flipMode(),r.trigger("blur")})),$("#sidebar-trigger").on("click",c.toggle),$("#mask").on("click",c.toggle),d.on("click",(function(){x.on(),E.on(),h.trigger("focus")})),m.on("click",(function(){x.off(),E.off()})),h.on("focus",(function(){g.addClass(j)})),h.on("focusout",(function(){g.removeClass(j)})),h.on("input",(function(){""===h.val()?F()?C.removeClass(T):E.off():(E.on(),F()&&C.addClass(T))})),dayjs.locale(P.locale),dayjs.extend(window.dayjs_plugin_localizedFormat),$("[".concat(P.attrTimestamp,"]")).each((function(){var t=dayjs.unix(P.getTimestamp($(this))),e=t.format(P.getDateFormat($(this)));$(this).text(e),$(this).removeAttr(P.attrTimestamp),$(this).removeAttr(P.attrDateFormat);var r=$(this).attr("data-bs-toggle");if(void 0!==r&&"tooltip"===r){var n=t.format("llll");$(this).attr("data-bs-title",n),new bootstrap.Tooltip($(this))}}))}(); diff --git a/assets/js/dist/page.min.js b/assets/js/dist/page.min.js new file mode 100644 index 0000000..dcce2df --- /dev/null +++ b/assets/js/dist/page.min.js @@ -0,0 +1,6 @@ +/*! + * Chirpy v6.1.0 (https://github.com/cotes2020/jekyll-theme-chirpy/) + * © 2019 Cotes Chung + * MIT Licensed + */ +!function(){"use strict";function t(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}function e(t,e){for(var n=0;nt.length)&&(e=t.length);for(var n=0,r=new Array(e);n.row"),v=$("#topbar-title"),g=$("#search-wrapper"),b=$("#search-result-wrapper"),h=$("#search-results"),y=$("#search-input"),C=$("#search-hints"),w=$("html,body"),k="loaded",S="unloaded",A="input-focus",T="d-flex",E=function(){function e(){t(this,e)}return n(e,null,[{key:"on",value:function(){e.offset=window.scrollY,w.scrollTop(0)}},{key:"off",value:function(){w.scrollTop(e.offset)}}]),e}();r(E,"offset",0),r(E,"resultVisible",!1);var j=function(){function e(){t(this,e)}return n(e,null,[{key:"on",value:function(){f.addClass(S),v.addClass(S),d.addClass(S),g.addClass(T),p.addClass(k)}},{key:"off",value:function(){p.removeClass(k),g.removeClass(T),f.removeClass(S),v.removeClass(S),d.removeClass(S)}}]),e}(),x=function(){function e(){t(this,e)}return n(e,null,[{key:"on",value:function(){E.resultVisible||(E.on(),b.removeClass(S),m.addClass(S),E.resultVisible=!0)}},{key:"off",value:function(){E.resultVisible&&(h.empty(),C.hasClass(S)&&C.removeClass(S),b.addClass(S),m.removeClass(S),E.off(),y.val(""),E.resultVisible=!1)}}]),e}();function O(){return p.hasClass(k)}$(".collapse");var P=".code-header>button",V="fas fa-check",I="timeout",N="data-title-succeed",q="data-bs-original-title",z=2e3;function D(t){if($(t)[0].hasAttribute(I)){var e=$(t).attr(I);if(Number(e)>Date.now())return!0}return!1}function M(t){$(t).attr(I,Date.now()+z)}function U(t){$(t).removeAttr(I)}var B,J,L,Y=$(P).children().attr("class");function F(t){t.parent().removeClass("shimmer")}B=$(window),J=$("#back-to-top"),B.on("scroll",(function(){B.scrollTop()>50?J.fadeIn():J.fadeOut()})),J.on("click",(function(){B.scrollTop(0)})),o(document.querySelectorAll('[data-bs-toggle="tooltip"]')).map((function(t){return new bootstrap.Tooltip(t)})),0!==l.length&&l.off().on("click",(function(t){var e=$(t.target),n=e.prop("tagName")==="button".toUpperCase()?e:e.parent();modeToggle.flipMode(),n.trigger("blur")})),$("#sidebar-trigger").on("click",c.toggle),$("#mask").on("click",c.toggle),d.on("click",(function(){j.on(),x.on(),y.trigger("focus")})),p.on("click",(function(){j.off(),x.off()})),y.on("focus",(function(){g.addClass(A)})),y.on("focusout",(function(){g.removeClass(A)})),y.on("input",(function(){""===y.val()?O()?C.removeClass(S):x.off():(x.on(),O()&&C.addClass(S))})),(L=$("#core-wrapper img[data-src]")).length<=0||(document.addEventListener("lazyloaded",(function(t){F($(t.target))})),L.each((function(){$(this).hasClass("ls-is-cached")&&F($(this))}))),$(".popup")<=0||$(".popup").magnificPopup({type:"image",closeOnContentClick:!0,showCloseBtn:!1,zoom:{enabled:!0,duration:300,easing:"ease-in-out"}}),function(){if($(P).length){var t=new ClipboardJS(P,{target:function(t){return t.parentNode.nextElementSibling.querySelector("code .rouge-code")}});o(document.querySelectorAll(P)).map((function(t){return new bootstrap.Tooltip(t,{placement:"left"})})),t.on("success",(function(t){t.clearSelection();var e=t.trigger;D(e)||(!function(t){$(t).children().attr("class",V)}(e),function(t){var e=$(t).attr(N);$(t).attr(q,e).tooltip("show")}(e),M(e),setTimeout((function(){!function(t){$(t).tooltip("hide").removeAttr(q)}(e),function(t){$(t).children().attr("class",Y)}(e),U(e)}),z))}))}$("#copy-link").on("click",(function(t){var e=$(t.target);D(e)||navigator.clipboard.writeText(window.location.href).then((function(){var t=e.attr(q),n=e.attr(N);e.attr(q,n).tooltip("show"),M(e),setTimeout((function(){e.attr(q,t),U(e)}),z)}))}))}()}(); diff --git a/assets/js/dist/post.min.js b/assets/js/dist/post.min.js new file mode 100644 index 0000000..916e367 --- /dev/null +++ b/assets/js/dist/post.min.js @@ -0,0 +1,6 @@ +/*! + * Chirpy v6.1.0 (https://github.com/cotes2020/jekyll-theme-chirpy/) + * © 2019 Cotes Chung + * MIT Licensed + */ +!function(){"use strict";function t(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}function e(t,e){for(var r=0;rt.length)&&(e=t.length);for(var r=0,n=new Array(e);r.row"),g=$("#topbar-title"),v=$("#search-wrapper"),h=$("#search-result-wrapper"),b=$("#search-results"),y=$("#search-input"),w=$("#search-hints"),C=$("html,body"),k="loaded",S="unloaded",T="input-focus",A="d-flex",j=function(){function e(){t(this,e)}return r(e,null,[{key:"on",value:function(){e.offset=window.scrollY,C.scrollTop(0)}},{key:"off",value:function(){C.scrollTop(e.offset)}}]),e}();n(j,"offset",0),n(j,"resultVisible",!1);var x=function(){function e(){t(this,e)}return r(e,null,[{key:"on",value:function(){f.addClass(S),g.addClass(S),d.addClass(S),v.addClass(A),p.addClass(k)}},{key:"off",value:function(){p.removeClass(k),v.removeClass(A),f.removeClass(S),g.removeClass(S),d.removeClass(S)}}]),e}(),E=function(){function e(){t(this,e)}return r(e,null,[{key:"on",value:function(){j.resultVisible||(j.on(),h.removeClass(S),m.addClass(S),j.resultVisible=!0)}},{key:"off",value:function(){j.resultVisible&&(b.empty(),w.hasClass(S)&&w.removeClass(S),h.addClass(S),m.removeClass(S),j.off(),y.val(""),j.resultVisible=!1)}}]),e}();function D(){return p.hasClass(k)}$(".collapse");var O=".code-header>button",F="fas fa-check",P="timeout",N="data-title-succeed",V="data-bs-original-title",q=2e3;function I(t){if($(t)[0].hasAttribute(P)){var e=$(t).attr(P);if(Number(e)>Date.now())return!0}return!1}function z(t){$(t).attr(P,Date.now()+q)}function L(t){$(t).removeAttr(P)}var M=$(O).children().attr("class");function U(t){t.parent().removeClass("shimmer")}var _,B,J,Y=function(){function e(){t(this,e)}return r(e,null,[{key:"attrTimestamp",get:function(){return"data-ts"}},{key:"attrDateFormat",get:function(){return"data-df"}},{key:"locale",get:function(){return $("html").attr("lang").substring(0,2)}},{key:"getTimestamp",value:function(t){return Number(t.attr(e.attrTimestamp))}},{key:"getDateFormat",value:function(t){return t.attr(e.attrDateFormat)}}]),e}();_=$(window),B=$("#back-to-top"),_.on("scroll",(function(){_.scrollTop()>50?B.fadeIn():B.fadeOut()})),B.on("click",(function(){_.scrollTop(0)})),o(document.querySelectorAll('[data-bs-toggle="tooltip"]')).map((function(t){return new bootstrap.Tooltip(t)})),0!==l.length&&l.off().on("click",(function(t){var e=$(t.target),r=e.prop("tagName")==="button".toUpperCase()?e:e.parent();modeToggle.flipMode(),r.trigger("blur")})),$("#sidebar-trigger").on("click",u.toggle),$("#mask").on("click",u.toggle),d.on("click",(function(){x.on(),E.on(),y.trigger("focus")})),p.on("click",(function(){x.off(),E.off()})),y.on("focus",(function(){v.addClass(T)})),y.on("focusout",(function(){v.removeClass(T)})),y.on("input",(function(){""===y.val()?D()?w.removeClass(S):E.off():(E.on(),D()&&w.addClass(S))})),(J=$("#core-wrapper img[data-src]")).length<=0||(document.addEventListener("lazyloaded",(function(t){U($(t.target))})),J.each((function(){$(this).hasClass("ls-is-cached")&&U($(this))}))),$(".popup")<=0||$(".popup").magnificPopup({type:"image",closeOnContentClick:!0,showCloseBtn:!1,zoom:{enabled:!0,duration:300,easing:"ease-in-out"}}),dayjs.locale(Y.locale),dayjs.extend(window.dayjs_plugin_localizedFormat),$("[".concat(Y.attrTimestamp,"]")).each((function(){var t=dayjs.unix(Y.getTimestamp($(this))),e=t.format(Y.getDateFormat($(this)));$(this).text(e),$(this).removeAttr(Y.attrTimestamp),$(this).removeAttr(Y.attrDateFormat);var r=$(this).attr("data-bs-toggle");if(void 0!==r&&"tooltip"===r){var n=t.format("llll");$(this).attr("data-bs-title",n),new bootstrap.Tooltip($(this))}})),function(){if($(O).length){var t=new ClipboardJS(O,{target:function(t){return t.parentNode.nextElementSibling.querySelector("code .rouge-code")}});o(document.querySelectorAll(O)).map((function(t){return new bootstrap.Tooltip(t,{placement:"left"})})),t.on("success",(function(t){t.clearSelection();var e=t.trigger;I(e)||(!function(t){$(t).children().attr("class",F)}(e),function(t){var e=$(t).attr(N);$(t).attr(V,e).tooltip("show")}(e),z(e),setTimeout((function(){!function(t){$(t).tooltip("hide").removeAttr(V)}(e),function(t){$(t).children().attr("class",M)}(e),L(e)}),q))}))}$("#copy-link").on("click",(function(t){var e=$(t.target);I(e)||navigator.clipboard.writeText(window.location.href).then((function(){var t=e.attr(V),r=e.attr(N);e.attr(V,r).tooltip("show"),z(e),setTimeout((function(){e.attr(V,t),L(e)}),q)}))}))}(),document.querySelector("#core-wrapper h2,#core-wrapper h3")&&tocbot.init({tocSelector:"#toc",contentSelector:".post-content",ignoreSelector:"[data-toc-skip]",headingSelector:"h2, h3",orderedList:!1,scrollSmooth:!1})}(); diff --git a/assets/js/pwa/app.js b/assets/js/pwa/app.js new file mode 100644 index 0000000..c798fe2 --- /dev/null +++ b/assets/js/pwa/app.js @@ -0,0 +1,47 @@ +--- +layout: compress +permalink: '/app.js' +--- + +const $notification = $('#notification'); +const $btnRefresh = $('#notification .toast-body>button'); + +if ('serviceWorker' in navigator) { + /* Registering Service Worker */ + navigator.serviceWorker.register('{{ "/sw.js" | relative_url }}') + .then(registration => { + + /* in case the user ignores the notification */ + if (registration.waiting) { + $notification.toast('show'); + } + + registration.addEventListener('updatefound', () => { + registration.installing.addEventListener('statechange', () => { + if (registration.waiting) { + if (navigator.serviceWorker.controller) { + $notification.toast('show'); + } + } + }); + }); + + $btnRefresh.click(() => { + if (registration.waiting) { + registration.waiting.postMessage('SKIP_WAITING'); + } + $notification.toast('hide'); + }); + }); + + let refreshing = false; + + /* Detect controller change and refresh all the opened tabs */ + navigator.serviceWorker.addEventListener('controllerchange', () => { + if (!refreshing) { + window.location.reload(); + refreshing = true; + } + }); +} + diff --git a/assets/js/pwa/sw.js b/assets/js/pwa/sw.js new file mode 100644 index 0000000..3213b4f --- /dev/null +++ b/assets/js/pwa/sw.js @@ -0,0 +1,90 @@ +--- +layout: compress +permalink: '/sw.js' +# PWA service worker +--- + +self.importScripts('{{ "/assets/js/data/swcache.js" | relative_url }}'); + +const cacheName = 'chirpy-{{ "now" | date: "%Y%m%d.%H%M%S" }}'; + +function verifyDomain(url) { + for (const domain of allowedDomains) { + const regex = RegExp(`^http(s)?:\/\/${domain}\/`); + if (regex.test(url)) { + return true; + } + } + + return false; +} + +function isExcluded(url) { + for (const item of denyUrls) { + if (url === item) { + return true; + } + } + return false; +} + +self.addEventListener('install', event => { + event.waitUntil( + caches.open(cacheName).then(cache => { + return cache.addAll(resource); + }) + ); +}); + +self.addEventListener('activate', event => { + event.waitUntil( + caches.keys().then(keyList => { + return Promise.all( + keyList.map(key => { + if (key !== cacheName) { + return caches.delete(key); + } + }) + ); + }) + ); +}); + +self.addEventListener('message', (event) => { + if (event.data === 'SKIP_WAITING') { + self.skipWaiting(); + } +}); + +self.addEventListener('fetch', event => { + event.respondWith( + caches.match(event.request).then(response => { + if (response) { + return response; + } + + return fetch(event.request).then(response => { + const url = event.request.url; + + if (event.request.method !== 'GET' || + !verifyDomain(url) || + isExcluded(url)) { + return response; + } + + /* + see: + */ + let responseToCache = response.clone(); + + caches.open(cacheName).then(cache => { + /* console.log('[sw] Caching new resource: ' + event.request.url); */ + cache.put(event.request, responseToCache); + }); + + return response; + }); + }) + ); +}); + diff --git a/assets/js/pwa/unregister.js b/assets/js/pwa/unregister.js new file mode 100644 index 0000000..bd91150 --- /dev/null +++ b/assets/js/pwa/unregister.js @@ -0,0 +1,12 @@ +--- +layout: compress +permalink: '/unregister.js' +--- + +if ('serviceWorker' in navigator) { + navigator.serviceWorker.getRegistrations().then((registrations) => { + for (let reg of registrations) { + reg.unregister(); + } + }); +} diff --git a/assets/robots.txt b/assets/robots.txt new file mode 100644 index 0000000..45c34e0 --- /dev/null +++ b/assets/robots.txt @@ -0,0 +1,10 @@ +--- +permalink: /robots.txt +# The robots rules +--- + +User-agent: * + +Disallow: /norobots/ + +Sitemap: {{ '/sitemap.xml' | absolute_url }} diff --git a/index.html b/index.html new file mode 100644 index 0000000..1357b08 --- /dev/null +++ b/index.html @@ -0,0 +1,4 @@ +--- +layout: home +# Index page +--- diff --git a/jekyll-theme-chirpy.gemspec b/jekyll-theme-chirpy.gemspec new file mode 100644 index 0000000..c577306 --- /dev/null +++ b/jekyll-theme-chirpy.gemspec @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +Gem::Specification.new do |spec| + spec.name = "jekyll-theme-chirpy" + spec.version = "6.1.0" + spec.authors = ["Cotes Chung"] + spec.email = ["cotes.chung@gmail.com"] + + spec.summary = "A minimal, responsive and feature-rich Jekyll theme for technical writing." + spec.homepage = "https://github.com/cotes2020/jekyll-theme-chirpy" + spec.license = "MIT" + + spec.files = `git ls-files -z`.split("\x0").select { |f| + f.match(%r!^((_(includes|layouts|sass|data)|assets)\/|README|LICENSE)!i) + } + + spec.metadata = { + "bug_tracker_uri" => "https://github.com/cotes2020/jekyll-theme-chirpy/issues", + "documentation_uri" => "https://github.com/cotes2020/jekyll-theme-chirpy/#readme", + "homepage_uri" => "https://cotes2020.github.io/chirpy-demo", + "source_code_uri" => "https://github.com/cotes2020/jekyll-theme-chirpy", + "wiki_uri" => "https://github.com/cotes2020/jekyll-theme-chirpy/wiki", + "plugin_type" => "theme" + } + + spec.required_ruby_version = ">= 2.6" + + spec.add_runtime_dependency "jekyll", "~> 4.3" + spec.add_runtime_dependency "jekyll-paginate", "~> 1.1" + spec.add_runtime_dependency "jekyll-redirect-from", "~> 0.16" + spec.add_runtime_dependency "jekyll-seo-tag", "~> 2.7" + spec.add_runtime_dependency "jekyll-archives", "~> 2.2" + spec.add_runtime_dependency "jekyll-sitemap", "~> 1.4" + spec.add_runtime_dependency "jekyll-include-cache", "~> 0.2" + +end diff --git a/package.json b/package.json new file mode 100644 index 0000000..674f76e --- /dev/null +++ b/package.json @@ -0,0 +1,35 @@ +{ + "name": "jekyll-theme-chirpy", + "version": "6.1.0", + "description": "A minimal, responsive and feature-rich Jekyll theme for technical writing.", + "repository": { + "type": "git", + "url": "git+https://github.com/cotes2020/jekyll-theme-chirpy.git" + }, + "author": "Cotes Chung", + "license": "MIT", + "bugs": { + "url": "https://github.com/cotes2020/jekyll-theme-chirpy/issues" + }, + "homepage": "https://github.com/cotes2020/jekyll-theme-chirpy/", + "scripts": { + "prebuild": "npx rimraf assets/js/dist", + "build": "NODE_ENV=production npx rollup -c --bundleConfigAsCjs", + "prewatch": "npx rimraf assets/js/dist", + "watch": "npx rollup -c --bundleConfigAsCjs -w", + "test": "npx stylelint _sass/**/*.scss", + "fixlint": "npm run test -- --fix" + }, + "devDependencies": { + "@babel/core": "^7.21.3", + "@babel/plugin-proposal-class-properties": "^7.18.6", + "@babel/preset-env": "^7.20.2", + "@rollup/plugin-babel": "^6.0.3", + "@rollup/plugin-terser": "^0.4.0", + "rimraf": "^5.0.1", + "rollup": "^3.20.2", + "rollup-plugin-license": "^3.0.1", + "stylelint": "^15.3.0", + "stylelint-config-standard-scss": "^9.0.0" + } +} diff --git a/rollup.config.js b/rollup.config.js new file mode 100644 index 0000000..907ca3e --- /dev/null +++ b/rollup.config.js @@ -0,0 +1,46 @@ +import babel from '@rollup/plugin-babel'; +import terser from '@rollup/plugin-terser'; +import license from 'rollup-plugin-license'; +import path from 'path'; + +const JS_SRC = '_javascript'; +const JS_DIST = 'assets/js/dist'; +const isProd = process.env.NODE_ENV === 'production'; + +function build(filename) { + return { + input: [`${JS_SRC}/${filename}.js`], + output: { + file: `${JS_DIST}/${filename}.min.js`, + format: 'iife', + name: 'Chirpy', + sourcemap: !isProd + }, + watch: { + include: `${JS_SRC}/**` + }, + plugins: [ + babel({ + babelHelpers: 'bundled', + presets: ['@babel/env'], + plugins: ['@babel/plugin-proposal-class-properties'] + }), + license({ + banner: { + commentStyle: 'ignored', + content: { file: path.join(__dirname, JS_SRC, '_copyright') } + } + }), + isProd && terser() + ] + }; +} + +export default [ + build('commons'), + build('home'), + build('categories'), + build('page'), + build('post'), + build('misc') +]; diff --git a/tools/init b/tools/init new file mode 100755 index 0000000..5baac5d --- /dev/null +++ b/tools/init @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# +# Init the environment for new user. + +set -eu + +# CLI Dependencies +CLI=("git" "npm") + +ACTIONS_WORKFLOW=pages-deploy.yml + +# temporary file suffixes that make `sed -i` compatible with BSD and Linux +TEMP_SUFFIX="to-delete" + +_no_gh=false + +help() { + echo "Usage:" + echo + echo " bash /path/to/init [options]" + echo + echo "Options:" + echo " --no-gh Do not deploy to Github." + echo " -h, --help Print this help information." +} + +# BSD and GNU compatible sed +_sedi() { + regex=$1 + file=$2 + sed -i.$TEMP_SUFFIX "$regex" "$file" + rm -f "$file".$TEMP_SUFFIX +} + +_check_cli() { + for i in "${!CLI[@]}"; do + cli="${CLI[$i]}" + if ! command -v "$cli" &>/dev/null; then + echo "Command '$cli' not found! Hint: you should install it." + exit 1 + fi + done +} + +_check_status() { + if [[ -n $(git status . -s) ]]; then + echo "Error: Commit unstaged files first, and then run this tool again." + exit 1 + fi +} + +_check_init() { + local _has_inited=false + + if [[ ! -d .github ]]; then # using option `--no-gh` + _has_inited=true + else + if [[ -f .github/workflows/$ACTIONS_WORKFLOW ]]; then + # on BSD, the `wc` could contains blank + local _count + _count=$(find .github/workflows/ -type f -name "*.yml" | wc -l) + if [[ ${_count//[[:blank:]]/} == 1 ]]; then + _has_inited=true + fi + fi + fi + + if $_has_inited; then + echo "Already initialized." + exit 0 + fi +} + +check_env() { + _check_cli + _check_status + _check_init +} + +checkout_latest_release() { + hash=$(git log --grep="chore(release):" -1 --pretty="%H") + git reset --hard "$hash" +} + +init_files() { + if $_no_gh; then + rm -rf .github + else + ## Change the files of `.github` + mv .github/workflows/$ACTIONS_WORKFLOW.hook . + rm -rf .github + mkdir -p .github/workflows + mv ./${ACTIONS_WORKFLOW}.hook .github/workflows/${ACTIONS_WORKFLOW} + + ## Cleanup image settings in site config + _sedi "s/^img_cdn:.*/img_cdn:/;s/^avatar:.*/avatar:/" _config.yml + fi + + # remove the other files + rm -rf _posts/* + + # build assets + npm i && npm run build + + # track the js output + _sedi "/^assets.*\/dist/d" .gitignore +} + +commit() { + git add -A + git commit -m "chore: initialize the environment" -q + echo -e "\n[INFO] Initialization successful!\n" +} + +main() { + check_env + checkout_latest_release + init_files + commit +} + +while (($#)); do + opt="$1" + case $opt in + --no-gh) + _no_gh=true + shift + ;; + -h | --help) + help + exit 0 + ;; + *) + # unknown option + help + exit 1 + ;; + esac +done + +main diff --git a/tools/release b/tools/release new file mode 100755 index 0000000..43182eb --- /dev/null +++ b/tools/release @@ -0,0 +1,240 @@ +#!/usr/bin/env bash +# +# Release a new version to the GitLab flow production branch. +# +# For a new major/minor version, bump version on the main branch, and then merge into the production branch. +# +# For a patch version, bump the version number on the patch branch, then merge that branch into the main branch +# and production branch. +# +# +# Usage: run on main branch or the patch branch +# +# Requires: Git, NPM and RubyGems + +set -eu + +opt_pre=false # preview mode option +opt_skip_ver=false # option for skip versioning + +working_branch="$(git branch --show-current)" + +STAGING_BRANCH="$(git symbolic-ref refs/remotes/origin/HEAD | sed 's@^refs/remotes/origin/@@')" + +PROD_BRANCH="production" + +GEM_SPEC="jekyll-theme-chirpy.gemspec" + +NODE_CONFIG="package.json" + +JS_DIST="assets/js/dist" +BACKUP_PATH="$(mktemp -d)" + +FILES=( + "_sass/jekyll-theme-chirpy.scss" + "$GEM_SPEC" + "$NODE_CONFIG" +) + +TOOLS=( + "git" + "npm" + "standard-version" + "gem" +) + +help() { + echo "A tool to release new version Chirpy gem" + echo + echo "Usage:" + echo + echo " bash ./tools/release [options]" + echo + echo "Options:" + echo " -k, --skip-versioning Skip the step of generating the version number." + echo " -p, --preview Enable preview mode, only package, and will not modify the branches" + echo " -h, --help Print this information." +} + +_check_git() { + # ensure nothing is uncommitted + if [[ -n $(git status . -s) ]]; then + echo "Abort: Commit the staged files first, and then run this tool again." + exit 1 + fi + + # ensure the working branch is the main/patch branch + if [[ $working_branch != "$STAGING_BRANCH" && $working_branch != hotfix/* ]]; then + echo "Abort: Please run on the main branch or patch branches." + exit 1 + fi +} + +_check_src() { + for i in "${!FILES[@]}"; do + _src="${FILES[$i]}" + if [[ ! -f $_src && ! -d $_src ]]; then + echo -e "Error: Missing file \"$_src\"!\n" + exit 1 + fi + done + +} + +_check_command() { + for i in "${!TOOLS[@]}"; do + cli="${TOOLS[$i]}" + if ! command -v "$cli" &>/dev/null; then + echo "Command '$cli' not found!" + exit 1 + fi + done +} + +_check_node_packages() { + if [[ ! -d node_modules || "$(du node_modules | awk '{print $1}')" == "0" ]]; then + npm i + fi +} + +check() { + _check_command + _check_git + _check_src + _check_node_packages +} + +_bump_files() { + for i in "${!FILES[@]}"; do + if [[ ${FILES[$i]} == "$NODE_CONFIG" ]]; then + continue + fi + + sed -i "s/v[[:digit:]]\+\.[[:digit:]]\+\.[[:digit:]]\+/v$1/" "${FILES[$i]}" + done + + npm run build +} + +_bump_gemspec() { + sed -i "s/[[:digit:]]\+\.[[:digit:]]\+\.[[:digit:]]\+/$1/" "$GEM_SPEC" +} + +# 1. Bump latest version number to the following files: +# +# - _sass/jekyll-theme-chirpy.scss +# - _javascript/copyright +# - assets/js/dist/*.js (will be built by gulp later) +# - jekyll-theme-chirpy.gemspec +# +# 2. Create a commit to save the changes. +bump() { + _bump_files "$1" + _bump_gemspec "$1" + + if [[ $opt_pre = false && -n $(git status . -s) ]]; then + git add . + git commit -m "chore(release): $1" + fi +} + +## Remove unnecessary theme settings +cleanup_config() { + cp _config.yml _config.yml.bak + sed -i "s/^img_cdn:.*/img_cdn:/;s/^avatar:.*/avatar:/" _config.yml +} + +resume_config() { + mv _config.yml.bak _config.yml +} + +# build a gem package +build_gem() { + echo -e "Build the gem package for v$_version\n" + cleanup_config + rm -f ./*.gem + git add "$JS_DIST" -f # add JS dist to gem + gem build "$GEM_SPEC" + cp "$JS_DIST"/* "$BACKUP_PATH" + git restore --staged "$JS_DIST" # resume the git status + resume_config +} + +# Update the git branch graph, tag, and then build the gem package. +release() { + _version="$1" # X.Y.Z + + git checkout "$PROD_BRANCH" + git merge --no-ff --no-edit "$working_branch" + + # Create a new tag on working branch + echo -e "Create tag v$_version\n" + git tag "v$_version" + + # Merge from patch branch to the staging branch + if [[ $working_branch == hotfix/* ]]; then + git checkout "$STAGING_BRANCH" + git merge --no-ff --no-edit "$working_branch" + git branch -D "$working_branch" + fi +} + +main() { + if [[ $opt_skip_ver = false ]]; then + check + + # auto-generate a new version number to the file 'package.json' + if $opt_pre; then + standard-version --prerelease rc + else + standard-version + fi + fi + + # Change heading of Patch version to level 2 (a bug from `standard-version`) + sed -i "s/^### \[/## \[/g" CHANGELOG.md + # Replace multiple empty lines with a single empty line + sed -i "/^$/N;/^\n$/D" CHANGELOG.md + + _version="$(grep '"version":' "$NODE_CONFIG" | sed 's/.*: "//;s/".*//')" + + echo -e "Bump version number to $_version\n" + bump "$_version" + + build_gem + + if [[ $opt_pre = true ]]; then + # Undo all changes on Git + git reset --hard && git clean -fd + else + release "$_version" + fi + + # restore the dist files for future development + mkdir -p "$JS_DIST" && cp "$BACKUP_PATH"/* "$JS_DIST" +} + +while (($#)); do + opt="$1" + case $opt in + -p | --preview) + opt_pre=true + shift + ;; + -k | --skip-versioning) + opt_skip_ver=true + shift + ;; + -h | --help) + help + exit 0 + ;; + *) + # unknown option + help + exit 1 + ;; + esac +done + +main diff --git a/tools/run b/tools/run new file mode 100755 index 0000000..8072e41 --- /dev/null +++ b/tools/run @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# +# Run jekyll serve and then launch the site + +bundle exec jekyll s -H 0.0.0.0 -l diff --git a/tools/test b/tools/test new file mode 100755 index 0000000..83a9490 --- /dev/null +++ b/tools/test @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# +# Build and test the site content +# +# Requirement: html-proofer, jekyll +# +# Usage: See help information + +set -eu + +SITE_DIR="_site" + +_config="_config.yml" + +_baseurl="" + +help() { + echo "Build and test the site content" + echo + echo "Usage:" + echo + echo " bash ./tools/test [options]" + echo + echo "Options:" + echo ' -c, --config "" Specify config file(s)' + echo " -h, --help Print this information." +} + +read_baseurl() { + if [[ $_config == *","* ]]; then + # multiple config + IFS="," + read -ra config_array <<<"$_config" + + # reverse loop the config files + for ((i = ${#config_array[@]} - 1; i >= 0; i--)); do + _tmp_baseurl="$(grep '^baseurl:' "${config_array[i]}" | sed "s/.*: *//;s/['\"]//g;s/#.*//")" + + if [[ -n $_tmp_baseurl ]]; then + _baseurl="$_tmp_baseurl" + break + fi + done + + else + # single config + _baseurl="$(grep '^baseurl:' "$_config" | sed "s/.*: *//;s/['\"]//g;s/#.*//")" + fi +} + +main() { + # clean up + if [[ -d $SITE_DIR ]]; then + rm -rf "$SITE_DIR" + fi + + read_baseurl + + # build + JEKYLL_ENV=production bundle exec jekyll b \ + -d "$SITE_DIR$_baseurl" -c "$_config" + + # test + bundle exec htmlproofer "$SITE_DIR" \ + --disable-external \ + --check-html \ + --allow_hash_href +} + +while (($#)); do + opt="$1" + case $opt in + -c | --config) + _config="$2" + shift + shift + ;; + -h | --help) + help + exit 0 + ;; + *) + # unknown option + help + exit 1 + ;; + esac +done + +main