From 7c9feba3bae74b278716386ccafabd2c4c8d977d Mon Sep 17 00:00:00 2001
From: R4V3N <o.joisten@live.se>
Date: Tue, 28 Mar 2023 12:38:49 +0200
Subject: [PATCH 1/2] Added Swedish Translation (#68)

* 1.1.23

* 1.1.24

* Added Swedish langauge

---------

Co-authored-by: di-sukharev <dim.sukharev@gmail.com>
---
 package-lock.json | 4 ++--
 package.json      | 2 +-
 src/i18n/index.ts | 8 ++++++--
 src/i18n/sv.json  | 6 ++++++
 4 files changed, 15 insertions(+), 5 deletions(-)
 create mode 100644 src/i18n/sv.json

diff --git a/package-lock.json b/package-lock.json
index 5edc1ca..0cc093e 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "opencommit",
-  "version": "1.1.22",
+  "version": "1.1.24",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "opencommit",
-      "version": "1.1.22",
+      "version": "1.1.24",
       "license": "MIT",
       "dependencies": {
         "@clack/prompts": "^0.6.1",
diff --git a/package.json b/package.json
index 62505d8..760b0c1 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "opencommit",
-  "version": "1.1.22",
+  "version": "1.1.24",
   "description": "GPT CLI to auto-generate impressive commits in 1 second. Killing lame commits with AI 🤯🔫",
   "keywords": [
     "git",
diff --git a/src/i18n/index.ts b/src/i18n/index.ts
index 53594c4..079e05e 100644
--- a/src/i18n/index.ts
+++ b/src/i18n/index.ts
@@ -9,6 +9,7 @@ import ja from '../i18n/ja.json' assert { type: 'json' };
 import pt_br from '../i18n/pt_br.json' assert { type: 'json' };
 import vi_VN from '../i18n/vi_VN.json' assert { type: 'json' };
 import es_ES from '../i18n/es_ES.json' assert { type: 'json' };
+import sv from '../i18n/sv.json' assert { type: 'json' };
 
 export enum I18nLocals {
   'en' = 'en',
@@ -20,7 +21,8 @@ export enum I18nLocals {
   'it' = 'it',
   'ko' = 'ko',
   'pt_br' = 'pt_br',
-  'es_ES' = 'es_ES'
+  'es_ES' = 'es_ES',
+  'sv' = 'sv',
 };
 
 export const i18n = {
@@ -34,7 +36,8 @@ export const i18n = {
   ko,
   pt_br,
   vi_VN,
-  es_ES
+  es_ES,
+  sv
 };
 
 export const I18N_CONFIG_ALIAS: { [key: string]: string[] } = {
@@ -49,6 +52,7 @@ export const I18N_CONFIG_ALIAS: { [key: string]: string[] } = {
   vi_VN: ['vi_VN', 'Vietnamese', 'tiếng Việt'],
   en: ['en', 'English', 'english'],
   es_ES: ['es_ES', 'Spanish', 'español'],
+  sv: ['sv', 'Swedish', 'Svenska'],
 };
 
 export function getI18nLocal(value: string): string | boolean {
diff --git a/src/i18n/sv.json b/src/i18n/sv.json
new file mode 100644
index 0000000..eb86aee
--- /dev/null
+++ b/src/i18n/sv.json
@@ -0,0 +1,6 @@
+{
+"localLanguage": "svenska",
+"commitFix": "fixa(server.ts): ändra variabelnamnet för port från små bokstäver till stora bokstäver PORT",
+"commitFeat": "nyhet(server.ts): lägg till stöd för process.env.PORT miljövariabel",
+"commitDescription": "Variabeln som innehåller portnumret heter nu PORT vilket förbättrar konsekvensen med namngivningskonventionerna eftersom PORT är en konstant. Stöd för en miljövariabel gör att applikationen kan vara mer flexibel då den nu kan köras på vilken port som helst som specificeras via miljövariabeln process.env.PORT."
+}
\ No newline at end of file

From 3103ae18b8035bd5046c2f72798dfcdc901b9c60 Mon Sep 17 00:00:00 2001
From: Raymond <mathsgod@yahoo.com>
Date: Tue, 28 Mar 2023 18:43:02 +0800
Subject: [PATCH 2/2] Count file diff by token, not by length of string (#63)

* 1.1.23

* 1.1.24

* feat(package.json): add @dqbd/tiktoken dependency

refactor(generateCommitMessageFromGitDiff.ts): add tokenCount function to count the number of tokens in a string
refactor(generateCommitMessageFromGitDiff.ts): change the way the length of INIT_MESSAGES_PROMPT is calculated to use tokenCount function
refactor(generateCommitMessageFromGitDiff.ts): change the way the length of diff is calculated to use tokenCount function

refactor(generateCommitMessageFromGitDiff.ts): rename function parameter from diff to fileDiff and update function calls accordingly
feat(generateCommitMessageFromGitDiff.ts): add tokenCount function to count tokens in fileDiff and use it to check if fileDiff is bigger than MAX_REQ_TOKENS

feat(utils): add tokenCount function to count the number of tokens in a string
refactor(utils/mergeStrings.ts): use tokenCount function to count the number of tokens in a string instead of checking the length of the concatenated string

---------

Co-authored-by: di-sukharev <dim.sukharev@gmail.com>
---
 package-lock.json                       |  6 ++++++
 package.json                            |  1 +
 src/generateCommitMessageFromGitDiff.ts | 21 ++++++++++-----------
 src/utils/mergeStrings.ts               |  3 ++-
 src/utils/tokenCount.ts                 | 14 ++++++++++++++
 5 files changed, 33 insertions(+), 12 deletions(-)
 create mode 100644 src/utils/tokenCount.ts

diff --git a/package-lock.json b/package-lock.json
index 0cc093e..d799cb4 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,6 +10,7 @@
       "license": "MIT",
       "dependencies": {
         "@clack/prompts": "^0.6.1",
+        "@dqbd/tiktoken": "^1.0.2",
         "axios": "^1.3.4",
         "chalk": "^5.2.0",
         "cleye": "^1.3.2",
@@ -83,6 +84,11 @@
         "node": ">=12"
       }
     },
+    "node_modules/@dqbd/tiktoken": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.2.tgz",
+      "integrity": "sha512-AjGTBRWsMoVmVeN55NLyupyM8TNamOUBl6tj5t/leLDVup3CFGO9tVagNL1jf3GyZLkWZSTmYVbPQ/M2LEcNzw=="
+    },
     "node_modules/@esbuild/android-arm": {
       "version": "0.15.18",
       "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.15.18.tgz",
diff --git a/package.json b/package.json
index 760b0c1..0ce4593 100644
--- a/package.json
+++ b/package.json
@@ -60,6 +60,7 @@
   },
   "dependencies": {
     "@clack/prompts": "^0.6.1",
+    "@dqbd/tiktoken": "^1.0.2",
     "axios": "^1.3.4",
     "chalk": "^5.2.0",
     "cleye": "^1.3.2",
diff --git a/src/generateCommitMessageFromGitDiff.ts b/src/generateCommitMessageFromGitDiff.ts
index 69100b0..af26956 100644
--- a/src/generateCommitMessageFromGitDiff.ts
+++ b/src/generateCommitMessageFromGitDiff.ts
@@ -6,6 +6,7 @@ import { api } from './api';
 import { getConfig } from './commands/config';
 import { mergeStrings } from './utils/mergeStrings';
 import { i18n, I18nLocals } from './i18n';
+import { tokenCount } from './utils/tokenCount';
 
 const config = getConfig();
 const translation = i18n[(config?.language as I18nLocals) || 'en'];
@@ -13,12 +14,10 @@ const translation = i18n[(config?.language as I18nLocals) || 'en'];
 const INIT_MESSAGES_PROMPT: Array<ChatCompletionRequestMessage> = [
   {
     role: ChatCompletionRequestMessageRoleEnum.System,
-    content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message. ${
-      config?.emoji
-        ? 'Use Gitmoji convention to preface the commit'
-        : 'Do not preface the commit with anything'
-    }, use the present tense. ${
-      config?.description
+    content: `You are to act as the author of a commit message in git. Your mission is to create clean and comprehensive commit messages in the conventional commit convention. I'll send you an output of 'git diff --staged' command, and you convert it into a commit message. ${config?.emoji
+      ? 'Use Gitmoji convention to preface the commit'
+      : 'Do not preface the commit with anything'
+      }, use the present tense. ${config?.description
         ? 'Add a short description of what commit is about after the commit message. Don\'t start it with "This commit", just describe the changes.'
         : "Don't add any descriptions to the commit, only commit message."
     } Use ${translation.localLanguage} to answer.`
@@ -80,16 +79,16 @@ interface GenerateCommitMessageError {
 }
 
 const INIT_MESSAGES_PROMPT_LENGTH = INIT_MESSAGES_PROMPT.map(
-  (msg) => msg.content
-).join('').length;
+  (msg) => tokenCount(msg.content) + 4
+).reduce((a, b) => a + b, 0);
 
 const MAX_REQ_TOKENS = 3900 - INIT_MESSAGES_PROMPT_LENGTH;
 
 export const generateCommitMessageWithChatCompletion = async (
   diff: string
 ): Promise<string | GenerateCommitMessageError> => {
-  try {
-    if (diff.length >= MAX_REQ_TOKENS) {
+ try {
+    if (tokenCount(diff) >= MAX_REQ_TOKENS) {
       const commitMessagePromises = getCommitMsgsPromisesFromFileDiffs(diff);
 
       const commitMessages = await Promise.all(commitMessagePromises);
@@ -144,7 +143,7 @@ function getCommitMsgsPromisesFromFileDiffs(diff: string) {
   const commitMessagePromises = [];
 
   for (const fileDiff of mergedFilesDiffs) {
-    if (fileDiff.length >= MAX_REQ_TOKENS) {
+    if (tokenCount(fileDiff) >= MAX_REQ_TOKENS) {
       // if file-diff is bigger than gpt context — split fileDiff into lineDiff
       const messagesPromises = getMessagesPromisesByLines(fileDiff, separator);
 
diff --git a/src/utils/mergeStrings.ts b/src/utils/mergeStrings.ts
index 7b55a99..ee35f50 100644
--- a/src/utils/mergeStrings.ts
+++ b/src/utils/mergeStrings.ts
@@ -1,8 +1,9 @@
+import { tokenCount } from './tokenCount'
 export function mergeStrings(arr: string[], maxStringLength: number): string[] {
   const mergedArr: string[] = [];
   let currentItem: string = arr[0];
   for (const item of arr.slice(1)) {
-    if (currentItem.length + item.length <= maxStringLength) {
+    if (tokenCount(currentItem + item) <= maxStringLength) {
       currentItem += item;
     } else {
       mergedArr.push(currentItem);
diff --git a/src/utils/tokenCount.ts b/src/utils/tokenCount.ts
new file mode 100644
index 0000000..84e4f23
--- /dev/null
+++ b/src/utils/tokenCount.ts
@@ -0,0 +1,14 @@
+import { Tiktoken } from "@dqbd/tiktoken/lite"
+import cl100k_base from "@dqbd/tiktoken/encoders/cl100k_base.json" assert{type: "json"}
+
+export function tokenCount(content: string): number {
+    const encoding = new Tiktoken(
+        cl100k_base.bpe_ranks,
+        cl100k_base.special_tokens,
+        cl100k_base.pat_str
+    );
+    const tokens = encoding.encode(content);
+    encoding.free();
+
+    return tokens.length;
+}