Skip to content

Commit

Permalink
Merge pull request #352 from PESchoenberg/develop
Browse files Browse the repository at this point in the history
New functions.
  • Loading branch information
PESchoenberg authored Mar 23, 2024
2 parents 43bccf4 + ebd9830 commit de90495
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 8 deletions.
105 changes: 101 additions & 4 deletions grsp0.scm
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@

(define-module (grsp grsp0)
#:use-module (grsp grsp3)
#:use-module (grsp grsp1)
#:use-module (grsp grsp1)
#:use-module (grsp grsp11)
#:use-module (ice-9 string-fun)
#:use-module (ice-9 futures)
#:use-module (ice-9 binary-ports)
Expand Down Expand Up @@ -182,7 +183,7 @@
grsp-sldvls
grsp-clear-on-demand
grsp-ldlc
grsp-padlr
;;grsp-padlr
grsp-pad-lr
grsp-menuv
grsp-menup
Expand All @@ -194,7 +195,10 @@
grsp-movc
plinerc
grsp-file-isolate-name
grsp-pg-psql1))
grsp-pg-psql1
grsp-count-words
grsp-string-lo
grsp-substring-replace))


;;;; pline - Displays string p_s1 p_l1 times in one line at the console.
Expand Down Expand Up @@ -2551,7 +2555,7 @@
res1))


;;;; grsp-sldvls - Speparator, Line, display value, line, separator.
;;;; grsp-sldvls - Separator, Line, display value, line, separator.
;; Displays p_n1 blank lines before a spearator defined by p_s2, string
;; p_s1 and p_n2 blank lines after p_s1 and separator p_s3.
;;
Expand Down Expand Up @@ -3014,3 +3018,96 @@

res1))


;;;; grsp-count-words - Counts the words in string p_s1 and returns a two
;; element list:
;;
;; - Elem 0: number of words.
;; - Elem 1: a list containing each word in p_s1 as a separate element.
;;
;; Keywords:
;;
;; - words, terms
;;
;; Parameters:
;;
;; - p_s1: string.
;;
(define (grsp-count-words p_s1)
(let ((res1 '())
(l1 '()))

(set! l1 (string-split p_s1 #\space))

;; Compose results.
(set! res1 (list (length l1) l1))

res1))


;;;; grsp-string-lo - From string p_s1, leaves only the characters contained in
;; list p_l1, purging it from everything else.
;;
;; Keywords:
;;
;; - strings, alphanumeric
;;
;; Parameters.
;;
;; - p_s1: string.
;; - p_l1: list of one-character strings ("a" "b", etc-).
;;
(define (grsp-string-lo p_s1 p_l1)
(let ((res1 "")
(s1 "")
(s2 "")
(s3 "|"))

(set! s1 p_s1)

;; String loop.
(let loop ((j1 0))
(if (< j1 (string-length p_s1))

(begin (set! s2 (substring s1 j1 (+ j1 1)))

;; If substring is not on list l1, replace it by a
;; "monentary" substring.
(cond ((equal? (grsp-lal-exists s2 p_l1) #f)
(set! s1 (string-replace-substring s1 s2 s3))))

(loop (+ j1 1)))))

;; Compose results.
(set! res1 (string-replace-substring s1 s3 ""))

res1))


;;;; grsp-substring-replace p_s1 - Replaces all p_s2 substrings with substring p_s3
;; in string p_s1.
;;
;; Keywords:
;;
;; - replacing, strings
;;
;; Parameters:
;;
;; - p_s1: string.
;; - p_s2: string.
;; - p_s3: string.
;;
(define (grsp-substring-replace p_s1 p_s2 p_s3)
(let ((res1 "")
(b1 #f))

(set! res1 p_s1)

(while (equal? b1 #f)

(cond ((not (equal? (string-contains res1 p_s2) #f))
(set! res1 (string-replace-substring res1 p_s2 p_s3)))
(else (set! b1 #t))))

res1))

42 changes: 40 additions & 2 deletions grsp11.scm
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@
grsp-ly2code
grsp-lal-deletee
grsp-lal-esubstr
grsp-lal-extract-from-lp))
grsp-lal-extract-from-lp
grsp-lal-exists))


;;;; grsp-lal-rel - Replace element in list. Replace element p_j1 of list
Expand Down Expand Up @@ -821,7 +822,7 @@

;;;; grsp-lal-supp - Given list p_l1 with n elements, it returns a
;; list that contains one instance per each element value contained in
;; p_11. That is, it elimitates repeated instances of the elements of
;; p_11. That is, it eliminates repeated instances of the elements of
;; p_l1 and returns its domain subset.
;;
;; Keywords:
Expand Down Expand Up @@ -1262,3 +1263,40 @@
(loop (+ j1 1)))))

res1))


;;;; grsp-lal-exists - Finds out if string p_s1 is in list p_l1.
;;
;; Keywords:
;;
;; - strings, alphanumeric
;;
;; Parameters.
;;
;; - p_s1: string.
;; - p_l1: list of strings.
;;
;; Output:
;;
;; - #t if p_s1 is in the list.
;; - #f otherwise.
;;
(define (grsp-lal-exists p_s1 p_l1)
(let ((res1 #f)
(b1 #f)
(i1 0))

;; List loop.
(while (equal? b1 #f)

(cond ((equal? i1 (length p_l1))
(set! b1 #t))
((< i1 (length p_l1))

(cond ((equal? p_s1 (list-ref p_l1 i1))
(set! b1 #t)
(set! res1 #t)))))

(set! i1 (in i1)))

res1))
1 change: 1 addition & 0 deletions grsp2.scm
Original file line number Diff line number Diff line change
Expand Up @@ -3514,3 +3514,4 @@

res1))


77 changes: 75 additions & 2 deletions grsp3.scm
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,8 @@
#:use-module (grsp grsp4)
#:use-module (grsp grsp11)
#:use-module (ice-9 threads)
#:use-module (ice-9 futures)
#:use-module (ice-9 futures)
#:use-module (ice-9 string-fun)
#:export (grsp-lm
grsp-hm
grsp-ln
Expand Down Expand Up @@ -517,7 +518,8 @@
grsp-matrix-row-col-setk
grsp-matrix-find-if-prkey-exists
grsp-matrix-row-subexpk
grsp-matrix-row-insert))
grsp-matrix-row-insert
grsp-matrix-tf-idf))


;;;; grsp-lm - Short form of (grsp-matrix-esi 1 p_a1).
Expand Down Expand Up @@ -13480,3 +13482,74 @@
(set! res1 (grsp-ms2my res1))))

res1))


;;;; grsp-matrix-tf-idf - Creates a matrix for TF-IDF.
;;
;; Keywords:
;;
;; - frequency, embeddings, words, terms
;;
;; Parameters:
;;
;; - p_a1: col vector, string, list of documents.
;;
;; Notes:
;;
;; - https://www.turing.com/kb/guide-on-word-embeddings-in-nlp
;;
(define (grsp-matrix-tf-idf p_a1)
(let ((res1 0)
(s1 "")
(s2 "")
(td 0)
(a1 0)
(a2 0)
(tn 0)
(l1 '("a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "ñ" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
"á" "é" "í" "ó" "ú" " "))
(l2 '()))

;; Create matrices matrices.
(set! a1 (grsp-matrix-cpy p_a1))
(set! a2 (grsp-matrix-create 0 0 1))

;; Add a column to a1 to contain the results of td calculation.
(set! a1 (grsp-matrix-subexp a1 0 1))

;; Downcase and clean strings of all elements in the first col
;; of a1.
(let loop ((i1 (grsp-lm a1)))
(if (<= i1 (grsp-hm a1))

;; First we donwcase, trim and clean the string.
(begin (set! s1 (string-downcase (array-ref a1 i1 0)))
(set! s1 (string-trim-both s1))
(set! s1 (grsp-substring-replace s1 " " ""))
(set! s1 (grsp-string-lo s1 l1))
(array-set! a1 s1 i1 0)

;; Calculate the number of terms in each document (td) and
;; place the results for each document on the second column
;; of a1.
(array-set a1 (grsp-count-words s1) i1 1)

;; Analize each document and identify each different word.
;; Place them in a column vector a2 once. If a term already
;; exists in a2, then do not add a second instance of it.
(set! l2 (string-split s1 #\space))
(set! tn (length l2))

(loop (+ i1 1)))))

;; Build results matrix.
(set! res1 (grsp-matrix-create 0 (grsp-tm a1) (grsp-tm a2)))

;; Calculate each termś frequency on each document. Insert the result
;; for each term in each document in res1.

;; Eliminate trivial terms.

;; Count the number of documents containing each non-trivial term.

res1))

0 comments on commit de90495

Please sign in to comment.