Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
SearchEngine
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Rosa Delima Mendrofa
SearchEngine
Commits
8f565798
Commit
8f565798
authored
May 28, 2020
by
Yolanda Nainggolan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added searching, fixed result and lyrics
parent
2d25e3c9
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
466 additions
and
532 deletions
+466
-532
views.cpython-37.pyc
...e/InvertedIndexSimulator/__pycache__/views.cpython-37.pyc
+0
-0
main.cpython-37.pyc
...edIndexSimulator/inverted/__pycache__/main.cpython-37.pyc
+0
-0
main.py
SearchEngine/InvertedIndexSimulator/inverted/main.py
+233
-162
dataframe.min.css
...nvertedIndexSimulator/static/assets/css/dataframe.min.css
+5
-12
trying.min.css
...e/InvertedIndexSimulator/static/assets/css/trying.min.css
+159
-0
dataframe.html
...gine/InvertedIndexSimulator/templates/apps/dataframe.html
+5
-68
indexing.html
...ngine/InvertedIndexSimulator/templates/apps/indexing.html
+6
-44
lyric.html
...chEngine/InvertedIndexSimulator/templates/apps/lyric.html
+7
-41
result.html
...hEngine/InvertedIndexSimulator/templates/apps/result.html
+6
-21
views.py
SearchEngine/InvertedIndexSimulator/views.py
+45
-184
urls.cpython-37.pyc
SearchEngine/SearchEngine/__pycache__/urls.cpython-37.pyc
+0
-0
No files found.
SearchEngine/InvertedIndexSimulator/__pycache__/views.cpython-37.pyc
View file @
8f565798
No preview for this file type
SearchEngine/InvertedIndexSimulator/inverted/__pycache__/main.cpython-37.pyc
View file @
8f565798
No preview for this file type
SearchEngine/InvertedIndexSimulator/inverted/main.py
View file @
8f565798
...
@@ -2,19 +2,86 @@ resource_package = __name__
...
@@ -2,19 +2,86 @@ resource_package = __name__
import
string
import
string
import
re
import
re
import
collections
import
math
import
pandas
as
pd
import
json
import
xml.dom.minidom
as
minidom
import
xml.etree.ElementTree
as
et
from
xml.etree.ElementTree
import
ElementTree
from
sklearn.feature_extraction.text
import
CountVectorizer
from
sklearn.feature_extraction.text
import
CountVectorizer
from
nltk.corpus
import
stopwords
from
nltk.corpus
import
stopwords
from
nltk.tokenize
import
sent_tokenize
,
word_tokenize
from
nltk.tokenize
import
sent_tokenize
,
word_tokenize
from
Sastrawi.Stemmer.StemmerFactory
import
StemmerFactory
from
Sastrawi.Stemmer.StemmerFactory
import
StemmerFactory
from
itertools
import
count
from
itertools
import
count
import
collections
try
:
import
math
from
future_builtins
import
zip
import
xml.etree.ElementTree
as
et
except
ImportError
:
# not 2.6+ or is 3.x
from
xml.etree.ElementTree
import
ElementTree
try
:
from
itertools
import
izip
as
zip
# < 2.5 or 3.x
except
ImportError
:
pass
##############Show Dataframe########################
def
show_dataframe
(
parse_data
):
data
=
parse_data
.
getroot
()
df_cols
=
[
"DOCNO"
,
"SONG"
,
"ARTIST"
,
"LYRICS"
]
rows
=
[]
for
node
in
data
:
s_docno
=
node
.
find
(
"DOCNO"
)
.
text
if
node
is
not
None
else
None
s_song
=
node
.
find
(
"SONG"
)
.
text
if
node
is
not
None
else
None
s_artist
=
node
.
find
(
"ARTIST"
)
.
text
if
node
is
not
None
else
None
s_lyrics
=
node
.
find
(
"LYRICS"
)
.
text
if
node
is
not
None
else
None
rows
.
append
({
"DOCNO"
:
s_docno
,
"SONG"
:
s_song
,
"ARTIST"
:
s_artist
,
"LYRICS"
:
s_lyrics
})
DataFrame
=
pd
.
DataFrame
(
rows
,
columns
=
df_cols
)
dictionary
=
DataFrame
.
set_index
(
'DOCNO'
)
.
T
.
to_dict
(
'list'
)
##############Remove Punctuation, URL and Tokenize###################
nilai
=
list
(
dictionary
.
values
())
nomornya
=
list
(
dictionary
.
keys
())
for
i
in
range
(
0
,
len
(
nomornya
)):
nomornya
[
i
]
=
int
(
nomornya
[
i
])
lagunya
=
[
sublist
[
0
]
for
sublist
in
nilai
]
artisnya
=
[
sublist
[
1
]
for
sublist
in
nilai
]
liriknya
=
[
sublist
[
2
]
for
sublist
in
nilai
]
context
=
{
"DOCNO"
:
nomornya
,
"SONG"
:
lagunya
,
"ARTIST"
:
artisnya
,
"LYRICS"
:
liriknya
}
return
context
##############N_DOC########################
def
data_var
(
tree
):
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
=
[]
all_song
=
[]
all_lyrics
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_lyrics
.
append
(
node
.
text
)
N_DOC
=
len
(
all_lyrics
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_lyrics
[
i
])
return
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
##############Remove Punctuation###################
def
remove_punc_tokenize
(
sentence
):
def
remove_punc_tokenize
(
sentence
):
tokens
=
[]
tokens
=
[]
for
punctuation
in
string
.
punctuation
:
for
punctuation
in
string
.
punctuation
:
sentence
=
sentence
.
replace
(
punctuation
,
" "
)
sentence
=
sentence
.
replace
(
punctuation
,
" "
)
...
@@ -30,23 +97,79 @@ def to_lower(tokens):
...
@@ -30,23 +97,79 @@ def to_lower(tokens):
tokens
=
[
x
.
lower
()
for
x
in
tokens
]
tokens
=
[
x
.
lower
()
for
x
in
tokens
]
return
tokens
return
tokens
def
generate_ngrams
(
data
,
n
):
##############Load Data########################
ngram
=
[]
def
load_data
(
dcmnt_xml
):
result
=
[]
all_doc_no
=
dcmnt_xml
.
getElementsByTagName
(
'DOCNO'
)
all_profile
=
dcmnt_xml
.
getElementsByTagName
(
'SONG'
)
all_date
=
dcmnt_xml
.
getElementsByTagName
(
'ARTIST'
)
all_text
=
dcmnt_xml
.
getElementsByTagName
(
'LYRICS'
)
all_pub
=
dcmnt_xml
.
getElementsByTagName
(
'PUB'
)
all_page
=
dcmnt_xml
.
getElementsByTagName
(
'PAGE'
)
N_DOC
=
len
(
all_doc_no
)
all_sentence_doc_sample
=
[]
for
i
in
range
(
N_DOC
):
sentence_doc_sample
=
' '
+
all_text
[
i
]
.
firstChild
.
data
all_sentence_doc_sample
.
append
(
sentence_doc_sample
)
return
all_doc_no
,
N_DOC
,
all_sentence_doc_sample
##############Indexing########################
def
indexing
(
N_DOC
,
tokens_doc
,
all_doc_no
):
all_tokens
=
[]
for
i
in
range
(
N_DOC
):
for
w
in
tokens_doc
[
i
]:
all_tokens
.
append
(
w
)
new_sentence
=
' '
.
join
([
w
for
w
in
all_tokens
])
for
w
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentence
):
all_tokens
.
append
(
w
)
all_tokens
=
set
(
all_tokens
)
proximity_index
=
{}
for
token
in
all_tokens
:
dict_doc_position
=
{}
for
n
in
range
(
N_DOC
):
if
(
token
in
tokens_doc
[
n
]):
dict_doc_position
[
all_doc_no
[
n
]
.
firstChild
.
data
]
=
[
i
+
1
for
i
,
j
in
zip
(
count
(),
tokens_doc
[
n
])
if
j
==
token
]
proximity_index
[
token
]
=
dict_doc_position
proximity_index
=
collections
.
OrderedDict
(
sorted
(
proximity_index
.
items
()))
indexnya
=
json
.
loads
(
json
.
dumps
(
proximity_index
))
words
=
indexnya
.
keys
()
freq
=
indexnya
.
values
()
freq
=
list
(
freq
)
hasil
=
{}
for
key
in
words
:
for
value
in
freq
:
hasil
[
key
]
=
value
freq
.
remove
(
value
)
break
#menampilkan hasil n-gram per dokumen
numb
=
[]
for
i
in
range
(
len
(
data
)):
idx
=
[]
sequences
=
[
data
[
i
][
j
:]
for
j
in
range
(
n
)]
temp
=
zip
(
*
sequences
)
for
i
,
j
in
hasil
.
items
():
lst
=
list
(
temp
)
numb
.
append
(
i
)
result
.
append
([
" "
.
join
(
lst
)
for
lst
in
lst
])
idx
.
append
(
j
)
res
=
{}
for
key
in
numb
:
for
value
in
idx
:
res
[
key
]
=
value
idx
.
remove
(
value
)
break
return
res
#menggabungkan n-gram semua dokumen dalam bentuk array
for
i
in
range
(
len
(
result
)):
for
j
in
range
(
len
(
result
[
i
])):
ngram
.
append
(
result
[
i
][
j
])
return
ngram
,
result
from
nltk.corpus
import
stopwords
from
nltk.corpus
import
stopwords
stop_words
=
set
(
stopwords
.
words
(
'english'
))
stop_words
=
set
(
stopwords
.
words
(
'english'
))
...
@@ -64,30 +187,19 @@ def stemming(tokens):
...
@@ -64,30 +187,19 @@ def stemming(tokens):
return
tokens
return
tokens
def
searching
(
dcmnt_xml
,
query
):
def
main
(
query
):
all_doc_no
=
dcmnt_xml
.
getElementsByTagName
(
'DOCNO'
)
all_song
=
dcmnt_xml
.
getElementsByTagName
(
'SONG'
)
tree
=
ElementTree
()
all_lyrics
=
dcmnt_xml
.
getElementsByTagName
(
'LYRICS'
)
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
N_DOC
=
len
(
all_doc_no
)
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
all_sentence_doc
=
[]
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
sentence_doc
=
all_song
[
i
]
.
firstChild
.
data
+
' '
+
all_lyrics
[
i
]
.
firstChild
.
data
all_sentence_doc
.
append
(
sentence_doc
)
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
...
@@ -95,158 +207,118 @@ def main(query):
...
@@ -95,158 +207,118 @@ def main(query):
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
to_lower
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
to_lower
(
tokens_doc
[
i
])
stop_words
=
set
(
stopwords
.
words
(
'english'
))
stopping
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
temp
=
[]
tokens_doc
[
i
]
=
stop_word_token
(
tokens_doc
[
i
])
for
j
in
tokens_doc
[
i
]:
if
j
not
in
stop_words
:
temp
.
append
(
j
)
stopping
.
append
(
temp
)
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
([
w
for
w
in
stopping
[
i
]
if
not
any
(
j
.
isdigit
()
for
j
in
w
)])
tokens_doc
[
i
]
=
([
w
for
w
in
tokens_doc
[
i
]
if
not
any
(
j
.
isdigit
()
for
j
in
w
)])
f
actory
=
StemmerFactory
()
f
or
i
in
range
(
N_DOC
):
stemmer
=
factory
.
create_stemmer
(
)
tokens_doc
[
i
]
=
stemming
(
tokens_doc
[
i
]
)
stemming
=
[]
all_tokens
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
temp
=
[]
for
j
in
tokens_doc
[
i
]:
for
j
in
tokens_doc
[
i
]:
# print(j)
all_tokens
.
append
(
j
)
temp
.
append
(
stemmer
.
stem
(
j
))
stemming
.
append
(
temp
)
all_tokens
=
[]
new_sentences
=
' '
.
join
([
w
for
w
in
all_tokens
])
for
i
in
range
(
N_DOC
):
for
w
in
stemming
[
i
]:
all_tokens
.
append
(
w
)
new_sentence
=
' '
.
join
([
w
for
w
in
all_tokens
])
for
j
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentences
):
all_tokens
.
append
(
j
)
for
w
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentence
):
all_tokens
.
append
(
w
)
all_tokens
=
set
(
all_tokens
)
all_tokens
=
set
(
all_tokens
)
alls
=
[]
for
i
in
all_tokens
:
alls
.
append
(
i
)
proximity_index
=
{}
for
token
in
all_tokens
:
dict_doc_position
=
{}
for
n
in
range
(
N_DOC
):
if
(
token
in
tokens_doc
[
n
]):
dict_doc_position
[
all_doc_no
[
n
]
.
firstChild
.
data
]
=
[
i
+
1
for
i
,
j
in
zip
(
count
(),
tokens_doc
[
n
])
if
j
==
token
]
proximity_index
[
token
]
=
dict_doc_position
queri
=
[]
import
collections
spl
=
query
.
split
()
proximity_index
=
collections
.
OrderedDict
(
sorted
(
proximity_index
.
items
()))
for
i
in
range
(
len
(
spl
)):
if
not
spl
[
i
]
.
isdigit
():
queri
.
append
(
spl
[
i
])
punc
=
[]
kunci
=
[]
for
i
in
range
(
len
(
queri
)):
nilai
=
[]
no_punc
=
""
for
key
,
value
in
proximity_index
[
query
]
.
items
():
for
j
in
range
(
len
(
queri
[
i
])):
kunci
.
append
(
key
)
if
queri
[
i
][
j
]
not
in
string
.
punctuation
:
nilai
.
append
(
value
)
no_punc
=
no_punc
+
queri
[
i
][
j
]
punc
.
append
(
no_punc
)
lower
=
[]
dict
=
{}
for
i
in
range
(
len
(
punc
)):
for
key
in
kunci
:
lower
.
append
(
punc
[
i
]
.
lower
())
for
value
in
nilai
:
dict
[
key
]
=
value
nilai
.
remove
(
value
)
break
stop
=
[]
xtree
=
et
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
for
i
in
range
(
len
(
lower
)):
xroot
=
xtree
.
getroot
()
if
lower
[
i
]
not
in
stop_words
:
stop
.
append
(
lower
[
i
])
stem
=
[]
df_cols
=
[
"SONG"
]
for
i
in
range
(
len
(
stop
)):
rows
=
[]
stem
.
append
(
stemmer
.
stem
(
stop
[
i
]))
join_word
=
' '
.
join
([
w
for
w
in
stem
])
for
node
in
xroot
:
lirik
=
node
.
find
(
"SONG"
)
.
text
if
node
is
not
None
else
None
ngram
,
ngram_doc
=
generate_ngrams
(
stemming
,
len
(
stem
)
)
rows
.
append
({
"SONG"
:
lirik
}
)
n_gram_index
=
{}
df
=
pd
.
DataFrame
(
rows
,
columns
=
df_cols
)
for
ngram_token
in
ngram
:
doc_no
=
[]
for
i
in
range
(
N_DOC
):
if
(
ngram_token
in
ngram_doc
[
i
]):
doc_no
.
append
(
all_doc_no
[
i
])
n_gram_index
[
ngram_token
]
=
doc_no
df
=
[]
nomor
=
[]
for
i
in
dict
:
nomor
.
append
(
int
(
i
))
for
i
in
range
(
N_DOC
):
judul
=
[]
count
=
0
for
i
in
nomor
:
for
j
in
range
(
len
(
ngram_doc
[
i
])):
judul
.
append
(
df
[
'SONG'
][
i
-
1
])
if
join_word
==
ngram_doc
[
i
][
j
]:
count
+=
1
df
.
append
(
count
)
idf
=
[]
for
i
in
range
(
len
(
df
)):
try
:
idf
.
append
(
math
.
log10
(
N_DOC
/
df
[
i
]))
except
ZeroDivisionError
:
idf
.
append
(
str
(
0
))
#w(t, d)
#t = term
#d = document
wtd
=
[]
l
=
[]
for
i
in
range
(
N_DOC
):
dic
=
{}
tf
=
ngram_doc
[
i
]
.
count
(
join_word
)
# menghitung nilai tf
if
tf
!=
0
:
score
=
math
.
log10
(
tf
)
#log10(tf(t,d))
score
+=
1
# 1 + log(tf(t,d))
score
*=
idf
[
i
]
#tf * idf
idx
=
all_doc_no
[
i
]
hasil
=
{}
judul
=
all_song
[
i
]
for
key
in
nomor
:
for
value
in
judul
:
hasil
[
key
]
=
value
judul
.
remove
(
value
)
break
dic
[
'docno'
]
=
idx
numb
=
[]
dic
[
'judul'
]
=
judul
tit
=
[]
dic
[
'score'
]
=
score
l
.
append
(
dic
)
for
i
,
j
in
hasil
.
items
():
wtd
.
append
(
l
)
# [i+1] = defenisi nomor dokumen; score = wtd
numb
.
append
(
i
)
# print(score
)
tit
.
append
(
j
)
hasil
=
[]
res
=
{}
hasil
.
append
(
sorted
(
wtd
[
0
],
key
=
lambda
x
:
x
[
'score'
],
reverse
=
True
))
for
key
in
numb
:
for
value
in
tit
:
res
[
key
]
=
value
tit
.
remove
(
value
)
break
return
hasil
return
res
def
detail
(
nomor
):
def
detail
(
id
):
tree
=
et
()
tree
.
parse
(
"apps/data/dataset_STBI.xml"
)
all_doc_no
=
[]
import
pandas
as
pd
all_song
=
[]
import
xml.etree.ElementTree
as
et
all_text
=
[]
import
numpy
as
np
for
node
in
tree
.
iter
(
"DOCNO"
):
xtree
=
et
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
.
append
(
node
.
text
)
xroot
=
xtree
.
getroot
()
for
node
in
tree
.
iter
(
"SONG"
):
df_cols
=
[
"SONG"
,
"LYRICS"
]
# all_song.append(node.text.replace("\n"," "))
rows
=
[]
all_song
.
append
(
node
.
text
)
head
=
all_song
for
node
in
xroot
:
judul
=
node
.
find
(
"SONG"
)
.
text
if
node
is
not
None
else
None
lirik
=
node
.
find
(
"LYRICS"
)
.
text
if
node
is
not
None
else
None
rows
.
append
({
"SONG"
:
judul
,
"LYRICS"
:
lirik
})
df
=
pd
.
DataFrame
(
rows
,
columns
=
df_cols
)
lyrics
=
df
[
'LYRICS'
][
id
-
1
]
judul
=
df
[
'SONG'
][
id
-
1
]
return
lyrics
,
judul
for
node
in
tree
.
iter
(
"LYRICS"
):
# all_text.append(node.text.replace("\n"," "))
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
text
=
[]
judul
=
[]
hasil
=
[]
id
=
str
(
nomor
)
for
i
in
range
(
N_DOC
):
check
=
all_doc_no
[
i
]
if
check
==
id
:
text
=
all_text
[
i
]
judul
=
all_song
[
i
]
return
text
,
judul
\ No newline at end of file
SearchEngine/InvertedIndexSimulator/static/assets/css/dataframe.min.css
View file @
8f565798
...
@@ -55,15 +55,13 @@ footer {
...
@@ -55,15 +55,13 @@ footer {
border-radius
:
15px
;
border-radius
:
15px
;
padding
:
20px
;
padding
:
20px
;
margin-top
:
10px
;
margin-top
:
10px
;
width
:
auto
;
width
:
100%
;
}
}
.carda
{
table
{
box-shadow
:
0
4px
8px
0
rgba
(
0
,
0
,
0
,
0.2
);
table-layout
:
fixed
;
border-radius
:
15px
;
border
:
1px
solid
black
;
padding
:
20px
;
width
:
100px
;
margin-top
:
10px
;
width
:
max-content
;
}
}
.jumbotron
{
.jumbotron
{
...
@@ -155,11 +153,6 @@ button:hover span:after {
...
@@ -155,11 +153,6 @@ button:hover span:after {
right
:
0
;
right
:
0
;
}
}
table
,
th
,
td
{
border
:
1px
solid
black
;
border-collapse
:
collapse
;
}
form
button
{
form
button
{
display
:
inline-block
;
display
:
inline-block
;
...
...
SearchEngine/InvertedIndexSimulator/static/assets/css/trying.min.css
0 → 100644
View file @
8f565798
@import
url('https://fonts.googleapis.com/css?family=Quicksand:400,700&display=swap')
;
body
{
font-family
:
sans-serif
;
}
h2
,
h3
{
color
:
#00a2c6
}
footer
{
color
:
white
;
background-color
:
#591a75
}
nav
a
{
font-size
:
18px
;
font-weight
:
400
;
text-decoration
:
none
;
}
nav
a
:hover
{
font-weight
:
bold
;
}
.profile
header
{
text-align
:
center
;
}
footer
{
position
:
fixed
;
left
:
0
;
bottom
:
0
;
width
:
100%
;
padding
:
5px
;
color
:
white
;
background-color
:
#440f5c
;
text-align
:
center
;
font-weight
:
bold
;
}
.featured-image
{
width
:
100%
;
max-height
:
300px
;
object-fit
:
cover
;
object-position
:
center
;
}
.card
{
box-shadow
:
0
4px
8px
0
rgba
(
0
,
0
,
0
,
0.2
);
border-radius
:
15px
;
padding
:
20px
;
margin-top
:
10px
;
}
.jumbotron
{
font-size
:
20px
;
padding
:
60px
;
text-align
:
center
;
color
:
white
;
background-image
:
url(https://ak.picdn.net/assets/cms/music_subscription_homepage_banner.jpg)
;
background-size
:
cover
;
background-repeat
:
no-repeat
;
text-shadow
:
black
0.3em
0.3em
0.3em
;
}
nav
{
background-color
:
#091729
;
padding
:
5px
;
position
:
sticky
;
top
:
0
;
}
nav
a
{
font-size
:
18px
;
font-weight
:
400
;
text-decoration
:
none
;
color
:
white
;
}
body
{
font-family
:
'Quicksand'
,
sans-serif
;
margin
:
0
;
padding
:
0
;
}
main
{
padding
:
15px
;
overflow
:
auto
;
}
#content
{
width
:
100%
;
}
*
{
box-sizing
:
border-box
;
}
.button
{
display
:
inline-block
;
border-radius
:
4px
;
background-color
:
#7c1ca6
;
border
:
none
;
color
:
#FFFFFF
;
text-align
:
center
;
font-size
:
15px
;
padding
:
20px
;
transition
:
all
0.5s
;
cursor
:
pointer
;
margin
:
5px
;
}
button
span
{
cursor
:
pointer
;
display
:
inline-block
;
position
:
relative
;
transition
:
0.5s
;
}
button
span
:after
{
content
:
'\00bb'
;
position
:
absolute
;
opacity
:
0
;
top
:
0
;
right
:
-20px
;
transition
:
0.5s
;
}
button
:hover
span
{
padding-right
:
25px
;
}
button
:hover
span
:after
{
opacity
:
1
;
right
:
0
;
}
form
button
{
display
:
inline-block
;
border-radius
:
4px
;
background-color
:
#7c1ca6
;
border
:
none
;
color
:
#FFFFFF
;
text-align
:
center
;
font-size
:
15px
;
padding
:
10px
;
transition
:
all
0.5s
;
cursor
:
pointer
;
margin
:
5px
;
width
:
80px
;
}
\ No newline at end of file
SearchEngine/InvertedIndexSimulator/templates/apps/dataframe.html
View file @
8f565798
...
@@ -5,23 +5,6 @@
...
@@ -5,23 +5,6 @@
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<title>
Song Lyric Search Engine
</title>
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
<style>
#leftbox
{
text-align
:
center
;
float
:
left
;
white-space
:
nowrap
;
}
#middlebox
{
float
:
left
;
text-align
:
center
;
white-space
:
nowrap
;
}
#middleboxb
{
float
:
left
;
text-align
:
left
;
white-space
:
nowrap
;
}
</style>
</head>
</head>
<body>
<body>
...
@@ -38,69 +21,23 @@
...
@@ -38,69 +21,23 @@
</div>
</div>
<center><h1>
Dataset
</h1><br></center>
<center><h1>
Dataset
</h1><br></center>
<article
class=
"carda"
style=
"overflow-x:scroll; overflow-y:scroll;"
>
<div
id =
"leftbox"
>
<table>
<table>
<tr>
<tr>
<th>
DOCNO
</th>
<th>
DOCNO
</th>
</tr>
{% for i in DOCNO %}
<tr>
<td>
{{ i }}
</td>
</tr>
{% endfor %}
</table>
</div>
<div
id =
"middlebox"
>
<table
align=
"left"
>
<tr>
<th>
SONG
</th>
</tr>
{% for i in SONG %}
<tr>
<td>
{{ i }}
</td>
</tr>
{% endfor %}
</table>
</div>
<div
id =
"middlebox"
>
<table>
<tr>
<th>
ARTIST
</th>
<th>
ARTIST
</th>
</tr>
<th>
SONG
</th>
{% for i in ARTIST %}
<tr>
<td>
{{ i }}
</td>
</tr>
{% endfor %}
</table>
</div>
<div
id =
"middleboxb"
>
<table>
<tr>
<th>
LYRICS
</th>
<th>
LYRICS
</th>
</tr>
</tr>
{% for i in LYRICS
%}
{% for i in DOCNO
%}
<tr>
<tr>
<td>
{{ i }}
</td>
<td>
{{ i }}
</td>
<td>
{{ j }}
</td>
<td>
{{ k }}
</td>
<td>
{{ l }}
</td>
</tr>
</tr>
{% endfor %}
{% endfor %}
</table>
</table>
</div>
</article>
</article>
</article>
</div>
</div>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/indexing.html
View file @
8f565798
...
@@ -4,24 +4,7 @@
...
@@ -4,24 +4,7 @@
<head>
<head>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<title>
Song Lyric Search Engine
</title>
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/css/trying.min.css"
rel=
"stylesheet"
>
<style>
#leftbox
{
text-align
:
center
;
float
:
left
;
white-space
:
nowrap
;
}
#middlebox
{
float
:
left
;
text-align
:
center
;
white-space
:
nowrap
;
}
#middleboxb
{
float
:
left
;
text-align
:
left
;
white-space
:
nowrap
;
}
</style>
</head>
</head>
<body>
<body>
...
@@ -37,41 +20,20 @@
...
@@ -37,41 +20,20 @@
</div>
</div>
</div>
</div>
<center><h1>
Proximity Index
</h1><br></center>
<center><p
style=
"font-size:40px;"
><strong>
Indexing
</strong></p>
<article
class=
"carda"
style=
"overflow-x:scroll; overflow-y:scroll;"
>
<table
width=
"100%"
;
border=
"1px solid black"
>
<div
id =
"leftbox"
>
<table>
<tr>
<tr>
<th>
Token
</th>
<th>
Token
</th>
</tr>
{% for i in words %}
<tr>
<td>
{{ i }}
</td>
</tr>
{% endfor %}
</table>
</div>
<div
id =
"middleboxb"
>
<table
align=
"left"
>
<tr>
<th>
Index
</th>
<th>
Index
</th>
</tr>
</tr>
{% for i in freq
%}
{% for key, values in res.items
%}
<tr>
<tr>
<td>
{{ i }}
</td>
<td>
{{ key }}
</td>
<td>
{{ values }}
</td>
</tr>
</tr>
{% endfor %}
{% endfor %}
</table>
</table>
</div>
</article>
</article>
</article>
</div>
</div>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/lyric.html
View file @
8f565798
<!DOCTYPE html>
<!DOCTYPE html>
<html
lang=
"en"
>
<html
lang=
"en"
>
<head>
<head>
<meta
charset=
"utf-8"
>
<meta
charset=
"utf-8"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1, shrink-to-fit=no"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1, shrink-to-fit=no"
>
...
@@ -21,61 +21,27 @@
...
@@ -21,61 +21,27 @@
<!-- Custom styles for this template -->
<!-- Custom styles for this template -->
<link
href=
"../../static/assets/css/landing-page.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/css/landing-page.min.css"
rel=
"stylesheet"
>
</head>
</head>
<body>
<body>
<!-- Navigation -->
<nav
class=
"navbar navbar-light bg-light static-top"
>
<nav
class=
"navbar navbar-light bg-light static-top"
>
<div
class=
"container"
>
<div
class=
"container"
>
<a
class=
"navbar-brand"
href=
"/"
>
Cari Lagu
</a>
<a
class=
"navbar-brand"
href=
"/"
>
Search Simulator
</a>
<!-- <a class="btn btn-primary" href="#">Pilih Buku</a>
-->
</div>
</div>
</nav>
</nav>
<!-- Masthead -->
<!-- <header class="masthead text-white text-center">
<div class="overlay"></div>
<div class="container">
<div class="row">
<div class="col-xl-9 mx-auto">
<h1 class="mb-5">Silahkan masukkan lirik dari lagu yang ingin Anda temukan</h1>
</div>
<div class="col-md-10 col-lg-8 col-xl-7 mx-auto">
<form method="POST" action="/search">
<div class="form-row">
<div class="col-12 col-md-9 mb-2 mb-md-0">
<input type="text" class="form-control form-control-lg" name="querysearch" placeholder="Masukkan Query Anda...">
</div>
<div class="col-12 col-md-3">
<button type="submit" class="btn btn-block btn-lg btn-primary">Cari!</button>
</div>
</div>
</form>
</div>
</div>
</div>
</header> -->
<!-- Testimonials -->
<section
class=
"testimonials text-center bg-light"
>
<section
class=
"testimonials text-center bg-light"
>
<div
class=
"container"
>
<div
class=
"container"
>
<h2
class=
"mb-3"
>
Lirik Lagu
</h2>
<h2
class=
"mb-3"
>
Lirik Lagu
</h2>
<h4
class=
"mb-3"
>
No.{{no}} - {{judul}}
</h4>
<h4
class=
"mb-3"
>
No. {{ no }} - {{ judul }}
</h4>
<p>
{{text}}
</p>
<p>
{{ lyrics }}
</p>
</div>
</div>
</section>
</section>
<!-- Bootstrap core JavaScript -->
<script
src=
"../../static/assets/vendor/jquery/jquery.min.js"
></script>
<script
src=
"../../static/assets/vendor/jquery/jquery.min.js"
></script>
<script
src=
"../../static/assets/vendor/bootstrap/js/bootstrap.bundle.min.js"
></script>
<script
src=
"../../static/assets/vendor/bootstrap/js/bootstrap.bundle.min.js"
></script>
</body>
</body>
</html>
</html>
SearchEngine/InvertedIndexSimulator/templates/apps/result.html
View file @
8f565798
...
@@ -24,41 +24,26 @@
...
@@ -24,41 +24,26 @@
</head>
</head>
<body>
<body>
<!-- Navigation -->
<nav
class=
"navbar navbar-light bg-light static-top"
>
<nav
class=
"navbar navbar-light bg-light static-top"
>
<div
class=
"container"
>
<div
class=
"container"
>
<a
class=
"navbar-brand"
href=
"/"
>
CariLagu
</a>
<a
class=
"navbar-brand"
href=
"/"
>
Search Simulator
</a>
<!-- <a class="btn btn-primary" href="#">Pilih Buku</a>
-->
</div>
</div>
</nav>
</nav>
<!-- Testimonials -->
<section
class=
"testimonials text-center bg-light"
>
<section
class=
"testimonials text-center bg-light"
>
<div
class=
"container"
>
<div
class=
"container"
>
<h2
class=
"mb-5"
>
Lagu yang sesuai dengan "{{ query }}"
</h2>
<h2
class=
"mb-5"
>
Lagu yang sesuai dengan query "{{ query }}"
</h2>
{% if hasil %}
<div
class=
"row"
>
<div
class=
"row"
>
{% for i in hasil %}
{% for key, values in res.items %}
{% for j in i %}
<div
class=
"col-lg-4"
>
<div
class=
"col-lg-4"
>
<div
class=
"testimonial-item mx-auto mb-5 mb-lg-0"
>
<div
class=
"testimonial-item mx-auto mb-5 mb-lg-0"
>
<img
class=
"img-fluid rounded-circle mb-3"
src=
"../../static/img/hkbp.jpg"
alt=
""
>
<h5><a
href=
"/lyric/{{ key }}"
>
Lagu No: {{ key }}
</a></h5>
<h5><a
href=
"/lyric"
>
Lagu No:{{ j.docno }}
</a></h5>
<h5>
"{{ values }}"
</h5>
<h5>
"{{ j.judul }}"
</h5>
<p
class=
"font-weight-light mb-0"
>
score :{{ j.score }}
</p>
</div>
</div>
</div>
</div>
{% endfor %}
{% endfor %}
{% endfor %}
</div>
</div>
{% else %}
<h2
class=
"mb-5"
>
Lagu dengan lirik: "{{ query }}" tidak ditemukan
</h2>
{% endif %}
</div>
</div>
</section>
</section>
...
...
SearchEngine/InvertedIndexSimulator/views.py
View file @
8f565798
from
django.shortcuts
import
render
from
django.shortcuts
import
render
from
django.http
import
HttpResponse
from
django.http
import
HttpResponse
from
InvertedIndexSimulator.inverted
import
main
from
InvertedIndexSimulator.inverted
import
main
from
xml.etree.ElementTree
import
ElementTree
from
sklearn.feature_extraction.text
import
CountVectorizer
from
itertools
import
count
import
pandas
as
pd
import
pandas
as
pd
import
xml.etree.ElementTree
as
et
import
xml.etree.ElementTree
as
et
import
string
import
string
import
re
import
re
from
sklearn.feature_extraction.text
import
CountVectorizer
import
json
import
xml.dom.minidom
as
minidom
import
xml.dom.minidom
as
minidom
import
collections
import
collections
from
itertools
import
count
try
:
try
:
from
future_builtins
import
zip
from
future_builtins
import
zip
except
ImportError
:
# not 2.6+ or is 3.x
except
ImportError
:
# not 2.6+ or is 3.x
...
@@ -17,138 +19,56 @@ except ImportError: # not 2.6+ or is 3.x
...
@@ -17,138 +19,56 @@ except ImportError: # not 2.6+ or is 3.x
except
ImportError
:
except
ImportError
:
pass
pass
def
home
(
request
):
def
home
(
request
):
return
render
(
request
,
'apps/home.html'
)
return
render
(
request
,
'apps/home.html'
)
def
dataframe
(
request
):
def
dataframe
(
request
):
parse_data
=
et
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
parse_data
=
et
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
data
=
parse_data
.
getroot
()
context
=
main
.
show_dataframe
(
parse_data
)
df_cols
=
[
"DOCNO"
,
"SONG"
,
"ARTIST"
,
"LYRICS"
]
rows
=
[]
for
node
in
data
:
s_docno
=
node
.
find
(
"DOCNO"
)
.
text
if
node
is
not
None
else
None
s_song
=
node
.
find
(
"SONG"
)
.
text
if
node
is
not
None
else
None
s_artist
=
node
.
find
(
"ARTIST"
)
.
text
if
node
is
not
None
else
None
s_lyrics
=
node
.
find
(
"LYRICS"
)
.
text
if
node
is
not
None
else
None
rows
.
append
({
"DOCNO"
:
s_docno
,
"SONG"
:
s_song
,
"ARTIST"
:
s_artist
,
"LYRICS"
:
s_lyrics
})
DataFrame
=
pd
.
DataFrame
(
rows
,
columns
=
df_cols
)
dictionary
=
DataFrame
.
set_index
(
'DOCNO'
)
.
T
.
to_dict
(
'list'
)
nilai
=
list
(
dictionary
.
values
())
nomornya
=
list
(
dictionary
.
keys
())
lagunya
=
[
sublist
[
0
]
for
sublist
in
nilai
]
artisnya
=
[
sublist
[
1
]
for
sublist
in
nilai
]
liriknya
=
[
sublist
[
2
]
for
sublist
in
nilai
]
context
=
{
"DOCNO"
:
nomornya
,
"SONG"
:
lagunya
,
"ARTIST"
:
artisnya
,
"LYRICS"
:
liriknya
}
return
render
(
request
,
'apps/dataframe.html'
,
context
)
return
render
(
request
,
'apps/dataframe.html'
,
context
)
def
preprocessing
(
request
):
def
preprocessing
(
request
):
from
xml.etree.ElementTree
import
ElementTree
tree
=
ElementTree
()
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
=
main
.
data_var
(
tree
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
context
=
{
"tokens_doc"
:
tokens_doc
}
context
=
{
"tokens_doc"
:
tokens_doc
}
return
render
(
request
,
'apps/preprocessing.html'
,
context
)
return
render
(
request
,
'apps/preprocessing.html'
,
context
)
def
preprocessing2
(
request
):
def
preprocessing2
(
request
):
from
xml.etree.ElementTree
import
ElementTree
tree
=
ElementTree
()
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
=
main
.
data_var
(
tree
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
main
.
to_lower
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
main
.
to_lower
(
tokens_doc
[
i
])
context
=
{
"tokens_doc"
:
tokens_doc
}
context
=
{
"tokens_doc"
:
tokens_doc
}
return
render
(
request
,
'apps/preprocessing2.html'
,
context
)
return
render
(
request
,
'apps/preprocessing2.html'
,
context
)
def
preprocessing3
(
request
):
def
preprocessing3
(
request
):
from
xml.etree.ElementTree
import
ElementTree
tree
=
ElementTree
()
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
=
main
.
data_var
(
tree
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
...
@@ -161,37 +81,18 @@ def preprocessing3(request):
...
@@ -161,37 +81,18 @@ def preprocessing3(request):
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
([
w
for
w
in
tokens_doc
[
i
]
if
not
any
(
j
.
isdigit
()
for
j
in
w
)])
tokens_doc
[
i
]
=
([
w
for
w
in
tokens_doc
[
i
]
if
not
any
(
j
.
isdigit
()
for
j
in
w
)])
context
=
{
"tokens_doc"
:
tokens_doc
}
context
=
{
"tokens_doc"
:
tokens_doc
}
return
render
(
request
,
'apps/preprocessing3.html'
,
context
)
return
render
(
request
,
'apps/preprocessing3.html'
,
context
)
def
preprocessing4
(
request
):
def
preprocessing4
(
request
):
from
xml.etree.ElementTree
import
ElementTree
tree
=
ElementTree
()
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
=
main
.
data_var
(
tree
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
...
@@ -207,33 +108,17 @@ def preprocessing4(request):
...
@@ -207,33 +108,17 @@ def preprocessing4(request):
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
context
=
{
"tokens_doc"
:
tokens_doc
}
context
=
{
"tokens_doc"
:
tokens_doc
}
return
render
(
request
,
'apps/preprocessing4.html'
,
context
)
return
render
(
request
,
'apps/preprocessing4.html'
,
context
)
def
indexing
(
request
):
def
indexing
(
request
):
import
string
import
re
from
sklearn.feature_extraction.text
import
CountVectorizer
import
xml.dom.minidom
as
minidom
dcmnt_xml
=
minidom
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
dcmnt_xml
=
minidom
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
N_DOC
,
all_sentence_doc_sample
=
main
.
load_data
(
dcmnt_xml
)
all_doc_no
=
dcmnt_xml
.
getElementsByTagName
(
'DOCNO'
)
all_profile
=
dcmnt_xml
.
getElementsByTagName
(
'SONG'
)
all_date
=
dcmnt_xml
.
getElementsByTagName
(
'ARTIST'
)
all_text
=
dcmnt_xml
.
getElementsByTagName
(
'LYRICS'
)
all_pub
=
dcmnt_xml
.
getElementsByTagName
(
'PUB'
)
all_page
=
dcmnt_xml
.
getElementsByTagName
(
'PAGE'
)
N_DOC
=
len
(
all_doc_no
)
all_sentence_doc_sample
=
[]
for
i
in
range
(
N_DOC
):
sentence_doc_sample
=
' '
+
all_text
[
i
]
.
firstChild
.
data
all_sentence_doc_sample
.
append
(
sentence_doc_sample
)
tokens_doc
=
[]
tokens_doc
=
[]
...
@@ -249,65 +134,40 @@ def indexing(request):
...
@@ -249,65 +134,40 @@ def indexing(request):
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
all_tokens
=
[]
res
=
main
.
indexing
(
N_DOC
,
tokens_doc
,
all_doc_no
)
for
i
in
range
(
N_DOC
):
for
w
in
tokens_doc
[
i
]:
all_tokens
.
append
(
w
)
new_sentence
=
' '
.
join
([
w
for
w
in
all_tokens
])
for
w
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentence
):
all_tokens
.
append
(
w
)
all_tokens
=
set
(
all_tokens
)
proximity_index
=
{}
for
token
in
all_tokens
:
dict_doc_position
=
{}
for
n
in
range
(
N_DOC
):
if
(
token
in
tokens_doc
[
n
]):
dict_doc_position
[
all_doc_no
[
n
]
.
firstChild
.
data
]
=
[
i
+
1
for
i
,
j
in
zip
(
count
(),
tokens_doc
[
n
])
if
j
==
token
]
proximity_index
[
token
]
=
dict_doc_position
proximity_index
=
collections
.
OrderedDict
(
sorted
(
proximity_index
.
items
()))
import
json
indexnya
=
json
.
loads
(
json
.
dumps
(
proximity_index
))
words
=
indexnya
.
keys
()
freq
=
indexnya
.
values
()
context
=
{
"words"
:
words
,
"freq"
:
freq
}
context
=
{
"res"
:
res
,
}
return
render
(
request
,
'apps/indexing.html'
,
context
)
return
render
(
request
,
'apps/indexing.html'
,
context
)
def
index
(
request
):
def
index
(
request
):
return
render
(
request
,
'apps/index.html'
)
return
render
(
request
,
'apps/index.html'
)
def
lyric
(
request
,
id
):
text
,
judul
=
main
.
detail
(
id
)
content
=
{
'no'
:
id
,
'judul'
:
judul
,
'text'
:
text
}
return
render
(
request
,
'apps/lyric.html'
,
content
)
def
result
(
request
):
def
result
(
request
):
#%%
dcmnt_xml
=
minidom
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
# proximity_index = collections.OrderedDict(sorted(proximity_index.items()))
# for key, value in proximity_index.items():
# # print (key, value)
if
request
.
method
==
'POST'
:
if
request
.
method
==
'POST'
:
query
=
request
.
POST
[
'querysearch'
]
query
=
request
.
POST
[
'querysearch'
]
hasil
=
main
.
main
(
query
)
res
=
main
.
searching
(
dcmnt_xml
,
query
)
content
=
{
content
=
{
'
hasil'
:
hasil
,
'
res'
:
res
,
'query'
:
query
'query'
:
query
}
}
return
render
(
request
,
'apps/result.html'
,
content
)
return
render
(
request
,
'apps/result.html'
,
content
)
def
lyric
(
request
,
id
):
lyrics
,
judul
=
main
.
detail
(
id
)
content
=
{
'no'
:
id
,
'judul'
:
judul
,
'lyrics'
:
lyrics
,
}
return
render
(
request
,
'apps/lyric.html'
,
content
)
\ No newline at end of file
SearchEngine/SearchEngine/__pycache__/urls.cpython-37.pyc
View file @
8f565798
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment