Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
SearchEngine
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Rosa Delima Mendrofa
SearchEngine
Commits
2d25e3c9
Commit
2d25e3c9
authored
May 08, 2020
by
Yolanda Nainggolan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add indexing
parent
e0d68fdf
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
90 additions
and
46 deletions
+90
-46
views.cpython-37.pyc
...e/InvertedIndexSimulator/__pycache__/views.cpython-37.pyc
+0
-0
main.cpython-37.pyc
...edIndexSimulator/inverted/__pycache__/main.cpython-37.pyc
+0
-0
main.py
SearchEngine/InvertedIndexSimulator/inverted/main.py
+0
-0
indexing.html
...ngine/InvertedIndexSimulator/templates/apps/indexing.html
+47
-5
views.py
SearchEngine/InvertedIndexSimulator/views.py
+43
-41
No files found.
SearchEngine/InvertedIndexSimulator/__pycache__/views.cpython-37.pyc
View file @
2d25e3c9
No preview for this file type
SearchEngine/InvertedIndexSimulator/inverted/__pycache__/main.cpython-37.pyc
View file @
2d25e3c9
No preview for this file type
SearchEngine/InvertedIndexSimulator/inverted/main.py
View file @
2d25e3c9
SearchEngine/InvertedIndexSimulator/templates/apps/indexing.html
View file @
2d25e3c9
...
@@ -5,6 +5,23 @@
...
@@ -5,6 +5,23 @@
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<title>
Song Lyric Search Engine
</title>
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
<style>
#leftbox
{
text-align
:
center
;
float
:
left
;
white-space
:
nowrap
;
}
#middlebox
{
float
:
left
;
text-align
:
center
;
white-space
:
nowrap
;
}
#middleboxb
{
float
:
left
;
text-align
:
left
;
white-space
:
nowrap
;
}
</style>
</head>
</head>
<body>
<body>
...
@@ -19,25 +36,50 @@
...
@@ -19,25 +36,50 @@
<button
onclick=
"pageRedirect_next()"
class=
"button"
style=
"vertical-align:middle"
><span>
Next
</span></button>
<button
onclick=
"pageRedirect_next()"
class=
"button"
style=
"vertical-align:middle"
><span>
Next
</span></button>
</div>
</div>
</div>
</div>
<center><h1>
Indexing
</h1><br></center>
<p><strong>
Dengan Proximity Index
</strong></p><br></center>
<center><h1>
Proximity Index
</h1><br></center>
<table
style=
"width:100%"
>
<article
class=
"carda"
style=
"overflow-x:scroll; overflow-y:scroll;"
>
<div
id =
"leftbox"
>
<table>
<tr>
<tr>
<th>
Apa judulnya ya?
</th>
<th>
Token
</th>
</tr>
</tr>
{% for i in indexnya
%}
{% for i in words
%}
<tr>
<tr>
<td>
{{ i }}
</td>
<td>
{{ i }}
</td>
</tr>
</tr>
{% endfor %}
{% endfor %}
</table>
</table>
</div>
<div
id =
"middleboxb"
>
<table
align=
"left"
>
<tr>
<th>
Index
</th>
</tr>
{% for i in freq %}
<tr>
<td>
{{ i }}
</td>
</tr>
{% endfor %}
</table>
</div>
</article>
</article>
</article>
</div>
</div>
</main>
</main>
<!-- <footer>
<p>© STBI-2020-03</p>
</footer> -->
</body>
</body>
...
...
SearchEngine/InvertedIndexSimulator/views.py
View file @
2d25e3c9
...
@@ -3,6 +3,19 @@ from django.http import HttpResponse
...
@@ -3,6 +3,19 @@ from django.http import HttpResponse
from
InvertedIndexSimulator.inverted
import
main
from
InvertedIndexSimulator.inverted
import
main
import
pandas
as
pd
import
pandas
as
pd
import
xml.etree.ElementTree
as
et
import
xml.etree.ElementTree
as
et
import
string
import
re
from
sklearn.feature_extraction.text
import
CountVectorizer
import
xml.dom.minidom
as
minidom
import
collections
from
itertools
import
count
try
:
from
future_builtins
import
zip
except
ImportError
:
# not 2.6+ or is 3.x
try
:
from
itertools
import
izip
as
zip
# < 2.5 or 3.x
except
ImportError
:
pass
def
home
(
request
):
def
home
(
request
):
return
render
(
request
,
'apps/home.html'
)
return
render
(
request
,
'apps/home.html'
)
...
@@ -201,37 +214,31 @@ def preprocessing4(request):
...
@@ -201,37 +214,31 @@ def preprocessing4(request):
def
indexing
(
request
):
def
indexing
(
request
):
from
sklearn.feature_extraction.text
import
CountVectorizer
from
xml.etree.ElementTree
import
ElementTree
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
=
[]
import
string
all_song
=
[]
import
re
all_text
=
[]
from
sklearn.feature_extraction.text
import
CountVectorizer
import
xml.dom.minidom
as
minidom
for
node
in
tree
.
iter
(
"DOCNO"
):
dcmnt_xml
=
minidom
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_doc_no
=
dcmnt_xml
.
getElementsByTagName
(
'DOCNO'
)
all_text
.
append
(
node
.
text
)
all_profile
=
dcmnt_xml
.
getElementsByTagName
(
'SONG'
)
all_date
=
dcmnt_xml
.
getElementsByTagName
(
'ARTIST'
)
all_text
=
dcmnt_xml
.
getElementsByTagName
(
'LYRICS'
)
all_pub
=
dcmnt_xml
.
getElementsByTagName
(
'PUB'
)
all_page
=
dcmnt_xml
.
getElementsByTagName
(
'PAGE'
)
N_DOC
=
len
(
all_
text
)
N_DOC
=
len
(
all_
doc_no
)
all_sentence_doc
=
[]
all_sentence_doc
_sample
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
sentence_doc_sample
=
' '
+
all_text
[
i
]
.
firstChild
.
data
all_sentence_doc_sample
.
append
(
sentence_doc_sample
)
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc_sample
[
i
]))
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
main
.
to_lower
(
tokens_doc
[
i
])
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
main
.
stop_word_token
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
main
.
stop_word_token
(
tokens_doc
[
i
])
...
@@ -242,27 +249,18 @@ def indexing(request):
...
@@ -242,27 +249,18 @@ def indexing(request):
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
all_tokens
=
[]
all_tokens
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
for
j
in
tokens_doc
[
i
]:
for
w
in
tokens_doc
[
i
]:
all_tokens
.
append
(
j
)
all_tokens
.
append
(
w
)
new_sentence
s
=
' '
.
join
([
w
for
w
in
all_tokens
])
new_sentence
=
' '
.
join
([
w
for
w
in
all_tokens
])
for
j
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentences
):
for
w
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentence
):
all_tokens
.
append
(
j
)
all_tokens
.
append
(
w
)
all_tokens
=
set
(
all_tokens
)
all_tokens
=
set
(
all_tokens
)
from
itertools
import
count
try
:
from
future_builtins
import
zip
except
ImportError
:
# not 2.6+ or is 3.x
try
:
from
itertools
import
izip
as
zip
# < 2.5 or 3.x
except
ImportError
:
pass
proximity_index
=
{}
proximity_index
=
{}
for
token
in
all_tokens
:
for
token
in
all_tokens
:
dict_doc_position
=
{}
dict_doc_position
=
{}
...
@@ -271,12 +269,16 @@ def indexing(request):
...
@@ -271,12 +269,16 @@ def indexing(request):
dict_doc_position
[
all_doc_no
[
n
]
.
firstChild
.
data
]
=
[
i
+
1
for
i
,
j
in
zip
(
count
(),
tokens_doc
[
n
])
if
j
==
token
]
dict_doc_position
[
all_doc_no
[
n
]
.
firstChild
.
data
]
=
[
i
+
1
for
i
,
j
in
zip
(
count
(),
tokens_doc
[
n
])
if
j
==
token
]
proximity_index
[
token
]
=
dict_doc_position
proximity_index
[
token
]
=
dict_doc_position
import
collections
proximity_index
=
collections
.
OrderedDict
(
sorted
(
proximity_index
.
items
()))
proximity_index
=
collections
.
OrderedDict
(
sorted
(
proximity_index
.
items
()))
for
key
,
value
in
proximity_index
.
items
():
indexnya
=
(
key
,
value
)
context
=
{
"indexnya"
:
indexnya
}
import
json
indexnya
=
json
.
loads
(
json
.
dumps
(
proximity_index
))
words
=
indexnya
.
keys
()
freq
=
indexnya
.
values
()
context
=
{
"words"
:
words
,
"freq"
:
freq
}
return
render
(
request
,
'apps/indexing.html'
,
context
)
return
render
(
request
,
'apps/indexing.html'
,
context
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment