Commit b3b033bf by Yolanda Nainggolan

add phrase search

parent 8f565798
...@@ -187,7 +187,7 @@ def stemming(tokens): ...@@ -187,7 +187,7 @@ def stemming(tokens):
return tokens return tokens
def searching(dcmnt_xml, query): def proximity(dcmnt_xml, query):
all_doc_no = dcmnt_xml.getElementsByTagName('DOCNO') all_doc_no = dcmnt_xml.getElementsByTagName('DOCNO')
all_song = dcmnt_xml.getElementsByTagName('SONG') all_song = dcmnt_xml.getElementsByTagName('SONG')
...@@ -322,3 +322,110 @@ def detail(id): ...@@ -322,3 +322,110 @@ def detail(id):
return lyrics ,judul return lyrics ,judul
def phrase(dcmnt_xml, query):
all_doc_no = dcmnt_xml.getElementsByTagName('DOCNO')
all_song = dcmnt_xml.getElementsByTagName('SONG')
all_lyrics = dcmnt_xml.getElementsByTagName('LYRICS')
N_DOC = len(all_doc_no)
all_sentence_doc = []
for i in range(N_DOC):
sentence_doc = all_song[i].firstChild.data +' '+ all_lyrics[i].firstChild.data
all_sentence_doc.append(sentence_doc)
tokens_doc = []
for i in range(N_DOC):
tokens_doc.append(remove_punc_tokenize(all_sentence_doc[i]))
for i in range(N_DOC):
tokens_doc[i] = to_lower(tokens_doc[i])
for i in range(N_DOC):
tokens_doc[i] = stop_word_token(tokens_doc[i])
for i in range(N_DOC):
tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
for i in range(N_DOC):
tokens_doc[i] = stemming(tokens_doc[i])
all_tokens =[]
for i in range(N_DOC):
for j in tokens_doc[i]:
all_tokens.append(j)
new_sentences = ' '.join([w for w in all_tokens])
for j in CountVectorizer().build_tokenizer()(new_sentences):
all_tokens.append(j)
all_tokens = set(all_tokens)
##Phrase Search##
bi_gram_tokens = []
bi_gram_sentence_doc = []
for n in range(N_DOC):
token = []
for i in range(len(tokens_doc[n])):
if not(i == len(tokens_doc[n])-1):
token.append(tokens_doc[n][i]+'_'+tokens_doc[n][i+1])
bi_gram_tokens.append(tokens_doc[n][i]+'_'+tokens_doc[n][i+1])
bi_gram_sentence_doc.append(' '.join(token))
bi_gram_index = {}
for bigram_token in bi_gram_tokens:
doc_no = []
for i in range(N_DOC):
if(bigram_token in bi_gram_sentence_doc[i]):
doc_no.append(all_doc_no[i].firstChild.data)
bi_gram_index[bigram_token] = doc_no
lst_doc = bi_gram_index[query]
for i in range(len(lst_doc)):
lst_doc[i] = int(lst_doc[i])
for i in range(len(lst_doc)):
lst_doc[i] = int(lst_doc[i])
xtree = et.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
xroot = xtree.getroot()
df_cols = ["SONG"]
rows = []
for node in xroot:
lirik = node.find("SONG").text if node is not None else None
rows.append({"SONG": lirik})
df = pd.DataFrame(rows, columns = df_cols)
judul = []
for i in lst_doc:
judul.append(df['SONG'][i-1])
hasil = {}
for key in lst_doc:
for value in judul:
hasil[key] = value
judul.remove(value)
break
numb = []
tit = []
for i, j in hasil.items():
numb.append(i)
tit.append(j)
res = {}
for key in numb:
for value in tit:
res[key] = value
tit.remove(value)
break
return res
\ No newline at end of file
@import url('https://fonts.googleapis.com/css?family=Quicksand:400,700&display=swap'); @import url('https://fonts.googleapis.com/css?family=Quicksand:400,700&display=swap');
body {
font-family: sans-serif;
}
h2, h3 { h2, h3 {
color: #00a2c6 color: #00a2c6
} }
...@@ -55,13 +50,6 @@ footer { ...@@ -55,13 +50,6 @@ footer {
border-radius: 15px; border-radius: 15px;
padding: 20px; padding: 20px;
margin-top: 10px; margin-top: 10px;
width: 100%;
}
table{
table-layout: fixed;
border: 1px solid black;
width: 100px;
} }
.jumbotron { .jumbotron {
...@@ -101,7 +89,6 @@ main { ...@@ -101,7 +89,6 @@ main {
} }
#content { #content {
width: 100%;
height: 100%; height: 100%;
} }
......
...@@ -6,6 +6,15 @@ ...@@ -6,6 +6,15 @@
<title>Song Lyric Search Engine</title> <title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/dataframe.min.css" rel="stylesheet"> <link href="../../static/assets/css/dataframe.min.css" rel="stylesheet">
</head> </head>
<style>
table {
border-collapse: collapse;
}
table, td, th {
border: 1px solid black;
}
</style>
<body> <body>
<main> <main>
...@@ -20,21 +29,15 @@ ...@@ -20,21 +29,15 @@
</div> </div>
</div> </div>
<center><h1>Dataset</h1><br></center> <center><p style="font-size:40px;"><strong>Data</strong></p>
<table> <table>
<tr> <tr>
<th>DOCNO</th>
<th>ARTIST</th>
<th>SONG</th>
<th>LYRICS</th> <th>LYRICS</th>
</tr> </tr>
{% for i in DOCNO %} {% for i in LYRICS %}
<tr> <tr>
<td>{{ i }}</td> <td>{{ i }}</td>
<td>{{ j }}</td>
<td>{{ k }}</td>
<td>{{ l }}</td>
</tr> </tr>
{% endfor %} {% endfor %}
</table> </table>
......
...@@ -19,11 +19,10 @@ ...@@ -19,11 +19,10 @@
<main> <main>
<div id="content"> <div id="content">
<article class="card"> <article class="card">
<center><h1>Pilih Dataset</h1><br> <center><h1>Pilih Metode Searching</h1><br>
<table> <table>
<tr> <tr>
<th><button onclick="pageRedirect()" class="button" style="vertical-align:middle"><span>International Billboard Song </span></button></th> <td><button onclick="data()"class="button" style="vertical-align:middle"><span> Mulai </span></button></td>
<td><button class="button" style="vertical-align:middle"><span>Indonesian Song </span></button></td>
</tr> </tr>
</table> </table>
</center> </center>
...@@ -39,9 +38,9 @@ ...@@ -39,9 +38,9 @@
</body> </body>
<script> <script>
function pageRedirect() { function data() {
window.location.href = "/dataframe"; window.location.href = "/dataframe";
} }
</script> </script>
</html> </html>
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
</div> </div>
</div> </div>
<div class="row"> <div class="row">
<center><h1 style="font-size:45px">Searching!<br></h1> <center><h1 style="font-size:45px">Proximity Search<br></h1>
<p style="font-size:20px"><strong>Silahkan masukkan lirik dari lagu yang ingin Anda temukan</strong></p> <p style="font-size:20px"><strong>Silahkan masukkan satu kata dalam lirik dari lagu yang ingin Anda temukan</strong></p>
<form method="POST" action="/result/"> <form method="POST" action="/result/">
{% csrf_token %} {% csrf_token %}
......
...@@ -6,6 +6,15 @@ ...@@ -6,6 +6,15 @@
<title>Song Lyric Search Engine</title> <title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/trying.min.css" rel="stylesheet"> <link href="../../static/assets/css/trying.min.css" rel="stylesheet">
</head> </head>
<style>
table {
border-collapse: collapse;
}
table, td, th {
border: 1px solid black;
}
</style>
<body> <body>
<main> <main>
...@@ -21,7 +30,7 @@ ...@@ -21,7 +30,7 @@
</div> </div>
<center><p style="font-size:40px;"><strong>Indexing</strong></p> <center><p style="font-size:40px;"><strong>Indexing</strong></p>
<table width="100%"; border="1px solid black"> <table>
<tr> <tr>
<th>Token</th> <th>Token</th>
<th>Index</th> <th>Index</th>
...@@ -47,7 +56,7 @@ ...@@ -47,7 +56,7 @@
<script> <script>
function pageRedirect_prev() { function pageRedirect_prev() {
window.location.href = "/preprocessing4"; window.location.href = "/search";
} }
function pageRedirect_next() { function pageRedirect_next() {
......
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/dataframe.min.css" rel="stylesheet">
</head>
<body>
<main>
<div id="content">
<article class="card">
<div>
<div>
<button onclick="pageRedirect_prev()" class="button" style="vertical-align:middle"><span>Previous</span></button>
</div>
</div>
<div class="row">
<center><h1 style="font-size:45px">Phrase Search<br></h1>
<p style="font-size:20px"><strong>Silahkan masukkan dua kata dalam lirik dari lagu yang ingin Anda temukan (hubungkan dengan <i>underscore</i>)</strong></p>
<p>*Misalnya "sugar_pie" </p>
<form method="POST" action="/resultphrase/">
{% csrf_token %}
<div class="form-row">
<input type="text" name="querysearch" placeholder="Masukkan Query Anda..."> <br>
<button type="submit">Cari!</button>
</div>
</form>
</div>
</center>
</article>
</div>
</main>
</body>
<script>
function pageRedirect_prev() {
window.location.href = "/search";
}
</script>
</html>
...@@ -6,6 +6,15 @@ ...@@ -6,6 +6,15 @@
<title>Song Lyric Search Engine</title> <title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/dataframe.min.css" rel="stylesheet"> <link href="../../static/assets/css/dataframe.min.css" rel="stylesheet">
</head> </head>
<style>
table {
border-collapse: collapse;
}
table, td, th {
border: 1px solid black;
}
</style>
<body> <body>
<main> <main>
......
...@@ -6,6 +6,15 @@ ...@@ -6,6 +6,15 @@
<title>Song Lyric Search Engine</title> <title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/dataframe.min.css" rel="stylesheet"> <link href="../../static/assets/css/dataframe.min.css" rel="stylesheet">
</head> </head>
<style>
table {
border-collapse: collapse;
}
table, td, th {
border: 1px solid black;
}
</style>
<body> <body>
<main> <main>
......
...@@ -6,6 +6,15 @@ ...@@ -6,6 +6,15 @@
<title>Song Lyric Search Engine</title> <title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/dataframe.min.css" rel="stylesheet"> <link href="../../static/assets/css/dataframe.min.css" rel="stylesheet">
</head> </head>
<style>
table {
border-collapse: collapse;
}
table, td, th {
border: 1px solid black;
}
</style>
<body> <body>
<main> <main>
......
...@@ -6,6 +6,15 @@ ...@@ -6,6 +6,15 @@
<title>Song Lyric Search Engine</title> <title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/dataframe.min.css" rel="stylesheet"> <link href="../../static/assets/css/dataframe.min.css" rel="stylesheet">
</head> </head>
<style>
table {
border-collapse: collapse;
}
table, td, th {
border: 1px solid black;
}
</style>
<body> <body>
<main> <main>
...@@ -48,7 +57,7 @@ ...@@ -48,7 +57,7 @@
} }
function pageRedirect_next() { function pageRedirect_next() {
window.location.href = "/indexing"; window.location.href = "/search";
} }
</script> </script>
......
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta name="description" content="">
<meta name="author" content="">
<title>Inverted Index</title>
<!-- Bootstrap core CSS -->
<link href="../../static/assets/vendor/bootstrap/css/bootstrap.min.css" rel="stylesheet">
<!-- Custom fonts for this template -->
<link href="../../static/assets/vendor/fontawesome-free/css/all.min.css" rel="stylesheet">
<link href="../../static/assets/vendor/simple-line-icons/css/simple-line-icons.css" rel="stylesheet" type="text/css">
<link href="https://fonts.googleapis.com/css?family=Lato:300,400,700,300italic,400italic,700italic" rel="stylesheet" type="text/css">
<!-- Custom styles for this template -->
<link href="../../static/assets/css/landing-page.min.css" rel="stylesheet">
</head>
<body>
<nav class="navbar navbar-light bg-light static-top">
<div class="container">
<a class="navbar-brand" href="/">Search Simulator</a>
</div>
</nav>
<section class="testimonials text-center bg-light">
<div class="container">
<h2 class="mb-5">Lagu yang sesuai dengan query "{{ query }}"</h2>
<div class="row">
{% for key, values in res.items %}
<div class="col-lg-4">
<div class="testimonial-item mx-auto mb-5 mb-lg-0">
<h5><a href="/lyric/{{ key }}">Lagu No: {{ key }}</a></h5>
<h5>"{{ values }}"</h5>
</div>
</div>
{% endfor %}
</div>
</div>
</section>
<!-- Bootstrap core JavaScript -->
<script src="../../static/assets/vendor/jquery/jquery.min.js"></script>
<script src="../../static/assets/vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
</body>
</html>
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/trying.min.css" rel="stylesheet">
</head>
<body>
<main>
<div id="content">
<article class="card">
<div>
<div>
<button onclick="pageRedirect_prev()" class="button" style="vertical-align:middle"><span>Previous</span></button>
</div>
</div>
<div class="row">
<center><h1 style="font-size:45px">Searching!<br></h1>
<p style="font-size:20px"><strong>Silahkan pilih metode searching yang anda inginkan</strong></p>
<table>
<tr>
<th><button onclick="proximity()" class="button" style="vertical-align:middle"><span> Proximity Search </span></button></th>
<td><button onclick="phrase()"class="button" style="vertical-align:middle"><span>Phrase Search </span></button></td>
</tr>
</table>
</div>
</center>
</article>
</div>
</main>
</body>
<script>
function pageRedirect_prev() {
window.location.href = "/preproseccing4";
}
function proximity() {
window.location.href = "/indexing";
}
function phrase() {
window.location.href = "/phrases";
}
</script>
</html>
...@@ -14,7 +14,10 @@ urlpatterns = [ ...@@ -14,7 +14,10 @@ urlpatterns = [
path('preprocessing3/', views.preprocessing3), path('preprocessing3/', views.preprocessing3),
path('preprocessing4/', views.preprocessing4), path('preprocessing4/', views.preprocessing4),
path('indexing/', views.indexing), path('indexing/', views.indexing),
path('search/', views.search),
path('index/', views.index), path('index/', views.index),
path('phrase/', views.phrase),
path('result/', views.result), path('result/', views.result),
path('resultphrases/', views.resultphrases),
path('lyric/<int:id>', views.lyric, name='lyric'), path('lyric/<int:id>', views.lyric, name='lyric'),
] ]
\ No newline at end of file
...@@ -19,7 +19,6 @@ except ImportError: # not 2.6+ or is 3.x ...@@ -19,7 +19,6 @@ except ImportError: # not 2.6+ or is 3.x
except ImportError: except ImportError:
pass pass
def home(request): def home(request):
return render(request, 'apps/home.html') return render(request, 'apps/home.html')
...@@ -146,12 +145,18 @@ def indexing(request): ...@@ -146,12 +145,18 @@ def indexing(request):
def index(request): def index(request):
return render(request, 'apps/index.html') return render(request, 'apps/index.html')
def phrases(request):
return render(request, 'apps/phrases.html')
def search(request):
return render(request, 'apps/search.html')
def result(request): def result(request):
dcmnt_xml = minidom.parse("InvertedIndexSimulator/data/dataset_STBI.xml") dcmnt_xml = minidom.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
if request.method == 'POST': if request.method == 'POST':
query = request.POST['querysearch'] query = request.POST['querysearch']
res = main.searching(dcmnt_xml, query) res = main.proximity(dcmnt_xml, query)
content = { content = {
'res':res, 'res':res,
...@@ -160,6 +165,20 @@ def result(request): ...@@ -160,6 +165,20 @@ def result(request):
return render(request, 'apps/result.html', content) return render(request, 'apps/result.html', content)
def resultphrase(request):
dcmnt_xml = minidom.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
if request.method == 'POST':
query = request.POST['querysearch']
res = main.phrase(dcmnt_xml, query)
content = {
'res':res,
'query':query
}
return render(request, 'apps/resultphrase.html', content)
def lyric(request,id): def lyric(request,id):
lyrics, judul = main.detail(id) lyrics, judul = main.detail(id)
......
...@@ -26,8 +26,11 @@ urlpatterns = [ ...@@ -26,8 +26,11 @@ urlpatterns = [
path('preprocessing3/', views.preprocessing3), path('preprocessing3/', views.preprocessing3),
path('preprocessing4/', views.preprocessing4), path('preprocessing4/', views.preprocessing4),
path('indexing/', views.indexing), path('indexing/', views.indexing),
path('search/', views.search),
path('index/', views.index), path('index/', views.index),
path('phrases/', views.phrases),
path('result/', views.result), path('result/', views.result),
path('resultphrase/', views.resultphrase),
path('lyric/<int:id>', views.lyric, name='lyric'), path('lyric/<int:id>', views.lyric, name='lyric'),
] ]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment