Commit 05f62a74 by Sartika Aritonang

First commit

parents
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import library\n",
"import urllib\n",
"import requests\n",
"import bs4\n",
"import pandas as pd\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# request a link\n",
"def request_url(link):\n",
" response = requests.get(link)\n",
" html = response.text\n",
" return html"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to parse html\n",
"def parse_html(to_parse):\n",
" soup = bs4.BeautifulSoup(to_parse, 'html.parser')\n",
" return soup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#take sub menu news form inipasti.com\n",
"def all_section(main_url):\n",
" section_list = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" for a in soup.find_all('a', href=True): \n",
" if a.text:\n",
" section_list.append(a['href'])\n",
" return section_list"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def find_corona(main_url):\n",
" url_list = []\n",
" not_news=[]\n",
" for i in main_url:\n",
" if i.find('corona')!=-1 or i.find('covid')!=-1 or i.find('pandemi')!=-1 or i.find('psbb')!=-1 or i.find('social-distancing')!=-1:\n",
" url_list.append(i)\n",
" url_list = list(dict.fromkeys(url_list))\n",
" return url_list"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def writer(main_url):\n",
" author = []\n",
" for i in main_url:\n",
" a = parse_html(request_url(i))\n",
" try:\n",
" b = a.find(\"div\", class_ = \"td-author-name vcard author\") \n",
" if b:\n",
" c = b.find(\"a\")\n",
" author.append(c.text)\n",
" else:\n",
" author.append('None')\n",
" except :\n",
" author.append('None')\n",
" \n",
" return author"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def title(main_url):\n",
" titles = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" try :\n",
" h1 = soup.select('h1', {'class' : 'entry-title td-module-title'})[0].text.strip()\n",
" titles.append(h1)\n",
" except :\n",
" titles.append('None')\n",
" return titles"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def date(main_url):\n",
" dates = []\n",
" for i in main_url:\n",
" a = parse_html(request_url(i))\n",
" try:\n",
" b = a.find(\"span\", class_ = \"td-post-date\").text\n",
" c = re.sub(\"Inipasti.com - \", \"\", b)\n",
" dates.append(c)\n",
" except:\n",
" dates.append('None')\n",
" return dates"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def collect_text(main_url, titles = [], author = [], dates = []):\n",
" paragraf = []\n",
" data = []\n",
" join =[]\n",
" isiteks = []\n",
" #collect all text with tag <p> from articles \n",
" for i,j in enumerate(main_url):\n",
" a = parse_html(request_url(j))\n",
" try:\n",
" content = a.find('div', class_='td-post-content')\n",
" pragraf = content.find_all('p')\n",
" for k in pragraf:\n",
" paragraf.append(k.text)\n",
" data.append(paragraf)\n",
" paragraf = []\n",
" except:\n",
" data.append('None')\n",
" for i in data:\n",
" join.append(' '.join(i))\n",
" \n",
" #collect articles and urls in one variable \n",
" for i, j in enumerate(main_url):\n",
" isiteks.append({'news' : 'Inipasti.com', 'link' : j, 'title' : titles[i], 'author' : author[i], 'date_time' : dates[i], 'paragraf' : join[i] })\n",
" return isiteks"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def get_news(main_url, file_name = ''):\n",
" section = all_section(main_url)\n",
" corona_news = find_corona(section)\n",
" titles = title(corona_news)\n",
" author = writer(corona_news)\n",
" datetime = date(corona_news)\n",
" text = collect_text(corona_news, titles, author, datetime)\n",
" file = save_file(text, file_name)\n",
" \n",
" return file "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def save_file(file, file_name = ''):\n",
" new_file = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
" new_file.to_csv(file_name + '.csv', index=True, encoding='utf-8', sep = ',')\n",
" \n",
" return new_file"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"main_url=[\"https://inipasti.com/\"]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>news</th>\n",
" <th>link</th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>date_time</th>\n",
" <th>paragraf</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/category/kesehatan/corona...</td>\n",
" <td>Coronavirus</td>\n",
" <td>None</td>\n",
" <td>Mei 1, 2020 2:10 pm</td>\n",
" <td>N o n e</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/dalam-bayang-bayang-coron...</td>\n",
" <td>Dalam Bayang-bayang Coronavirus, Umat Islam di...</td>\n",
" <td>Inipasti</td>\n",
" <td>April 21, 2020 10:37 am</td>\n",
" <td>INIPASTI.COM, ALGIERS / CAIRO / JAKARTA – Bebe...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/trump-menjanjikan-vaksin-...</td>\n",
" <td>Trump Menjanjikan Vaksin Covid-19 pada Akhir T...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 4, 2020 10:52 am</td>\n",
" <td>INIPASTI.COM, Presiden Donald Trump meluncurka...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/partai-gelora-bagi-bantua...</td>\n",
" <td>Partai Gelora Bagi Bantuan Sembako ke Warga Te...</td>\n",
" <td>Muhammad Seilessy</td>\n",
" <td>Mei 3, 2020 5:48 pm</td>\n",
" <td>INIPASTI.COM, MAKASSAR, – DPD Partai Gelora In...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/para-pengajarnya-selebrit...</td>\n",
" <td>Para Pengajarnya Selebritis, Startup Kelas Onl...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 5, 2020 9:08 am</td>\n",
" <td>INIPASTI.COM, MasterClass, sebuah startup yang...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/tanggap-pandemi-satgas-co...</td>\n",
" <td>Tanggap Pandemi, Satgas Covid-19 Unhas Terima ...</td>\n",
" <td>Inipasti</td>\n",
" <td>April 23, 2020 5:50 pm</td>\n",
" <td>INIPASTI.COM, MAKASSAR – Satuan Tugas Pencegah...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/pasien-eks-covid-19-dikun...</td>\n",
" <td>Pasien Eks Covid-19 dikunjungi Tim Gugus Tugas...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 5, 2020 2:19 pm</td>\n",
" <td>INIPASTI.COM, PASANGKAYU– Satu pasien Covid-19...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/terkait-rencana-psbb-di-s...</td>\n",
" <td>Terkait Rencana PSBB di Sulbar,Ini Masukan Pem...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 4, 2020 4:54 am</td>\n",
" <td>INIPASTI.COM, PASANGKAYU – Pemprov Sulbar ikut...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/psbb-makassar-diperpanjan...</td>\n",
" <td>PSBB Makassar Diperpanjang 14 Hari, Petugas Di...</td>\n",
" <td>Muhammad Seilessy</td>\n",
" <td>Mei 6, 2020 5:08 pm</td>\n",
" <td>INIPASTI.COM, MAKASSAR, — Pembatasan Sosial Be...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/ajiep-surati-pimpinan-dpd...</td>\n",
" <td>Ajiep Surati Pimpinan DPD Terkait Postur APBN ...</td>\n",
" <td>Muhammad Seilessy</td>\n",
" <td>April 10, 2020 2:03 pm</td>\n",
" <td>INIPASTI.COM, MAKASSAR, — Anggota Dewan Perwak...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/dampak-corona-penumpang-p...</td>\n",
" <td>Dampak Corona, Penumpang Pesawat ke Luar Neger...</td>\n",
" <td>Iin Nurfahraeni</td>\n",
" <td>April 2, 2020 2:16 pm</td>\n",
" <td>INIPASTI.COM, JAKARTA – Badan Pusat Statistik ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/cegah-penyebaran-covid-19...</td>\n",
" <td>Cegah Penyebaran Covid-19, WNA Mulai Dilarang ...</td>\n",
" <td>Iin Nurfahraeni</td>\n",
" <td>April 2, 2020 2:15 pm</td>\n",
" <td>INIPASTI.COM, JAKARTA – Penyebaran virus Coron...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/as-dan-inggris-ingatkan-a...</td>\n",
" <td>AS dan Inggris Ingatkan, Adanya Peretas Dukung...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 6, 2020 9:18 am</td>\n",
" <td>INIPASTI.COM, LONDON / WASHINGTON – Peretas ya...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/jelang-lebaran-di-tengah-...</td>\n",
" <td>Jelang Lebaran di Tengah Pandemi, Muda Mudi To...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 7, 2020 9:50 am</td>\n",
" <td>INIPASTI.COM, MALILI – Melihat pandemi yang su...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/jelang-lebaran-di-tengah-...</td>\n",
" <td>Jelang Lebaran di Tengah Pandemi, Muda Mudi To...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 7, 2020 9:50 am</td>\n",
" <td>INIPASTI.COM, MALILI – Melihat pandemi yang su...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/iqbal-suhaeb-psbb-tahap-k...</td>\n",
" <td>Iqbal Suhaeb: PSBB Tahap Kedua untuk Bangun So...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 6, 2020 5:38 pm</td>\n",
" <td>INIPASTI.COM, MAKASSAR – Pj Wali Kota Makassar...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/iqbal-suhaeb-psbb-tahap-k...</td>\n",
" <td>Iqbal Suhaeb: PSBB Tahap Kedua untuk Bangun So...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 6, 2020 5:38 pm</td>\n",
" <td>INIPASTI.COM, MAKASSAR – Pj Wali Kota Makassar...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/perangi-berita-palsu-fron...</td>\n",
" <td>Perangi Berita Palsu: Front Baru dalam Melawan...</td>\n",
" <td>Inipasti</td>\n",
" <td>April 14, 2020 8:07 am</td>\n",
" <td>INIPASTI.COM, ANALISIS – Jaman yang disesaki d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/fakta-baru-virus-corona-y...</td>\n",
" <td>Fakta Baru Virus Corona yang Mengejutkan</td>\n",
" <td>Syakhruddin DN</td>\n",
" <td>Februari 8, 2020 11:50 am</td>\n",
" <td>INIPASTI.COM, JAKARTA –  Satu pasien, dirawat ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/opini-covid-19-mengubah-l...</td>\n",
" <td>[Opini] Covid-19 Mengubah Lanskap Persekolahan...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 5, 2020 9:28 am</td>\n",
" <td>Dr. Naidah Naing, ST., MSi., IAI INIPASTI.COM,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/kenapa-ribut-dengan-satga...</td>\n",
" <td>Kenapa Ribut dengan Satgas COVID dan Ramuan HE...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 4, 2020 11:40 am</td>\n",
" <td>Oleh: Haris Rusly Moti INIPASTI.COM, OPINI – S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/coronavirus-disease-centr...</td>\n",
" <td>“Coronavirus Disease Centre” Pemprov Sulsel</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 2, 2020 8:02 am</td>\n",
" <td>Penulis : Muhammad Zaiyani INIPASTI.COM, OPINI...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/ajak-masyarakat-cegah-cor...</td>\n",
" <td>Ajak Masyarakat Cegah Corona, Appi Tetap Optim...</td>\n",
" <td>Muhammad Seilessy</td>\n",
" <td>April 30, 2020 5:30 pm</td>\n",
" <td>INIPASTI.COM, MAKASSAR — Bakal calon Wali Kota...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/bagaimana-coronavirus-dap...</td>\n",
" <td>Bagaimana Coronavirus Dapat Mengubah Tatanan I...</td>\n",
" <td>Inipasti</td>\n",
" <td>Maret 21, 2020 10:31 pm</td>\n",
" <td>INIPASTI.COM – Wabah coronavirus COVID-19 tela...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/gratis-di-tengah-pandemi-...</td>\n",
" <td>Gratis di Tengah Pandemi, Anda Sekarang Dapat ...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 2, 2020 7:44 am</td>\n",
" <td>INIPASTI.COM, TIPS – Google memotong harga apl...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/para-pengajarnya-selebrit...</td>\n",
" <td>Para Pengajarnya Selebritis, Startup Kelas Onl...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 5, 2020 9:08 am</td>\n",
" <td>INIPASTI.COM, MasterClass, sebuah startup yang...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/pasien-eks-covid-19-dikun...</td>\n",
" <td>Pasien Eks Covid-19 dikunjungi Tim Gugus Tugas...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 5, 2020 2:19 pm</td>\n",
" <td>INIPASTI.COM, PASANGKAYU– Satu pasien Covid-19...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/terkait-rencana-psbb-di-s...</td>\n",
" <td>Terkait Rencana PSBB di Sulbar,Ini Masukan Pem...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 4, 2020 4:54 am</td>\n",
" <td>INIPASTI.COM, PASANGKAYU – Pemprov Sulbar ikut...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/kunjungi-posko-covid-19-w...</td>\n",
" <td>Kunjungi Posko Covid 19, Wagub Sulsel Beri Apr...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 6, 2020 12:13 pm</td>\n",
" <td>INIPASTI.COM, MAKASSAR, Wakil Gubernur Sulawes...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/kunjungi-posko-covid-19-w...</td>\n",
" <td>Kunjungi Posko Covid 19, Wagub Sulsel Beri Apr...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 6, 2020 12:13 pm</td>\n",
" <td>INIPASTI.COM, MAKASSAR, Wakil Gubernur Sulawes...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/menyakitkan-al-aqsa-ditut...</td>\n",
" <td>‘Menyakitkan’: Al-Aqsa Ditutup Selama Ramadan ...</td>\n",
" <td>Inipasti</td>\n",
" <td>April 17, 2020 2:23 pm</td>\n",
" <td>INIPASTI.COM – Dewan Wakaf Islam Yerusalem mem...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/cegah-meluasnya-wabah-cov...</td>\n",
" <td>Cegah Meluasnya Wabah Covid-19, Mudik Lebaran ...</td>\n",
" <td>Inipasti</td>\n",
" <td>Maret 25, 2020 9:19 am</td>\n",
" <td>INIPASTI.COM, Jakarta –  Mudik lebaran 2020 ak...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/pbb-dunia-harus-mencontoh...</td>\n",
" <td>PBB: Dunia harus Mencontoh Korea Selatan dalam...</td>\n",
" <td>Inipasti</td>\n",
" <td>Mei 1, 2020 2:10 pm</td>\n",
" <td>INIPASTI.COM, Ketua PBB mengatakan bahwasanya ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/hasil-uji-coba-remdesivir...</td>\n",
" <td>Ini Obat Covid-19 Paling Berhasil yang Diuji C...</td>\n",
" <td>Inipasti</td>\n",
" <td>April 30, 2020 2:11 pm</td>\n",
" <td>INIPASTI.COM, Harapan akan terapi obat yang ef...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>Inipasti.com</td>\n",
" <td>https://inipasti.com/5-hal-berpenghasilan-yang...</td>\n",
" <td>5 Hal Berpenghasilan yang Dapat Anda Pelajari ...</td>\n",
" <td>Inipasti</td>\n",
" <td>April 21, 2020 7:57 am</td>\n",
" <td>INIPASTI.COM, TIPS – Perintah karantina atau k...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" news link \\\n",
"0 Inipasti.com https://inipasti.com/category/kesehatan/corona... \n",
"1 Inipasti.com https://inipasti.com/dalam-bayang-bayang-coron... \n",
"2 Inipasti.com https://inipasti.com/trump-menjanjikan-vaksin-... \n",
"3 Inipasti.com https://inipasti.com/partai-gelora-bagi-bantua... \n",
"4 Inipasti.com https://inipasti.com/para-pengajarnya-selebrit... \n",
"5 Inipasti.com https://inipasti.com/tanggap-pandemi-satgas-co... \n",
"6 Inipasti.com https://inipasti.com/pasien-eks-covid-19-dikun... \n",
"7 Inipasti.com https://inipasti.com/terkait-rencana-psbb-di-s... \n",
"8 Inipasti.com https://inipasti.com/psbb-makassar-diperpanjan... \n",
"9 Inipasti.com https://inipasti.com/ajiep-surati-pimpinan-dpd... \n",
"10 Inipasti.com https://inipasti.com/dampak-corona-penumpang-p... \n",
"11 Inipasti.com https://inipasti.com/cegah-penyebaran-covid-19... \n",
"12 Inipasti.com https://inipasti.com/as-dan-inggris-ingatkan-a... \n",
"13 Inipasti.com https://inipasti.com/jelang-lebaran-di-tengah-... \n",
"14 Inipasti.com https://inipasti.com/jelang-lebaran-di-tengah-... \n",
"15 Inipasti.com https://inipasti.com/iqbal-suhaeb-psbb-tahap-k... \n",
"16 Inipasti.com https://inipasti.com/iqbal-suhaeb-psbb-tahap-k... \n",
"17 Inipasti.com https://inipasti.com/perangi-berita-palsu-fron... \n",
"18 Inipasti.com https://inipasti.com/fakta-baru-virus-corona-y... \n",
"19 Inipasti.com https://inipasti.com/opini-covid-19-mengubah-l... \n",
"20 Inipasti.com https://inipasti.com/kenapa-ribut-dengan-satga... \n",
"21 Inipasti.com https://inipasti.com/coronavirus-disease-centr... \n",
"22 Inipasti.com https://inipasti.com/ajak-masyarakat-cegah-cor... \n",
"23 Inipasti.com https://inipasti.com/bagaimana-coronavirus-dap... \n",
"24 Inipasti.com https://inipasti.com/gratis-di-tengah-pandemi-... \n",
"25 Inipasti.com https://inipasti.com/para-pengajarnya-selebrit... \n",
"26 Inipasti.com https://inipasti.com/pasien-eks-covid-19-dikun... \n",
"27 Inipasti.com https://inipasti.com/terkait-rencana-psbb-di-s... \n",
"28 Inipasti.com https://inipasti.com/kunjungi-posko-covid-19-w... \n",
"29 Inipasti.com https://inipasti.com/kunjungi-posko-covid-19-w... \n",
"30 Inipasti.com https://inipasti.com/menyakitkan-al-aqsa-ditut... \n",
"31 Inipasti.com https://inipasti.com/cegah-meluasnya-wabah-cov... \n",
"32 Inipasti.com https://inipasti.com/pbb-dunia-harus-mencontoh... \n",
"33 Inipasti.com https://inipasti.com/hasil-uji-coba-remdesivir... \n",
"34 Inipasti.com https://inipasti.com/5-hal-berpenghasilan-yang... \n",
"\n",
" title author \\\n",
"0 Coronavirus None \n",
"1 Dalam Bayang-bayang Coronavirus, Umat Islam di... Inipasti \n",
"2 Trump Menjanjikan Vaksin Covid-19 pada Akhir T... Inipasti \n",
"3 Partai Gelora Bagi Bantuan Sembako ke Warga Te... Muhammad Seilessy \n",
"4 Para Pengajarnya Selebritis, Startup Kelas Onl... Inipasti \n",
"5 Tanggap Pandemi, Satgas Covid-19 Unhas Terima ... Inipasti \n",
"6 Pasien Eks Covid-19 dikunjungi Tim Gugus Tugas... Inipasti \n",
"7 Terkait Rencana PSBB di Sulbar,Ini Masukan Pem... Inipasti \n",
"8 PSBB Makassar Diperpanjang 14 Hari, Petugas Di... Muhammad Seilessy \n",
"9 Ajiep Surati Pimpinan DPD Terkait Postur APBN ... Muhammad Seilessy \n",
"10 Dampak Corona, Penumpang Pesawat ke Luar Neger... Iin Nurfahraeni \n",
"11 Cegah Penyebaran Covid-19, WNA Mulai Dilarang ... Iin Nurfahraeni \n",
"12 AS dan Inggris Ingatkan, Adanya Peretas Dukung... Inipasti \n",
"13 Jelang Lebaran di Tengah Pandemi, Muda Mudi To... Inipasti \n",
"14 Jelang Lebaran di Tengah Pandemi, Muda Mudi To... Inipasti \n",
"15 Iqbal Suhaeb: PSBB Tahap Kedua untuk Bangun So... Inipasti \n",
"16 Iqbal Suhaeb: PSBB Tahap Kedua untuk Bangun So... Inipasti \n",
"17 Perangi Berita Palsu: Front Baru dalam Melawan... Inipasti \n",
"18 Fakta Baru Virus Corona yang Mengejutkan Syakhruddin DN \n",
"19 [Opini] Covid-19 Mengubah Lanskap Persekolahan... Inipasti \n",
"20 Kenapa Ribut dengan Satgas COVID dan Ramuan HE... Inipasti \n",
"21 “Coronavirus Disease Centre” Pemprov Sulsel Inipasti \n",
"22 Ajak Masyarakat Cegah Corona, Appi Tetap Optim... Muhammad Seilessy \n",
"23 Bagaimana Coronavirus Dapat Mengubah Tatanan I... Inipasti \n",
"24 Gratis di Tengah Pandemi, Anda Sekarang Dapat ... Inipasti \n",
"25 Para Pengajarnya Selebritis, Startup Kelas Onl... Inipasti \n",
"26 Pasien Eks Covid-19 dikunjungi Tim Gugus Tugas... Inipasti \n",
"27 Terkait Rencana PSBB di Sulbar,Ini Masukan Pem... Inipasti \n",
"28 Kunjungi Posko Covid 19, Wagub Sulsel Beri Apr... Inipasti \n",
"29 Kunjungi Posko Covid 19, Wagub Sulsel Beri Apr... Inipasti \n",
"30 ‘Menyakitkan’: Al-Aqsa Ditutup Selama Ramadan ... Inipasti \n",
"31 Cegah Meluasnya Wabah Covid-19, Mudik Lebaran ... Inipasti \n",
"32 PBB: Dunia harus Mencontoh Korea Selatan dalam... Inipasti \n",
"33 Ini Obat Covid-19 Paling Berhasil yang Diuji C... Inipasti \n",
"34 5 Hal Berpenghasilan yang Dapat Anda Pelajari ... Inipasti \n",
"\n",
" date_time \\\n",
"0 Mei 1, 2020 2:10 pm \n",
"1 April 21, 2020 10:37 am \n",
"2 Mei 4, 2020 10:52 am \n",
"3 Mei 3, 2020 5:48 pm \n",
"4 Mei 5, 2020 9:08 am \n",
"5 April 23, 2020 5:50 pm \n",
"6 Mei 5, 2020 2:19 pm \n",
"7 Mei 4, 2020 4:54 am \n",
"8 Mei 6, 2020 5:08 pm \n",
"9 April 10, 2020 2:03 pm \n",
"10 April 2, 2020 2:16 pm \n",
"11 April 2, 2020 2:15 pm \n",
"12 Mei 6, 2020 9:18 am \n",
"13 Mei 7, 2020 9:50 am \n",
"14 Mei 7, 2020 9:50 am \n",
"15 Mei 6, 2020 5:38 pm \n",
"16 Mei 6, 2020 5:38 pm \n",
"17 April 14, 2020 8:07 am \n",
"18 Februari 8, 2020 11:50 am \n",
"19 Mei 5, 2020 9:28 am \n",
"20 Mei 4, 2020 11:40 am \n",
"21 Mei 2, 2020 8:02 am \n",
"22 April 30, 2020 5:30 pm \n",
"23 Maret 21, 2020 10:31 pm \n",
"24 Mei 2, 2020 7:44 am \n",
"25 Mei 5, 2020 9:08 am \n",
"26 Mei 5, 2020 2:19 pm \n",
"27 Mei 4, 2020 4:54 am \n",
"28 Mei 6, 2020 12:13 pm \n",
"29 Mei 6, 2020 12:13 pm \n",
"30 April 17, 2020 2:23 pm \n",
"31 Maret 25, 2020 9:19 am \n",
"32 Mei 1, 2020 2:10 pm \n",
"33 April 30, 2020 2:11 pm \n",
"34 April 21, 2020 7:57 am \n",
"\n",
" paragraf \n",
"0 N o n e \n",
"1 INIPASTI.COM, ALGIERS / CAIRO / JAKARTA – Bebe... \n",
"2 INIPASTI.COM, Presiden Donald Trump meluncurka... \n",
"3 INIPASTI.COM, MAKASSAR, – DPD Partai Gelora In... \n",
"4 INIPASTI.COM, MasterClass, sebuah startup yang... \n",
"5 INIPASTI.COM, MAKASSAR – Satuan Tugas Pencegah... \n",
"6 INIPASTI.COM, PASANGKAYU– Satu pasien Covid-19... \n",
"7 INIPASTI.COM, PASANGKAYU – Pemprov Sulbar ikut... \n",
"8 INIPASTI.COM, MAKASSAR, — Pembatasan Sosial Be... \n",
"9 INIPASTI.COM, MAKASSAR, — Anggota Dewan Perwak... \n",
"10 INIPASTI.COM, JAKARTA – Badan Pusat Statistik ... \n",
"11 INIPASTI.COM, JAKARTA – Penyebaran virus Coron... \n",
"12 INIPASTI.COM, LONDON / WASHINGTON – Peretas ya... \n",
"13 INIPASTI.COM, MALILI – Melihat pandemi yang su... \n",
"14 INIPASTI.COM, MALILI – Melihat pandemi yang su... \n",
"15 INIPASTI.COM, MAKASSAR – Pj Wali Kota Makassar... \n",
"16 INIPASTI.COM, MAKASSAR – Pj Wali Kota Makassar... \n",
"17 INIPASTI.COM, ANALISIS – Jaman yang disesaki d... \n",
"18 INIPASTI.COM, JAKARTA –  Satu pasien, dirawat ... \n",
"19 Dr. Naidah Naing, ST., MSi., IAI INIPASTI.COM,... \n",
"20 Oleh: Haris Rusly Moti INIPASTI.COM, OPINI – S... \n",
"21 Penulis : Muhammad Zaiyani INIPASTI.COM, OPINI... \n",
"22 INIPASTI.COM, MAKASSAR — Bakal calon Wali Kota... \n",
"23 INIPASTI.COM – Wabah coronavirus COVID-19 tela... \n",
"24 INIPASTI.COM, TIPS – Google memotong harga apl... \n",
"25 INIPASTI.COM, MasterClass, sebuah startup yang... \n",
"26 INIPASTI.COM, PASANGKAYU– Satu pasien Covid-19... \n",
"27 INIPASTI.COM, PASANGKAYU – Pemprov Sulbar ikut... \n",
"28 INIPASTI.COM, MAKASSAR, Wakil Gubernur Sulawes... \n",
"29 INIPASTI.COM, MAKASSAR, Wakil Gubernur Sulawes... \n",
"30 INIPASTI.COM – Dewan Wakaf Islam Yerusalem mem... \n",
"31 INIPASTI.COM, Jakarta –  Mudik lebaran 2020 ak... \n",
"32 INIPASTI.COM, Ketua PBB mengatakan bahwasanya ... \n",
"33 INIPASTI.COM, Harapan akan terapi obat yang ef... \n",
"34 INIPASTI.COM, TIPS – Perintah karantina atau k... "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# r = get_news(main_url, \"news_2\")\n",
"# r\n",
"\n",
"inipasti_news=get_news(main_url, file_name = 'inipasti_satu')\n",
"inipasti_news"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import library\n",
"import urllib\n",
"import requests\n",
"import bs4\n",
"import pandas as pd\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# request a link\n",
"def request_url(link):\n",
" response = requests.get(link)\n",
" html = response.text\n",
" return html"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to parse html\n",
"def parse_html(to_parse):\n",
" soup = bs4.BeautifulSoup(to_parse, 'html.parser')\n",
" return soup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#take sub menu news form kompas.com\n",
"def all_section(main_url):\n",
" section_list = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" for h3 in soup.find_all(class_=\"article__title article__title--medium\"):\n",
" section_list.append(h3.a.get('href'))\n",
" return section_list"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"#get only link about corona or covid\n",
"def find_corona(main_url):\n",
" url_list = []\n",
" not_news=[]\n",
" for i in main_url:\n",
" if i.find('corona')!=-1 or i.find('covid')!=-1 or i.find('pandemi')!=-1 or i.find('psbb')!=-1:\n",
" url_list.append(i)\n",
" url_list = list(dict.fromkeys(url_list))\n",
" for i in url_list:\n",
" if (i.find('read') == -1):\n",
" not_news.append(i) \n",
" for j in not_news:\n",
" url_list.remove(j)\n",
" return url_list"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#take all article pages from urls\n",
"def all_pages(main_url):\n",
" all_pages_articles = []\n",
" for page in main_url:\n",
" soup = parse_html(request_url(page))\n",
" div_class = soup.find_all(\"div\", class_=\"col-bs12-8 col-offset-0\")\n",
" #print(page, div_class)\n",
" if (div_class == []):\n",
" all_pages_articles.append(page)\n",
" else :\n",
" for i in div_class:\n",
" url_class = i.find_all(\"div\", class_=\"paging__item\")\n",
" for j in url_class:\n",
" a_class = j.find_all(\"a\", class_=\"paging__link\")\n",
" for k in a_class:\n",
" #print(k.get('href'))\n",
" all_pages_articles.append(k.get('href'))\n",
" return all_pages_articles"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def writer(main_url):\n",
" author = []\n",
" for i in main_url:\n",
" a = parse_html(request_url(i)) \n",
" b = a.find(\"div\", class_ = \"read__credit__item\", id = \"penulis\")\n",
" if b:\n",
" c = b.find(\"a\")\n",
" author.append(c.text)\n",
" else:\n",
" d = a.find(\"div\", class_ = \"read__credit__item\")\n",
" author.append(d.text)\n",
" return author"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def title(main_url):\n",
" titles = []\n",
" for i in main_url:\n",
" a = parse_html(request_url(i)) \n",
" b = a.find(\"h1\", class_ = \"read__title\").text\n",
" titles.append(b)\n",
" return titles"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def date(main_url):\n",
" dates = []\n",
" for i in main_url:\n",
" a = parse_html(request_url(i)) \n",
" b = a.find(\"div\", class_ = \"read__time\").text\n",
" c = re.sub(\"Kompas.com - \", \"\", b)\n",
" dates.append(c)\n",
" return dates"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"def collect_text(main_url, titles = [], author = [], dates = []):\n",
" news = []\n",
" paragraf = []\n",
" data = []\n",
" join =[]\n",
" isiteks = []\n",
" penulis = []\n",
" #collect all text with tag <p> from articles \n",
" for i in main_url:\n",
" a = parse_html(request_url(i)) \n",
" for k in range(0, len(a.findAll('p'))):\n",
" newparagraf = a.find_all('p')[k].text\n",
" paragraf.append(newparagraf)\n",
" data.append(paragraf)\n",
" paragraf = []\n",
" for i in data:\n",
" join.append(' '.join(i))\n",
" \n",
" for i in main_url:\n",
" news_title = \"Kompas.com\"\n",
" news.append(news_title)\n",
" \n",
" #collect articles and urls in one variable \n",
" for i, j in enumerate(main_url):\n",
" isiteks.append({'news' : news[i], 'link' : j, 'title' : titles[i], 'author' : author[i], 'date_time' : dates[i], 'paragraf' : join[i] })\n",
" return isiteks"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"def get_news(main_url, file_name = ''):\n",
" section = all_section(main_url)\n",
" corona = find_corona(section)\n",
" all_page = all_pages(corona)\n",
" titles = title(all_page)\n",
" author = writer(all_page)\n",
" dates = date(all_page)\n",
" text = collect_text(all_page, titles, author, dates)\n",
" print(text)\n",
" file = save_file(text, file_name)\n",
" \n",
" return file "
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"def save_file(file, file_name = ''):\n",
" new_file = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
" new_file.to_csv(file_name + '.csv', index=True, encoding='utf-8', sep = ',')\n",
" \n",
" return new_file"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"main_url = ['https://megapolitan.kompas.com/']"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'news': 'Kompas.com', 'link': 'https://megapolitan.kompas.com/read/2020/05/04/20321891/antisipasi-menyebarnya-covid-19-di-kepulauan-seribu-pemkab-berencana-ubah', 'title': 'Antisipasi Menyebarnya Covid-19 di Kepulauan Seribu, Pemkab Berencana Ubah Pulau Sebaru Jadi Rumah Sakit', 'author': 'Jimmy Ramadhan Azhari', 'date_time': '04/05/2020, 20:32 WIB', 'paragraf': 'Antisipasi Menyebarnya Covid-19 di Kepulauan Seribu, Pemkab Berencana Ubah Pulau Sebaru Jadi Rumah Sakit JAKARTA, KOMPAS.com - Pemerintah Kabupaten Kepulauan Seribu berencana mengubah Pulau Sebaru yang biasanya dijadikan sebagai tempat observasi menjadi rumah sakit Covid-19. Wakil Bupati Kepulauan Seribu Junaedi mengatakan, rencana pengubahan fungsi itu dilakukan jika wabah di sana semakin meluas. \"Pulau Sebaru bukan tempat observasi lagi, tapi rencana akan dijadikan semacam rumah sakit apabila terjadi skala buruk Pandemi Covid 19 di Kepulauan Seribu,\" kata Junaedi saat dihubungi Kompas.com, Senin (4/5/2020). Baca juga: Pemprov DKI Laporkan Rencana Pembatasan Warga yang Masuk Jakarta Usai Lebaran ke Kemenhub Junaedi mengatakan, pulau tersebut terbilang layak dijadikan sebagai tempat isolasi pasien karena pernah jadi tempat observasi WNI ABK World Dream beberapa waktu lalu.\\xa0 Bahkan pengelola fasilitas observasi tersebut juga telah menyatakan kesiapannya jika memang skema terburuk itu diberlakukan. Junaedi mengatakan, pemerintah hanya perlu menyediakan tenaga medis dan hal-hal kecil terkait teknis apabila ingin mengubah pulau itu jadi rumah sakit Covid-19. Adapun saat ini, Pemkab Kepulauan Seribu juga telah menyediakan tempat isolasi pasien dengan kategori orang tanpa gejala (OTG) di masing-masing pulau. Baca juga: Kronologi Tertularnya 10 Pasien Positif Covid-19 di Kepulauan Seribu \"Upaya yang sudah dilakukan Gugus Tugas menyedian 39 tempat karantina/isolasi di 12 Pulau. Tempatnya sekolahan, GOR, SKKT. Dan sudah dilengkapi oleh velbed di setiap lokasi karantina,\" ucap Junaedi. Adapun saat ini, sebanyak 10 orang warga Pulau Tidung telah dinyatakan Positif Covid-19. Sembilan di antaranya berasal dari Pulai Tidung yang merupakan cluster dan subcluster dari Tabligh Akbar Kebon Jeruk, Jakarta Barat. Sementara satu pasien lainnya berasal dari Pulau Kelapa yang juga terinfeksi di daratan Jakarta.'}, {'news': 'Kompas.com', 'link': 'https://megapolitan.kompas.com/read/2020/05/04/20282811/kasus-covid-19-stagnan-tiga-hari-walkot-bekasi-klaim-psbb-tahap-kedua', 'title': 'Kasus Covid-19 Stagnan Tiga Hari, Walkot Bekasi Klaim PSBB Tahap Kedua Buahkan Hasil', 'author': 'Cynthia Lova', 'date_time': '04/05/2020, 20:28 WIB', 'paragraf': 'Kasus Covid-19 Stagnan Tiga Hari, Walkot Bekasi Klaim PSBB Tahap Kedua Buahkan Hasil BEKASI, KOMPAS.com - Wali Kota Bekasi Rahmat Effendi mengklaim bahwa penerapan pembatasan sosial berskala besar (PSBB) tahap kedua di Kota Patriot mulai membuahkan hasil. Adapun PSBB tahap kedua sudah mulai berjalan sejak 29 April hingga 12 Mei 2020 mendatang. Hal itu diungkapkan pria dengan sapaan akrab Pepen setelah kasus Covid-19 di Kota Bekasi mulai stagnan atau tak terjadi pertambahan kasus pada tiga hari belakangan, yakni sejak 2 hingga 4 Mei 2020. “Dari 28 April sudah mulai menurun. Nah ini ODP, PDP, dan positif kita lihat, dari tanggal 2 Mei sudah nol penambahannya. Artinya PSBB kita tahap kedua ini dengan bagitu masifnya pencegahan sudah mulai membuahkan hasil. Yang positifnya lihat, sama betul (jumlahnya),” ujar Rahmat di Kota Bekasi, Senin (4/5/2020). Baca juga: Pemkot Bekasi Siapkan 300 Alat Tes Swab bagi Penumpang KRL di Stasiun Bekasi, Besok Rahmat berujar, berbagai upaya penegakan aturan PSBB tahap kedua diklaim mampu menurunkan angka Covid-19 di Kota Bekasi tiga hari belakangan ini. Mulai dari rutin memberikan peringatan kepada warga yang masih berkeliaran ataupun masih nongkrong di warung makan atau di pinggir jalan hingga adanya penjagaan di 32 titik check point. “Alhamdulillah, tidak ada penambahan kasus. Tiga hari ini kelandaian luar biasa, mudah-mudahan sisa delapan hari ini landainya.\" \"Namun kita berdoa saja, minggu ini terjadi penurunan, artinya apa yang selama sudah kita lakukan, kita sudah ke streching sedemikian rupa mudah-mudahan hasilnya nyata,” kata Pepen. Baca juga: Data Penambahan Kasus Covid-19 di Bekasi Hari ke Hari Selama PSBB Adapun berdasarkan data website corona.bekasikota.go.id, mulai 2 Mei hingga 4 Mei 2020 tak ada peningkatan kasus. Adapun saat ini tercatat ada 249 kasus positif Covid. Dari 249 kasus positif Covid-19, ada 114 yang sembuh. Lalu, ada 2.023 yang masih dalam pemantauan. Dari 2.023 ODP, ada 1.511 yang sehat dan 512 orang masih pemantauan. Kemudian, PDP ada 800 orang. Dari 800 orang PDP, ada 344 orang usai diawasi dan 355 orang masih diawasi.'}, {'news': 'Kompas.com', 'link': 'https://megapolitan.kompas.com/read/2020/05/04/19324561/kronologi-tertularnya-10-pasien-positif-covid-19-di-kepulauan-seribu?page=1', 'title': 'Kronologi Tertularnya 10 Pasien Positif Covid-19 di Kepulauan Seribu', 'author': 'Jimmy Ramadhan Azhari', 'date_time': '04/05/2020, 19:32 WIB', 'paragraf': 'Kronologi Tertularnya 10 Pasien Positif Covid-19 di Kepulauan Seribu JAKARTA, KOMPAS.com - Hingga hari ini, sudah ada 10 warga Kabupaten Kepulauan Seribu yang dinyatakan positif Covid-19. Wakil Bupati Kepulauan Seribu Junaedi mengatakan, 10 warga yang terpapar Covid-19 itu berasal dari dua pulau yang berbeda. \"Sembilan dari Pulau Tidung, yang satu dari Pulau Kelapa,\" kata Junaedi saat dihubungi Kompas.com, Senin (4/5/2020). Baca juga: Terindikasi Positif karena Kontak dengan Pasien Covid-19, 8 Warga Kepulauan Seribu Diisolasi Ia kemudian menjelaskan bahwa sembilan orang yang terpapar di Pulau Tidung bermula dari penularan oleh dua orang yang mengikuti tabligh akbar di Kebon Jeruk, Jakarta Barat. Sebelum dinyatakan positif Covid-19, keduanya sempat pulang ke Pulau Tidung dan berkontak dengan kerabatnya di sana. Namun, setelah beberapa orang jemaah tabligh akbar Kebon Jeruk dinyatakan positif Covid-19, keduanya pun diisolasi ke RSD Wisma Atlet Kemayoran dan menjalani swab test. Baca juga: Berkontak dengan Pasien Positif Covid-19, 38 Warga Pulau Tidung Jalani Tes Swab Kemudiam, pada 21 April 2020 lalu dikonfirmasi bahwa pasien tersebut dinyatakan positif Covid-19. Pemerintah setempat kemudian melakukan swab test pada keluarga dan kerabat yang sempat kontak dengan kedua pasien positif tersebut \"Sabtu (25/4/2020) didapatkan tujuh orang positif terkonfirmasi Covid-19 dikarantina di Wisma Atlet,\" ucap Junaedi. Sementara, untuk pasien positif Covid-19 dari Pulau Kelapa, yang bersangkutan dinyatakan positif Covid-19 setelah sebelumnya membesuk menantunya di daratan Jakarta. Baca juga: Masih Banyak Pemuda Nongkrong di Dermaga Kepulauan Seribu Saat PSBB'}, {'news': 'Kompas.com', 'link': 'https://megapolitan.kompas.com/read/2020/05/04/19324561/kronologi-tertularnya-10-pasien-positif-covid-19-di-kepulauan-seribu?page=2', 'title': 'Kronologi Tertularnya 10 Pasien Positif Covid-19 di Kepulauan Seribu', 'author': 'Jimmy Ramadhan Azhari', 'date_time': '04/05/2020, 19:32 WIB', 'paragraf': 'Kronologi Tertularnya 10 Pasien Positif Covid-19 di Kepulauan Seribu Satu orang tersebut saat ini sedang dirawat di RSUD Koja, Jakarta Utara. Menurut Junaedi, warga tersebut tidak sempat kembali ke Pulau Kelapa sebelum dinyatakan positif Covid-19. Namun, pihak Puskesmas masih menelusuri riwayat perjalanan warga tersebut. Adapun pasien positif Covid-19 di Jakarta mencapai 4.472 orang hingga Senin (4/5/2020) ini. Jumlah pasien yang terinfeksi virus corona tipe 2 (SARS-CoV-2) itu bertambah 55 orang dibandingkan data terakhir pada kemarin, yakni 4.417 pasien. Kepala Dinas Kesehatan DKI Jakarta Widyastuti mengatakan, dari total pasien, 650 orang dinyatakan sembuh.'}]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>news</th>\n",
" <th>link</th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>date_time</th>\n",
" <th>paragraf</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Kompas.com</td>\n",
" <td>https://megapolitan.kompas.com/read/2020/05/04...</td>\n",
" <td>Antisipasi Menyebarnya Covid-19 di Kepulauan S...</td>\n",
" <td>Jimmy Ramadhan Azhari</td>\n",
" <td>04/05/2020, 20:32 WIB</td>\n",
" <td>Antisipasi Menyebarnya Covid-19 di Kepulauan S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Kompas.com</td>\n",
" <td>https://megapolitan.kompas.com/read/2020/05/04...</td>\n",
" <td>Kasus Covid-19 Stagnan Tiga Hari, Walkot Bekas...</td>\n",
" <td>Cynthia Lova</td>\n",
" <td>04/05/2020, 20:28 WIB</td>\n",
" <td>Kasus Covid-19 Stagnan Tiga Hari, Walkot Bekas...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Kompas.com</td>\n",
" <td>https://megapolitan.kompas.com/read/2020/05/04...</td>\n",
" <td>Kronologi Tertularnya 10 Pasien Positif Covid-...</td>\n",
" <td>Jimmy Ramadhan Azhari</td>\n",
" <td>04/05/2020, 19:32 WIB</td>\n",
" <td>Kronologi Tertularnya 10 Pasien Positif Covid-...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Kompas.com</td>\n",
" <td>https://megapolitan.kompas.com/read/2020/05/04...</td>\n",
" <td>Kronologi Tertularnya 10 Pasien Positif Covid-...</td>\n",
" <td>Jimmy Ramadhan Azhari</td>\n",
" <td>04/05/2020, 19:32 WIB</td>\n",
" <td>Kronologi Tertularnya 10 Pasien Positif Covid-...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" news link \\\n",
"0 Kompas.com https://megapolitan.kompas.com/read/2020/05/04... \n",
"1 Kompas.com https://megapolitan.kompas.com/read/2020/05/04... \n",
"2 Kompas.com https://megapolitan.kompas.com/read/2020/05/04... \n",
"3 Kompas.com https://megapolitan.kompas.com/read/2020/05/04... \n",
"\n",
" title author \\\n",
"0 Antisipasi Menyebarnya Covid-19 di Kepulauan S... Jimmy Ramadhan Azhari \n",
"1 Kasus Covid-19 Stagnan Tiga Hari, Walkot Bekas... Cynthia Lova \n",
"2 Kronologi Tertularnya 10 Pasien Positif Covid-... Jimmy Ramadhan Azhari \n",
"3 Kronologi Tertularnya 10 Pasien Positif Covid-... Jimmy Ramadhan Azhari \n",
"\n",
" date_time paragraf \n",
"0 04/05/2020, 20:32 WIB Antisipasi Menyebarnya Covid-19 di Kepulauan S... \n",
"1 04/05/2020, 20:28 WIB Kasus Covid-19 Stagnan Tiga Hari, Walkot Bekas... \n",
"2 04/05/2020, 19:32 WIB Kronologi Tertularnya 10 Pasien Positif Covid-... \n",
"3 04/05/2020, 19:32 WIB Kronologi Tertularnya 10 Pasien Positif Covid-... "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"r = get_news(main_url, \"news_2\")\n",
"r"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"import xml.etree.cElementTree as ET\n",
"def save_file_xml(file):\n",
" \n",
" data = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
"\n",
" root = ET.Element(\"COLLECTION\")\n",
" for i in range(len(data)):\n",
" news = ET.SubElement(root, \"NEWS\").text = data['news'][i]\n",
" id_news = ET.SubElement(news, \"ID\").text = str(i+1)\n",
" link = ET.SubElement(news, \"LINK\").text = data['link'][i]\n",
" title = ET.SubElement(news, \"TITLE\").text = data['title'][i]\n",
" author =ET.SubElement(news, \"AUTHOR\").text = data['author'][i]\n",
" datetime = ET.SubElement(news, \"DATETIME\").text = data['date_time'][i]\n",
" paragraf = ET.SubElement(news, \"PARAGRAPH\").text = data['paragraf'][i]\n",
" \n",
" tree = ET.ElementTree(root)\n",
" save = tree.write(\"kompas.xml\")\n",
" return save"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import library\n",
"import urllib\n",
"import requests\n",
"import bs4\n",
"import pandas as pd\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# request a link\n",
"def request_url(link):\n",
" response = requests.get(link)\n",
" html = response.text\n",
" return html"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to parse html\n",
"def parse_html(to_parse):\n",
" soup = bs4.BeautifulSoup(to_parse, 'html.parser')\n",
" return soup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#take sub menu news form liputan6.com\n",
"def all_section(main_url):\n",
" section_list = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" for a in soup.find_all('a', href=True): \n",
" if a.text:\n",
" section_list.append(a['href'])\n",
" return section_list"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#get only link about corona or covid\n",
"def find_corona(main_url):\n",
" url_list = []\n",
" not_news=[]\n",
" for i in main_url:\n",
" if i.find('corona')!=-1 or i.find('covid')!=-1 or i.find('pandemi')!=-1:\n",
" url_list.append(i)\n",
" url_list = list(dict.fromkeys(url_list))\n",
" for i in url_list:\n",
" if (i.find('berita') == -1):\n",
" not_news.append(i) \n",
" for j in not_news:\n",
" url_list.remove(j)\n",
" return url_list"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def title(main_url):\n",
" titles = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_title = soup.find(\"h1\", class_=\"post-title\")\n",
" if(get_title):\n",
" titles.append(get_title.text)\n",
" else:\n",
" titles.append('None')\n",
" return titles "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def writer(main_url):\n",
" author = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_author = soup.find(\"p\", class_=\"text-muted small mt10\")\n",
" if(get_author):\n",
" authorr=get_author.text.partition(\"Editor\")[0]\n",
" author_name = ''.join(authorr.partition(': ')[2:])\n",
" if(author_name == ''):\n",
" authorname = ''.join(authorr.partition('Oleh ')[2:])\n",
" author.append(authorname)\n",
" else:\n",
" author.append(author_name)\n",
" else:\n",
" author.append('None')\n",
" return author"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def date(main_url):\n",
" datetime = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_datetime = soup.find(\"span\", class_=\"article-date\")\n",
" if(get_datetime):\n",
" delete_first_space = ''.join(get_datetime.text.partition(' ')[2:])\n",
" if(delete_first_space[0] == ' '):\n",
" delete_second_space = ''.join(delete_first_space.partition(' ')[2:])\n",
" datetime.append(delete_second_space)\n",
" else:\n",
" datetime.append(delete_first_space)\n",
" else:\n",
" datetime.append('None')\n",
" return datetime"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def collect_text(main_url, titles = [], author = [], datetime = []):\n",
" paragraf = []\n",
" isiteks=[]\n",
" for i,j in enumerate(main_url):\n",
" a = parse_html(request_url(j))\n",
" content = a.find('div', class_=\"post-content clearfix\")\n",
" if(content):\n",
" s = ' '.join(content.text.split())\n",
" del_pewarta = s.partition('Pewarta')[0]\n",
" del_penerjemah = del_pewarta.partition('Penerjemah')[0]\n",
" del_oleh = del_penerjemah.partition('Oleh')[0]\n",
" paragraf.append(del_oleh)\n",
" else:\n",
" paragraf.append('None')\n",
" \n",
" for i, j in enumerate(main_url):\n",
" isiteks.append({'news': 'Antaranews.com', 'link' : j, 'title': titles[i], 'author' : author[i], 'date_time': datetime[i], 'paragraf' : paragraf[i]})\n",
" return isiteks"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def get_news(main_url, file_name = ''):\n",
" section = all_section(main_url)\n",
" corona_news = find_corona(section)\n",
" titles = title(corona_news)\n",
" author = writer(corona_news)\n",
" datetime = date(corona_news)\n",
" text = collect_text(corona_news, titles, author, datetime)\n",
" file = save_file(text, file_name)\n",
" \n",
" return file"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def save_file(file, file_name = ''):\n",
" new_file = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
" new_file.to_csv(file_name + '.csv', index=True, encoding='utf-8', sep = ',')\n",
" \n",
" return new_file"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"url=[\"https://www.antaranews.com/\"]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>news</th>\n",
" <th>link</th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>date_time</th>\n",
" <th>paragraf</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468851/peng...</td>\n",
" <td>Pengelola diajak tata ulang destinasi wisata s...</td>\n",
" <td>Hanni Sofia</td>\n",
" <td>Selasa, 5 Mei 2020 19:15 WIB</td>\n",
" <td>Pascapandemi diperkirakan akan terjadi kondisi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468815/lpdb...</td>\n",
" <td>LPDB-KUMKM bantu permodalan koperasi terdampak...</td>\n",
" <td>Hanni Sofia</td>\n",
" <td>Selasa, 5 Mei 2020 19:00 WIB</td>\n",
" <td>Bantuan perkuatan permodalan dari LPDB-KUMKM m...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1454852/depu...</td>\n",
" <td>Deputi BUMN: Transformasi penting meski pandem...</td>\n",
" <td>Mentari Dwi Gayati</td>\n",
" <td>Kamis, 30 April 2020 17:52 WIB</td>\n",
" <td>Kita butuh SDM Unggul yang toleran, yang memba...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1454258/pert...</td>\n",
" <td>Pertamina sulap lapangan bola Simprug jadi rum...</td>\n",
" <td>Afut Syafril Nursyirwan</td>\n",
" <td>Kamis, 30 April 2020 16:10 WIB</td>\n",
" <td>di rumah sakit ini disediakan pula fasilitas p...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468719/kasu...</td>\n",
" <td>Kasus positif COVID-19 di Jakarta bertambah 169</td>\n",
" <td>Livia Kristianti</td>\n",
" <td>Selasa, 5 Mei 2020 18:43 WIB</td>\n",
" <td>Jakarta (ANTARA) - Perkembangan virus corona (...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468623/pasi...</td>\n",
" <td>Pasien sembuh COVID-19 di Kota Bogor bertambah...</td>\n",
" <td>Riza Harahap</td>\n",
" <td>Selasa, 5 Mei 2020 18:05 WIB</td>\n",
" <td>Bogor (ANTARA) - Pasien yang sembuh dari COVID...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1467981/pulo...</td>\n",
" <td>Pulomas Jaya sediakan 2.100 tes cepat COVID-19</td>\n",
" <td>Andi Firdaus</td>\n",
" <td>Selasa, 5 Mei 2020 15:24 WIB</td>\n",
" <td>Semua orang boleh mengikuti tes cepatJakarta (...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1466679/kema...</td>\n",
" <td>Kemarin, puluhan warga positif COVID-19 hingga...</td>\n",
" <td>Andi Firdaus</td>\n",
" <td>Selasa, 5 Mei 2020 07:00 WIB</td>\n",
" <td>Jakarta (ANTARA) - Beragam peristiwa di wilaya...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468323/wand...</td>\n",
" <td>Wander Luiz lelang jersey bantu masyarakat ter...</td>\n",
" <td>Asep Firmansyah</td>\n",
" <td>Selasa, 5 Mei 2020 16:34 WIB</td>\n",
" <td>Lelang dibuka hari Selasa, 5 Mei 2020 pukul 07...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1466463/kalo...</td>\n",
" <td>Kalou diskors Hertha karena langgar aturan ter...</td>\n",
" <td>A Rauf Andar Adipati</td>\n",
" <td>Selasa, 5 Mei 2020 00:57 WIB</td>\n",
" <td>Jakarta (ANTARA) - Salomon Kalou diskors oleh ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1466409/liga...</td>\n",
" <td>Liga Jerman temukan 10 kasus positif corona di...</td>\n",
" <td>Aditya Eko Sigit Wicaksono</td>\n",
" <td>Senin, 4 Mei 2020 23:56 WIB</td>\n",
" <td>Jakarta (ANTARA) - Liga Jerman pada Senin mend...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1438604/bale...</td>\n",
" <td>Bale donasi ratusan ribu poundsterling guna pe...</td>\n",
" <td>Junaydi Suswanto</td>\n",
" <td>Kamis, 23 April 2020 05:45 WIB</td>\n",
" <td>Jakarta (ANTARA) - Kapten timnas sepak bola Wa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1465647/lean...</td>\n",
" <td>Leani Ratri bertahan saat pandemi COVID-19</td>\n",
" <td>Shofi Ayudiana</td>\n",
" <td>Senin, 4 Mei 2020 20:20 WIB</td>\n",
" <td>Saya enggak mau sia-sia udah jauh dari keluarg...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1465563/lean...</td>\n",
" <td>Leani Ratri berlatih bersama adik selama pande...</td>\n",
" <td>Shofi Ayudiana</td>\n",
" <td>Senin, 4 Mei 2020 19:55 WIB</td>\n",
" <td>Jakarta (ANTARA) - Atlet para bulu tangkis Ind...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1462239/keti...</td>\n",
" <td>Ketika pandemi munculkan ide leburnya ATP dan WTA</td>\n",
" <td>Jafar M Sidik</td>\n",
" <td>Minggu, 3 Mei 2020 16:29 WIB</td>\n",
" <td>Jakarta (ANTARA) - Dua bulan tak menjalankan a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1467369/200-...</td>\n",
" <td>200 pelaku olahraga Bekasi jalani rapid test C...</td>\n",
" <td>Pradita Kurniawan Syah</td>\n",
" <td>Selasa, 5 Mei 2020 12:41 WIB</td>\n",
" <td>Alhamdulillah hasilnya sudah keluar, semua din...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1463349/bisa...</td>\n",
" <td>Bisakah virus corona bertahan di paket belanja...</td>\n",
" <td>Natisha Andarningtyas</td>\n",
" <td>Senin, 4 Mei 2020 06:12 WIB</td>\n",
" <td>Jakarta (ANTARA) - Masyarakat mengandalkan bel...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1462287/cara...</td>\n",
" <td>Cara kerja remdesivir pada pasien corona</td>\n",
" <td>Lia Wanadriani Santosa</td>\n",
" <td>Minggu, 3 Mei 2020 16:43 WIB</td>\n",
" <td>Jakarta (ANTARA) - Remdesivir, obat antivirus ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1422121/tips...</td>\n",
" <td>Tips rawat hewan peliharaan di tengah pandemi ...</td>\n",
" <td>Arnidhya Nur Zhafira</td>\n",
" <td>Rabu, 15 April 2020 14:18 WIB</td>\n",
" <td>Jakarta (ANTARA) - Beberapa waktu belakangan, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1464249/mado...</td>\n",
" <td>Madonna pergi ke pesta setelah klaim punya ant...</td>\n",
" <td>Ida Nurcahyani</td>\n",
" <td>Senin, 4 Mei 2020 14:44 WIB</td>\n",
" <td>Jakarta (ANTARA) - Madonna terlihat menghadiri...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1465821/sing...</td>\n",
" <td>Singapura yakini perlu ambil tindakan jangka p...</td>\n",
" <td>Aria Cindyara</td>\n",
" <td>Senin, 4 Mei 2020 20:51 WIB</td>\n",
" <td>Jakarta (ANTARA) - Pemerintah Singapura mengan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468893/alib...</td>\n",
" <td>Alibaba gratiskan buku panduan bagi pengusaha ...</td>\n",
" <td>M. Irfan Ilmie</td>\n",
" <td>Selasa, 5 Mei 2020 19:27 WIB</td>\n",
" <td>Jakarta (ANTARA) - Alibaba Business School mel...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468881/who-...</td>\n",
" <td>WHO: Kasus Desember COVID-19 Prancis \"tidak me...</td>\n",
" <td>Gusti Nur Cahya Aryani</td>\n",
" <td>Selasa, 5 Mei 2020 19:26 WIB</td>\n",
" <td>Mungkin juga ada lebih banyak kasus awal yang ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468863/indo...</td>\n",
" <td>Indonesia mendapat alat deteksi COVID-19 dari ...</td>\n",
" <td>Zeynita Gibbons</td>\n",
" <td>Selasa, 5 Mei 2020 19:21 WIB</td>\n",
" <td>London (ANTARA) - Badan Tenaga Atom Internasio...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468569/kasu...</td>\n",
" <td>Kasus COVID-19 menurun, Hong Kong siap longgar...</td>\n",
" <td>Suwanti</td>\n",
" <td>Selasa, 5 Mei 2020 17:41 WIB</td>\n",
" <td>Hong Kong (ANTARA) - Pemerintah Hong Kong pada...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468551/5000...</td>\n",
" <td>50.000 alat uji PCR diproduksi akhir Mei 2020 ...</td>\n",
" <td>Martha Herlinawati S</td>\n",
" <td>Selasa, 5 Mei 2020 17:37 WIB</td>\n",
" <td>BPPT juga mengembangkan ventilator portabel be...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468425/chin...</td>\n",
" <td>China sebut tiga vaksin COVID-19 masuki tahap ...</td>\n",
" <td>Aria Cindyara</td>\n",
" <td>Selasa, 5 Mei 2020 17:00 WIB</td>\n",
" <td>Jakarta (ANTARA) - Duta Besar China untuk Indo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468299/menr...</td>\n",
" <td>Menristek: 50.000 alat tes COVID-19 non PCR ak...</td>\n",
" <td>Martha Herlinawati S</td>\n",
" <td>Selasa, 5 Mei 2020 16:32 WIB</td>\n",
" <td>Dua perangkat ini diperkirakan akhir Juli 2020...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468149/pasi...</td>\n",
" <td>Pasien COVID-19 yang sembuh bertambah 243 menj...</td>\n",
" <td>Dewanto Samodro</td>\n",
" <td>Selasa, 5 Mei 2020 16:02 WIB</td>\n",
" <td>Jakarta (ANTARA) - Juru Bicara Pemerintah untu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468017/who-...</td>\n",
" <td>WHO peringatkan obat tradisional COVID-19 yang...</td>\n",
" <td>Asri Mayang Sari</td>\n",
" <td>Selasa, 5 Mei 2020 15:28 WIB</td>\n",
" <td>Paris (ANTARA) - Organisasi Kesehatan Dunia (W...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1467759/peny...</td>\n",
" <td>Penyebaran COVID melambat, pemerintah siapkan ...</td>\n",
" <td>Desca Lidya Natalia</td>\n",
" <td>Selasa, 5 Mei 2020 14:31 WIB</td>\n",
" <td>Pemerintah mempersiapkan 'exit strategy' pande...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1467753/ini-...</td>\n",
" <td>Ini rekomendasi UI terkait kebijakan ekonomi s...</td>\n",
" <td>Feru Lantara</td>\n",
" <td>Selasa, 5 Mei 2020 14:30 WIB</td>\n",
" <td>Rekomendasi yang diberikan adalah agar pemerin...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1454884/keme...</td>\n",
" <td>Kemenperin ajukan IKM terdampak COVID-19 dapat...</td>\n",
" <td>Sella Panduarsa Gareta</td>\n",
" <td>Kamis, 30 April 2020 18:13 WIB</td>\n",
" <td>IKM cukup terpukul dengan penurunan permintaan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://kalbar.antaranews.com/berita/416589/sa...</td>\n",
" <td>Satu PDP COVID-19 di Singkawang meninggal</td>\n",
" <td>None</td>\n",
" <td>Selasa, 5 Mei 2020 18:58 WIB</td>\n",
" <td>Pontianak (ANTARA) - Satu pasien dalam pengawa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://lampung.antaranews.com/berita/416577/d...</td>\n",
" <td>Dinkes : Tambahan satu pasien terbukti positif...</td>\n",
" <td>None</td>\n",
" <td>Selasa, 5 Mei 2020 17:44 WIB</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://sumbar.antaranews.com/berita/354363/ka...</td>\n",
" <td>Kabar gembira lagi, setengah pasien COVID-19 d...</td>\n",
" <td>None</td>\n",
" <td>Selasa, 5 Mei 2020 14:49 WIB</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://jogja.antaranews.com/berita/423843/pem...</td>\n",
" <td>Pemkab Sleman tutup sementara operasional Indo...</td>\n",
" <td>None</td>\n",
" <td>Selasa, 5 Mei 2020 12:48 WIB</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://papua.antaranews.com/berita/549687/kab...</td>\n",
" <td>Kabar baik, Warga di Papua yang sembuh COVID-1...</td>\n",
" <td>None</td>\n",
" <td>Selasa, 5 Mei 2020 3:20 WIB</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://papuabarat.antaranews.com/berita/7605/...</td>\n",
" <td>Warga Papua Barat diimbau tidak resisten terha...</td>\n",
" <td>None</td>\n",
" <td>Selasa, 5 Mei 2020 0:26 WIB</td>\n",
" <td>Manokwari (ANTARA) - Wakil Gubernur Provinsi P...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://jatim.antaranews.com/berita/376935/gug...</td>\n",
" <td>Gugus Tugas Jatim umumkan 63 karyawan pabrik r...</td>\n",
" <td>None</td>\n",
" <td>Minggu, 3 Mei 2020 17:47 WIB</td>\n",
" <td>Surabaya (ANTARA) - Tim Gugus Tugas Percepatan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://kaltim.antaranews.com/berita/75297/per...</td>\n",
" <td>Peringati hari buruh, KPC galang dana COVID-19</td>\n",
" <td>None</td>\n",
" <td>Sabtu, 2 Mei 2020 3:44 WIB</td>\n",
" <td>Samarinda (ANTARA) - Perusahaan tambang batu b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://ambon.antaranews.com/berita/76350/pasi...</td>\n",
" <td>Pasien positif COVID-19 di Maluku sembuh</td>\n",
" <td>None</td>\n",
" <td>Rabu, 1 April 2020 20:09 WIB</td>\n",
" <td>Ambon (ANTARA) - Satu pasien positif COVID-19 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://sumut.antaranews.com/berita/285538/sat...</td>\n",
" <td>Satu PDP corona yang diisolasi di RSUP Adam Ma...</td>\n",
" <td>None</td>\n",
" <td>Selasa, 17 Maret 2020 22:10 WIB</td>\n",
" <td>Medan (ANTARA) - Seorang Pasien Dalam Pengawas...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468959/peng...</td>\n",
" <td>Pengamat: Banyak pelaku usaha tawarkan hal bar...</td>\n",
" <td>Aji Cakti</td>\n",
" <td>Selasa, 5 Mei 2020 19:42 WIB</td>\n",
" <td>Jakarta (ANTARA) - Pengamat bisnis Lucky Esa d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468929/pmi-...</td>\n",
" <td>PMI salurkan 40 ribu paket sembako kepada warg...</td>\n",
" <td>Aditia Aulia Rohman</td>\n",
" <td>Selasa, 5 Mei 2020 19:32 WIB</td>\n",
" <td>DKI Jakarta (ANTARA) - Palang Merah Indonesia ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468911/ahli...</td>\n",
" <td>Ahli: Terapi plasma darah untuk pasien COVID-1...</td>\n",
" <td>Prisca Triferna Violleta</td>\n",
" <td>Selasa, 5 Mei 2020 19:29 WIB</td>\n",
" <td>Kalau sebagai terapi harus diberikan kepada pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468845/pasi...</td>\n",
" <td>Pasien COVID-19 di Batam sembuh bertambah 1 ja...</td>\n",
" <td>Yuniati Jannatun Naim</td>\n",
" <td>Selasa, 5 Mei 2020 19:10 WIB</td>\n",
" <td>pasien positif COVID-19 nomor 7 dinyatakan sem...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468833/pold...</td>\n",
" <td>Polda Sumsel bentuk pengawasan khusus napi asi...</td>\n",
" <td>Yudi Abdullah</td>\n",
" <td>Selasa, 5 Mei 2020 19:09 WIB</td>\n",
" <td>Palembang (ANTARA) - Kepolisian Daerah Sumater...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468713/peng...</td>\n",
" <td>Pengamat : Usulan Pansus COVID-19 di Surabaya ...</td>\n",
" <td>Abdul Hakim</td>\n",
" <td>Selasa, 5 Mei 2020 18:40 WIB</td>\n",
" <td>Surabaya (ANTARA) - Pengamat politik sekaligus...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1465263/21-w...</td>\n",
" <td>21 warga Kebon Kacang dirujuk ke rumah sakit k...</td>\n",
" <td>Livia Kristianti</td>\n",
" <td>Senin, 4 Mei 2020 18:55 WIB</td>\n",
" <td>Jakarta (ANTARA) - Sebanyak 21 orang warga RW ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1465941/peme...</td>\n",
" <td>Pemerintah berikan kuota 10 GB sebagai insenti...</td>\n",
" <td>TIm JACX dan Kominfo</td>\n",
" <td>Senin, 4 Mei 2020 21:13 WIB</td>\n",
" <td>JAKARTA (ANTARA/JACX) - Sebuah pesan berantai ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1459017/kope...</td>\n",
" <td>Koperasi dan UMKM gelar aksi peduli bersama la...</td>\n",
" <td>Hanni Sofia</td>\n",
" <td>Sabtu, 2 Mei 2020 05:50 WIB</td>\n",
" <td>Jakarta (ANTARA) - Sejumlah pelaku koperasi da...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1454492/ayu-...</td>\n",
" <td>Ayu, sosok relawan COVID-19</td>\n",
" <td>Martha Herlinawati S</td>\n",
" <td>Kamis, 30 April 2020 16:50 WIB</td>\n",
" <td>Jakarta (ANTARA) - Tak terasa hampir sebulan, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468629/bali...</td>\n",
" <td>Balitbangtan lakukan riset potensi eucalyptus ...</td>\n",
" <td>Subagyo</td>\n",
" <td>Selasa, 5 Mei 2020 18:12 WIB</td>\n",
" <td>Hasil penelitian ini dapat menjadi harapan bar...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Antaranews.com</td>\n",
" <td>https://www.antaranews.com/berita/1468767/dad-...</td>\n",
" <td>DAD - Polres Bengkayang bantu sembako warga te...</td>\n",
" <td>Dedi</td>\n",
" <td>Selasa, 5 Mei 2020 18:53 WIB</td>\n",
" <td>Pontianak (ANTARA) - Dewan Adat Dayak (DAD) Ka...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" news link \\\n",
"0 Antaranews.com https://www.antaranews.com/berita/1468851/peng... \n",
"1 Antaranews.com https://www.antaranews.com/berita/1468815/lpdb... \n",
"2 Antaranews.com https://www.antaranews.com/berita/1454852/depu... \n",
"3 Antaranews.com https://www.antaranews.com/berita/1454258/pert... \n",
"4 Antaranews.com https://www.antaranews.com/berita/1468719/kasu... \n",
"5 Antaranews.com https://www.antaranews.com/berita/1468623/pasi... \n",
"6 Antaranews.com https://www.antaranews.com/berita/1467981/pulo... \n",
"7 Antaranews.com https://www.antaranews.com/berita/1466679/kema... \n",
"8 Antaranews.com https://www.antaranews.com/berita/1468323/wand... \n",
"9 Antaranews.com https://www.antaranews.com/berita/1466463/kalo... \n",
"10 Antaranews.com https://www.antaranews.com/berita/1466409/liga... \n",
"11 Antaranews.com https://www.antaranews.com/berita/1438604/bale... \n",
"12 Antaranews.com https://www.antaranews.com/berita/1465647/lean... \n",
"13 Antaranews.com https://www.antaranews.com/berita/1465563/lean... \n",
"14 Antaranews.com https://www.antaranews.com/berita/1462239/keti... \n",
"15 Antaranews.com https://www.antaranews.com/berita/1467369/200-... \n",
"16 Antaranews.com https://www.antaranews.com/berita/1463349/bisa... \n",
"17 Antaranews.com https://www.antaranews.com/berita/1462287/cara... \n",
"18 Antaranews.com https://www.antaranews.com/berita/1422121/tips... \n",
"19 Antaranews.com https://www.antaranews.com/berita/1464249/mado... \n",
"20 Antaranews.com https://www.antaranews.com/berita/1465821/sing... \n",
"21 Antaranews.com https://www.antaranews.com/berita/1468893/alib... \n",
"22 Antaranews.com https://www.antaranews.com/berita/1468881/who-... \n",
"23 Antaranews.com https://www.antaranews.com/berita/1468863/indo... \n",
"24 Antaranews.com https://www.antaranews.com/berita/1468569/kasu... \n",
"25 Antaranews.com https://www.antaranews.com/berita/1468551/5000... \n",
"26 Antaranews.com https://www.antaranews.com/berita/1468425/chin... \n",
"27 Antaranews.com https://www.antaranews.com/berita/1468299/menr... \n",
"28 Antaranews.com https://www.antaranews.com/berita/1468149/pasi... \n",
"29 Antaranews.com https://www.antaranews.com/berita/1468017/who-... \n",
"30 Antaranews.com https://www.antaranews.com/berita/1467759/peny... \n",
"31 Antaranews.com https://www.antaranews.com/berita/1467753/ini-... \n",
"32 Antaranews.com https://www.antaranews.com/berita/1454884/keme... \n",
"33 Antaranews.com https://kalbar.antaranews.com/berita/416589/sa... \n",
"34 Antaranews.com https://lampung.antaranews.com/berita/416577/d... \n",
"35 Antaranews.com https://sumbar.antaranews.com/berita/354363/ka... \n",
"36 Antaranews.com https://jogja.antaranews.com/berita/423843/pem... \n",
"37 Antaranews.com https://papua.antaranews.com/berita/549687/kab... \n",
"38 Antaranews.com https://papuabarat.antaranews.com/berita/7605/... \n",
"39 Antaranews.com https://jatim.antaranews.com/berita/376935/gug... \n",
"40 Antaranews.com https://kaltim.antaranews.com/berita/75297/per... \n",
"41 Antaranews.com https://ambon.antaranews.com/berita/76350/pasi... \n",
"42 Antaranews.com https://sumut.antaranews.com/berita/285538/sat... \n",
"43 Antaranews.com https://www.antaranews.com/berita/1468959/peng... \n",
"44 Antaranews.com https://www.antaranews.com/berita/1468929/pmi-... \n",
"45 Antaranews.com https://www.antaranews.com/berita/1468911/ahli... \n",
"46 Antaranews.com https://www.antaranews.com/berita/1468845/pasi... \n",
"47 Antaranews.com https://www.antaranews.com/berita/1468833/pold... \n",
"48 Antaranews.com https://www.antaranews.com/berita/1468713/peng... \n",
"49 Antaranews.com https://www.antaranews.com/berita/1465263/21-w... \n",
"50 Antaranews.com https://www.antaranews.com/berita/1465941/peme... \n",
"51 Antaranews.com https://www.antaranews.com/berita/1459017/kope... \n",
"52 Antaranews.com https://www.antaranews.com/berita/1454492/ayu-... \n",
"53 Antaranews.com https://www.antaranews.com/berita/1468629/bali... \n",
"54 Antaranews.com https://www.antaranews.com/berita/1468767/dad-... \n",
"\n",
" title \\\n",
"0 Pengelola diajak tata ulang destinasi wisata s... \n",
"1 LPDB-KUMKM bantu permodalan koperasi terdampak... \n",
"2 Deputi BUMN: Transformasi penting meski pandem... \n",
"3 Pertamina sulap lapangan bola Simprug jadi rum... \n",
"4 Kasus positif COVID-19 di Jakarta bertambah 169 \n",
"5 Pasien sembuh COVID-19 di Kota Bogor bertambah... \n",
"6 Pulomas Jaya sediakan 2.100 tes cepat COVID-19 \n",
"7 Kemarin, puluhan warga positif COVID-19 hingga... \n",
"8 Wander Luiz lelang jersey bantu masyarakat ter... \n",
"9 Kalou diskors Hertha karena langgar aturan ter... \n",
"10 Liga Jerman temukan 10 kasus positif corona di... \n",
"11 Bale donasi ratusan ribu poundsterling guna pe... \n",
"12 Leani Ratri bertahan saat pandemi COVID-19 \n",
"13 Leani Ratri berlatih bersama adik selama pande... \n",
"14 Ketika pandemi munculkan ide leburnya ATP dan WTA \n",
"15 200 pelaku olahraga Bekasi jalani rapid test C... \n",
"16 Bisakah virus corona bertahan di paket belanja... \n",
"17 Cara kerja remdesivir pada pasien corona \n",
"18 Tips rawat hewan peliharaan di tengah pandemi ... \n",
"19 Madonna pergi ke pesta setelah klaim punya ant... \n",
"20 Singapura yakini perlu ambil tindakan jangka p... \n",
"21 Alibaba gratiskan buku panduan bagi pengusaha ... \n",
"22 WHO: Kasus Desember COVID-19 Prancis \"tidak me... \n",
"23 Indonesia mendapat alat deteksi COVID-19 dari ... \n",
"24 Kasus COVID-19 menurun, Hong Kong siap longgar... \n",
"25 50.000 alat uji PCR diproduksi akhir Mei 2020 ... \n",
"26 China sebut tiga vaksin COVID-19 masuki tahap ... \n",
"27 Menristek: 50.000 alat tes COVID-19 non PCR ak... \n",
"28 Pasien COVID-19 yang sembuh bertambah 243 menj... \n",
"29 WHO peringatkan obat tradisional COVID-19 yang... \n",
"30 Penyebaran COVID melambat, pemerintah siapkan ... \n",
"31 Ini rekomendasi UI terkait kebijakan ekonomi s... \n",
"32 Kemenperin ajukan IKM terdampak COVID-19 dapat... \n",
"33 Satu PDP COVID-19 di Singkawang meninggal \n",
"34 Dinkes : Tambahan satu pasien terbukti positif... \n",
"35 Kabar gembira lagi, setengah pasien COVID-19 d... \n",
"36 Pemkab Sleman tutup sementara operasional Indo... \n",
"37 Kabar baik, Warga di Papua yang sembuh COVID-1... \n",
"38 Warga Papua Barat diimbau tidak resisten terha... \n",
"39 Gugus Tugas Jatim umumkan 63 karyawan pabrik r... \n",
"40 Peringati hari buruh, KPC galang dana COVID-19 \n",
"41 Pasien positif COVID-19 di Maluku sembuh \n",
"42 Satu PDP corona yang diisolasi di RSUP Adam Ma... \n",
"43 Pengamat: Banyak pelaku usaha tawarkan hal bar... \n",
"44 PMI salurkan 40 ribu paket sembako kepada warg... \n",
"45 Ahli: Terapi plasma darah untuk pasien COVID-1... \n",
"46 Pasien COVID-19 di Batam sembuh bertambah 1 ja... \n",
"47 Polda Sumsel bentuk pengawasan khusus napi asi... \n",
"48 Pengamat : Usulan Pansus COVID-19 di Surabaya ... \n",
"49 21 warga Kebon Kacang dirujuk ke rumah sakit k... \n",
"50 Pemerintah berikan kuota 10 GB sebagai insenti... \n",
"51 Koperasi dan UMKM gelar aksi peduli bersama la... \n",
"52 Ayu, sosok relawan COVID-19 \n",
"53 Balitbangtan lakukan riset potensi eucalyptus ... \n",
"54 DAD - Polres Bengkayang bantu sembako warga te... \n",
"\n",
" author date_time \\\n",
"0 Hanni Sofia Selasa, 5 Mei 2020 19:15 WIB \n",
"1 Hanni Sofia Selasa, 5 Mei 2020 19:00 WIB \n",
"2 Mentari Dwi Gayati Kamis, 30 April 2020 17:52 WIB \n",
"3 Afut Syafril Nursyirwan Kamis, 30 April 2020 16:10 WIB \n",
"4 Livia Kristianti Selasa, 5 Mei 2020 18:43 WIB \n",
"5 Riza Harahap Selasa, 5 Mei 2020 18:05 WIB \n",
"6 Andi Firdaus Selasa, 5 Mei 2020 15:24 WIB \n",
"7 Andi Firdaus Selasa, 5 Mei 2020 07:00 WIB \n",
"8 Asep Firmansyah Selasa, 5 Mei 2020 16:34 WIB \n",
"9 A Rauf Andar Adipati Selasa, 5 Mei 2020 00:57 WIB \n",
"10 Aditya Eko Sigit Wicaksono Senin, 4 Mei 2020 23:56 WIB \n",
"11 Junaydi Suswanto Kamis, 23 April 2020 05:45 WIB \n",
"12 Shofi Ayudiana Senin, 4 Mei 2020 20:20 WIB \n",
"13 Shofi Ayudiana Senin, 4 Mei 2020 19:55 WIB \n",
"14 Jafar M Sidik Minggu, 3 Mei 2020 16:29 WIB \n",
"15 Pradita Kurniawan Syah Selasa, 5 Mei 2020 12:41 WIB \n",
"16 Natisha Andarningtyas Senin, 4 Mei 2020 06:12 WIB \n",
"17 Lia Wanadriani Santosa Minggu, 3 Mei 2020 16:43 WIB \n",
"18 Arnidhya Nur Zhafira Rabu, 15 April 2020 14:18 WIB \n",
"19 Ida Nurcahyani Senin, 4 Mei 2020 14:44 WIB \n",
"20 Aria Cindyara Senin, 4 Mei 2020 20:51 WIB \n",
"21 M. Irfan Ilmie Selasa, 5 Mei 2020 19:27 WIB \n",
"22 Gusti Nur Cahya Aryani Selasa, 5 Mei 2020 19:26 WIB \n",
"23 Zeynita Gibbons Selasa, 5 Mei 2020 19:21 WIB \n",
"24 Suwanti Selasa, 5 Mei 2020 17:41 WIB \n",
"25 Martha Herlinawati S Selasa, 5 Mei 2020 17:37 WIB \n",
"26 Aria Cindyara Selasa, 5 Mei 2020 17:00 WIB \n",
"27 Martha Herlinawati S Selasa, 5 Mei 2020 16:32 WIB \n",
"28 Dewanto Samodro Selasa, 5 Mei 2020 16:02 WIB \n",
"29 Asri Mayang Sari Selasa, 5 Mei 2020 15:28 WIB \n",
"30 Desca Lidya Natalia Selasa, 5 Mei 2020 14:31 WIB \n",
"31 Feru Lantara Selasa, 5 Mei 2020 14:30 WIB \n",
"32 Sella Panduarsa Gareta Kamis, 30 April 2020 18:13 WIB \n",
"33 None Selasa, 5 Mei 2020 18:58 WIB \n",
"34 None Selasa, 5 Mei 2020 17:44 WIB \n",
"35 None Selasa, 5 Mei 2020 14:49 WIB \n",
"36 None Selasa, 5 Mei 2020 12:48 WIB \n",
"37 None Selasa, 5 Mei 2020 3:20 WIB \n",
"38 None Selasa, 5 Mei 2020 0:26 WIB \n",
"39 None Minggu, 3 Mei 2020 17:47 WIB \n",
"40 None Sabtu, 2 Mei 2020 3:44 WIB \n",
"41 None Rabu, 1 April 2020 20:09 WIB \n",
"42 None Selasa, 17 Maret 2020 22:10 WIB \n",
"43 Aji Cakti Selasa, 5 Mei 2020 19:42 WIB \n",
"44 Aditia Aulia Rohman Selasa, 5 Mei 2020 19:32 WIB \n",
"45 Prisca Triferna Violleta Selasa, 5 Mei 2020 19:29 WIB \n",
"46 Yuniati Jannatun Naim Selasa, 5 Mei 2020 19:10 WIB \n",
"47 Yudi Abdullah Selasa, 5 Mei 2020 19:09 WIB \n",
"48 Abdul Hakim Selasa, 5 Mei 2020 18:40 WIB \n",
"49 Livia Kristianti Senin, 4 Mei 2020 18:55 WIB \n",
"50 TIm JACX dan Kominfo Senin, 4 Mei 2020 21:13 WIB \n",
"51 Hanni Sofia Sabtu, 2 Mei 2020 05:50 WIB \n",
"52 Martha Herlinawati S Kamis, 30 April 2020 16:50 WIB \n",
"53 Subagyo Selasa, 5 Mei 2020 18:12 WIB \n",
"54 Dedi Selasa, 5 Mei 2020 18:53 WIB \n",
"\n",
" paragraf \n",
"0 Pascapandemi diperkirakan akan terjadi kondisi... \n",
"1 Bantuan perkuatan permodalan dari LPDB-KUMKM m... \n",
"2 Kita butuh SDM Unggul yang toleran, yang memba... \n",
"3 di rumah sakit ini disediakan pula fasilitas p... \n",
"4 Jakarta (ANTARA) - Perkembangan virus corona (... \n",
"5 Bogor (ANTARA) - Pasien yang sembuh dari COVID... \n",
"6 Semua orang boleh mengikuti tes cepatJakarta (... \n",
"7 Jakarta (ANTARA) - Beragam peristiwa di wilaya... \n",
"8 Lelang dibuka hari Selasa, 5 Mei 2020 pukul 07... \n",
"9 Jakarta (ANTARA) - Salomon Kalou diskors oleh ... \n",
"10 Jakarta (ANTARA) - Liga Jerman pada Senin mend... \n",
"11 Jakarta (ANTARA) - Kapten timnas sepak bola Wa... \n",
"12 Saya enggak mau sia-sia udah jauh dari keluarg... \n",
"13 Jakarta (ANTARA) - Atlet para bulu tangkis Ind... \n",
"14 Jakarta (ANTARA) - Dua bulan tak menjalankan a... \n",
"15 Alhamdulillah hasilnya sudah keluar, semua din... \n",
"16 Jakarta (ANTARA) - Masyarakat mengandalkan bel... \n",
"17 Jakarta (ANTARA) - Remdesivir, obat antivirus ... \n",
"18 Jakarta (ANTARA) - Beberapa waktu belakangan, ... \n",
"19 Jakarta (ANTARA) - Madonna terlihat menghadiri... \n",
"20 Jakarta (ANTARA) - Pemerintah Singapura mengan... \n",
"21 Jakarta (ANTARA) - Alibaba Business School mel... \n",
"22 Mungkin juga ada lebih banyak kasus awal yang ... \n",
"23 London (ANTARA) - Badan Tenaga Atom Internasio... \n",
"24 Hong Kong (ANTARA) - Pemerintah Hong Kong pada... \n",
"25 BPPT juga mengembangkan ventilator portabel be... \n",
"26 Jakarta (ANTARA) - Duta Besar China untuk Indo... \n",
"27 Dua perangkat ini diperkirakan akhir Juli 2020... \n",
"28 Jakarta (ANTARA) - Juru Bicara Pemerintah untu... \n",
"29 Paris (ANTARA) - Organisasi Kesehatan Dunia (W... \n",
"30 Pemerintah mempersiapkan 'exit strategy' pande... \n",
"31 Rekomendasi yang diberikan adalah agar pemerin... \n",
"32 IKM cukup terpukul dengan penurunan permintaan... \n",
"33 Pontianak (ANTARA) - Satu pasien dalam pengawa... \n",
"34 None \n",
"35 None \n",
"36 None \n",
"37 None \n",
"38 Manokwari (ANTARA) - Wakil Gubernur Provinsi P... \n",
"39 Surabaya (ANTARA) - Tim Gugus Tugas Percepatan... \n",
"40 Samarinda (ANTARA) - Perusahaan tambang batu b... \n",
"41 Ambon (ANTARA) - Satu pasien positif COVID-19 ... \n",
"42 Medan (ANTARA) - Seorang Pasien Dalam Pengawas... \n",
"43 Jakarta (ANTARA) - Pengamat bisnis Lucky Esa d... \n",
"44 DKI Jakarta (ANTARA) - Palang Merah Indonesia ... \n",
"45 Kalau sebagai terapi harus diberikan kepada pa... \n",
"46 pasien positif COVID-19 nomor 7 dinyatakan sem... \n",
"47 Palembang (ANTARA) - Kepolisian Daerah Sumater... \n",
"48 Surabaya (ANTARA) - Pengamat politik sekaligus... \n",
"49 Jakarta (ANTARA) - Sebanyak 21 orang warga RW ... \n",
"50 JAKARTA (ANTARA/JACX) - Sebuah pesan berantai ... \n",
"51 Jakarta (ANTARA) - Sejumlah pelaku koperasi da... \n",
"52 Jakarta (ANTARA) - Tak terasa hampir sebulan, ... \n",
"53 Hasil penelitian ini dapat menjadi harapan bar... \n",
"54 Pontianak (ANTARA) - Dewan Adat Dayak (DAD) Ka... "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"antaranews_news=get_news(url, file_name = 'antaranews_satu')\n",
"antaranews_news"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#1\n",
"#import library\n",
"import urllib\n",
"import requests\n",
"import bs4\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#2\n",
"# request a link\n",
"def request_url(link):\n",
" response = requests.get(link)\n",
" html = response.text\n",
" return html"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#3\n",
"# Function to parse html\n",
"def parse_html(to_parse):\n",
" soup = bs4.BeautifulSoup(to_parse, 'html.parser')\n",
" return soup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#get only link about corona or covid\n",
"def find_corona(main_url):\n",
" url_list = []\n",
" for i in main_url:\n",
" if i.find('corona')!=-1 or i.find('covid')!=-1 or i.find('pandemi')!=-1 or i.find('psbb')!=-1:\n",
" url_list.append(i)\n",
" \n",
" return url_list"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"def title(main_url):\n",
" titles = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" try:\n",
" a = soup.find(\"h1\", class_=\"detail__title\").text\n",
" for i in a:\n",
" b = re.sub('\\n', '', a)\n",
" titles.append(b) \n",
" except:\n",
" d = soup.find(\"h1\", class_=\"title\").text\n",
" titles.append(d)\n",
" return titles "
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"def writer(main_url):\n",
" author = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i)) \n",
" try:\n",
" b = soup.find(\"div\", class_= \"detail__author\").text\n",
" author.append(b)\n",
" except:\n",
" d = \"Detik\"\n",
" author.append(d)\n",
" return author"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def date(main_url):\n",
" dates = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" try:\n",
" a = soup.find(\"div\", class_=\"detail__date\").text\n",
" dates.append(a)\n",
" except:\n",
" b = soup.find(\"div\", class_=\"date mt20\").text\n",
" dates.append(b)\n",
" return dates"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"#take all title from detik.com\n",
"def all_section(main_url):\n",
" section_list = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" for h3 in soup.find_all(class_= \"media__title\"):\n",
" section_list.append(h3.a.get('href'))\n",
" #print(section_list)\n",
" return section_list"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#7\n",
"#take all article pages from urls\n",
"def all_pages(main_url):\n",
" all_pages_articles = []\n",
" for page in main_url:\n",
" soup = parse_html(request_url(page))\n",
" div_class = soup.find_all(\"div\", class_= \"detail__anchor\")\n",
" #print(page, div_class)\n",
" \n",
" if (div_class == []):\n",
" print(page)\n",
" all_pages_articles.append(page)\n",
" else :\n",
" for i in div_class:\n",
" url_class = i.find_all(\"a\")\n",
" for j in url_class:\n",
" print(j.get('href'))\n",
" all_pages_articles.append(j.get('href'))\n",
" return all_pages_articles"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"def collect_text(main_url, titles = [], author = [], dates = []):\n",
" news = []\n",
" paragraf = []\n",
" data = []\n",
" join =[]\n",
" isiteks = []\n",
" for i in main_url:\n",
" a = parse_html(request_url(i))\n",
" b = a.find('p')\n",
" if (b == None):\n",
" c = parse_html(request_url(i))\n",
" u = c.find(\"div\", class_= \"detail__body-text\")\n",
" newparagraf = u.findAll(text=True, recursive=False)\n",
" data.append(newparagraf)\n",
" data = [[ele for ele in sub if ele not in ('s:parallaxindetail','e:parallaxindetail', '\\n', '\\t', '\\r')] for sub in data]\n",
" for i in data:\n",
" join.append(''.join(i))\n",
" data = []\n",
"\n",
" else:\n",
" a = parse_html(request_url(i))\n",
" for k in range(0, len(a.findAll('p'))):\n",
" newparagraf = a.find_all('p')[k].text\n",
" paragraf.append(newparagraf)\n",
" for i in paragraf: \n",
" data.append(re.sub('\\n', ' ', i)) \n",
" join.append(''.join(data))\n",
" data = []\n",
" paragraf = []\n",
" \n",
" for i in main_url:\n",
" news_title = \"Detik.com\"\n",
" news.append(news_title)\n",
"\n",
" for i, j in enumerate(main_url):\n",
" isiteks.append({'news' : news[i], 'link' : j, 'title' : titles[i], 'author' : author[i], 'date_time' : dates[i], 'paragraf': join[i]})\n",
" return isiteks"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"def get_news(main_url, file_name = ''):\n",
" section = all_section(main_url)\n",
" corona = find_corona(section)\n",
" all_page = all_pages(corona)\n",
" titles = title(all_page)\n",
" author = writer(all_page)\n",
" dates = date(all_page)\n",
" text = collect_text(all_page, titles, author, dates)\n",
" print(text)\n",
" file = save_file(text, file_name)\n",
" \n",
" return file "
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"def save_file(file, file_name = ''):\n",
" new_file = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
" new_file.to_csv(file_name + '.csv', index=True, encoding='utf-8', sep = ',')\n",
" \n",
" return new_file"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"main_url = ['https://news.detik.com/indeks?date=05%2F02%2F2020']"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://news.detik.com/berita-jawa-timur/d-5000117/2-karyawan-sampoerna-positif-corona-kampung-kawasan-pabrik-isolasi-diri/1\n",
"https://news.detik.com/berita-jawa-timur/d-5000117/2-karyawan-sampoerna-positif-corona-kampung-kawasan-pabrik-isolasi-diri/2\n",
"https://news.detik.com/berita/d-5000115/penutupan-pasar-di-polman-demi-cegah-corona-sempat-diwarnai-keributan\n",
"https://news.detik.com/berita-jawa-timur/d-5000113/dua-pedagang-positif-corona-pasar-kupang-gunung-ditutup-dua-pekan/1\n",
"https://news.detik.com/berita-jawa-timur/d-5000113/dua-pedagang-positif-corona-pasar-kupang-gunung-ditutup-dua-pekan/2\n",
"https://news.detik.com/berita-jawa-tengah/d-5000109/kasus-corona-di-grobogan-tambah-3-1-di-antaranya-bayi-1-bulan/1\n",
"https://news.detik.com/berita-jawa-tengah/d-5000109/kasus-corona-di-grobogan-tambah-3-1-di-antaranya-bayi-1-bulan/2\n",
"https://news.detik.com/berita-jawa-barat/d-5000082/update-corona-di-banten-hari-ini-5-pasien-sembuh-total-361-kasus-positif\n",
"https://news.detik.com/berita/d-5000081/belajar-dari-covid-19-ini-7-tips-nadiem-untuk-guru-saat-ajari-siswa\n",
"https://news.detik.com/berita/d-5000080/angin-segar-14-provinsi-nihil-kasus-baru-corona-per-2-mei-2020-ini-kuncinya/1\n",
"https://news.detik.com/berita/d-5000080/angin-segar-14-provinsi-nihil-kasus-baru-corona-per-2-mei-2020-ini-kuncinya/2\n",
"[{'news': 'Detik.com', 'link': 'https://news.detik.com/berita-jawa-timur/d-5000117/2-karyawan-sampoerna-positif-corona-kampung-kawasan-pabrik-isolasi-diri/1', 'title': ' 2 Karyawan Sampoerna Positif Corona, Kampung Kawasan Pabrik Isolasi Diri ', 'author': 'Esti Widiyana - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 23:07 WIB', 'paragraf': 'Perkampungan yang bersebelahan dengan pabrik rokok Sampoerna memilih menutup akses jalan. Itu setelah ada dua karyawan meninggal dan ratusan positif Corona.Bahkan, saat detikcom harus melewati portal. Selain itu saat masuk ke kampung untuk menemui RT/RW setempat, beragam pertanyaan dilontarkan dan harus dijawab. Jika tidak memiliki alasan yang jelas, warga yang berjaga di pintu masuk akan menyuruh siapapun untuk kembali lagi.Hal itu dilakukan agar tidak ada penyebaran virus Corona di perkampungan yang lokasinya berdekatan dengan pabrik yang kini tutup dan sementara tak berproduksi. Apalagi, beberapa karyawan yang indekos sekitar pabrik tengah melakukan isolasi mandiri. Mereka yang isolasi mandiri dirasa tidak terjangkit virus Corona.Salah satu warga yang juga istri Ketua RT setempat yang menolak menyebut nama mengaku ada 20 karyawan yang indekos di wilayahnya melakukan isolasi mandiri.\"Mereka yang ndak kena (Virus Corona) isolasi sendiri di kos,\" kata wanita tersebut saat ditemui detikcom di gang kampung, Sabtu (2/5/2020).Menurutnya, karyawan yang kini melakukan isolasi mandiri kemungkinan aman dari virus Corona. Dia juga berharap tak ada yang terjangkit di wilayahnya, baik itu dari karyawan PT HM Sampoerna atau warga sekitar. Selanjutnya Halaman 1 2 birojatim corona surabaya corona jatim corona di jatim klaster pt sampoerna '}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita-jawa-timur/d-5000117/2-karyawan-sampoerna-positif-corona-kampung-kawasan-pabrik-isolasi-diri/2', 'title': ' 2 Karyawan Sampoerna Positif Corona, Kampung Kawasan Pabrik Isolasi Diri ', 'author': 'Esti Widiyana - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 23:07 WIB', 'paragraf': '\"Kalau yang pulang di kos gini kan kemungkinan aman. Di sini aman kok, semoga tidak ada yang terjangkit,\" jelasnya.Sementara Sukat, warga RT 05 Jalan Rungkut Lor IV mengatakan enam karyawan PT HM Sampoerna saat ini tidak boleh keluar dari kos. Mereka dianjurkan untuk melakukan isolasi mandiri.\"Sekitar enam karyawan. Malah harus di dalam kos tidak boleh keluar. Isolasi sendiri mereka,\" kata Sukat.Namun jika di antara mereka ada yang merasa sakit, tambah dia, dianjurkan segera ke puskesmas untuk memeriksakan diri.\"Kalau sampai ada yang sakit langsung disuruh periksa,\" pungkasnya.Selama ini, kawasan Rungkut dihuni ratusan karyawan pabrik rokok PT HM Sampoerna untuk indekos. Namun setelah ada 2 karyawan positif Corona meninggal, mereka dirumahkan. Sebelum dirumahkan, 506 karyawan menjalani rapid test. Dari jumlah itu, 123 dinyatakan reaktif rapid test. Mereka kemudian melakukan test swab dan hasilnya diketahui secara bertahap.'}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita/d-5000115/penutupan-pasar-di-polman-demi-cegah-corona-sempat-diwarnai-keributan', 'title': ' Penutupan Pasar di Polman Demi Cegah Corona Sempat Diwarnai Keributan ', 'author': 'Abdy Febriady - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 23:04 WIB', 'paragraf': 'Pasar rakyat di Dusun Kannusuang, Desa Pulliwa, Kecamatan Bulo, Kabupaten Polewali Mandar, ditutup warga setempat. Penutupan itu menuai aksi protes pedagang dan memicu keributan.Penutupan pasar dilakukan pada Sabtu (2/5/2020). Detik-detik keributan yang dipicu aksi protes pedagang akibat penutupan pasar ini, terekam kamera warga yang kini beredar luas di media sosial. Banyak pedagang yang tidak terima penutupan pasar tersebut, dengan berbagai macam alasan.\"Kami sebelumnya tidak mendapat informasi kalau pasar akan ditutup, kalau kita tidak jualan pada hari ini, barang-barang jualan kami akan rusak \" ujar salah seorang pedagang.\"Kalau hari ini kita buka pasar, pasar depan jangan ada yang membandel, sampai ada kebijakan baru, kita sepakati itu \" pinta pedagang lainnya.Kepala Dusun Kannusuang Lukman Logawali mengkonfirmasi adanya penutupan itu. Penutupan pasar dilakukan untuk mengantisipasi penularan virus COVID-19, apalagi dua warga di Kabupaten Polewali Mandar telah dinyatakan positif berdasarkan hasil swab.\"Penutupan pasar berdasarkan kesepakatan bersama warga, untuk mengantisipasi terjadinya hal tidak diinginkan, apalagi sudah ada dua warga di daerah ini (Polman) yang tertular virus corona \" kata Lukman kepada wartawan, Sabtu (2/5/2020).Lukman mengaku, rencana penutupan penutupan pasar Kannusuang telah diumumkan sebelumnya melalui media sosial. Penutupan itu berlaku untuk kios-kios yang menjual barang di luar kebutuhan pokok. \"Bahkan ada sejumlah pedagang yang kami telpon langsung dan sampaikan agar tidak usah datang, karena untuk sementara waktu pasar Kannusuang ditutup \" ujarnya.asar Kannusuang telah diumumkan sebelumnya melalui media sosial. Penutupan itu berlaku untuk kios-kios yang menjual barang di luar kebutuhan pokok.\"Pedagang sembako dan ikan tetap boleh berjualan, sedangkan pedagang lain seperti perabot, hiasan, pakaian apalagi pakaian bekas, dan lain-lain untuk sementara waktu, tidak boleh lagi berjualan di sini \" lanjutnya.Lukman mengatakan keributan selesai, setelah warga setempat mengalah dan memberikan kesempatan kepada pedagang untuk kembali berjualan, dengan perjanjian minggu depan pasar ditutup kecuali bagi pedagang sembako, hingga batas waktu yang tidak ditentukan.\"Apalagi penutupan pasar sempat menimbulkan antrean panjang kendaraan, hingga mengganggu aktifitas warga setempat, untuk menghindari keributan, pasar akhirnya di buka khusus untuk hari ini, namun pedagang tetap haru mematuhi aturan yang kami buat,\" kata Lukman.'}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita-jawa-timur/d-5000113/dua-pedagang-positif-corona-pasar-kupang-gunung-ditutup-dua-pekan/1', 'title': ' Dua Pedagang Positif Corona, Pasar Kupang Gunung Ditutup Dua Pekan ', 'author': 'Esti Widiyana - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 22:55 WIB', 'paragraf': 'Pasar Kupang Gunung Surabaya ditutup selama 14 hari. Penutupan selama 2 pekan itu karena satu pedagang positif Corona dan orang tuanya yang juga pedagang PDP telah meninggal.Sementara itu pasar mulai tutup mulai pukul 00.00, Minggu (3/5/2020). Sosialisasi penutupan sudah dilakukan sehari sebelumnya.Dari pantauan detikcom, Pasar Kupang Gunung sudah tidak ada aktivitas. Bahkan ada pembatas dilarang melintas garis dari Satpol PP. Sebuah banner warna kuning tampak digantung bertuliskan tidak ada aktivitas di pasar mulai 2 hingga 16 Mei 2020.\"Iya betul (Penutupan pasar). Karena ada satu PDP meninggal dan anaknya yang jualan positif Corona,\" kata Kepala Bagian Perekonomian dan usaha Daerah Pemkot Surabaya Agus Hebi Djuniantoro saat dihubungi detikcom, Sabtu (2/5/2020).Hebi mengatakan, pedagang yang positif Corona itu tidak memiliki gejala atau bisa disebut sebagai OTG. Pedagang berjenis kelamin perempuan itu melakukan isolasi mandiri sambil dipantau oleh puskesmas setempat.\"Dia merasa sehat tapi positif, tidak ada gejala, sekarang isolasi di rumah sambil dipantau puskesmas dan diawasi,\" ujarnya. Selanjutnya Halaman 1 2 birojatim pasar kupang corona surabaya pemkot surabaya corona jatim '}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita-jawa-timur/d-5000113/dua-pedagang-positif-corona-pasar-kupang-gunung-ditutup-dua-pekan/2', 'title': ' Dua Pedagang Positif Corona, Pasar Kupang Gunung Ditutup Dua Pekan ', 'author': 'Esti Widiyana - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 22:55 WIB', 'paragraf': 'Tak hanya pedagang positif Corona saja yang melakukan isolasi, pasar dan juga PKL sepanjang jalan diberhentikan sementara aktivitas jual beli. Bahkan ada beberapa pihak yang mengawasi pasar tersebut.\"Untuk pasarnya kita semprot, ada penjagaan di sana ada Satpol PP, LKMK, Polsek juga di sana. Aktivitas berdagang tidak ada, karantina atau isolasi mandiri,\" jelasnya.Dia menjelaskan, jika terdapat pedagang pasar yang positif Corona dan memiliki bukti medis terpaksa harus dilakukan karantina 14 hari. \"Kalau ada yang terkonfirmasi terpapar, ya kita lanjut untuk karantinanya,\" katanya.Intinya, lanjut Hebi, kedisiplinan antara penjual dan pedagang. Seperti selalu menggunakan masker, sering cuci tangan dan hidup bersih.\"Yang penting social distancing, karena di pasar bejubel (banyak orang). Kadang kalau diingatkan ndableg (nakal). Sudah menyangkut hal sakit kita ga boleh main-main,\" urainya.Pihaknya pun juga sudah melakukan sosialisasi sejak 2 bulan lalu untuk physical distancing, tidak bergerombol dan tetap menggunakan masker.\"Orang pakai masker paling lama satu jam. Kalau disiplin kita bisa. Manuto. Saya sosialisasi besoknya kembali lagi,\" keluhnya.'}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita-jawa-tengah/d-5000109/kasus-corona-di-grobogan-tambah-3-1-di-antaranya-bayi-1-bulan/1', 'title': ' Kasus Corona di Grobogan Tambah 3, 1 Di Antaranya Bayi 1 Bulan ', 'author': 'Febrian Chandra - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 22:39 WIB', 'paragraf': 'Jumlah kasus positif virus Corona (COVID-19) di Kabupaten Grobogan, Jawa Tengah, bertambah menjadi tiga orang. Salah satu di antaranya merupakan bayi berusia satu bulan.\"Dengan bertambahnya tiga orang, sekarang total warga Grobogan yang terkonfirmasi positif ada 13 orang. Satu diantaranya seorang bayi laki - laki berusia 1 bulan,\" kata Ketua harian gugus tugas percepatan penanganan COVID-19 Kabupaten Grobogan Endang Sulistyoningsih saat dimintai konfirmasi via telepon, Sabtu (02/05/2020).Endang menyebut bayi tersebut tinggal di Kecamatan Tanggungharjo. Saat ini, bayi tersebut sedang menjalani perawatan intensif di RSUD KRMT Wongsonegoro Semarang.Kemudian kasus Corona lainnya seorang pria berusia 50 tahun asal Kecamatan Brati yang dirawat di RS Yakkum Purwodadi. Lalu, pria berusia 38 tahun dari Kecamatan Purwodadi yang sudah diisolasi di RSUD Ki Ageng Selo Wirosari. Selanjutnya Halaman 1 2 grobogan corona di jateng virus corona covid-19 birojatengdiy '}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita-jawa-tengah/d-5000109/kasus-corona-di-grobogan-tambah-3-1-di-antaranya-bayi-1-bulan/2', 'title': ' Kasus Corona di Grobogan Tambah 3, 1 Di Antaranya Bayi 1 Bulan ', 'author': 'Febrian Chandra - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 22:39 WIB', 'paragraf': '\"Saat ini kita sedang lakukan tracing terhadap warga atau keluarga yang pernah berinteraksi dengan mereka,\" tutur Endang.Endang menerangkan dari 13 kasus positif virus Corona di Grobogan, dua di antaranya sudah meninggal dunia saat berstatus pasien dalam pengawasan (PDP).\"Total 13 kasus COVID-19 di Grobogan. Rinciannya dua meninggal dunia, dua sembuh, dan sembilan orang masih dirawat di rumah sakit,\" jelasnya.'}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita-jawa-barat/d-5000082/update-corona-di-banten-hari-ini-5-pasien-sembuh-total-361-kasus-positif', 'title': ' Update Corona di Banten Hari Ini: 5 Pasien Sembuh, Total 361 Kasus Positif ', 'author': \"Bahtiar Rifa'i - detikNews\", 'date_time': 'Sabtu, 02 Mei 2020 22:09 WIB', 'paragraf': 'Lima pasien di Provinsi Banten dinyatakan sembuh dari virus Corona pada hari ini, Sabtu (2/5/2020). Tak ada laporan penambahan pasien meninggal. Total ada 361 kasus positif, 218 masih dirawat, 95 sembuh dan 48 meninggal. Data yang dihimpun detikcom melalui situs resmi milik Tim Gugus Tugas Percepatan Penanganan COVID-19 Banten, pasien sembuh berasal dari Kota Tangerang sebanyak 2 orang, Tangsel satu orang, dan dua orang dari Kota Serang. Sementara untuk penambahan jumlah kasus totalnya hari ini ada 7 pasien. Penambahan 2 kasus positif berasal Kota Serang, Kota Tangerang 3 kasus, Kota Tangsel dan Kabupaten Tangerang masing-masing satu kasus. Untuk PDP di Banten jadi total ada 1.502 orang atau bertambah 9 orang. Yang sembuh ada 467 dan meninggal 159 orang. Sedangkan ODP berjumlah 6.614 orang. 1726 orang statusnya masih terus dipantatu dan sisanya sudah dinyatakan sembuh. Tiga besar jumlah kasus positif di Banten masih terjadi di Tangerang Raya. Sementara Lebak sampai hari ini belum melaporkan ada kasus positif Corona. Berikut sebaran jumlah total pasien positif per daerah di Banten: 1. Kota Tangerang 160 kasus.2. Tangerang Selatan 108 kasus.3. Kabupaten Tangerang 81 kasus.4. Kota Serang 6 kasus.5. Pandeglangg 3 kasus.6. Serang 2 kasus. 7. Kota Cilegon 1 kasus. '}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita/d-5000081/belajar-dari-covid-19-ini-7-tips-nadiem-untuk-guru-saat-ajari-siswa', 'title': ' Belajar dari COVID-19, Ini 7 Tips Nadiem untuk Guru Saat Ajari Siswa ', 'author': 'Reyhan Diandri - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 22:08 WIB', 'paragraf': 'Kementerian Pendidikan dan Kebudayaan (Kemendikbud) menggelar acara bertajuk \\'Belajar Dari COVID-19\\' dalam rangka merayakan Hari Pendidikan Nasional (Hardiknas) 2020. Acara tersebut disiarkan langsung hari ini melalui salah satu stasiun TV nasional.Menteri Pendidikan dan Kebudayaan (Mendikbud) Nadiem Makarim membagikan tujuh tips untuk guru-guru maupun orang tua agar dapat dengan efektif memberikan pelajaran kepada anak dari rumah.\"Saya ingin hari ini memberikan beberapa pikiran saya dan juga beberapa tips-tips bagi pengajar baik guru atau orang tua dalam menghadapi kondisi situasi krisis ini. Di mana banyak sekali murid dan guru yang belajar dari rumah,\" ujar Nadiem, melalui live streaming acara Hardiknas 2020 di YouTube, Sabtu (2/5/2020).Nadiem mengatakan untuk tips yang pertama dan juga yang terpenting adalah jangan stres. Jika masih ada banyak guru atau orang tua yang belum mengenal teknologi, cobalah untuk beradaptasi.\"Ini merupakan masa adaptasi pasti yang tentunya tidak mudah dan penuh dengan kebingungan serta ketidakpastian. Jadi jangan khawatir, yakini bahwa cara terbaik untuk belajar suatu hal baru adalah untuk keluar dari zona nyaman tersebut,\" jelas Nadiem.Kemudian untuk tips kedua adalah membagi kelas menjadi kelompok yang lebih kecil. Dengan membagikan murid atau anak ke dalam beberapa kelompok kecil sesuai kompetensi mereka, maka waktu belajar akan lebih efektif.\"Cobalah membagi kelompok belajar berdasarkan kompetensi yang sama. Sebagai contoh mengapa 5 jam mengajar dalam satu hari harus dilakukan secara bersamaan, padahal itu bisa juga dibagi menjadi 5 kelompok yang lebih kecil melalui video conference. Setelah dibagi tersebut anak-anak dapat dibagi lagi sesuai dengan kompetensi mereka mungkin ada dengan pelajaran yang sulit atau diberi pelajaran yang mereka senangi, \" ujar Nadiem.Untuk tips yang ketiga Nadiem mengatakan para pengajar bisa memberikan group assignment yang menciptakan suatu tantangan dan kolaborasi. Dengan hal tersebut murid-murid dapat belajar bertanggung jawab, karena nilai mereka saling bergantung satu antarlainnya.\"Dan ini melatih empati mereka dan juga kemampuan mereka untuk mendorong kemampuan sesama lainnya. Dan yang terpenting untuk para siswa adalah asas gotong royong mereka itu juga terbentuk,\" ungkap Nadiem.Tips keempat adalah mengalokasikan waktu bagi murid-murid yang tertinggal di dalam kelas. Nadiem mencontohkan jika para guru dapat mengalokasikan waktu mereka kepada murid tersebut dengan baik, maka sang murid dapat lebih percaya diri bergabung lagi dengan yang lainnya nantinya setelah krisis COVID-19 berakhir.\"Mungkinkah ini juga kesempatan untuk melibatkan orang tua dalam lebih memahami dan membantu, seperti apa tantangan anak-anak mereka yang mungkin membuat pembelajaran terganggu atau lebih lambat di suatu topik tertentu,\" jelas Nadiem.Kemudian tips kelima yaitu fokus kepada yang terpenting. Menurut Nadiem, kalau kita mengajarkan semua pembelajaran dalam waktu bersamaan maka anak-anak mungkin tidak dapat memahami dengan baik. Oleh karena itu daripada kejar tayang semua topik, inilah saatnya untuk menguatkan konsep fundamental yang mendasari kemampuan para murid untuk bisa sukses di pelajaran manapun.Sedangkan tips keenam adalah mencontek dari guru-guru lain. Nadiem mengatakan bahwa banyak guru yang berinovasi dengan metode belajar online. Sama seperti murid adapula guru yang lebih cepat beradaptasi dan ada juga guru yang lebih lambat dalam beradaptasi.\"Maka dari itu jangan ragu-ragu untuk meminta pertolongan dari guru lain. Jangan ragu untuk meminta contoh-contoh atau best practice dari guru lain. Seperti bagaimana menyampaikan bahan, bagaimana mereka menggunakan fitur-fitur software. Jadi itulah maksud saya dari mencontek guru lain,\" jelas Nadiem.Dan tips yang terakhir adalah have fun. Nadiem berpendapat bahwa mengajar itu tidak mudah tapi siapa bilang harus membosankan.\"Walaupun kita dalam krisis, ini saatnya kita mencoba hal-hal yang masih diragukan namun dalam hati kita, kita rasa inilah yang terbaik. Inilah saatnya kita mendengarkan insting kita sebagai guru dan orang tua dan bukan hanya mengikuti proses seadanya. Seperti murid, inilah saatnya guru berinovasi dengan melakukan banyak tanya, banyak coba, dan banyak karya,\" pungkasi Nadiem.Sebagai informasi, \\'Belajar Dari COVID-19\\' merupakan sebuah program dari Kemendikbud untuk menginpirasi murid, guru, orang tua, serta masyarakat bahwa dalam kondisi pandemi ini kita masih bisa mendapatkan hikmah pembelajaran untuk bangsa Indonesia kedepannya. Dalam acara ini juga terdapat penampilan musik dari musisi tanah air seperti Tulus, Vidi Aldiano, Rinni Wulandari, Rizky Febian, Lyodra, Naura, Sabyan, Gitabumi Voices, dan Bina Vokalia.Ada berbagai segmen menarik seperti diskusi dari Mendikbud dan Najwa Shihab yang belajar bareng mengenai COVID-19, serta selipan video-video edukasi dan komentar dari para murid, guru, hingga relawan mengenai kegiatan mereka di masa COVID-19.'}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita/d-5000080/angin-segar-14-provinsi-nihil-kasus-baru-corona-per-2-mei-2020-ini-kuncinya/1', 'title': ' Angin Segar 14 Provinsi Nihil Kasus Baru Corona per 2 Mei 2020, Ini Kuncinya ', 'author': 'Hestiana Dharmastuti - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 22:07 WIB', 'paragraf': 'Pemerintah menyampaikan kabar baik penanganan virus Corona (COVID-19). Ada 14 provinsi yang tidak ada penambahan kasus baru positif Corona.Kabar baik ini disampaikan Juru Bicara Pemerintah untuk Penanganan Wabah Virus Corona, Achmad Yurianto, dalam konferensi pers yag disiarkan di akun YouTube BNPB, Sabtu (2/5/2020).Berdasarkan data dari 1 Mei pukul 12.00 WIB hingga 2 Mei pukul 12.00 WIB, ada 14 provinsi yang tidak ada penambahan kasus positif.Berikut 14 provinsi yang tidak ada penambahan kasus baru positif Corona 2 Mei 2002:1. Aceh: Nihil kasus positif baru (Kasus kumulatif 11)2. Bangka Belitung: Nihil kasus positif baru (Kasus kumulatif 19)3. Bengkulu: Nihil kasus positif baru (Kasus kumulatif 12)4. Jambi: Nihil kasus positif baru (Kasus kumulatif 32)5. Kalimantan Selatan: Nihil kasus positif baru (Kasus kumulatif 179)6. Kepulauan Riau: Nihil kasus positif baru (Kasus kumulatif 89)7. Sumatera Selatan: Nihil kasus positif baru (Kasus kumulatif 156)8. Sulawesi Utara: Nihil kasus positif baru (Kasus kumulatif 45)9 Sumatera Utara: Nihil kasus positif baru (Kasus kumulatif 117)10. Lampung: Nihil kasus positif baru (Kasus kumulatif 50)11. Maluku Utara: Nihil kasus positif baru (Kasus kumulatif 41)12. Maluku: Nihil kasus positif baru (Kasus kumulatif 23)13. Nusa Tenggara Timur: Nihil kasus positif baru (Kasus kumulatif 3)14. Gorontalo: Nihil kasus positif baru (Kasus kumulatif 15) Selanjutnya Halaman 1 2 virus corona corona round-up positif corona '}, {'news': 'Detik.com', 'link': 'https://news.detik.com/berita/d-5000080/angin-segar-14-provinsi-nihil-kasus-baru-corona-per-2-mei-2020-ini-kuncinya/2', 'title': ' Angin Segar 14 Provinsi Nihil Kasus Baru Corona per 2 Mei 2020, Ini Kuncinya ', 'author': 'Hestiana Dharmastuti - detikNews', 'date_time': 'Sabtu, 02 Mei 2020 22:07 WIB', 'paragraf': 'Yuri mengimbau warga untuk tetap di rumah dan tidak mudik. Sebab, hal itu menjadi kunci untuk memutus mata rantai penularan COVID-19.\"Oleh karena itu tetap tinggal di rumah, jangan bepergian, jangan mudik. Ini kunci agar kita pastikan untuk tidak tertular atau tidak menulari orang lain,\" ujarnya.Data kasus positif COVID-19 di Indonesia per Sabtu, 2 Mei 2020, mencapai 10.843 kasus. Sedangkan pasien sembuh Corona berjumlah 1.665 orang dan meninggal 831 orang.Jumlah kasus positif hari ini bertambah 292 orang. Sementara itu, jumlah pasien sembuh bertambah 74 orang dan jumlah pasien yang meninggal bertambah 31 orang.'}]\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>news</th>\n",
" <th>link</th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>date_time</th>\n",
" <th>paragraf</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita-jawa-timur/d-500...</td>\n",
" <td>2 Karyawan Sampoerna Positif Corona, K...</td>\n",
" <td>Esti Widiyana - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 23:07 WIB</td>\n",
" <td>Perkampungan yang bersebelahan dengan pabrik r...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita-jawa-timur/d-500...</td>\n",
" <td>2 Karyawan Sampoerna Positif Corona, K...</td>\n",
" <td>Esti Widiyana - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 23:07 WIB</td>\n",
" <td>\"Kalau yang pulang di kos gini kan kemungkinan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita/d-5000115/penutu...</td>\n",
" <td>Penutupan Pasar di Polman Demi Cegah C...</td>\n",
" <td>Abdy Febriady - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 23:04 WIB</td>\n",
" <td>Pasar rakyat di Dusun Kannusuang, Desa Pulliwa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita-jawa-timur/d-500...</td>\n",
" <td>Dua Pedagang Positif Corona, Pasar Kup...</td>\n",
" <td>Esti Widiyana - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 22:55 WIB</td>\n",
" <td>Pasar Kupang Gunung Surabaya ditutup selama 14...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita-jawa-timur/d-500...</td>\n",
" <td>Dua Pedagang Positif Corona, Pasar Kup...</td>\n",
" <td>Esti Widiyana - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 22:55 WIB</td>\n",
" <td>Tak hanya pedagang positif Corona saja yang me...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita-jawa-tengah/d-50...</td>\n",
" <td>Kasus Corona di Grobogan Tambah 3, 1 D...</td>\n",
" <td>Febrian Chandra - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 22:39 WIB</td>\n",
" <td>Jumlah kasus positif virus Corona (COVID-19) d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita-jawa-tengah/d-50...</td>\n",
" <td>Kasus Corona di Grobogan Tambah 3, 1 D...</td>\n",
" <td>Febrian Chandra - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 22:39 WIB</td>\n",
" <td>\"Saat ini kita sedang lakukan tracing terhadap...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita-jawa-barat/d-500...</td>\n",
" <td>Update Corona di Banten Hari Ini: 5 Pa...</td>\n",
" <td>Bahtiar Rifa'i - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 22:09 WIB</td>\n",
" <td>Lima pasien di Provinsi Banten dinyatakan semb...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita/d-5000081/belaja...</td>\n",
" <td>Belajar dari COVID-19, Ini 7 Tips Nadi...</td>\n",
" <td>Reyhan Diandri - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 22:08 WIB</td>\n",
" <td>Kementerian Pendidikan dan Kebudayaan (Kemendi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita/d-5000080/angin-...</td>\n",
" <td>Angin Segar 14 Provinsi Nihil Kasus Ba...</td>\n",
" <td>Hestiana Dharmastuti - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 22:07 WIB</td>\n",
" <td>Pemerintah menyampaikan kabar baik penanganan ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Detik.com</td>\n",
" <td>https://news.detik.com/berita/d-5000080/angin-...</td>\n",
" <td>Angin Segar 14 Provinsi Nihil Kasus Ba...</td>\n",
" <td>Hestiana Dharmastuti - detikNews</td>\n",
" <td>Sabtu, 02 Mei 2020 22:07 WIB</td>\n",
" <td>Yuri mengimbau warga untuk tetap di rumah dan ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" news link \\\n",
"0 Detik.com https://news.detik.com/berita-jawa-timur/d-500... \n",
"1 Detik.com https://news.detik.com/berita-jawa-timur/d-500... \n",
"2 Detik.com https://news.detik.com/berita/d-5000115/penutu... \n",
"3 Detik.com https://news.detik.com/berita-jawa-timur/d-500... \n",
"4 Detik.com https://news.detik.com/berita-jawa-timur/d-500... \n",
"5 Detik.com https://news.detik.com/berita-jawa-tengah/d-50... \n",
"6 Detik.com https://news.detik.com/berita-jawa-tengah/d-50... \n",
"7 Detik.com https://news.detik.com/berita-jawa-barat/d-500... \n",
"8 Detik.com https://news.detik.com/berita/d-5000081/belaja... \n",
"9 Detik.com https://news.detik.com/berita/d-5000080/angin-... \n",
"10 Detik.com https://news.detik.com/berita/d-5000080/angin-... \n",
"\n",
" title \\\n",
"0 2 Karyawan Sampoerna Positif Corona, K... \n",
"1 2 Karyawan Sampoerna Positif Corona, K... \n",
"2 Penutupan Pasar di Polman Demi Cegah C... \n",
"3 Dua Pedagang Positif Corona, Pasar Kup... \n",
"4 Dua Pedagang Positif Corona, Pasar Kup... \n",
"5 Kasus Corona di Grobogan Tambah 3, 1 D... \n",
"6 Kasus Corona di Grobogan Tambah 3, 1 D... \n",
"7 Update Corona di Banten Hari Ini: 5 Pa... \n",
"8 Belajar dari COVID-19, Ini 7 Tips Nadi... \n",
"9 Angin Segar 14 Provinsi Nihil Kasus Ba... \n",
"10 Angin Segar 14 Provinsi Nihil Kasus Ba... \n",
"\n",
" author date_time \\\n",
"0 Esti Widiyana - detikNews Sabtu, 02 Mei 2020 23:07 WIB \n",
"1 Esti Widiyana - detikNews Sabtu, 02 Mei 2020 23:07 WIB \n",
"2 Abdy Febriady - detikNews Sabtu, 02 Mei 2020 23:04 WIB \n",
"3 Esti Widiyana - detikNews Sabtu, 02 Mei 2020 22:55 WIB \n",
"4 Esti Widiyana - detikNews Sabtu, 02 Mei 2020 22:55 WIB \n",
"5 Febrian Chandra - detikNews Sabtu, 02 Mei 2020 22:39 WIB \n",
"6 Febrian Chandra - detikNews Sabtu, 02 Mei 2020 22:39 WIB \n",
"7 Bahtiar Rifa'i - detikNews Sabtu, 02 Mei 2020 22:09 WIB \n",
"8 Reyhan Diandri - detikNews Sabtu, 02 Mei 2020 22:08 WIB \n",
"9 Hestiana Dharmastuti - detikNews Sabtu, 02 Mei 2020 22:07 WIB \n",
"10 Hestiana Dharmastuti - detikNews Sabtu, 02 Mei 2020 22:07 WIB \n",
"\n",
" paragraf \n",
"0 Perkampungan yang bersebelahan dengan pabrik r... \n",
"1 \"Kalau yang pulang di kos gini kan kemungkinan... \n",
"2 Pasar rakyat di Dusun Kannusuang, Desa Pulliwa... \n",
"3 Pasar Kupang Gunung Surabaya ditutup selama 14... \n",
"4 Tak hanya pedagang positif Corona saja yang me... \n",
"5 Jumlah kasus positif virus Corona (COVID-19) d... \n",
"6 \"Saat ini kita sedang lakukan tracing terhadap... \n",
"7 Lima pasien di Provinsi Banten dinyatakan semb... \n",
"8 Kementerian Pendidikan dan Kebudayaan (Kemendi... \n",
"9 Pemerintah menyampaikan kabar baik penanganan ... \n",
"10 Yuri mengimbau warga untuk tetap di rumah dan ... "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d = get_news(main_url, file_name = 'detik_satu')\n",
"d"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import library\n",
"import urllib\n",
"import requests\n",
"import bs4\n",
"import pandas as pd\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# request a link\n",
"def request_url(link):\n",
" response = requests.get(link)\n",
" html = response.text\n",
" return html"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to parse html\n",
"def parse_html(to_parse):\n",
" soup = bs4.BeautifulSoup(to_parse, 'html.parser')\n",
" return soup"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def all_section(main_url):\n",
" section_list = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" for i in soup.find_all(\"li\", class_= \"list-group-item\"):\n",
" b = i.a.get('href')\n",
" section_list.append(b)\n",
" return section_list"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def find_corona(main_url):\n",
" url_list = []\n",
" for i in main_url:\n",
" if i.find('corona')!=-1 or i.find('covid')!=-1 or i.find('pandemi')!=-1 or i.find('psbb')!=-1:\n",
" url_list.append(i)\n",
" return url_list"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def title(main_url):\n",
" titles = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" a = soup.find(\"h1\").text\n",
" titles.append(a)\n",
" return titles "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def writer(main_url):\n",
" author = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i)) \n",
" aut = soup.find('div', class_='content')\n",
" get_author = ' '.join(aut.text.split())\n",
" new = ''.join(get_author.partition('Editor: ')[2:])\n",
" b = (new.partition('Bagikan melalui: ')[0])\n",
" c = (b.partition('googletag.cmd.push(function()')[0])\n",
" author.append(c)\n",
" return author"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def date(main_url):\n",
" datetime = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_datetime = soup.find(\"div\", class_=\"content\")\n",
" get = get_datetime.find('p', class_ = \"date\")\n",
" if(get):\n",
" datetime.append(get.text)\n",
" else:\n",
" datetime.append('None')\n",
" \n",
" \n",
" return datetime"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def collect_text(main_url, titles = [], author = [], datetime = []):\n",
" paragraf = []\n",
" data = []\n",
" join =[]\n",
" isiteks = []\n",
"\n",
" for i,j in enumerate(main_url):\n",
" a = parse_html(request_url(j))\n",
" pragraf_text = a.find_all(\"div\", id = \"allcontentgm\")\n",
" pragraf_photos = a.find_all(\"div\", class_=\"read-page--photo-tag--header__content\")\n",
" if(pragraf_text):\n",
" for k in pragraf_text:\n",
" newparagraf = k.find_all('p')\n",
" for x in newparagraf:\n",
" paragraf.append(x.text)\n",
" else:\n",
" for k in pragraf_photos:\n",
" paragraf.append(k.text)\n",
" data.append(paragraf)\n",
" paragraf = []\n",
" for i in data:\n",
" join.append(' '.join(i))\n",
" \n",
" for i, j in enumerate(main_url):\n",
" isiteks.append({'news': 'Glamedianews.com', 'link' : j, 'title': titles[i], 'author' : author[i], 'date_time': datetime[i], 'paragraf' : join[i]})\n",
" return isiteks"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def save_file(file, file_name = ''):\n",
" new_file = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
" new_file.to_csv(file_name + '.csv', index=True, encoding='utf-8', sep = ',')\n",
" \n",
" return new_file"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def get_news(main_url, file_name = ''):\n",
" section = all_section(main_url)\n",
" corona_news = find_corona(section)\n",
" titles = title(corona_news)\n",
" author = writer(corona_news)\n",
" datetime = date(corona_news)\n",
" text = collect_text(corona_news, titles, author, datetime)\n",
" file = save_file(text, file_name)\n",
" \n",
" return file "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"main_url=[\"https://www.galamedianews.com/?indeks=yes&tanggal=4&bulan=5&tahun=2020&submit=TAMPILKAN\"]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>news</th>\n",
" <th>link</th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>date_time</th>\n",
" <th>paragraf</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256313&amp;jud...</td>\n",
" <td>PMI Kota Sukabumi Bentuk Kelurahan Siaga Covid-19</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 23:36 WIB</td>\n",
" <td>PALANG Merah Indonesia (PMI) Kota Sukabumi, me...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256312&amp;jud...</td>\n",
" <td>Sudah 101 Kasus Hoaks Covid-19 di Medsos Diung...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 23:01 WIB</td>\n",
" <td>KASUS penyebaran informasi hoaks soal Covid-19...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256316&amp;jud...</td>\n",
" <td>Panglima TNI Perintahkan Seluruh Jajaran TNI U...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 22:50 WIB</td>\n",
" <td>PANGLIMA TNI Marsekal TNI Dr. (H.C.) Hadi Tjah...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256314&amp;jud...</td>\n",
" <td>Presiden Jokowi Ikuti KTT Gerakan Non-Blok Vir...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 22:39 WIB</td>\n",
" <td>PRESIDEN Joko Widodo (Jokowi) mengikuti Konfer...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256309&amp;jud...</td>\n",
" <td>Pemkab Purwakarta Pastikan sebelum PSBB Bansos...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 21:59 WIB</td>\n",
" <td>PEMERINTAH Kabupaten Purwakarta, memastikan ba...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256303&amp;jud...</td>\n",
" <td>Prediksi Trump, Angka Kematian Covid-19 di AS ...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 21:06 WIB</td>\n",
" <td>PRESIDEN Amerika Serikat, Donald Trump, mempre...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256299&amp;jud...</td>\n",
" <td>Terhimpit Masalah Ekonomi Akibat Covid-19, Rat...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 20:50 WIB</td>\n",
" <td>BERBAGAI sektor bisnis terpukul akibat mewabah...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256297&amp;jud...</td>\n",
" <td>Anies Bakal \"Hukum\" Warganya yang Nekat Mudik ...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 20:42 WIB</td>\n",
" <td>PANDEMI Covid-19 berhasil membuat masyarakat d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256293&amp;jud...</td>\n",
" <td>Rilis Animasi Tentara Terakota vs Patung Liber...</td>\n",
" <td>Mia Fahrani</td>\n",
" <td>Senin, 4 Mei 2020 | 20:20 WIB</td>\n",
" <td>Cina merilis tayangan animasi pendek berjudul ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256291&amp;jud...</td>\n",
" <td>Terus Merebak Covid-19, Forkopkes Garut Sampai...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 20:05 WIB</td>\n",
" <td>FORUM Komunikasi Organisasi Profesi Kesehatan ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256290&amp;jud...</td>\n",
" <td>Dampak Pandemi Covid-19, 14 Belasan Perusahaan...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 20:05 WIB</td>\n",
" <td>KAMAR Dagang dan Industri (Kadin) Kabupaten Ka...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256288&amp;jud...</td>\n",
" <td>Data dan Fakta Pelaksanaan PSBB di Kota Bandun...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 19:55 WIB</td>\n",
" <td>MENJELANG dua pekan berlangsungnya Pembatasan ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256287&amp;jud...</td>\n",
" <td>Hasil PSBB di Kota Cimahi Saling Bertolak Bela...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 19:47 WIB</td>\n",
" <td>PEMBATASAN Sosial Berskala Besar (PSBB) di Kot...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256284&amp;jud...</td>\n",
" <td>Pemkab Ajak Milenial Cegah Corona, Teh Nia : M...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 18:55 WIB</td>\n",
" <td>PENYEBARAN virus corona telah menjadi masalah ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256283&amp;jud...</td>\n",
" <td>Di Subang, Tiga Orang Pasien Positif Sembuh da...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 18:51 WIB</td>\n",
" <td>PASIEN positif Covid-19 di Kabupaten Subang ya...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256281&amp;jud...</td>\n",
" <td>Cegah Sebaran Covid-19 Kota Tasikmalaya Berlak...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 18:41 WIB</td>\n",
" <td>PEMERINTAH Kota Tasikmalaya akhirnya memberlak...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256278&amp;jud...</td>\n",
" <td>Reproduksi Virus Corona di Kota Bandung Diklai...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 18:34 WIB</td>\n",
" <td>KOORDINATOR Bidang Perencanaan, Data, Kajian d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256277&amp;jud...</td>\n",
" <td>Covid-19 Masih Mengganas, Jepang Perpanjang St...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 18:22 WIB</td>\n",
" <td>PERDANA Menteri Jepang Shinzo Abe pada Senin (...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256275&amp;jud...</td>\n",
" <td>Kota Bandung Bakal Ikuti PSBB Provinsi Hingga ...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 17:59 WIB</td>\n",
" <td>PEMKOT Bandung memastikan Pelaksanaan pembatas...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256274&amp;jud...</td>\n",
" <td>Yurianto: Batasi Keluar Rumah untuk Hentikan C...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 17:58 WIB</td>\n",
" <td>JURU Bicara Pemerintah untuk Penanganan Covid-...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256272&amp;jud...</td>\n",
" <td>Fokus Penegakan Hukum Sumedang Perpanjang Masa...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 17:47 WIB</td>\n",
" <td>PEMERINTAH Kabupaten Sumedang memperpanjang ma...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256273&amp;jud...</td>\n",
" <td>Wabup Garut : Pelanggar PSBB Akan Diberi Sanks...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 17:42 WIB</td>\n",
" <td>WAKIL Bupati (Wabup) Garut Helmi Budiman menya...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256261&amp;jud...</td>\n",
" <td>Jumlah Pasien Positif dan yang Sembuh Covid-19...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 17:27 WIB</td>\n",
" <td>JUMLAH pasien positif virus corona RI masih be...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256258&amp;jud...</td>\n",
" <td>Grafik Semakin Landai, Ini Dia Data Pasien Cov...</td>\n",
" <td>Kiki Kurnia</td>\n",
" <td>Senin, 4 Mei 2020 | 17:12 WIB</td>\n",
" <td>TIM Gugus Tugas Percepatan Penanganan Covid-19...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256265&amp;jud...</td>\n",
" <td>Pasutri Disarankan Menunda Kehamilan di Masa P...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 17:08 WIB</td>\n",
" <td>BADAN Kependudukan dan Keluarga Berencana Nasi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256260&amp;jud...</td>\n",
" <td>Sah, PSBB se-Jawa Barat Dimulai Tanggal 6 Mei ...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 16:28 WIB</td>\n",
" <td>DUA hari menjelang pemberlakukan PSBB di Wilay...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256255&amp;jud...</td>\n",
" <td>Legislator Pantau Penyaluran Bantuan untuk War...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 15:55 WIB</td>\n",
" <td>SEJUMLAH anggota DPRD Kabupaten Bandung dari D...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256254&amp;jud...</td>\n",
" <td>Gunakan Lego dalam Animasi \"Once Upon a Virus\"...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 15:34 WIB</td>\n",
" <td>CINA merilis film animasi pendek berjudul \"Onc...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256251&amp;jud...</td>\n",
" <td>Menhan Inggris Ungkapkan Pengalaman Tertular C...</td>\n",
" <td>Rosyad Abdullah</td>\n",
" <td>Senin, 4 Mei 2020 | 14:59 WIB</td>\n",
" <td>MENGIDAP COVID-19 \"sangat mengerikan\", ungkap ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256252&amp;jud...</td>\n",
" <td>Emil: Tiga Kunci Tekan Sebaran Covid-19</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 14:54 WIB</td>\n",
" <td>UNTUK menekan persebaran Covid-19 di Jawa Bara...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256237&amp;jud...</td>\n",
" <td>Berangsur Pulih dari Covid-19, Pasar Mobil di ...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 14:52 WIB</td>\n",
" <td>PENJUALAN beberapa merek mobil di China tumbuh...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256249&amp;jud...</td>\n",
" <td>DPR RI Dukung Perppu Covid-19</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 14:38 WIB</td>\n",
" <td>BADAN Anggaran DPR RI mendukung Peraturan Peme...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256248&amp;jud...</td>\n",
" <td>Dampak Covid-19, 70.367 Pekerja Migran Kembali...</td>\n",
" <td>Lucky M. Lukman</td>\n",
" <td>Senin, 4 Mei 2020 | 14:37 WIB</td>\n",
" <td>KETUA Gugus Tugas Percepatan Penanganan Covid-...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256243&amp;jud...</td>\n",
" <td>Alhamdulillah, Pasien 01 Covid-19 di Sumedang ...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 14:12 WIB</td>\n",
" <td>SETELAH berulang kali dilakukan test swab  ter...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256242&amp;jud...</td>\n",
" <td>Langgar Protokol Kesehatan Saat PSBB, 168 Pabr...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 13:45 WIB</td>\n",
" <td>KETUA Gugus Tugas Percepatan Penanganan Covid-...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256241&amp;jud...</td>\n",
" <td>Pekan Kedua PSBB, Pelanggar Semakin Menurun</td>\n",
" <td>Brilliant Awal</td>\n",
" <td>Senin, 4 Mei 2020 | 13:42 WIB</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256239&amp;jud...</td>\n",
" <td>Tingkat Laju Kasus Baru Pasien Positif Covid-...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 13:33 WIB</td>\n",
" <td>Ketua Gugus Tugas Percepatan Penanganan Covid-...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256234&amp;jud...</td>\n",
" <td>Penerapan PSBB, Sejumlah Provinsi Alami Perlam...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 13:17 WIB</td>\n",
" <td>GUGUS Tugas Percepatan Penanganan Covid-19 men...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256227&amp;jud...</td>\n",
" <td>Lima Mahasiswa Indonesia di Rusia Positif Teri...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 12:10 WIB</td>\n",
" <td>LIMA mahasiswa Indonesia di Moskow, Rusia, din...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256223&amp;jud...</td>\n",
" <td>Di Tengah Pandemi Covid-19, Desa Tenjolaya Pas...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 11:50 WIB</td>\n",
" <td>PANDEMI virus corona atau Covid-19 berdampak p...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256221&amp;jud...</td>\n",
" <td>Presiden : Pantau Ketat Klaster-Klaster Penula...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 10:53 WIB</td>\n",
" <td>PRESIDEN Joko Widodo minta klaster-klaster pen...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256220&amp;jud...</td>\n",
" <td>Presiden Ingin Tiap Daerah Terapkan PSBB denga...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 10:50 WIB</td>\n",
" <td>PRESIDEN RI Joko Widodo menginginkan setiap da...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256219&amp;jud...</td>\n",
" <td>Brasil Laporkan 4.588 Kasus Baru Virus Corona ...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 10:46 WIB</td>\n",
" <td>KEMENTERIAN Kesehatan Brasil melaporkan 4.588 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256216&amp;jud...</td>\n",
" <td>Positif Covid-19 di AS Naik 29.671 Kasus, Juml...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 09:59 WIB</td>\n",
" <td>PUSAT Pengendalian dan Pencegahan Penyakit (CD...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256212&amp;jud...</td>\n",
" <td>Olahraga Dunia Mencoba Keluar dari Bayang-Baya...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 09:34 WIB</td>\n",
" <td>Peristiwa-peristiwa besar olahraga, termasuk O...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256210&amp;jud...</td>\n",
" <td>Menlu AS : Ada Bukti Siginifikan Virus Corona ...</td>\n",
" <td>Efrie Christianto</td>\n",
" <td>Senin, 4 Mei 2020 | 09:28 WIB</td>\n",
" <td>MENTERI Luar Negeri Amerika Serikat Mike Pompe...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256204&amp;jud...</td>\n",
" <td>DPR: Restrukturisasi Kredit Akibat Pandemi Cov...</td>\n",
" <td>Rosyad Abdullah</td>\n",
" <td>Senin, 4 Mei 2020 | 09:15 WIB</td>\n",
" <td>ANGGOTA Komisi XI DPR RI Puteri Anetta Komarud...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256196&amp;jud...</td>\n",
" <td>KTT Gerakan Negara-Negara Non-Blok Bahas Covid...</td>\n",
" <td>Rosyad Abdullah</td>\n",
" <td>Senin, 4 Mei 2020 | 08:37 WIB</td>\n",
" <td>PERDANA Menteri Malaysia Muhyiddin Yassin akan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>Glamedianews.com</td>\n",
" <td>https://www.galamedianews.com?arsip=256191&amp;jud...</td>\n",
" <td>Bisakah Virus Corona Bertahan di Paket Belanja...</td>\n",
" <td>Brilliant Awal</td>\n",
" <td>Senin, 4 Mei 2020 | 06:55 WIB</td>\n",
" <td>SAAT Pembatasan Sosial Berskala Besar (PSBB), ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" news link \\\n",
"0 Glamedianews.com https://www.galamedianews.com?arsip=256313&jud... \n",
"1 Glamedianews.com https://www.galamedianews.com?arsip=256312&jud... \n",
"2 Glamedianews.com https://www.galamedianews.com?arsip=256316&jud... \n",
"3 Glamedianews.com https://www.galamedianews.com?arsip=256314&jud... \n",
"4 Glamedianews.com https://www.galamedianews.com?arsip=256309&jud... \n",
"5 Glamedianews.com https://www.galamedianews.com?arsip=256303&jud... \n",
"6 Glamedianews.com https://www.galamedianews.com?arsip=256299&jud... \n",
"7 Glamedianews.com https://www.galamedianews.com?arsip=256297&jud... \n",
"8 Glamedianews.com https://www.galamedianews.com?arsip=256293&jud... \n",
"9 Glamedianews.com https://www.galamedianews.com?arsip=256291&jud... \n",
"10 Glamedianews.com https://www.galamedianews.com?arsip=256290&jud... \n",
"11 Glamedianews.com https://www.galamedianews.com?arsip=256288&jud... \n",
"12 Glamedianews.com https://www.galamedianews.com?arsip=256287&jud... \n",
"13 Glamedianews.com https://www.galamedianews.com?arsip=256284&jud... \n",
"14 Glamedianews.com https://www.galamedianews.com?arsip=256283&jud... \n",
"15 Glamedianews.com https://www.galamedianews.com?arsip=256281&jud... \n",
"16 Glamedianews.com https://www.galamedianews.com?arsip=256278&jud... \n",
"17 Glamedianews.com https://www.galamedianews.com?arsip=256277&jud... \n",
"18 Glamedianews.com https://www.galamedianews.com?arsip=256275&jud... \n",
"19 Glamedianews.com https://www.galamedianews.com?arsip=256274&jud... \n",
"20 Glamedianews.com https://www.galamedianews.com?arsip=256272&jud... \n",
"21 Glamedianews.com https://www.galamedianews.com?arsip=256273&jud... \n",
"22 Glamedianews.com https://www.galamedianews.com?arsip=256261&jud... \n",
"23 Glamedianews.com https://www.galamedianews.com?arsip=256258&jud... \n",
"24 Glamedianews.com https://www.galamedianews.com?arsip=256265&jud... \n",
"25 Glamedianews.com https://www.galamedianews.com?arsip=256260&jud... \n",
"26 Glamedianews.com https://www.galamedianews.com?arsip=256255&jud... \n",
"27 Glamedianews.com https://www.galamedianews.com?arsip=256254&jud... \n",
"28 Glamedianews.com https://www.galamedianews.com?arsip=256251&jud... \n",
"29 Glamedianews.com https://www.galamedianews.com?arsip=256252&jud... \n",
"30 Glamedianews.com https://www.galamedianews.com?arsip=256237&jud... \n",
"31 Glamedianews.com https://www.galamedianews.com?arsip=256249&jud... \n",
"32 Glamedianews.com https://www.galamedianews.com?arsip=256248&jud... \n",
"33 Glamedianews.com https://www.galamedianews.com?arsip=256243&jud... \n",
"34 Glamedianews.com https://www.galamedianews.com?arsip=256242&jud... \n",
"35 Glamedianews.com https://www.galamedianews.com?arsip=256241&jud... \n",
"36 Glamedianews.com https://www.galamedianews.com?arsip=256239&jud... \n",
"37 Glamedianews.com https://www.galamedianews.com?arsip=256234&jud... \n",
"38 Glamedianews.com https://www.galamedianews.com?arsip=256227&jud... \n",
"39 Glamedianews.com https://www.galamedianews.com?arsip=256223&jud... \n",
"40 Glamedianews.com https://www.galamedianews.com?arsip=256221&jud... \n",
"41 Glamedianews.com https://www.galamedianews.com?arsip=256220&jud... \n",
"42 Glamedianews.com https://www.galamedianews.com?arsip=256219&jud... \n",
"43 Glamedianews.com https://www.galamedianews.com?arsip=256216&jud... \n",
"44 Glamedianews.com https://www.galamedianews.com?arsip=256212&jud... \n",
"45 Glamedianews.com https://www.galamedianews.com?arsip=256210&jud... \n",
"46 Glamedianews.com https://www.galamedianews.com?arsip=256204&jud... \n",
"47 Glamedianews.com https://www.galamedianews.com?arsip=256196&jud... \n",
"48 Glamedianews.com https://www.galamedianews.com?arsip=256191&jud... \n",
"\n",
" title author \\\n",
"0 PMI Kota Sukabumi Bentuk Kelurahan Siaga Covid-19 Kiki Kurnia \n",
"1 Sudah 101 Kasus Hoaks Covid-19 di Medsos Diung... Kiki Kurnia \n",
"2 Panglima TNI Perintahkan Seluruh Jajaran TNI U... Kiki Kurnia \n",
"3 Presiden Jokowi Ikuti KTT Gerakan Non-Blok Vir... Kiki Kurnia \n",
"4 Pemkab Purwakarta Pastikan sebelum PSBB Bansos... Kiki Kurnia \n",
"5 Prediksi Trump, Angka Kematian Covid-19 di AS ... Lucky M. Lukman \n",
"6 Terhimpit Masalah Ekonomi Akibat Covid-19, Rat... Lucky M. Lukman \n",
"7 Anies Bakal \"Hukum\" Warganya yang Nekat Mudik ... Lucky M. Lukman \n",
"8 Rilis Animasi Tentara Terakota vs Patung Liber... Mia Fahrani \n",
"9 Terus Merebak Covid-19, Forkopkes Garut Sampai... Kiki Kurnia \n",
"10 Dampak Pandemi Covid-19, 14 Belasan Perusahaan... Lucky M. Lukman \n",
"11 Data dan Fakta Pelaksanaan PSBB di Kota Bandun... Lucky M. Lukman \n",
"12 Hasil PSBB di Kota Cimahi Saling Bertolak Bela... Lucky M. Lukman \n",
"13 Pemkab Ajak Milenial Cegah Corona, Teh Nia : M... Kiki Kurnia \n",
"14 Di Subang, Tiga Orang Pasien Positif Sembuh da... Lucky M. Lukman \n",
"15 Cegah Sebaran Covid-19 Kota Tasikmalaya Berlak... Kiki Kurnia \n",
"16 Reproduksi Virus Corona di Kota Bandung Diklai... Lucky M. Lukman \n",
"17 Covid-19 Masih Mengganas, Jepang Perpanjang St... Lucky M. Lukman \n",
"18 Kota Bandung Bakal Ikuti PSBB Provinsi Hingga ... Lucky M. Lukman \n",
"19 Yurianto: Batasi Keluar Rumah untuk Hentikan C... Kiki Kurnia \n",
"20 Fokus Penegakan Hukum Sumedang Perpanjang Masa... Kiki Kurnia \n",
"21 Wabup Garut : Pelanggar PSBB Akan Diberi Sanks... Kiki Kurnia \n",
"22 Jumlah Pasien Positif dan yang Sembuh Covid-19... Kiki Kurnia \n",
"23 Grafik Semakin Landai, Ini Dia Data Pasien Cov... Kiki Kurnia \n",
"24 Pasutri Disarankan Menunda Kehamilan di Masa P... Lucky M. Lukman \n",
"25 Sah, PSBB se-Jawa Barat Dimulai Tanggal 6 Mei ... Lucky M. Lukman \n",
"26 Legislator Pantau Penyaluran Bantuan untuk War... Lucky M. Lukman \n",
"27 Gunakan Lego dalam Animasi \"Once Upon a Virus\"... Lucky M. Lukman \n",
"28 Menhan Inggris Ungkapkan Pengalaman Tertular C... Rosyad Abdullah \n",
"29 Emil: Tiga Kunci Tekan Sebaran Covid-19 Lucky M. Lukman \n",
"30 Berangsur Pulih dari Covid-19, Pasar Mobil di ... Efrie Christianto \n",
"31 DPR RI Dukung Perppu Covid-19 Efrie Christianto \n",
"32 Dampak Covid-19, 70.367 Pekerja Migran Kembali... Lucky M. Lukman \n",
"33 Alhamdulillah, Pasien 01 Covid-19 di Sumedang ... Efrie Christianto \n",
"34 Langgar Protokol Kesehatan Saat PSBB, 168 Pabr... Efrie Christianto \n",
"35 Pekan Kedua PSBB, Pelanggar Semakin Menurun Brilliant Awal \n",
"36 Tingkat Laju Kasus Baru Pasien Positif Covid-... Efrie Christianto \n",
"37 Penerapan PSBB, Sejumlah Provinsi Alami Perlam... Efrie Christianto \n",
"38 Lima Mahasiswa Indonesia di Rusia Positif Teri... Efrie Christianto \n",
"39 Di Tengah Pandemi Covid-19, Desa Tenjolaya Pas... Efrie Christianto \n",
"40 Presiden : Pantau Ketat Klaster-Klaster Penula... Efrie Christianto \n",
"41 Presiden Ingin Tiap Daerah Terapkan PSBB denga... Efrie Christianto \n",
"42 Brasil Laporkan 4.588 Kasus Baru Virus Corona ... Efrie Christianto \n",
"43 Positif Covid-19 di AS Naik 29.671 Kasus, Juml... Efrie Christianto \n",
"44 Olahraga Dunia Mencoba Keluar dari Bayang-Baya... Efrie Christianto \n",
"45 Menlu AS : Ada Bukti Siginifikan Virus Corona ... Efrie Christianto \n",
"46 DPR: Restrukturisasi Kredit Akibat Pandemi Cov... Rosyad Abdullah \n",
"47 KTT Gerakan Negara-Negara Non-Blok Bahas Covid... Rosyad Abdullah \n",
"48 Bisakah Virus Corona Bertahan di Paket Belanja... Brilliant Awal \n",
"\n",
" date_time \\\n",
"0 Senin, 4 Mei 2020 | 23:36 WIB \n",
"1 Senin, 4 Mei 2020 | 23:01 WIB \n",
"2 Senin, 4 Mei 2020 | 22:50 WIB \n",
"3 Senin, 4 Mei 2020 | 22:39 WIB \n",
"4 Senin, 4 Mei 2020 | 21:59 WIB \n",
"5 Senin, 4 Mei 2020 | 21:06 WIB \n",
"6 Senin, 4 Mei 2020 | 20:50 WIB \n",
"7 Senin, 4 Mei 2020 | 20:42 WIB \n",
"8 Senin, 4 Mei 2020 | 20:20 WIB \n",
"9 Senin, 4 Mei 2020 | 20:05 WIB \n",
"10 Senin, 4 Mei 2020 | 20:05 WIB \n",
"11 Senin, 4 Mei 2020 | 19:55 WIB \n",
"12 Senin, 4 Mei 2020 | 19:47 WIB \n",
"13 Senin, 4 Mei 2020 | 18:55 WIB \n",
"14 Senin, 4 Mei 2020 | 18:51 WIB \n",
"15 Senin, 4 Mei 2020 | 18:41 WIB \n",
"16 Senin, 4 Mei 2020 | 18:34 WIB \n",
"17 Senin, 4 Mei 2020 | 18:22 WIB \n",
"18 Senin, 4 Mei 2020 | 17:59 WIB \n",
"19 Senin, 4 Mei 2020 | 17:58 WIB \n",
"20 Senin, 4 Mei 2020 | 17:47 WIB \n",
"21 Senin, 4 Mei 2020 | 17:42 WIB \n",
"22 Senin, 4 Mei 2020 | 17:27 WIB \n",
"23 Senin, 4 Mei 2020 | 17:12 WIB \n",
"24 Senin, 4 Mei 2020 | 17:08 WIB \n",
"25 Senin, 4 Mei 2020 | 16:28 WIB \n",
"26 Senin, 4 Mei 2020 | 15:55 WIB \n",
"27 Senin, 4 Mei 2020 | 15:34 WIB \n",
"28 Senin, 4 Mei 2020 | 14:59 WIB \n",
"29 Senin, 4 Mei 2020 | 14:54 WIB \n",
"30 Senin, 4 Mei 2020 | 14:52 WIB \n",
"31 Senin, 4 Mei 2020 | 14:38 WIB \n",
"32 Senin, 4 Mei 2020 | 14:37 WIB \n",
"33 Senin, 4 Mei 2020 | 14:12 WIB \n",
"34 Senin, 4 Mei 2020 | 13:45 WIB \n",
"35 Senin, 4 Mei 2020 | 13:42 WIB \n",
"36 Senin, 4 Mei 2020 | 13:33 WIB \n",
"37 Senin, 4 Mei 2020 | 13:17 WIB \n",
"38 Senin, 4 Mei 2020 | 12:10 WIB \n",
"39 Senin, 4 Mei 2020 | 11:50 WIB \n",
"40 Senin, 4 Mei 2020 | 10:53 WIB \n",
"41 Senin, 4 Mei 2020 | 10:50 WIB \n",
"42 Senin, 4 Mei 2020 | 10:46 WIB \n",
"43 Senin, 4 Mei 2020 | 09:59 WIB \n",
"44 Senin, 4 Mei 2020 | 09:34 WIB \n",
"45 Senin, 4 Mei 2020 | 09:28 WIB \n",
"46 Senin, 4 Mei 2020 | 09:15 WIB \n",
"47 Senin, 4 Mei 2020 | 08:37 WIB \n",
"48 Senin, 4 Mei 2020 | 06:55 WIB \n",
"\n",
" paragraf \n",
"0 PALANG Merah Indonesia (PMI) Kota Sukabumi, me... \n",
"1 KASUS penyebaran informasi hoaks soal Covid-19... \n",
"2 PANGLIMA TNI Marsekal TNI Dr. (H.C.) Hadi Tjah... \n",
"3 PRESIDEN Joko Widodo (Jokowi) mengikuti Konfer... \n",
"4 PEMERINTAH Kabupaten Purwakarta, memastikan ba... \n",
"5 PRESIDEN Amerika Serikat, Donald Trump, mempre... \n",
"6 BERBAGAI sektor bisnis terpukul akibat mewabah... \n",
"7 PANDEMI Covid-19 berhasil membuat masyarakat d... \n",
"8 Cina merilis tayangan animasi pendek berjudul ... \n",
"9 FORUM Komunikasi Organisasi Profesi Kesehatan ... \n",
"10 KAMAR Dagang dan Industri (Kadin) Kabupaten Ka... \n",
"11 MENJELANG dua pekan berlangsungnya Pembatasan ... \n",
"12 PEMBATASAN Sosial Berskala Besar (PSBB) di Kot... \n",
"13 PENYEBARAN virus corona telah menjadi masalah ... \n",
"14 PASIEN positif Covid-19 di Kabupaten Subang ya... \n",
"15 PEMERINTAH Kota Tasikmalaya akhirnya memberlak... \n",
"16 KOORDINATOR Bidang Perencanaan, Data, Kajian d... \n",
"17 PERDANA Menteri Jepang Shinzo Abe pada Senin (... \n",
"18 PEMKOT Bandung memastikan Pelaksanaan pembatas... \n",
"19 JURU Bicara Pemerintah untuk Penanganan Covid-... \n",
"20 PEMERINTAH Kabupaten Sumedang memperpanjang ma... \n",
"21 WAKIL Bupati (Wabup) Garut Helmi Budiman menya... \n",
"22 JUMLAH pasien positif virus corona RI masih be... \n",
"23 TIM Gugus Tugas Percepatan Penanganan Covid-19... \n",
"24 BADAN Kependudukan dan Keluarga Berencana Nasi... \n",
"25 DUA hari menjelang pemberlakukan PSBB di Wilay... \n",
"26 SEJUMLAH anggota DPRD Kabupaten Bandung dari D... \n",
"27 CINA merilis film animasi pendek berjudul \"Onc... \n",
"28 MENGIDAP COVID-19 \"sangat mengerikan\", ungkap ... \n",
"29 UNTUK menekan persebaran Covid-19 di Jawa Bara... \n",
"30 PENJUALAN beberapa merek mobil di China tumbuh... \n",
"31 BADAN Anggaran DPR RI mendukung Peraturan Peme... \n",
"32 KETUA Gugus Tugas Percepatan Penanganan Covid-... \n",
"33 SETELAH berulang kali dilakukan test swab  ter... \n",
"34 KETUA Gugus Tugas Percepatan Penanganan Covid-... \n",
"35 \n",
"36 Ketua Gugus Tugas Percepatan Penanganan Covid-... \n",
"37 GUGUS Tugas Percepatan Penanganan Covid-19 men... \n",
"38 LIMA mahasiswa Indonesia di Moskow, Rusia, din... \n",
"39 PANDEMI virus corona atau Covid-19 berdampak p... \n",
"40 PRESIDEN Joko Widodo minta klaster-klaster pen... \n",
"41 PRESIDEN RI Joko Widodo menginginkan setiap da... \n",
"42 KEMENTERIAN Kesehatan Brasil melaporkan 4.588 ... \n",
"43 PUSAT Pengendalian dan Pencegahan Penyakit (CD... \n",
"44 Peristiwa-peristiwa besar olahraga, termasuk O... \n",
"45 MENTERI Luar Negeri Amerika Serikat Mike Pompe... \n",
"46 ANGGOTA Komisi XI DPR RI Puteri Anetta Komarud... \n",
"47 PERDANA Menteri Malaysia Muhyiddin Yassin akan... \n",
"48 SAAT Pembatasan Sosial Berskala Besar (PSBB), ... "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"news = get_news(main_url, file_name = 'glamedianews')\n",
"news"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import library\n",
"import urllib\n",
"import requests\n",
"import bs4\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# request a link\n",
"def request_url(link):\n",
" response = requests.get(link)\n",
" html = response.text\n",
" return html"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to parse html\n",
"def parse_html(to_parse):\n",
" soup = bs4.BeautifulSoup(to_parse, 'html.parser')\n",
" return soup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#take sub menu news form liputan6.com\n",
"def all_section(main_url):\n",
" section_list = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" for a in soup.find_all('a', href=True):\n",
" if a.text:\n",
" section_list.append(a['href'])\n",
" return section_list"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#get only link about corona or covid\n",
"def find_corona(main_url):\n",
" url_list = []\n",
" not_news=[]\n",
" for i in main_url:\n",
" if i.find('corona')!=-1 or i.find('covid')!=-1 or i.find('pandemi')!=-1 or i.find('psbb')!=-1 or i.find('social-distancing')!=-1:\n",
" url_list.append(i)\n",
" url_list = list(dict.fromkeys(url_list))\n",
" for i in url_list:\n",
" if (i.find('read') == -1):\n",
" not_news.append(i) \n",
" for j in not_news:\n",
" url_list.remove(j)\n",
" return url_list"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def title(main_url):\n",
" titles = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_title = soup.find(\"h1\", class_=\"read-page--header--title entry-title\")\n",
" get_title_photosnews = soup.find(\"h2\", class_=\"read-page--photo-tag--header__title\")\n",
" if(get_title):\n",
" titles.append(get_title.text)\n",
" elif(get_title_photosnews):\n",
" titles.append(get_title_photosnews.text)\n",
" else:\n",
" titles.append('None')\n",
" return titles "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def writer(main_url):\n",
" author = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_author = soup.find(\"span\", class_=\"read-page--header--author__name fn\")\n",
" get_editor = soup.find(\"div\", class_=\"read-page--photo-tag--header__credits-user\")\n",
" if(get_author):\n",
" author.append(get_author.text)\n",
" elif(get_editor):\n",
" author.append(get_editor.text)\n",
" else:\n",
" author.append('None')\n",
" return author"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def date(main_url):\n",
" datetime = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_datetime = soup.find(\"time\", class_=\"read-page--header--author__datetime updated\")\n",
" get_datetime_photosnews = soup.find(\"time\", class_=\"read-page--photo-tag--header__datetime updated\")\n",
" if(get_datetime):\n",
" datetime.append(get_datetime.text)\n",
" elif(get_datetime_photosnews):\n",
" datetime.append(get_datetime_photosnews.text)\n",
" else:\n",
" datetime.append('None')\n",
" return datetime"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def collect_text(main_url, titles = [], author = [], datetime = []):\n",
" paragraf = []\n",
" data = []\n",
" join =[]\n",
" isiteks = []\n",
"\n",
" for i,j in enumerate(main_url):\n",
" a = parse_html(request_url(j))\n",
" pragraf_text = a.find_all(\"div\", class_=\"article-content-body__item-page\")\n",
" pragraf_photos = a.find_all(\"div\", class_=\"read-page--photo-tag--header__content\")\n",
" if(pragraf_text):\n",
" for k in pragraf_text:\n",
" newparagraf = k.find_all('p')\n",
" for x in newparagraf:\n",
" paragraf.append(x.text)\n",
" else:\n",
" for k in pragraf_photos:\n",
" paragraf.append(k.text)\n",
" data.append(paragraf)\n",
" paragraf = []\n",
" for i in data:\n",
" join.append(' '.join(i))\n",
" \n",
" for i, j in enumerate(main_url):\n",
" isiteks.append({'news': 'Liputan6.com', 'link' : j, 'title': titles[i], 'author' : author[i], 'date_time': datetime[i], 'paragraf' : join[i]})\n",
" return isiteks"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def save_file(file, file_name = ''):\n",
" new_file = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
" new_file.to_csv(file_name + '.csv', index=True, encoding='utf-8', sep = ',')\n",
" \n",
" return new_file"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def get_news(main_url, file_name = ''):\n",
" section = all_section(main_url)\n",
" corona_news = find_corona(section)\n",
" titles = title(corona_news)\n",
" author = writer(corona_news)\n",
" datetime = date(corona_news)\n",
" text = collect_text(corona_news, titles, author, datetime)\n",
" file = save_file(text, file_name)\n",
" \n",
" return file "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"url=[\"https://www.liputan6.com/\"]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>news</th>\n",
" <th>link</th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>date_time</th>\n",
" <th>paragraf</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/otomotif/read/4245824...</td>\n",
" <td>Lewat Tembang 'Ojo Mudik', Mendiang Didi Kempo...</td>\n",
" <td>Liputan6.com</td>\n",
" <td>05 Mei 2020, 14:01 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Didi Kempot merupakan ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/news/read/4216608/lek...</td>\n",
" <td>Lekas Pulih Indonesia, Ayo Bantu Mereka yang T...</td>\n",
" <td>Liputan6.com</td>\n",
" <td>01 Apr 2020, 13:03 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Liputan6.com bersama B...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/lifestyle/read/424571...</td>\n",
" <td>Krisis Corona Covid-19, China Fashion Week 202...</td>\n",
" <td>Putu Elmira</td>\n",
" <td>05 Mei 2020, 14:01 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Ada yang berbeda denga...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/regional/read/4245208...</td>\n",
" <td>Kehabisan Reagen, Gorontalo Menumpang Periksa ...</td>\n",
" <td>Yoseph Ikanubun</td>\n",
" <td>05 Mei 2020, 14:00 WIB</td>\n",
" <td>Liputan6.com, Manado - Laboratorium Polymerase...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/global/read/4245828/p...</td>\n",
" <td>Pandemi Corona COVID-19 Mulai Mereda, Korsel A...</td>\n",
" <td>Tommy Kurnia</td>\n",
" <td>05 Mei 2020, 14:00 WIB</td>\n",
" <td>Liputan6.com, Seoul - Pemerintah Korea Selatan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/citizen6/read/4245695...</td>\n",
" <td>Diklaim Bisa Cegah Corona, Wanita Ini Rutin Mi...</td>\n",
" <td>Camelia</td>\n",
" <td>05 Mei 2020, 14:00 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Pandemi Corona Covid-1...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/news/read/4245103/fot...</td>\n",
" <td>FOTO: Patut Dicontoh, Warga di Perumahan Ini D...</td>\n",
" <td>Johan Fatzry</td>\n",
" <td>04 Mei 2020, 18:00 WIB</td>\n",
" <td>Bahan pangan gratis tersebut diperuntukkan bag...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/news/read/4245178/vid...</td>\n",
" <td>VIDEO: Kabar Baik, Laju Kasus Positif Covid-19...</td>\n",
" <td>Chandra Bayu Witantra</td>\n",
" <td>04 Mei 2020, 20:00 WIB</td>\n",
" <td>Liputan6.com, Jakarta Ketua Gugus Tugas Percep...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/showbiz/read/4245155/...</td>\n",
" <td>Ari Lasso Beri Semangat pada Masyarakat Lalui ...</td>\n",
" <td>Meiristica Nurul</td>\n",
" <td>05 Mei 2020, 13:40 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Ari Lasso tak bisa tin...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://hot.liputan6.com/read/4245848/7-potret...</td>\n",
" <td>7 Potret Belanja Sayur Online saat Social Dist...</td>\n",
" <td>Novita Ayuningtyas</td>\n",
" <td>05 Mei 2020, 13:35 WIB</td>\n",
" <td>Liputan6.com, Jakarta Selama pandemi corona CO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/news/read/4245749/fot...</td>\n",
" <td>FOTO: Perawatan Pasien Gangguan Mental di Masa...</td>\n",
" <td>Arny Christika Putri</td>\n",
" <td>05 Mei 2020, 13:30 WIB</td>\n",
" <td>Pihak panti melakukan pencegahan penyebaran vi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/news/read/4245868/ceg...</td>\n",
" <td>Cegah PHK saat Corona, Zulhas Dorong Subsidi G...</td>\n",
" <td>Nanda Perdana Putra</td>\n",
" <td>05 Mei 2020, 13:27 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Ketua Umum PAN Zulkifl...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/ramadan/read/4245209/...</td>\n",
" <td>Ramadan di Tengah Pandemi, Wamenag Zainut Ajak...</td>\n",
" <td>Liputan6.com</td>\n",
" <td>05 Mei 2020, 13:20 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Wakil Menteri Agama (W...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/news/read/4245841/upd...</td>\n",
" <td>Update Corona DKI Jakarta 5 Mei 2020: 4.641 Po...</td>\n",
" <td>Ika Defianti</td>\n",
" <td>05 Mei 2020, 13:18 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Jumlah kasus positif v...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/bisnis/read/4245822/r...</td>\n",
" <td>Rumah Sakit BUMN Telah Rawat 2.000 Pasien Viru...</td>\n",
" <td>Tira Santia</td>\n",
" <td>05 Mei 2020, 13:15 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Kementerian Badan Usah...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/news/read/4245842/pem...</td>\n",
" <td>Pemkot Tangerang Jadikan 2 Puskesmas Tempat Is...</td>\n",
" <td>Pramita Tristiawati</td>\n",
" <td>05 Mei 2020, 13:12 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Dua Puskesmas di Kota ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/news/read/4245829/pan...</td>\n",
" <td>Pandemi Corona, Tunjangan PNS DKI Dipotong hin...</td>\n",
" <td>Ika Defianti</td>\n",
" <td>05 Mei 2020, 13:11 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Kepala Badan Kepegawai...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/news/read/4245825/men...</td>\n",
" <td>Mendagri Minta Pemkot Depok Selesaikan Masalah...</td>\n",
" <td>Fachrur Rozie</td>\n",
" <td>05 Mei 2020, 13:09 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Menteri Dalam Negeri (...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/global/read/4245769/w...</td>\n",
" <td>Wabah Corona COVID-19 Reda, 85 juta Warga Chin...</td>\n",
" <td>Natasha Khairunisa Amani</td>\n",
" <td>05 Mei 2020, 13:02 WIB</td>\n",
" <td>Liputan6.com, Beijing - Kementerian Kebudayaan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/tekno/read/4245801/bs...</td>\n",
" <td>BSA Rilis Panduan Atasi Kejahatan Siber di Ten...</td>\n",
" <td>Andina Librianty</td>\n",
" <td>05 Mei 2020, 13:01 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Software Alliance (BSA...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/ramadan/read/4245759/...</td>\n",
" <td>Kampanye Ramadan, Ibu Negara UEA Minta Rakyat ...</td>\n",
" <td>Tommy Kurnia</td>\n",
" <td>05 Mei 2020, 13:00 WIB</td>\n",
" <td>Liputan6.com, Abu Dhabi - Pemerintah Uni Emira...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/health/read/4245626/a...</td>\n",
" <td>Akurasi Tak Setepat PCR, Gugus Tugas Sebut Rap...</td>\n",
" <td>Giovani Dio Prasasti</td>\n",
" <td>05 Mei 2020, 13:00 WIB</td>\n",
" <td>Liputan6.com, Jakarta Wiku Adisasmito, Ketua T...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/bisnis/read/4245792/s...</td>\n",
" <td>Sulit Jual Produk hingga PHK Karyawan, Ini Daf...</td>\n",
" <td>Tira Santia</td>\n",
" <td>05 Mei 2020, 12:50 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Direktur Jenderal Indu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/cek-fakta/read/424524...</td>\n",
" <td>VIDEO CEK FAKTA: Kriminalitas Meningkat di Mas...</td>\n",
" <td>Ratu Annisaa Suryasumirat</td>\n",
" <td>04 Mei 2020, 20:55 WIB</td>\n",
" <td>Liputan6.com, Jakarta Angka kejahatan meningka...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Liputan6.com</td>\n",
" <td>https://www.liputan6.com/cek-fakta/read/424505...</td>\n",
" <td>Cek Fakta: Tidak Benar Presiden Jokowi Menaikk...</td>\n",
" <td>Hanz Jimenez Salim</td>\n",
" <td>04 Mei 2020, 17:33 WIB</td>\n",
" <td>Liputan6.com, Jakarta - Kabar tentang Presiden...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" news link \\\n",
"0 Liputan6.com https://www.liputan6.com/otomotif/read/4245824... \n",
"1 Liputan6.com https://www.liputan6.com/news/read/4216608/lek... \n",
"2 Liputan6.com https://www.liputan6.com/lifestyle/read/424571... \n",
"3 Liputan6.com https://www.liputan6.com/regional/read/4245208... \n",
"4 Liputan6.com https://www.liputan6.com/global/read/4245828/p... \n",
"5 Liputan6.com https://www.liputan6.com/citizen6/read/4245695... \n",
"6 Liputan6.com https://www.liputan6.com/news/read/4245103/fot... \n",
"7 Liputan6.com https://www.liputan6.com/news/read/4245178/vid... \n",
"8 Liputan6.com https://www.liputan6.com/showbiz/read/4245155/... \n",
"9 Liputan6.com https://hot.liputan6.com/read/4245848/7-potret... \n",
"10 Liputan6.com https://www.liputan6.com/news/read/4245749/fot... \n",
"11 Liputan6.com https://www.liputan6.com/news/read/4245868/ceg... \n",
"12 Liputan6.com https://www.liputan6.com/ramadan/read/4245209/... \n",
"13 Liputan6.com https://www.liputan6.com/news/read/4245841/upd... \n",
"14 Liputan6.com https://www.liputan6.com/bisnis/read/4245822/r... \n",
"15 Liputan6.com https://www.liputan6.com/news/read/4245842/pem... \n",
"16 Liputan6.com https://www.liputan6.com/news/read/4245829/pan... \n",
"17 Liputan6.com https://www.liputan6.com/news/read/4245825/men... \n",
"18 Liputan6.com https://www.liputan6.com/global/read/4245769/w... \n",
"19 Liputan6.com https://www.liputan6.com/tekno/read/4245801/bs... \n",
"20 Liputan6.com https://www.liputan6.com/ramadan/read/4245759/... \n",
"21 Liputan6.com https://www.liputan6.com/health/read/4245626/a... \n",
"22 Liputan6.com https://www.liputan6.com/bisnis/read/4245792/s... \n",
"23 Liputan6.com https://www.liputan6.com/cek-fakta/read/424524... \n",
"24 Liputan6.com https://www.liputan6.com/cek-fakta/read/424505... \n",
"\n",
" title \\\n",
"0 Lewat Tembang 'Ojo Mudik', Mendiang Didi Kempo... \n",
"1 Lekas Pulih Indonesia, Ayo Bantu Mereka yang T... \n",
"2 Krisis Corona Covid-19, China Fashion Week 202... \n",
"3 Kehabisan Reagen, Gorontalo Menumpang Periksa ... \n",
"4 Pandemi Corona COVID-19 Mulai Mereda, Korsel A... \n",
"5 Diklaim Bisa Cegah Corona, Wanita Ini Rutin Mi... \n",
"6 FOTO: Patut Dicontoh, Warga di Perumahan Ini D... \n",
"7 VIDEO: Kabar Baik, Laju Kasus Positif Covid-19... \n",
"8 Ari Lasso Beri Semangat pada Masyarakat Lalui ... \n",
"9 7 Potret Belanja Sayur Online saat Social Dist... \n",
"10 FOTO: Perawatan Pasien Gangguan Mental di Masa... \n",
"11 Cegah PHK saat Corona, Zulhas Dorong Subsidi G... \n",
"12 Ramadan di Tengah Pandemi, Wamenag Zainut Ajak... \n",
"13 Update Corona DKI Jakarta 5 Mei 2020: 4.641 Po... \n",
"14 Rumah Sakit BUMN Telah Rawat 2.000 Pasien Viru... \n",
"15 Pemkot Tangerang Jadikan 2 Puskesmas Tempat Is... \n",
"16 Pandemi Corona, Tunjangan PNS DKI Dipotong hin... \n",
"17 Mendagri Minta Pemkot Depok Selesaikan Masalah... \n",
"18 Wabah Corona COVID-19 Reda, 85 juta Warga Chin... \n",
"19 BSA Rilis Panduan Atasi Kejahatan Siber di Ten... \n",
"20 Kampanye Ramadan, Ibu Negara UEA Minta Rakyat ... \n",
"21 Akurasi Tak Setepat PCR, Gugus Tugas Sebut Rap... \n",
"22 Sulit Jual Produk hingga PHK Karyawan, Ini Daf... \n",
"23 VIDEO CEK FAKTA: Kriminalitas Meningkat di Mas... \n",
"24 Cek Fakta: Tidak Benar Presiden Jokowi Menaikk... \n",
"\n",
" author date_time \\\n",
"0 Liputan6.com 05 Mei 2020, 14:01 WIB \n",
"1 Liputan6.com 01 Apr 2020, 13:03 WIB \n",
"2 Putu Elmira 05 Mei 2020, 14:01 WIB \n",
"3 Yoseph Ikanubun 05 Mei 2020, 14:00 WIB \n",
"4 Tommy Kurnia 05 Mei 2020, 14:00 WIB \n",
"5 Camelia 05 Mei 2020, 14:00 WIB \n",
"6 Johan Fatzry 04 Mei 2020, 18:00 WIB \n",
"7 Chandra Bayu Witantra 04 Mei 2020, 20:00 WIB \n",
"8 Meiristica Nurul 05 Mei 2020, 13:40 WIB \n",
"9 Novita Ayuningtyas 05 Mei 2020, 13:35 WIB \n",
"10 Arny Christika Putri 05 Mei 2020, 13:30 WIB \n",
"11 Nanda Perdana Putra 05 Mei 2020, 13:27 WIB \n",
"12 Liputan6.com 05 Mei 2020, 13:20 WIB \n",
"13 Ika Defianti 05 Mei 2020, 13:18 WIB \n",
"14 Tira Santia 05 Mei 2020, 13:15 WIB \n",
"15 Pramita Tristiawati 05 Mei 2020, 13:12 WIB \n",
"16 Ika Defianti 05 Mei 2020, 13:11 WIB \n",
"17 Fachrur Rozie 05 Mei 2020, 13:09 WIB \n",
"18 Natasha Khairunisa Amani 05 Mei 2020, 13:02 WIB \n",
"19 Andina Librianty 05 Mei 2020, 13:01 WIB \n",
"20 Tommy Kurnia 05 Mei 2020, 13:00 WIB \n",
"21 Giovani Dio Prasasti 05 Mei 2020, 13:00 WIB \n",
"22 Tira Santia 05 Mei 2020, 12:50 WIB \n",
"23 Ratu Annisaa Suryasumirat 04 Mei 2020, 20:55 WIB \n",
"24 Hanz Jimenez Salim 04 Mei 2020, 17:33 WIB \n",
"\n",
" paragraf \n",
"0 Liputan6.com, Jakarta - Didi Kempot merupakan ... \n",
"1 Liputan6.com, Jakarta - Liputan6.com bersama B... \n",
"2 Liputan6.com, Jakarta - Ada yang berbeda denga... \n",
"3 Liputan6.com, Manado - Laboratorium Polymerase... \n",
"4 Liputan6.com, Seoul - Pemerintah Korea Selatan... \n",
"5 Liputan6.com, Jakarta - Pandemi Corona Covid-1... \n",
"6 Bahan pangan gratis tersebut diperuntukkan bag... \n",
"7 Liputan6.com, Jakarta Ketua Gugus Tugas Percep... \n",
"8 Liputan6.com, Jakarta - Ari Lasso tak bisa tin... \n",
"9 Liputan6.com, Jakarta Selama pandemi corona CO... \n",
"10 Pihak panti melakukan pencegahan penyebaran vi... \n",
"11 Liputan6.com, Jakarta - Ketua Umum PAN Zulkifl... \n",
"12 Liputan6.com, Jakarta - Wakil Menteri Agama (W... \n",
"13 Liputan6.com, Jakarta - Jumlah kasus positif v... \n",
"14 Liputan6.com, Jakarta - Kementerian Badan Usah... \n",
"15 Liputan6.com, Jakarta - Dua Puskesmas di Kota ... \n",
"16 Liputan6.com, Jakarta - Kepala Badan Kepegawai... \n",
"17 Liputan6.com, Jakarta - Menteri Dalam Negeri (... \n",
"18 Liputan6.com, Beijing - Kementerian Kebudayaan... \n",
"19 Liputan6.com, Jakarta - Software Alliance (BSA... \n",
"20 Liputan6.com, Abu Dhabi - Pemerintah Uni Emira... \n",
"21 Liputan6.com, Jakarta Wiku Adisasmito, Ketua T... \n",
"22 Liputan6.com, Jakarta - Direktur Jenderal Indu... \n",
"23 Liputan6.com, Jakarta Angka kejahatan meningka... \n",
"24 Liputan6.com, Jakarta - Kabar tentang Presiden... "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"liputan6_news=get_news(url, file_name = 'liputan6_satu')\n",
"liputan6_news"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import library\n",
"import urllib\n",
"import requests\n",
"import bs4\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# request a link\n",
"def request_url(link):\n",
" response = requests.get(link)\n",
" html = response.text\n",
" return html"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to parse html\n",
"def parse_html(to_parse):\n",
" soup = bs4.BeautifulSoup(to_parse, 'html.parser')\n",
" return soup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#take sub menu news form liputan6.com\n",
"def all_section(main_url):\n",
" section_list = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" for a in soup.find_all('a', href=True):\n",
" if a.text:\n",
" section_list.append(a['href'])\n",
" return section_list"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#get only link about corona or covid\n",
"def find_corona(main_url):\n",
" url_list = []\n",
" not_news=[]\n",
" for i in main_url:\n",
" if i.find('corona')!=-1 or i.find('covid')!=-1 or i.find('pandemi')!=-1:\n",
" url_list.append(i)\n",
" url_list = list(dict.fromkeys(url_list))\n",
" for i in url_list:\n",
" if (i.find('read') == -1):\n",
" not_news.append(i) \n",
" for j in not_news:\n",
" url_list.remove(j)\n",
" return url_list"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def date(main_url):\n",
" datetime = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_datetime = soup.find(\"div\", class_=\"namerep\")\n",
" if(get_datetime):\n",
" datetime.append(get_datetime.b.string)\n",
" else:\n",
" datetime.append('None')\n",
" return datetime"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def writer(main_url):\n",
" author = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_author = soup.find(\"div\", class_=\"namerep\")\n",
" if(get_author):\n",
" get_author = ' '.join(get_author.text.split())\n",
" author.append(get_author.partition(\", \")[0])\n",
" else:\n",
" author.append('None')\n",
" return author"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def title(main_url):\n",
" titles = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" try:\n",
" get_title = soup.find(\"div\", class_=\"title\")\n",
" titles.append(get_title.h1.text)\n",
" except:\n",
" titles.append('None')\n",
" return titles"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def collect_text(main_url, titles = [], author = [], datetime = []):\n",
" paragraf = []\n",
" data = []\n",
" join =[]\n",
" isiteks = []\n",
" \n",
" for i,j in enumerate(main_url):\n",
" a = parse_html(request_url(j))\n",
" content = a.find('div', class_='read')\n",
" pragraf = content.find_all('p')\n",
" for k in pragraf:\n",
" s = ' '.join(k.text.split())\n",
" paragraf.append(s)\n",
" data.append(paragraf)\n",
" paragraf = []\n",
" for i in data:\n",
" join.append(' '.join(i))\n",
" \n",
" for i, j in enumerate(main_url):\n",
" isiteks.append({'news': 'Okezone.com', 'link' : j, 'title': titles[i], 'author' : author[i], 'date_time': datetime[i], 'paragraf' : join[i]})\n",
" return isiteks"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def save_file(file, file_name = ''):\n",
" new_file = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
" new_file.to_csv(file_name + '.csv', index=True, encoding='utf-8', sep = ',')\n",
" \n",
" return new_file"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def get_news(main_url, file_name = ''):\n",
" section = all_section(main_url)\n",
" corona_news = find_corona(section)\n",
" titles = title(corona_news)\n",
" author = writer(corona_news)\n",
" datetime = date(corona_news)\n",
" text = collect_text(corona_news, titles, author, datetime)\n",
" file = save_file(text, file_name)\n",
" \n",
" return file"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"url=[\"https://www.okezone.com/\"]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>news</th>\n",
" <th>link</th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>date_time</th>\n",
" <th>paragraf</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://lifestyle.okezone.com/read/2020/05/07/...</td>\n",
" <td>BKKBN Khawatirkan Ledakan Penduduk Pasca-Pande...</td>\n",
" <td>Muhammad Sukardi</td>\n",
" <td>Kamis 07 Mei 2020 12:00 WIB</td>\n",
" <td>PANDEMI virus corona COVID-19 memang membuat b...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://lifestyle.okezone.com/read/2020/05/07/...</td>\n",
" <td>Bermutasi, Virus Corona COVID-19 Makin Jinak?</td>\n",
" <td>Leonardus Selwyn Kangsaputra</td>\n",
" <td>Kamis 07 Mei 2020 11:45 WIB</td>\n",
" <td>PENELITI di Arizona State University (ASU), ba...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://sports.okezone.com/read/2020/05/07/40/...</td>\n",
" <td>Cegah Virus Corona, Menpora Beri Wejangan kepa...</td>\n",
" <td>Rivan Nasri Rachman</td>\n",
" <td>Kamis 07 Mei 2020 11:15 WIB</td>\n",
" <td>JAKARTA – PBSI baru saja merayakan hari jadi m...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://www.okezone.com/tren/read/2020/05/07/6...</td>\n",
" <td>Masjidil Haram Akan Dibuka, Corona di Mekkah M...</td>\n",
" <td>Mohammad Saifulloh</td>\n",
" <td>Kamis 07 Mei 2020 12:08 WIB</td>\n",
" <td>RENCANA pembukaan dua Masjid Suci di Arab Saud...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://lifestyle.okezone.com/read/2020/05/07/...</td>\n",
" <td>Bermutasi, Virus Corona COVID-19 Makin Jinak?</td>\n",
" <td>Leonardus Selwyn Kangsaputra</td>\n",
" <td>Kamis 07 Mei 2020 11:45 WIB</td>\n",
" <td>PENELITI di Arizona State University (ASU), ba...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://news.okezone.com/read/2020/05/07/18/22...</td>\n",
" <td>Wabah COVID-19, Umat Buddha di Berbagai Negara...</td>\n",
" <td>Rahman Asmardika</td>\n",
" <td>Kamis 07 Mei 2020 10:16 WIB</td>\n",
" <td>UMAT Buddha di berbagai belahan dunia merayaka...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://nasional.okezone.com/read/2020/05/07/3...</td>\n",
" <td>Relaksasi Moda Transportasi, Pengamat: Asa Mem...</td>\n",
" <td>Harits Tryan Akhmad</td>\n",
" <td>Kamis 07 Mei 2020 07:03 WIB</td>\n",
" <td>JAKARTA - Pengamat kebijakan publik, Trubus Ra...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://news.okezone.com/read/2020/05/07/18/22...</td>\n",
" <td>Trump: Pandemi Virus Corona Adalah Serangan Te...</td>\n",
" <td>Rahman Asmardika</td>\n",
" <td>Kamis 07 Mei 2020 14:30 WIB</td>\n",
" <td>WASHINGTON - Presiden Amerika Serikat (AS) Don...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://www.okezone.com/tren/read/2020/05/07/6...</td>\n",
" <td>Seperti Umat Islam, Kelompok Ini Berzakat untu...</td>\n",
" <td>Muhammad Sukardi</td>\n",
" <td>Kamis 07 Mei 2020 14:21 WIB</td>\n",
" <td>Sekelompok profesional dari komunitas Dalit te...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://news.okezone.com/read/2020/05/07/512/2...</td>\n",
" <td>Terapkan PSBB, Status Kota Tegal Kembali Hijau...</td>\n",
" <td>Taufik Budi</td>\n",
" <td>Kamis 07 Mei 2020 14:14 WIB</td>\n",
" <td>TEGAL - Pembatasan Sosial Berskala Besar (PSBB...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://megapolitan.okezone.com/read/2020/05/0...</td>\n",
" <td>3 Penumpang Positif Covid-19, Walkot Bekasi: A...</td>\n",
" <td>Wisnu Yusep</td>\n",
" <td>Kamis 07 Mei 2020 14:10 WIB</td>\n",
" <td>BEKASI - Wali Kota Bekasi Rahmat Effendi menga...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://economy.okezone.com/read/2020/05/07/32...</td>\n",
" <td>Terpukul Covid-19, Qatar Airways Bakal PHK Peg...</td>\n",
" <td>Kamis 07 Mei 2020 14:07 WIB</td>\n",
" <td>Kamis 07 Mei 2020 14:07 WIB</td>\n",
" <td>JAKARTA - Maskapai penerbangan internasional Q...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://megapolitan.okezone.com/read/2020/05/0...</td>\n",
" <td>Usaha Dekorasi Pernikahan Banting Setir Buat P...</td>\n",
" <td>Putra Ramadhani Astyawan</td>\n",
" <td>Kamis 07 Mei 2020 13:41 WIB</td>\n",
" <td>BOGOR - Siapa sangka pembuatan peti jenazah kh...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://news.okezone.com/read/2020/05/07/340/2...</td>\n",
" <td>Positif Corona, 13 Warga Desa di Polewali Mand...</td>\n",
" <td>Huzair Zainal</td>\n",
" <td>Kamis 07 Mei 2020 13:41 WIB</td>\n",
" <td>POLEWALI MANDAR – Sebanyak 13 orang yang beras...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://economy.okezone.com/read/2020/05/07/32...</td>\n",
" <td>Relaksasi Kebijakan Mudik Rawan Memperlama Pen...</td>\n",
" <td>Giri Hartomo</td>\n",
" <td>Kamis 07 Mei 2020 13:35 WIB</td>\n",
" <td>JAKARTA - Pemerintah membuka kembali transport...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Okezone.com</td>\n",
" <td>https://economy.okezone.com/read/2020/05/07/32...</td>\n",
" <td>Moda Transportasi Kembali Dibuka, Pandemi Covi...</td>\n",
" <td>Giri Hartomo</td>\n",
" <td>Kamis 07 Mei 2020 13:31 WIB</td>\n",
" <td>JAKARTA - Pemerintah memutuskan untuk membuka ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" news link \\\n",
"0 Okezone.com https://lifestyle.okezone.com/read/2020/05/07/... \n",
"1 Okezone.com https://lifestyle.okezone.com/read/2020/05/07/... \n",
"2 Okezone.com https://sports.okezone.com/read/2020/05/07/40/... \n",
"3 Okezone.com https://www.okezone.com/tren/read/2020/05/07/6... \n",
"4 Okezone.com https://lifestyle.okezone.com/read/2020/05/07/... \n",
"5 Okezone.com https://news.okezone.com/read/2020/05/07/18/22... \n",
"6 Okezone.com https://nasional.okezone.com/read/2020/05/07/3... \n",
"7 Okezone.com https://news.okezone.com/read/2020/05/07/18/22... \n",
"8 Okezone.com https://www.okezone.com/tren/read/2020/05/07/6... \n",
"9 Okezone.com https://news.okezone.com/read/2020/05/07/512/2... \n",
"10 Okezone.com https://megapolitan.okezone.com/read/2020/05/0... \n",
"11 Okezone.com https://economy.okezone.com/read/2020/05/07/32... \n",
"12 Okezone.com https://megapolitan.okezone.com/read/2020/05/0... \n",
"13 Okezone.com https://news.okezone.com/read/2020/05/07/340/2... \n",
"14 Okezone.com https://economy.okezone.com/read/2020/05/07/32... \n",
"15 Okezone.com https://economy.okezone.com/read/2020/05/07/32... \n",
"\n",
" title \\\n",
"0 BKKBN Khawatirkan Ledakan Penduduk Pasca-Pande... \n",
"1 Bermutasi, Virus Corona COVID-19 Makin Jinak? \n",
"2 Cegah Virus Corona, Menpora Beri Wejangan kepa... \n",
"3 Masjidil Haram Akan Dibuka, Corona di Mekkah M... \n",
"4 Bermutasi, Virus Corona COVID-19 Makin Jinak? \n",
"5 Wabah COVID-19, Umat Buddha di Berbagai Negara... \n",
"6 Relaksasi Moda Transportasi, Pengamat: Asa Mem... \n",
"7 Trump: Pandemi Virus Corona Adalah Serangan Te... \n",
"8 Seperti Umat Islam, Kelompok Ini Berzakat untu... \n",
"9 Terapkan PSBB, Status Kota Tegal Kembali Hijau... \n",
"10 3 Penumpang Positif Covid-19, Walkot Bekasi: A... \n",
"11 Terpukul Covid-19, Qatar Airways Bakal PHK Peg... \n",
"12 Usaha Dekorasi Pernikahan Banting Setir Buat P... \n",
"13 Positif Corona, 13 Warga Desa di Polewali Mand... \n",
"14 Relaksasi Kebijakan Mudik Rawan Memperlama Pen... \n",
"15 Moda Transportasi Kembali Dibuka, Pandemi Covi... \n",
"\n",
" author date_time \\\n",
"0 Muhammad Sukardi Kamis 07 Mei 2020 12:00 WIB \n",
"1 Leonardus Selwyn Kangsaputra Kamis 07 Mei 2020 11:45 WIB \n",
"2 Rivan Nasri Rachman Kamis 07 Mei 2020 11:15 WIB \n",
"3 Mohammad Saifulloh Kamis 07 Mei 2020 12:08 WIB \n",
"4 Leonardus Selwyn Kangsaputra Kamis 07 Mei 2020 11:45 WIB \n",
"5 Rahman Asmardika Kamis 07 Mei 2020 10:16 WIB \n",
"6 Harits Tryan Akhmad Kamis 07 Mei 2020 07:03 WIB \n",
"7 Rahman Asmardika Kamis 07 Mei 2020 14:30 WIB \n",
"8 Muhammad Sukardi Kamis 07 Mei 2020 14:21 WIB \n",
"9 Taufik Budi Kamis 07 Mei 2020 14:14 WIB \n",
"10 Wisnu Yusep Kamis 07 Mei 2020 14:10 WIB \n",
"11 Kamis 07 Mei 2020 14:07 WIB Kamis 07 Mei 2020 14:07 WIB \n",
"12 Putra Ramadhani Astyawan Kamis 07 Mei 2020 13:41 WIB \n",
"13 Huzair Zainal Kamis 07 Mei 2020 13:41 WIB \n",
"14 Giri Hartomo Kamis 07 Mei 2020 13:35 WIB \n",
"15 Giri Hartomo Kamis 07 Mei 2020 13:31 WIB \n",
"\n",
" paragraf \n",
"0 PANDEMI virus corona COVID-19 memang membuat b... \n",
"1 PENELITI di Arizona State University (ASU), ba... \n",
"2 JAKARTA – PBSI baru saja merayakan hari jadi m... \n",
"3 RENCANA pembukaan dua Masjid Suci di Arab Saud... \n",
"4 PENELITI di Arizona State University (ASU), ba... \n",
"5 UMAT Buddha di berbagai belahan dunia merayaka... \n",
"6 JAKARTA - Pengamat kebijakan publik, Trubus Ra... \n",
"7 WASHINGTON - Presiden Amerika Serikat (AS) Don... \n",
"8 Sekelompok profesional dari komunitas Dalit te... \n",
"9 TEGAL - Pembatasan Sosial Berskala Besar (PSBB... \n",
"10 BEKASI - Wali Kota Bekasi Rahmat Effendi menga... \n",
"11 JAKARTA - Maskapai penerbangan internasional Q... \n",
"12 BOGOR - Siapa sangka pembuatan peti jenazah kh... \n",
"13 POLEWALI MANDAR – Sebanyak 13 orang yang beras... \n",
"14 JAKARTA - Pemerintah membuka kembali transport... \n",
"15 JAKARTA - Pemerintah memutuskan untuk membuka ... "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"okezone_news=get_news(url, file_name = 'okezone_tiga')\n",
"okezone_news"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import library\n",
"import urllib\n",
"import requests\n",
"import bs4\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# request a link\n",
"def request_url(link):\n",
" response = requests.get(link)\n",
" html = response.text\n",
" return html"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to parse html\n",
"def parse_html(to_parse):\n",
" soup = bs4.BeautifulSoup(to_parse, 'html.parser')\n",
" return soup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#take sub menu news form liputan6.com\n",
"def all_section(main_url):\n",
" section_list = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" for a in soup.find_all('a', href=True): \n",
" if a.text:\n",
" section_list.append(a['href'])\n",
" return section_list"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#get only link about corona or covid\n",
"def find_corona(main_url):\n",
" url_list = []\n",
" not_news=[]\n",
" for i in main_url:\n",
" if i.find('corona')!=-1 or i.find('covid')!=-1 or i.find('pandemi')!=-1:\n",
" url_list.append(i)\n",
" url_list = list(dict.fromkeys(url_list))\n",
" for i in url_list:\n",
" if (i.find('tag') != -1 or i.find('https://') == -1 or i.find('topic')!=-1):\n",
" not_news.append(i) \n",
" for j in not_news:\n",
" url_list.remove(j)\n",
" return url_list"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def title(main_url):\n",
" titles = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" try :\n",
" h1 = soup.select('h1', {'class' : 'f50 black2 f400 crismon'})[0].text.strip()\n",
" titles.append(h1)\n",
" except:\n",
" titles.append('None')\n",
" return titles "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def writer(main_url):\n",
" author = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" try: \n",
" get_author = soup.find(\"div\", id=\"penulis\")\n",
" get_editor = soup.find(\"div\", id=\"editor\")\n",
" if(get_author):\n",
" get_author = ' '.join(get_author.text.split())\n",
" author.append(''.join(get_author.partition('Penulis: ')[2:]))\n",
" elif(get_editor):\n",
" get_editor = ' '.join(get_editor.text.split())\n",
" author.append(''.join(get_editor.partition('Editor: ')[2:]))\n",
" else:\n",
" author.append('None')\n",
" except:\n",
" author.append('None')\n",
" return author"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def date(main_url):\n",
" datetime = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" try:\n",
" get_datetime = soup.find(\"time\", class_=\"grey\")\n",
" if(get_datetime):\n",
" datetime.append(get_datetime.text)\n",
" else:\n",
" datetime.append('None')\n",
" except:\n",
" datetime.append('None')\n",
" return datetime"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def collect_text(main_url, titles = [], author = [], datetime = []):\n",
" paragraf = []\n",
" data = []\n",
" join =[]\n",
" isiteks = []\n",
" \n",
" for i,j in enumerate(main_url):\n",
" a = parse_html(request_url(j))\n",
" try:\n",
" content = a.find('div', class_='side-article txt-article')\n",
" pragraf = content.find_all('p')\n",
" for k in pragraf:\n",
" s = ' '.join(k.text.split())\n",
" paragraf.append(s)\n",
" data.append(paragraf)\n",
" paragraf = []\n",
" except:\n",
" data.append(paragraf)\n",
" for i in data:\n",
" join.append(' '.join(i))\n",
" \n",
" for i, j in enumerate(main_url):\n",
" isiteks.append({'news': 'Tribunnews.com', 'link' : j, 'title': titles[i], 'author' : author[i], 'date_time': datetime[i], 'paragraf' : join[i]})\n",
" return isiteks"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"#take all article pages from urls\n",
"def all_pages(main_url):\n",
" all_pages_articles = []\n",
" for page in main_url:\n",
" soup = parse_html(request_url(page))\n",
" div_class = soup.find_all(\"div\", class_= \"pt10 pb5 plr10 ovh bggrey\")\n",
" \n",
" if (div_class == []):\n",
" all_pages_articles.append(page)\n",
" else :\n",
" for i in div_class:\n",
" url_class = i.find_all(\"a\")\n",
" for j in url_class:\n",
" all_pages_articles.append(j.get('href'))\n",
" return all_pages_articles"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"def save_file(file, file_name = ''):\n",
" new_file = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
" new_file.to_csv(file_name + '.csv', index=True, encoding='utf-8', sep = ',')\n",
" \n",
" return new_file"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"def get_news(main_url, file_name = ''):\n",
" section = all_section(main_url)\n",
" corona = find_corona(section)\n",
" all_page = all_pages(corona)\n",
" titles = title(all_page)\n",
" author = writer(all_page)\n",
" dates = date(all_page)\n",
" text = collect_text(all_page, titles, author, dates)\n",
" file = save_file(text, file_name)\n",
" \n",
" return file "
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"url=[\"https://www.tribunnews.com/\"]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>news</th>\n",
" <th>link</th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>date_time</th>\n",
" <th>paragraf</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.covid19.go.id/</td>\n",
" <td>Tips Atasi Rasa Jenuh dan cemas dari Presiden ...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>Laporan Wartawan Tribunnews.com, Larasati Dyah...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona</td>\n",
" <td>Data Covid-19 di Indonesia</td>\n",
" <td>None</td>\n",
" <td>1 hari lalu</td>\n",
" <td>Laporan Wartawan Tribunnews.com, Larasati Dyah...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/w...</td>\n",
" <td>WNI Kabur Dari Pusat Karantina Covid-19 di Mal...</td>\n",
" <td>Larasati Dyah Utami</td>\n",
" <td>Kamis, 7 Mei 2020 21:50 WIB</td>\n",
" <td>Laporan Wartawan Tribunnews.com, Larasati Dyah...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/lifestyle/2020/05/0...</td>\n",
" <td>Anak Rentan Stres di Masa Pandemi Covid-19, In...</td>\n",
" <td>Widyadewi Metta Adya Irani</td>\n",
" <td>Kamis, 7 Mei 2020 21:45 WIB</td>\n",
" <td>TRIBUNNEWS.COM - Berikut cara mencegah dan men...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/lifestyle/2020/05/0...</td>\n",
" <td>Anak Rentan Stres di Masa Pandemi Covid-19, In...</td>\n",
" <td>Widyadewi Metta Adya Irani</td>\n",
" <td>Kamis, 7 Mei 2020 21:45 WIB</td>\n",
" <td>Sehingga, adanya kebijakan Pembatasan Sosial B...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/lifestyle/2020/05/0...</td>\n",
" <td>Anak Rentan Stres di Masa Pandemi Covid-19, In...</td>\n",
" <td>Widyadewi Metta Adya Irani</td>\n",
" <td>Kamis, 7 Mei 2020 21:45 WIB</td>\n",
" <td>Berikut cara mencegah stres pada anak: 1. Memb...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/lifestyle/2020/05/0...</td>\n",
" <td>Anak Rentan Stres di Masa Pandemi Covid-19, In...</td>\n",
" <td>Widyadewi Metta Adya Irani</td>\n",
" <td>Kamis, 7 Mei 2020 21:45 WIB</td>\n",
" <td>Pasalnya, menurut Hudan, stres yang dialami or...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/k...</td>\n",
" <td>Karena Pandemi Corona, Gadis Kirgiztan Terjeba...</td>\n",
" <td>Talitha Desena Darenti</td>\n",
" <td>Kamis, 7 Mei 2020 20:44 WIB</td>\n",
" <td>TRIBUNNEWS.COM - Di balik pandemi virus Corona...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/g...</td>\n",
" <td>Gubernur Ganjar Bentuk Tim Khusus Cari Orang Y...</td>\n",
" <td>Hendra Gunawan</td>\n",
" <td>Kamis, 7 Mei 2020 22:31 WIB</td>\n",
" <td>TRIBUNNEWS.COM, SEMARANG - Alumni ijtima dunia...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/g...</td>\n",
" <td>Gubernur Ganjar Bentuk Tim Khusus Cari Orang Y...</td>\n",
" <td>Hendra Gunawan</td>\n",
" <td>Kamis, 7 Mei 2020 22:31 WIB</td>\n",
" <td>Ada kekhawatiran yang akan terjadi jika para a...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/g...</td>\n",
" <td>Gubernur Ganjar Bentuk Tim Khusus Cari Orang Y...</td>\n",
" <td>Hendra Gunawan</td>\n",
" <td>Kamis, 7 Mei 2020 22:31 WIB</td>\n",
" <td>Karena itu, masyarakat harus jujur. Diperkirak...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/a...</td>\n",
" <td>Anies Baswedan: Pemprov DKI Telah Menyiapkan A...</td>\n",
" <td>Adi Suhendi</td>\n",
" <td>Kamis, 7 Mei 2020 22:30 WIB</td>\n",
" <td>TRIBUNNEWS.COM, JAKARTA - Pemerintah Provinsi ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/a...</td>\n",
" <td>Anies Baswedan: Pemprov DKI Telah Menyiapkan A...</td>\n",
" <td>Adi Suhendi</td>\n",
" <td>Kamis, 7 Mei 2020 22:30 WIB</td>\n",
" <td>Sampai April, anggaran BTT yang telah dialokas...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/a...</td>\n",
" <td>Anies Baswedan: Pemprov DKI Telah Menyiapkan A...</td>\n",
" <td>Adi Suhendi</td>\n",
" <td>Kamis, 7 Mei 2020 22:30 WIB</td>\n",
" <td>Namun, lantaran DKI meminta 1,1 juta warganya ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/regional/2020/05/07...</td>\n",
" <td>Tansuli Jatuh dan Meninggal, Warga Tak Berani ...</td>\n",
" <td>Hendra Gunawan</td>\n",
" <td>Kamis, 7 Mei 2020 22:18 WIB</td>\n",
" <td>TRIBUNNEWS.COM, BANDAR LAMPUNG -- Seorang pria...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/regional/2020/05/07...</td>\n",
" <td>Tansuli Jatuh dan Meninggal, Warga Tak Berani ...</td>\n",
" <td>Hendra Gunawan</td>\n",
" <td>Kamis, 7 Mei 2020 22:18 WIB</td>\n",
" <td>Warga Tanjungkarang, Enggal, Bandar Lampung, T...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/b...</td>\n",
" <td>Banyak Jenis Bansos yang Disalurkan Pemerintah...</td>\n",
" <td>Mafani Fidesya Hutauruk</td>\n",
" <td>Kamis, 7 Mei 2020 22:16 WIB</td>\n",
" <td>Laporan wartawan Tribunnews.com, Mafani Fidesy...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/b...</td>\n",
" <td>Banyak Jenis Bansos yang Disalurkan Pemerintah...</td>\n",
" <td>Mafani Fidesya Hutauruk</td>\n",
" <td>Kamis, 7 Mei 2020 22:16 WIB</td>\n",
" <td>“Dalam kondisi normal pun kehidupannya sudah p...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/k...</td>\n",
" <td>Kisah Cewek Kirgizstan 'Cinlok' dengan Pria La...</td>\n",
" <td>Hasanudin Aco</td>\n",
" <td>Kamis, 7 Mei 2020 12:14 WIB</td>\n",
" <td>TRIBUNNEWS.COM, LAMPUNG - Lockdown di China sa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/k...</td>\n",
" <td>Kisah Cewek Kirgizstan 'Cinlok' dengan Pria La...</td>\n",
" <td>Hasanudin Aco</td>\n",
" <td>Kamis, 7 Mei 2020 12:14 WIB</td>\n",
" <td>Namun, kebijakan lockdown oleh pemerintah Chin...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/k...</td>\n",
" <td>Kisah Cewek Kirgizstan 'Cinlok' dengan Pria La...</td>\n",
" <td>Hasanudin Aco</td>\n",
" <td>Kamis, 7 Mei 2020 12:14 WIB</td>\n",
" <td>Meski baru sebentar di Bandar Lampung, Aiperi ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/images/regional/vie...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>6 jam lalu</td>\n",
" <td>TRIBUNNEWS.COM, TULUNGAGUNG - Puskesmas Simo d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/regional/2020/05/07...</td>\n",
" <td>Ada Kasus Covid-19, Puskesmas Simo Tulungagung...</td>\n",
" <td>Hendra Gunawan</td>\n",
" <td>Kamis, 7 Mei 2020 22:04 WIB</td>\n",
" <td>TRIBUNNEWS.COM, TULUNGAGUNG - Puskesmas Simo d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/regional/2020/05/07...</td>\n",
" <td>Ada Kasus Covid-19, Puskesmas Simo Tulungagung...</td>\n",
" <td>Hendra Gunawan</td>\n",
" <td>Kamis, 7 Mei 2020 22:04 WIB</td>\n",
" <td>Pada penutupan kali ini karena ditemukan satu ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/seleb/2020/05/07/mi...</td>\n",
" <td>Mikha Tambayong: Covid-19 Hancurkan Agenda Aku...</td>\n",
" <td>Willem Jonata</td>\n",
" <td>Kamis, 7 Mei 2020 21:57 WIB</td>\n",
" <td>TRIBUNNEWS.COM - Karena pandemi virus corona (...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/06/3...</td>\n",
" <td>3 Penumpang Positif Covid-19, Kemenhub Pastika...</td>\n",
" <td>Hari Darmawan</td>\n",
" <td>Rabu, 6 Mei 2020 08:36 WIB</td>\n",
" <td>Laporan Wartawan Tribunnews, Hari Darmawan TRI...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/images/editorial/vi...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>6 jam lalu</td>\n",
" <td>TRIBUNNEWS.COM - Founder sekaligus Chief Execu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/images/editorial/vi...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>6 jam lalu</td>\n",
" <td>TRIBUNNEWS.COM - Founder sekaligus Chief Execu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/bisnis/2020/05/07/c...</td>\n",
" <td>Cara Hendy Setiono Motivasi Karyawan Baba Rafi...</td>\n",
" <td>Endra Kurniawan</td>\n",
" <td>Kamis, 7 Mei 2020 21:44 WIB</td>\n",
" <td>TRIBUNNEWS.COM - Founder sekaligus Chief Execu...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/bisnis/2020/05/07/c...</td>\n",
" <td>Cara Hendy Setiono Motivasi Karyawan Baba Rafi...</td>\n",
" <td>Endra Kurniawan</td>\n",
" <td>Kamis, 7 Mei 2020 21:44 WIB</td>\n",
" <td>Baginya krisis yang ditimbulkan akibat Covid-1...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/bisnis/2020/05/07/c...</td>\n",
" <td>Cara Hendy Setiono Motivasi Karyawan Baba Rafi...</td>\n",
" <td>Endra Kurniawan</td>\n",
" <td>Kamis, 7 Mei 2020 21:44 WIB</td>\n",
" <td>Usahanya yang ada di luar negeri juga mengalam...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/r...</td>\n",
" <td>Riset Terbaru Sebut Wabah Corona di Indonesia ...</td>\n",
" <td>Hasanudin Aco</td>\n",
" <td>Kamis, 7 Mei 2020 16:06 WIB</td>\n",
" <td>TRIBUNNEWS.COM, SINGAPURA - Sebuah riset yang ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/internasional/2020/...</td>\n",
" <td>Kasus Positif Covid-19 Baru di New York Mayori...</td>\n",
" <td>Imanuel Nicolas Manafe</td>\n",
" <td>Kamis, 7 Mei 2020 16:06 WIB</td>\n",
" <td>TRIBUNNEWS.COM, NEW YORK - Warga yang terpapar...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/internasional/2020/...</td>\n",
" <td>Kasus Positif Covid-19 Baru di New York Mayori...</td>\n",
" <td>Imanuel Nicolas Manafe</td>\n",
" <td>Kamis, 7 Mei 2020 16:06 WIB</td>\n",
" <td>Gubernur New York dari Partai Demokrat itu men...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/internasional/2020/...</td>\n",
" <td>Kasus Positif Covid-19 Baru di New York Mayori...</td>\n",
" <td>Imanuel Nicolas Manafe</td>\n",
" <td>Kamis, 7 Mei 2020 16:06 WIB</td>\n",
" <td>Dia memaparkan sekitar 600 orang yang terinfek...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>Tribunnews.com</td>\n",
" <td>https://www.tribunnews.com/corona/2020/05/07/j...</td>\n",
" <td>Jokowi Minta Gugus Tugas Pusat Ikut Tangani Co...</td>\n",
" <td>Hendra Gunawan</td>\n",
" <td>Kamis, 7 Mei 2020 16:08 WIB</td>\n",
" <td>TRIBUNNEWS.COM, SURABAYA - Presiden Joko Widod...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" news link \\\n",
"0 Tribunnews.com https://www.covid19.go.id/ \n",
"1 Tribunnews.com https://www.tribunnews.com/corona \n",
"2 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/w... \n",
"3 Tribunnews.com https://www.tribunnews.com/lifestyle/2020/05/0... \n",
"4 Tribunnews.com https://www.tribunnews.com/lifestyle/2020/05/0... \n",
"5 Tribunnews.com https://www.tribunnews.com/lifestyle/2020/05/0... \n",
"6 Tribunnews.com https://www.tribunnews.com/lifestyle/2020/05/0... \n",
"7 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/k... \n",
"8 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/g... \n",
"9 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/g... \n",
"10 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/g... \n",
"11 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/a... \n",
"12 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/a... \n",
"13 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/a... \n",
"14 Tribunnews.com https://www.tribunnews.com/regional/2020/05/07... \n",
"15 Tribunnews.com https://www.tribunnews.com/regional/2020/05/07... \n",
"16 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/b... \n",
"17 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/b... \n",
"18 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/k... \n",
"19 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/k... \n",
"20 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/k... \n",
"21 Tribunnews.com https://www.tribunnews.com/images/regional/vie... \n",
"22 Tribunnews.com https://www.tribunnews.com/regional/2020/05/07... \n",
"23 Tribunnews.com https://www.tribunnews.com/regional/2020/05/07... \n",
"24 Tribunnews.com https://www.tribunnews.com/seleb/2020/05/07/mi... \n",
"25 Tribunnews.com https://www.tribunnews.com/corona/2020/05/06/3... \n",
"26 Tribunnews.com https://www.tribunnews.com/images/editorial/vi... \n",
"27 Tribunnews.com https://www.tribunnews.com/images/editorial/vi... \n",
"28 Tribunnews.com https://www.tribunnews.com/bisnis/2020/05/07/c... \n",
"29 Tribunnews.com https://www.tribunnews.com/bisnis/2020/05/07/c... \n",
"30 Tribunnews.com https://www.tribunnews.com/bisnis/2020/05/07/c... \n",
"31 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/r... \n",
"32 Tribunnews.com https://www.tribunnews.com/internasional/2020/... \n",
"33 Tribunnews.com https://www.tribunnews.com/internasional/2020/... \n",
"34 Tribunnews.com https://www.tribunnews.com/internasional/2020/... \n",
"35 Tribunnews.com https://www.tribunnews.com/corona/2020/05/07/j... \n",
"\n",
" title \\\n",
"0 Tips Atasi Rasa Jenuh dan cemas dari Presiden ... \n",
"1 Data Covid-19 di Indonesia \n",
"2 WNI Kabur Dari Pusat Karantina Covid-19 di Mal... \n",
"3 Anak Rentan Stres di Masa Pandemi Covid-19, In... \n",
"4 Anak Rentan Stres di Masa Pandemi Covid-19, In... \n",
"5 Anak Rentan Stres di Masa Pandemi Covid-19, In... \n",
"6 Anak Rentan Stres di Masa Pandemi Covid-19, In... \n",
"7 Karena Pandemi Corona, Gadis Kirgiztan Terjeba... \n",
"8 Gubernur Ganjar Bentuk Tim Khusus Cari Orang Y... \n",
"9 Gubernur Ganjar Bentuk Tim Khusus Cari Orang Y... \n",
"10 Gubernur Ganjar Bentuk Tim Khusus Cari Orang Y... \n",
"11 Anies Baswedan: Pemprov DKI Telah Menyiapkan A... \n",
"12 Anies Baswedan: Pemprov DKI Telah Menyiapkan A... \n",
"13 Anies Baswedan: Pemprov DKI Telah Menyiapkan A... \n",
"14 Tansuli Jatuh dan Meninggal, Warga Tak Berani ... \n",
"15 Tansuli Jatuh dan Meninggal, Warga Tak Berani ... \n",
"16 Banyak Jenis Bansos yang Disalurkan Pemerintah... \n",
"17 Banyak Jenis Bansos yang Disalurkan Pemerintah... \n",
"18 Kisah Cewek Kirgizstan 'Cinlok' dengan Pria La... \n",
"19 Kisah Cewek Kirgizstan 'Cinlok' dengan Pria La... \n",
"20 Kisah Cewek Kirgizstan 'Cinlok' dengan Pria La... \n",
"21 None \n",
"22 Ada Kasus Covid-19, Puskesmas Simo Tulungagung... \n",
"23 Ada Kasus Covid-19, Puskesmas Simo Tulungagung... \n",
"24 Mikha Tambayong: Covid-19 Hancurkan Agenda Aku... \n",
"25 3 Penumpang Positif Covid-19, Kemenhub Pastika... \n",
"26 None \n",
"27 None \n",
"28 Cara Hendy Setiono Motivasi Karyawan Baba Rafi... \n",
"29 Cara Hendy Setiono Motivasi Karyawan Baba Rafi... \n",
"30 Cara Hendy Setiono Motivasi Karyawan Baba Rafi... \n",
"31 Riset Terbaru Sebut Wabah Corona di Indonesia ... \n",
"32 Kasus Positif Covid-19 Baru di New York Mayori... \n",
"33 Kasus Positif Covid-19 Baru di New York Mayori... \n",
"34 Kasus Positif Covid-19 Baru di New York Mayori... \n",
"35 Jokowi Minta Gugus Tugas Pusat Ikut Tangani Co... \n",
"\n",
" author date_time \\\n",
"0 None None \n",
"1 None 1 hari lalu \n",
"2 Larasati Dyah Utami Kamis, 7 Mei 2020 21:50 WIB \n",
"3 Widyadewi Metta Adya Irani Kamis, 7 Mei 2020 21:45 WIB \n",
"4 Widyadewi Metta Adya Irani Kamis, 7 Mei 2020 21:45 WIB \n",
"5 Widyadewi Metta Adya Irani Kamis, 7 Mei 2020 21:45 WIB \n",
"6 Widyadewi Metta Adya Irani Kamis, 7 Mei 2020 21:45 WIB \n",
"7 Talitha Desena Darenti Kamis, 7 Mei 2020 20:44 WIB \n",
"8 Hendra Gunawan Kamis, 7 Mei 2020 22:31 WIB \n",
"9 Hendra Gunawan Kamis, 7 Mei 2020 22:31 WIB \n",
"10 Hendra Gunawan Kamis, 7 Mei 2020 22:31 WIB \n",
"11 Adi Suhendi Kamis, 7 Mei 2020 22:30 WIB \n",
"12 Adi Suhendi Kamis, 7 Mei 2020 22:30 WIB \n",
"13 Adi Suhendi Kamis, 7 Mei 2020 22:30 WIB \n",
"14 Hendra Gunawan Kamis, 7 Mei 2020 22:18 WIB \n",
"15 Hendra Gunawan Kamis, 7 Mei 2020 22:18 WIB \n",
"16 Mafani Fidesya Hutauruk Kamis, 7 Mei 2020 22:16 WIB \n",
"17 Mafani Fidesya Hutauruk Kamis, 7 Mei 2020 22:16 WIB \n",
"18 Hasanudin Aco Kamis, 7 Mei 2020 12:14 WIB \n",
"19 Hasanudin Aco Kamis, 7 Mei 2020 12:14 WIB \n",
"20 Hasanudin Aco Kamis, 7 Mei 2020 12:14 WIB \n",
"21 None 6 jam lalu \n",
"22 Hendra Gunawan Kamis, 7 Mei 2020 22:04 WIB \n",
"23 Hendra Gunawan Kamis, 7 Mei 2020 22:04 WIB \n",
"24 Willem Jonata Kamis, 7 Mei 2020 21:57 WIB \n",
"25 Hari Darmawan Rabu, 6 Mei 2020 08:36 WIB \n",
"26 None 6 jam lalu \n",
"27 None 6 jam lalu \n",
"28 Endra Kurniawan Kamis, 7 Mei 2020 21:44 WIB \n",
"29 Endra Kurniawan Kamis, 7 Mei 2020 21:44 WIB \n",
"30 Endra Kurniawan Kamis, 7 Mei 2020 21:44 WIB \n",
"31 Hasanudin Aco Kamis, 7 Mei 2020 16:06 WIB \n",
"32 Imanuel Nicolas Manafe Kamis, 7 Mei 2020 16:06 WIB \n",
"33 Imanuel Nicolas Manafe Kamis, 7 Mei 2020 16:06 WIB \n",
"34 Imanuel Nicolas Manafe Kamis, 7 Mei 2020 16:06 WIB \n",
"35 Hendra Gunawan Kamis, 7 Mei 2020 16:08 WIB \n",
"\n",
" paragraf \n",
"0 Laporan Wartawan Tribunnews.com, Larasati Dyah... \n",
"1 Laporan Wartawan Tribunnews.com, Larasati Dyah... \n",
"2 Laporan Wartawan Tribunnews.com, Larasati Dyah... \n",
"3 TRIBUNNEWS.COM - Berikut cara mencegah dan men... \n",
"4 Sehingga, adanya kebijakan Pembatasan Sosial B... \n",
"5 Berikut cara mencegah stres pada anak: 1. Memb... \n",
"6 Pasalnya, menurut Hudan, stres yang dialami or... \n",
"7 TRIBUNNEWS.COM - Di balik pandemi virus Corona... \n",
"8 TRIBUNNEWS.COM, SEMARANG - Alumni ijtima dunia... \n",
"9 Ada kekhawatiran yang akan terjadi jika para a... \n",
"10 Karena itu, masyarakat harus jujur. Diperkirak... \n",
"11 TRIBUNNEWS.COM, JAKARTA - Pemerintah Provinsi ... \n",
"12 Sampai April, anggaran BTT yang telah dialokas... \n",
"13 Namun, lantaran DKI meminta 1,1 juta warganya ... \n",
"14 TRIBUNNEWS.COM, BANDAR LAMPUNG -- Seorang pria... \n",
"15 Warga Tanjungkarang, Enggal, Bandar Lampung, T... \n",
"16 Laporan wartawan Tribunnews.com, Mafani Fidesy... \n",
"17 “Dalam kondisi normal pun kehidupannya sudah p... \n",
"18 TRIBUNNEWS.COM, LAMPUNG - Lockdown di China sa... \n",
"19 Namun, kebijakan lockdown oleh pemerintah Chin... \n",
"20 Meski baru sebentar di Bandar Lampung, Aiperi ... \n",
"21 TRIBUNNEWS.COM, TULUNGAGUNG - Puskesmas Simo d... \n",
"22 TRIBUNNEWS.COM, TULUNGAGUNG - Puskesmas Simo d... \n",
"23 Pada penutupan kali ini karena ditemukan satu ... \n",
"24 TRIBUNNEWS.COM - Karena pandemi virus corona (... \n",
"25 Laporan Wartawan Tribunnews, Hari Darmawan TRI... \n",
"26 TRIBUNNEWS.COM - Founder sekaligus Chief Execu... \n",
"27 TRIBUNNEWS.COM - Founder sekaligus Chief Execu... \n",
"28 TRIBUNNEWS.COM - Founder sekaligus Chief Execu... \n",
"29 Baginya krisis yang ditimbulkan akibat Covid-1... \n",
"30 Usahanya yang ada di luar negeri juga mengalam... \n",
"31 TRIBUNNEWS.COM, SINGAPURA - Sebuah riset yang ... \n",
"32 TRIBUNNEWS.COM, NEW YORK - Warga yang terpapar... \n",
"33 Gubernur New York dari Partai Demokrat itu men... \n",
"34 Dia memaparkan sekitar 600 orang yang terinfek... \n",
"35 TRIBUNNEWS.COM, SURABAYA - Presiden Joko Widod... "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tribunnews_news = get_news(url, file_name = 'tribunnews_satu')\n",
"tribunnews_news"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import library\n",
"import urllib\n",
"import requests\n",
"import bs4\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# request a link\n",
"def request_url(link):\n",
" response = requests.get(link)\n",
" html = response.text\n",
" return html"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Function to parse html\n",
"def parse_html(to_parse):\n",
" soup = bs4.BeautifulSoup(to_parse, 'html.parser')\n",
" return soup"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#take sub menu news form liputan6.com\n",
"def all_section(main_url):\n",
" section_list = []\n",
" for i in main_url:\n",
" soup = parse_html(request_url(i))\n",
" for a in soup.find_all('a', href=True): \n",
" if a.text:\n",
" section_list.append(a['href'])\n",
" return section_list"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#get only link about corona or covid\n",
"def find_corona(main_url):\n",
" url_list = []\n",
" not_news=[]\n",
" for i in main_url:\n",
" if i.find('corona')!=-1 or i.find('covid')!=-1:\n",
" # print(i)\n",
" url_list.append(i)\n",
" url_list = list(dict.fromkeys(url_list))\n",
" for i in url_list:\n",
" if (i.find('/tag/') != -1):\n",
" not_news.append(i) \n",
" for j in not_news:\n",
" url_list.remove(j)\n",
" return url_list"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def title(main_url):\n",
" titles = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_title = soup.find(\"h1\", class_=\"title\")\n",
" if(get_title):\n",
" titles.append(' '.join(get_title.text.split()))\n",
" else:\n",
" titles.append('None')\n",
" return titles "
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"def writer(main_url):\n",
" author = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" try:\n",
" get_author = soup.find(\"div\", class_=\"detail_text\")\n",
" get_author1 = soup.find(\"div\", class_=\"date\")\n",
" get_author1 = ' '.join(get_author1.text.split())\n",
" get_author1 = get_author1.partition(\", \")[0]\n",
" if(get_author):\n",
" authorr = get_author.b.string\n",
" author.append(''.join(authorr.partition('(')[2:]).partition(\")\")[0])\n",
" elif(get_author1):\n",
" author.append(get_author1)\n",
" else:\n",
" author.append('None')\n",
" except:\n",
" author.append('None')\n",
" return author"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def date(main_url):\n",
" datetime = []\n",
" for i in main_url:\n",
" soup=parse_html(request_url(i))\n",
" get_datetime = soup.find(\"div\", class_=\"date\")\n",
" if(get_datetime):\n",
" del_line = ' '.join(get_datetime.text.split())\n",
" datetime.append(''.join(del_line.partition('| ')[2:]))\n",
" else:\n",
" datetime.append('None')\n",
" return datetime"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"def collect_text(main_url, titles = [], author = [], datetime = []):\n",
" paragraf = []\n",
" isiteks = []\n",
"\n",
" for i in main_url:\n",
" a = parse_html(request_url(i))\n",
" try:\n",
" content = a.find('span', id=\"detikdetailtext\")\n",
" content1 = a.find(\"div\", class_=\"detail_text\")\n",
" content2 = a.find(\"div\", class_=\"content_detail\")\n",
" if(content):\n",
" s = ' '.join(content.text.split())\n",
" paragraf.append(s)\n",
" elif(content1):\n",
" s = ' '.join(content1.text.split())\n",
" paragraf.append(s.partition(\"[Gambas:Video CNN]\")[0])\n",
" elif(content2):\n",
" s = ' '.join(content2.text.split())\n",
" paragraf.append(s)\n",
" else:\n",
" paragraf.append('None')\n",
" except:\n",
" paragraf.append('None') \n",
" \n",
" for i, j in enumerate(main_url):\n",
" isiteks.append({'news': 'CNN Indonesia', 'link' : j, 'title': titles[i], 'author' : author[i], 'date_time': datetime[i], 'paragraf' : paragraf[i]})\n",
" return isiteks"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def save_file(file, file_name = ''):\n",
" new_file = pd.DataFrame(file, columns=['news','link','title', 'author', 'date_time', 'paragraf'])\n",
" new_file.to_csv(file_name + '.csv', index=True, encoding='utf-8', sep = ',')\n",
" \n",
" return new_file"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"def get_news(main_url, file_name = ''):\n",
" section = all_section(main_url)\n",
" corona_news = find_corona(section)\n",
" titles = title(corona_news)\n",
" author = writer(corona_news)\n",
" datetime = date(corona_news)\n",
" text = collect_text(corona_news, titles, author, datetime)\n",
" file = save_file(text, file_name)\n",
" \n",
" return file"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"url=[\"https://www.cnnindonesia.com\"]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>news</th>\n",
" <th>link</th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>date_time</th>\n",
" <th>paragraf</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/nasional/20200505...</td>\n",
" <td>Mahasiswa PTN Bisa Minta Keringanan Uang Kulia...</td>\n",
" <td>fey/ain</td>\n",
" <td>Selasa, 05/05/2020 20:02 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Ketua Majelis Rektor...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/internasional/202...</td>\n",
" <td>WNI Positif Corona di Luar Negeri 703 Orang, 3...</td>\n",
" <td>ayp</td>\n",
" <td>Selasa, 05/05/2020 19:52 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Kementerian Luar Neg...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/internasional/202...</td>\n",
" <td>China Sebut RI Akan Menang Lawan Corona Secepa...</td>\n",
" <td>rds/dea</td>\n",
" <td>Selasa, 05/05/2020 14:57 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Pemerintah China mey...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/nasional/20200505...</td>\n",
" <td>Bertambah 243 Orang, Pasien Corona Sembuh Tert...</td>\n",
" <td>fra</td>\n",
" <td>Selasa, 05/05/2020 18:56 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Jumlah pasien positi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/ekonomi/202005051...</td>\n",
" <td>FOTO: Asa Tukang Cuci Mobil di Tengah Pandemi ...</td>\n",
" <td>CNN Indonesia/Andry Novelino</td>\n",
" <td>Selasa, 05/05/2020 18:47 WIB</td>\n",
" <td>Home Ekonomi Foto Bisnis FOTO: Asa Tukang Cuci...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/internasional/202...</td>\n",
" <td>Corona Menurun, Pelajar Korsel Kembali Sekolah...</td>\n",
" <td>ans/ayp</td>\n",
" <td>Selasa, 05/05/2020 18:10 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Korea Selatan akan k...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/nasional/20200505...</td>\n",
" <td>Pasien Covid-19 Klaster Ijtima Gowa Tulari Kel...</td>\n",
" <td>pnd/pmg</td>\n",
" <td>Selasa, 05/05/2020 19:32 WIB</td>\n",
" <td>Kendari, CNN Indonesia -- Dua pasien Covid-19 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/internasional/202...</td>\n",
" <td>VIDEO: AS Diklaim Sulit Temukan Warga Tanpa Ge...</td>\n",
" <td>AFP</td>\n",
" <td>Selasa, 05/05/2020 18:46 WIB</td>\n",
" <td>Home Internasional Video Eropa Amerika VIDEO: ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/tv/20200505185659...</td>\n",
" <td>VIDEO: Gubernur Sumut Kunjungi RS Darurat Covi...</td>\n",
" <td>CNN Indonesia TV</td>\n",
" <td>Selasa, 05/05/2020 20:00 WIB</td>\n",
" <td>VIDEO: Gubernur Sumut Kunjungi RS Darurat Covi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/gaya-hidup/202005...</td>\n",
" <td>Jumisih Berpacu di Antara Ancaman PHK Buruh da...</td>\n",
" <td>NMA</td>\n",
" <td>Jumat, 01/05/2020 16:52 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Perawakan mungil tak...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/olahraga/20200504...</td>\n",
" <td>Virus Corona dan Ujian Terberat Cristian Gonza...</td>\n",
" <td>TTF/har</td>\n",
" <td>Selasa, 05/05/2020 16:00 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Setelah menjadi mual...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/nasional/20200505...</td>\n",
" <td>2.200 TKI Akan Masuk Sumut, Antisipasi Covid-1...</td>\n",
" <td>fnr/pmg</td>\n",
" <td>Selasa, 05/05/2020 19:24 WIB</td>\n",
" <td>Medan, CNN Indonesia -- Pemerintah Provinsi Su...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/nasional/20200505...</td>\n",
" <td>Update Corona 5 Mei:12.071 Positif, 2.197 Oran...</td>\n",
" <td>yoa/ugo</td>\n",
" <td>Selasa, 05/05/2020 15:49 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Total jumlah pasien ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/ekonomi/202005030...</td>\n",
" <td>Perang Lawan Corona dan Benteng Terakhir Krisi...</td>\n",
" <td>Khudori</td>\n",
" <td>Minggu, 03/05/2020 10:04 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Resonansi krisis pan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/ekonomi/202004301...</td>\n",
" <td>Jalan Lapang Oligarki Ekonomi di Perppu Corona...</td>\n",
" <td>Arif Hulwan Muzayyin</td>\n",
" <td>Kamis, 30/04/2020 19:10 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Meski bisa menurunka...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://www.cnnindonesia.com/nasional/20200415...</td>\n",
" <td>Nina Bobo di Masa Corona</td>\n",
" <td>Yugo Hindarto</td>\n",
" <td>Kamis, 16/04/2020 08:05 WIB</td>\n",
" <td>Jakarta, CNN Indonesia -- Di masa pandemi coro...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>CNN Indonesia</td>\n",
" <td>https://news.detik.com/berita/d-5003717/positi...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" news link \\\n",
"0 CNN Indonesia https://www.cnnindonesia.com/nasional/20200505... \n",
"1 CNN Indonesia https://www.cnnindonesia.com/internasional/202... \n",
"2 CNN Indonesia https://www.cnnindonesia.com/internasional/202... \n",
"3 CNN Indonesia https://www.cnnindonesia.com/nasional/20200505... \n",
"4 CNN Indonesia https://www.cnnindonesia.com/ekonomi/202005051... \n",
"5 CNN Indonesia https://www.cnnindonesia.com/internasional/202... \n",
"6 CNN Indonesia https://www.cnnindonesia.com/nasional/20200505... \n",
"7 CNN Indonesia https://www.cnnindonesia.com/internasional/202... \n",
"8 CNN Indonesia https://www.cnnindonesia.com/tv/20200505185659... \n",
"9 CNN Indonesia https://www.cnnindonesia.com/gaya-hidup/202005... \n",
"10 CNN Indonesia https://www.cnnindonesia.com/olahraga/20200504... \n",
"11 CNN Indonesia https://www.cnnindonesia.com/nasional/20200505... \n",
"12 CNN Indonesia https://www.cnnindonesia.com/nasional/20200505... \n",
"13 CNN Indonesia https://www.cnnindonesia.com/ekonomi/202005030... \n",
"14 CNN Indonesia https://www.cnnindonesia.com/ekonomi/202004301... \n",
"15 CNN Indonesia https://www.cnnindonesia.com/nasional/20200415... \n",
"16 CNN Indonesia https://news.detik.com/berita/d-5003717/positi... \n",
"\n",
" title \\\n",
"0 Mahasiswa PTN Bisa Minta Keringanan Uang Kulia... \n",
"1 WNI Positif Corona di Luar Negeri 703 Orang, 3... \n",
"2 China Sebut RI Akan Menang Lawan Corona Secepa... \n",
"3 Bertambah 243 Orang, Pasien Corona Sembuh Tert... \n",
"4 FOTO: Asa Tukang Cuci Mobil di Tengah Pandemi ... \n",
"5 Corona Menurun, Pelajar Korsel Kembali Sekolah... \n",
"6 Pasien Covid-19 Klaster Ijtima Gowa Tulari Kel... \n",
"7 VIDEO: AS Diklaim Sulit Temukan Warga Tanpa Ge... \n",
"8 VIDEO: Gubernur Sumut Kunjungi RS Darurat Covi... \n",
"9 Jumisih Berpacu di Antara Ancaman PHK Buruh da... \n",
"10 Virus Corona dan Ujian Terberat Cristian Gonza... \n",
"11 2.200 TKI Akan Masuk Sumut, Antisipasi Covid-1... \n",
"12 Update Corona 5 Mei:12.071 Positif, 2.197 Oran... \n",
"13 Perang Lawan Corona dan Benteng Terakhir Krisi... \n",
"14 Jalan Lapang Oligarki Ekonomi di Perppu Corona... \n",
"15 Nina Bobo di Masa Corona \n",
"16 None \n",
"\n",
" author date_time \\\n",
"0 fey/ain Selasa, 05/05/2020 20:02 WIB \n",
"1 ayp Selasa, 05/05/2020 19:52 WIB \n",
"2 rds/dea Selasa, 05/05/2020 14:57 WIB \n",
"3 fra Selasa, 05/05/2020 18:56 WIB \n",
"4 CNN Indonesia/Andry Novelino Selasa, 05/05/2020 18:47 WIB \n",
"5 ans/ayp Selasa, 05/05/2020 18:10 WIB \n",
"6 pnd/pmg Selasa, 05/05/2020 19:32 WIB \n",
"7 AFP Selasa, 05/05/2020 18:46 WIB \n",
"8 CNN Indonesia TV Selasa, 05/05/2020 20:00 WIB \n",
"9 NMA Jumat, 01/05/2020 16:52 WIB \n",
"10 TTF/har Selasa, 05/05/2020 16:00 WIB \n",
"11 fnr/pmg Selasa, 05/05/2020 19:24 WIB \n",
"12 yoa/ugo Selasa, 05/05/2020 15:49 WIB \n",
"13 Khudori Minggu, 03/05/2020 10:04 WIB \n",
"14 Arif Hulwan Muzayyin Kamis, 30/04/2020 19:10 WIB \n",
"15 Yugo Hindarto Kamis, 16/04/2020 08:05 WIB \n",
"16 None None \n",
"\n",
" paragraf \n",
"0 Jakarta, CNN Indonesia -- Ketua Majelis Rektor... \n",
"1 Jakarta, CNN Indonesia -- Kementerian Luar Neg... \n",
"2 Jakarta, CNN Indonesia -- Pemerintah China mey... \n",
"3 Jakarta, CNN Indonesia -- Jumlah pasien positi... \n",
"4 Home Ekonomi Foto Bisnis FOTO: Asa Tukang Cuci... \n",
"5 Jakarta, CNN Indonesia -- Korea Selatan akan k... \n",
"6 Kendari, CNN Indonesia -- Dua pasien Covid-19 ... \n",
"7 Home Internasional Video Eropa Amerika VIDEO: ... \n",
"8 VIDEO: Gubernur Sumut Kunjungi RS Darurat Covi... \n",
"9 Jakarta, CNN Indonesia -- Perawakan mungil tak... \n",
"10 Jakarta, CNN Indonesia -- Setelah menjadi mual... \n",
"11 Medan, CNN Indonesia -- Pemerintah Provinsi Su... \n",
"12 Jakarta, CNN Indonesia -- Total jumlah pasien ... \n",
"13 Jakarta, CNN Indonesia -- Resonansi krisis pan... \n",
"14 Jakarta, CNN Indonesia -- Meski bisa menurunka... \n",
"15 Jakarta, CNN Indonesia -- Di masa pandemi coro... \n",
"16 None "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cnnindonesia_news=get_news(url, file_name = 'cnnindonesia_satu')\n",
"cnnindonesia_news"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment