-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtitaniclnnipynb
809 lines (809 loc) · 50.3 KB
/
titaniclnnipynb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Copy of Untitled18.ipynb",
"version": "0.3.2",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/vmr1532/-simple-sentimental_analyzer/blob/master/titaniclnnipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"metadata": {
"id": "DGWRT4_2z_kR",
"colab_type": "code",
"outputId": "daf7170b-fe06-4b18-a445-5cbb1d88d920",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"cell_type": "code",
"source": [
"\n",
"from googleapiclient.discovery import build\n",
"import io, os\n",
"from googleapiclient.http import MediaIoBaseDownload\n",
"from google.colab import auth\n",
"auth.authenticate_user()\n",
"drive_service = build('drive', 'v3')\n",
"results = drive_service.files().list(\n",
" q=\"name = 'kaggle.json'\", fields=\"files(id)\").execute()\n",
"kaggle_api_key = results.get('files', [])\n",
"filename = \"/root/.kaggle/kaggle.json\"\n",
"if not os.path.exists(os.path.dirname(filename)):\n",
" os.makedirs(os.path.dirname(filename))\n",
"request = drive_service.files().get_media(fileId=kaggle_api_key[0]['id'])\n",
"fh = io.FileIO(filename, 'wb')\n",
"downloader = MediaIoBaseDownload(fh, request)\n",
"done = False\n",
"while done is False:\n",
" status, done = downloader.next_chunk()\n",
" print(\"Download %d%%.\" % int(status.progress() * 100))\n",
"os.chmod(filename, 600)"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Download 100%.\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "yog3pSwL0Nnn",
"colab_type": "code",
"outputId": "83b9bade-3d0a-4c9f-d32a-04e0e8a1837a",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 208
}
},
"cell_type": "code",
"source": [
"!pip install kaggle\n"
],
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in /usr/local/lib/python3.6/dist-packages (1.5.3)\n",
"Requirement already satisfied: urllib3<1.25,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from kaggle) (1.22)\n",
"Requirement already satisfied: six>=1.10 in /usr/local/lib/python3.6/dist-packages (from kaggle) (1.11.0)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.6/dist-packages (from kaggle) (2019.3.9)\n",
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from kaggle) (2.5.3)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from kaggle) (2.18.4)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from kaggle) (4.28.1)\n",
"Requirement already satisfied: python-slugify in /usr/local/lib/python3.6/dist-packages (from kaggle) (3.0.2)\n",
"Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->kaggle) (3.0.4)\n",
"Requirement already satisfied: idna<2.7,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->kaggle) (2.6)\n",
"Requirement already satisfied: text-unidecode==1.2 in /usr/local/lib/python3.6/dist-packages (from python-slugify->kaggle) (1.2)\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "hVYD0ogK4Pju",
"colab_type": "code",
"outputId": "d2c84a3b-fd4c-4cd6-b379-3aa7c1147da1",
"colab": {
"resources": {
"http://localhost:8080/nbextensions/google.colab/files.js": {
"data": "Ly8gQ29weXJpZ2h0IDIwMTcgR29vZ2xlIExMQwovLwovLyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKLy8geW91IG1heSBub3QgdXNlIHRoaXMgZmlsZSBleGNlcHQgaW4gY29tcGxpYW5jZSB3aXRoIHRoZSBMaWNlbnNlLgovLyBZb3UgbWF5IG9idGFpbiBhIGNvcHkgb2YgdGhlIExpY2Vuc2UgYXQKLy8KLy8gICAgICBodHRwOi8vd3d3LmFwYWNoZS5vcmcvbGljZW5zZXMvTElDRU5TRS0yLjAKLy8KLy8gVW5sZXNzIHJlcXVpcmVkIGJ5IGFwcGxpY2FibGUgbGF3IG9yIGFncmVlZCB0byBpbiB3cml0aW5nLCBzb2Z0d2FyZQovLyBkaXN0cmlidXRlZCB1bmRlciB0aGUgTGljZW5zZSBpcyBkaXN0cmlidXRlZCBvbiBhbiAiQVMgSVMiIEJBU0lTLAovLyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KLy8gU2VlIHRoZSBMaWNlbnNlIGZvciB0aGUgc3BlY2lmaWMgbGFuZ3VhZ2UgZ292ZXJuaW5nIHBlcm1pc3Npb25zIGFuZAovLyBsaW1pdGF0aW9ucyB1bmRlciB0aGUgTGljZW5zZS4KCi8qKgogKiBAZmlsZW92ZXJ2aWV3IEhlbHBlcnMgZm9yIGdvb2dsZS5jb2xhYiBQeXRob24gbW9kdWxlLgogKi8KKGZ1bmN0aW9uKHNjb3BlKSB7CmZ1bmN0aW9uIHNwYW4odGV4dCwgc3R5bGVBdHRyaWJ1dGVzID0ge30pIHsKICBjb25zdCBlbGVtZW50ID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnc3BhbicpOwogIGVsZW1lbnQudGV4dENvbnRlbnQgPSB0ZXh0OwogIGZvciAoY29uc3Qga2V5IG9mIE9iamVjdC5rZXlzKHN0eWxlQXR0cmlidXRlcykpIHsKICAgIGVsZW1lbnQuc3R5bGVba2V5XSA9IHN0eWxlQXR0cmlidXRlc1trZXldOwogIH0KICByZXR1cm4gZWxlbWVudDsKfQoKLy8gTWF4IG51bWJlciBvZiBieXRlcyB3aGljaCB3aWxsIGJlIHVwbG9hZGVkIGF0IGEgdGltZS4KY29uc3QgTUFYX1BBWUxPQURfU0laRSA9IDEwMCAqIDEwMjQ7Ci8vIE1heCBhbW91bnQgb2YgdGltZSB0byBibG9jayB3YWl0aW5nIGZvciB0aGUgdXNlci4KY29uc3QgRklMRV9DSEFOR0VfVElNRU9VVF9NUyA9IDMwICogMTAwMDsKCmZ1bmN0aW9uIF91cGxvYWRGaWxlcyhpbnB1dElkLCBvdXRwdXRJZCkgewogIGNvbnN0IHN0ZXBzID0gdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKTsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIC8vIENhY2hlIHN0ZXBzIG9uIHRoZSBvdXRwdXRFbGVtZW50IHRvIG1ha2UgaXQgYXZhaWxhYmxlIGZvciB0aGUgbmV4dCBjYWxsCiAgLy8gdG8gdXBsb2FkRmlsZXNDb250aW51ZSBmcm9tIFB5dGhvbi4KICBvdXRwdXRFbGVtZW50LnN0ZXBzID0gc3RlcHM7CgogIHJldHVybiBfdXBsb2FkRmlsZXNDb250aW51ZShvdXRwdXRJZCk7Cn0KCi8vIFRoaXMgaXMgcm91Z2hseSBhbiBhc3luYyBnZW5lcmF0b3IgKG5vdCBzdXBwb3J0ZWQgaW4gdGhlIGJyb3dzZXIgeWV0KSwKLy8gd2hlcmUgdGhlcmUgYXJlIG11bHRpcGxlIGFzeW5jaHJvbm91cyBzdGVwcyBhbmQgdGhlIFB5dGhvbiBzaWRlIGlzIGdvaW5nCi8vIHRvIHBvbGwgZm9yIGNvbXBsZXRpb24gb2YgZWFjaCBzdGVwLgovLyBUaGlzIHVzZXMgYSBQcm9taXNlIHRvIGJsb2NrIHRoZSBweXRob24gc2lkZSBvbiBjb21wbGV0aW9uIG9mIGVhY2ggc3RlcCwKLy8gdGhlbiBwYXNzZXMgdGhlIHJlc3VsdCBvZiB0aGUgcHJldmlvdXMgc3RlcCBhcyB0aGUgaW5wdXQgdG8gdGhlIG5leHQgc3RlcC4KZnVuY3Rpb24gX3VwbG9hZEZpbGVzQ29udGludWUob3V0cHV0SWQpIHsKICBjb25zdCBvdXRwdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQob3V0cHV0SWQpOwogIGNvbnN0IHN0ZXBzID0gb3V0cHV0RWxlbWVudC5zdGVwczsKCiAgY29uc3QgbmV4dCA9IHN0ZXBzLm5leHQob3V0cHV0RWxlbWVudC5sYXN0UHJvbWlzZVZhbHVlKTsKICByZXR1cm4gUHJvbWlzZS5yZXNvbHZlKG5leHQudmFsdWUucHJvbWlzZSkudGhlbigodmFsdWUpID0+IHsKICAgIC8vIENhY2hlIHRoZSBsYXN0IHByb21pc2UgdmFsdWUgdG8gbWFrZSBpdCBhdmFpbGFibGUgdG8gdGhlIG5leHQKICAgIC8vIHN0ZXAgb2YgdGhlIGdlbmVyYXRvci4KICAgIG91dHB1dEVsZW1lbnQubGFzdFByb21pc2VWYWx1ZSA9IHZhbHVlOwogICAgcmV0dXJuIG5leHQudmFsdWUucmVzcG9uc2U7CiAgfSk7Cn0KCi8qKgogKiBHZW5lcmF0b3IgZnVuY3Rpb24gd2hpY2ggaXMgY2FsbGVkIGJldHdlZW4gZWFjaCBhc3luYyBzdGVwIG9mIHRoZSB1cGxvYWQKICogcHJvY2Vzcy4KICogQHBhcmFtIHtzdHJpbmd9IGlucHV0SWQgRWxlbWVudCBJRCBvZiB0aGUgaW5wdXQgZmlsZSBwaWNrZXIgZWxlbWVudC4KICogQHBhcmFtIHtzdHJpbmd9IG91dHB1dElkIEVsZW1lbnQgSUQgb2YgdGhlIG91dHB1dCBkaXNwbGF5LgogKiBAcmV0dXJuIHshSXRlcmFibGU8IU9iamVjdD59IEl0ZXJhYmxlIG9mIG5leHQgc3RlcHMuCiAqLwpmdW5jdGlvbiogdXBsb2FkRmlsZXNTdGVwKGlucHV0SWQsIG91dHB1dElkKSB7CiAgY29uc3QgaW5wdXRFbGVtZW50ID0gZG9jdW1lbnQuZ2V0RWxlbWVudEJ5SWQoaW5wdXRJZCk7CiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gZmFsc2U7CgogIGNvbnN0IG91dHB1dEVsZW1lbnQgPSBkb2N1bWVudC5nZXRFbGVtZW50QnlJZChvdXRwdXRJZCk7CiAgb3V0cHV0RWxlbWVudC5pbm5lckhUTUwgPSAnJzsKCiAgY29uc3QgcGlja2VkUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBpbnB1dEVsZW1lbnQuYWRkRXZlbnRMaXN0ZW5lcignY2hhbmdlJywgKGUpID0+IHsKICAgICAgcmVzb2x2ZShlLnRhcmdldC5maWxlcyk7CiAgICB9KTsKICB9KTsKCiAgY29uc3QgY2FuY2VsID0gZG9jdW1lbnQuY3JlYXRlRWxlbWVudCgnYnV0dG9uJyk7CiAgaW5wdXRFbGVtZW50LnBhcmVudEVsZW1lbnQuYXBwZW5kQ2hpbGQoY2FuY2VsKTsKICBjYW5jZWwudGV4dENvbnRlbnQgPSAnQ2FuY2VsIHVwbG9hZCc7CiAgY29uc3QgY2FuY2VsUHJvbWlzZSA9IG5ldyBQcm9taXNlKChyZXNvbHZlKSA9PiB7CiAgICBjYW5jZWwub25jbGljayA9ICgpID0+IHsKICAgICAgcmVzb2x2ZShudWxsKTsKICAgIH07CiAgfSk7CgogIC8vIENhbmNlbCB1cGxvYWQgaWYgdXNlciBoYXNuJ3QgcGlja2VkIGFueXRoaW5nIGluIHRpbWVvdXQuCiAgY29uc3QgdGltZW91dFByb21pc2UgPSBuZXcgUHJvbWlzZSgocmVzb2x2ZSkgPT4gewogICAgc2V0VGltZW91dCgoKSA9PiB7CiAgICAgIHJlc29sdmUobnVsbCk7CiAgICB9LCBGSUxFX0NIQU5HRV9USU1FT1VUX01TKTsKICB9KTsKCiAgLy8gV2FpdCBmb3IgdGhlIHVzZXIgdG8gcGljayB0aGUgZmlsZXMuCiAgY29uc3QgZmlsZXMgPSB5aWVsZCB7CiAgICBwcm9taXNlOiBQcm9taXNlLnJhY2UoW3BpY2tlZFByb21pc2UsIHRpbWVvdXRQcm9taXNlLCBjYW5jZWxQcm9taXNlXSksCiAgICByZXNwb25zZTogewogICAgICBhY3Rpb246ICdzdGFydGluZycsCiAgICB9CiAgfTsKCiAgaWYgKCFmaWxlcykgewogICAgcmV0dXJuIHsKICAgICAgcmVzcG9uc2U6IHsKICAgICAgICBhY3Rpb246ICdjb21wbGV0ZScsCiAgICAgIH0KICAgIH07CiAgfQoKICBjYW5jZWwucmVtb3ZlKCk7CgogIC8vIERpc2FibGUgdGhlIGlucHV0IGVsZW1lbnQgc2luY2UgZnVydGhlciBwaWNrcyBhcmUgbm90IGFsbG93ZWQuCiAgaW5wdXRFbGVtZW50LmRpc2FibGVkID0gdHJ1ZTsKCiAgZm9yIChjb25zdCBmaWxlIG9mIGZpbGVzKSB7CiAgICBjb25zdCBsaSA9IGRvY3VtZW50LmNyZWF0ZUVsZW1lbnQoJ2xpJyk7CiAgICBsaS5hcHBlbmQoc3BhbihmaWxlLm5hbWUsIHtmb250V2VpZ2h0OiAnYm9sZCd9KSk7CiAgICBsaS5hcHBlbmQoc3BhbigKICAgICAgICBgKCR7ZmlsZS50eXBlIHx8ICduL2EnfSkgLSAke2ZpbGUuc2l6ZX0gYnl0ZXMsIGAgKwogICAgICAgIGBsYXN0IG1vZGlmaWVkOiAkewogICAgICAgICAgICBmaWxlLmxhc3RNb2RpZmllZERhdGUgPyBmaWxlLmxhc3RNb2RpZmllZERhdGUudG9Mb2NhbGVEYXRlU3RyaW5nKCkgOgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnbi9hJ30gLSBgKSk7CiAgICBjb25zdCBwZXJjZW50ID0gc3BhbignMCUgZG9uZScpOwogICAgbGkuYXBwZW5kQ2hpbGQocGVyY2VudCk7CgogICAgb3V0cHV0RWxlbWVudC5hcHBlbmRDaGlsZChsaSk7CgogICAgY29uc3QgZmlsZURhdGFQcm9taXNlID0gbmV3IFByb21pc2UoKHJlc29sdmUpID0+IHsKICAgICAgY29uc3QgcmVhZGVyID0gbmV3IEZpbGVSZWFkZXIoKTsKICAgICAgcmVhZGVyLm9ubG9hZCA9IChlKSA9PiB7CiAgICAgICAgcmVzb2x2ZShlLnRhcmdldC5yZXN1bHQpOwogICAgICB9OwogICAgICByZWFkZXIucmVhZEFzQXJyYXlCdWZmZXIoZmlsZSk7CiAgICB9KTsKICAgIC8vIFdhaXQgZm9yIHRoZSBkYXRhIHRvIGJlIHJlYWR5LgogICAgbGV0IGZpbGVEYXRhID0geWllbGQgewogICAgICBwcm9taXNlOiBmaWxlRGF0YVByb21pc2UsCiAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgYWN0aW9uOiAnY29udGludWUnLAogICAgICB9CiAgICB9OwoKICAgIC8vIFVzZSBhIGNodW5rZWQgc2VuZGluZyB0byBhdm9pZCBtZXNzYWdlIHNpemUgbGltaXRzLiBTZWUgYi82MjExNTY2MC4KICAgIGxldCBwb3NpdGlvbiA9IDA7CiAgICB3aGlsZSAocG9zaXRpb24gPCBmaWxlRGF0YS5ieXRlTGVuZ3RoKSB7CiAgICAgIGNvbnN0IGxlbmd0aCA9IE1hdGgubWluKGZpbGVEYXRhLmJ5dGVMZW5ndGggLSBwb3NpdGlvbiwgTUFYX1BBWUxPQURfU0laRSk7CiAgICAgIGNvbnN0IGNodW5rID0gbmV3IFVpbnQ4QXJyYXkoZmlsZURhdGEsIHBvc2l0aW9uLCBsZW5ndGgpOwogICAgICBwb3NpdGlvbiArPSBsZW5ndGg7CgogICAgICBjb25zdCBiYXNlNjQgPSBidG9hKFN0cmluZy5mcm9tQ2hhckNvZGUuYXBwbHkobnVsbCwgY2h1bmspKTsKICAgICAgeWllbGQgewogICAgICAgIHJlc3BvbnNlOiB7CiAgICAgICAgICBhY3Rpb246ICdhcHBlbmQnLAogICAgICAgICAgZmlsZTogZmlsZS5uYW1lLAogICAgICAgICAgZGF0YTogYmFzZTY0LAogICAgICAgIH0sCiAgICAgIH07CiAgICAgIHBlcmNlbnQudGV4dENvbnRlbnQgPQogICAgICAgICAgYCR7TWF0aC5yb3VuZCgocG9zaXRpb24gLyBmaWxlRGF0YS5ieXRlTGVuZ3RoKSAqIDEwMCl9JSBkb25lYDsKICAgIH0KICB9CgogIC8vIEFsbCBkb25lLgogIHlpZWxkIHsKICAgIHJlc3BvbnNlOiB7CiAgICAgIGFjdGlvbjogJ2NvbXBsZXRlJywKICAgIH0KICB9Owp9CgpzY29wZS5nb29nbGUgPSBzY29wZS5nb29nbGUgfHwge307CnNjb3BlLmdvb2dsZS5jb2xhYiA9IHNjb3BlLmdvb2dsZS5jb2xhYiB8fCB7fTsKc2NvcGUuZ29vZ2xlLmNvbGFiLl9maWxlcyA9IHsKICBfdXBsb2FkRmlsZXMsCiAgX3VwbG9hZEZpbGVzQ29udGludWUsCn07Cn0pKHNlbGYpOwo=",
"ok": true,
"headers": [
[
"content-type",
"application/javascript"
]
],
"status": 200,
"status_text": "OK"
}
},
"base_uri": "https://localhost:8080/",
"height": 204
}
},
"cell_type": "code",
"source": [
"!kaggle competitions download -c titanic\n",
"\n",
"from google.colab import files\n",
"\n",
"uploaded = files.upload()\n",
"\n",
"for fn in uploaded.keys():\n",
" print('User uploaded file \"{name}\" with length {length} bytes'.format(\n",
" name=fn, length=len(uploaded[fn])))"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"Downloading train.csv to /content\n",
"\r 0% 0.00/59.8k [00:00<?, ?B/s]\n",
"100% 59.8k/59.8k [00:00<00:00, 23.5MB/s]\n",
"Downloading test.csv to /content\n",
" 0% 0.00/28.0k [00:00<?, ?B/s]\n",
"100% 28.0k/28.0k [00:00<00:00, 29.5MB/s]\n",
"Downloading gender_submission.csv to /content\n",
" 0% 0.00/3.18k [00:00<?, ?B/s]\n",
"100% 3.18k/3.18k [00:00<00:00, 3.30MB/s]\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <input type=\"file\" id=\"files-95cd53e5-93e6-47b4-b5c9-8eed03eb31be\" name=\"files[]\" multiple disabled />\n",
" <output id=\"result-95cd53e5-93e6-47b4-b5c9-8eed03eb31be\">\n",
" Upload widget is only available when the cell has been executed in the\n",
" current browser session. Please rerun this cell to enable.\n",
" </output>\n",
" <script src=\"/nbextensions/google.colab/files.js\"></script> "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"metadata": {
"id": "OOme1CIJ4qct",
"colab_type": "code",
"outputId": "744f384c-4ff0-4f96-edc8-9fc149cb2290",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 3334
}
},
"cell_type": "code",
"source": [
"import csv\n",
"import numpy\n",
"import pandas\n",
"train1=pandas.read_csv(\"train.csv\")\n",
"test1=pandas.read_csv(\"test.csv\")\n",
"print(train1.shape)\n",
"print(train1)\n",
"import sys\n",
"import pandas\n",
"from pandas.plotting import scatter_matrix\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import model_selection\n",
"from sklearn.metrics import classification_report\n",
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.svm import SVC\n",
"import sklearn"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"text": [
"(891, 12)\n",
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"5 6 0 3 \n",
"6 7 0 1 \n",
"7 8 0 3 \n",
"8 9 1 3 \n",
"9 10 1 2 \n",
"10 11 1 3 \n",
"11 12 1 1 \n",
"12 13 0 3 \n",
"13 14 0 3 \n",
"14 15 0 3 \n",
"15 16 1 2 \n",
"16 17 0 3 \n",
"17 18 1 2 \n",
"18 19 0 3 \n",
"19 20 1 3 \n",
"20 21 0 2 \n",
"21 22 1 2 \n",
"22 23 1 3 \n",
"23 24 1 1 \n",
"24 25 0 3 \n",
"25 26 1 3 \n",
"26 27 0 3 \n",
"27 28 0 1 \n",
"28 29 1 3 \n",
"29 30 0 3 \n",
".. ... ... ... \n",
"861 862 0 2 \n",
"862 863 1 1 \n",
"863 864 0 3 \n",
"864 865 0 2 \n",
"865 866 1 2 \n",
"866 867 1 2 \n",
"867 868 0 1 \n",
"868 869 0 3 \n",
"869 870 1 3 \n",
"870 871 0 3 \n",
"871 872 1 1 \n",
"872 873 0 1 \n",
"873 874 0 3 \n",
"874 875 1 2 \n",
"875 876 1 3 \n",
"876 877 0 3 \n",
"877 878 0 3 \n",
"878 879 0 3 \n",
"879 880 1 1 \n",
"880 881 1 2 \n",
"881 882 0 3 \n",
"882 883 0 3 \n",
"883 884 0 2 \n",
"884 885 0 3 \n",
"885 886 0 3 \n",
"886 887 0 2 \n",
"887 888 1 1 \n",
"888 889 0 3 \n",
"889 890 1 1 \n",
"890 891 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"5 Moran, Mr. James male NaN 0 \n",
"6 McCarthy, Mr. Timothy J male 54.0 0 \n",
"7 Palsson, Master. Gosta Leonard male 2.0 3 \n",
"8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n",
"9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n",
"10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n",
"11 Bonnell, Miss. Elizabeth female 58.0 0 \n",
"12 Saundercock, Mr. William Henry male 20.0 0 \n",
"13 Andersson, Mr. Anders Johan male 39.0 1 \n",
"14 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 \n",
"15 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 \n",
"16 Rice, Master. Eugene male 2.0 4 \n",
"17 Williams, Mr. Charles Eugene male NaN 0 \n",
"18 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 \n",
"19 Masselmani, Mrs. Fatima female NaN 0 \n",
"20 Fynney, Mr. Joseph J male 35.0 0 \n",
"21 Beesley, Mr. Lawrence male 34.0 0 \n",
"22 McGowan, Miss. Anna \"Annie\" female 15.0 0 \n",
"23 Sloper, Mr. William Thompson male 28.0 0 \n",
"24 Palsson, Miss. Torborg Danira female 8.0 3 \n",
"25 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 \n",
"26 Emir, Mr. Farred Chehab male NaN 0 \n",
"27 Fortune, Mr. Charles Alexander male 19.0 3 \n",
"28 O'Dwyer, Miss. Ellen \"Nellie\" female NaN 0 \n",
"29 Todoroff, Mr. Lalio male NaN 0 \n",
".. ... ... ... ... \n",
"861 Giles, Mr. Frederick Edward male 21.0 1 \n",
"862 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 \n",
"863 Sage, Miss. Dorothy Edith \"Dolly\" female NaN 8 \n",
"864 Gill, Mr. John William male 24.0 0 \n",
"865 Bystrom, Mrs. (Karolina) female 42.0 0 \n",
"866 Duran y More, Miss. Asuncion female 27.0 1 \n",
"867 Roebling, Mr. Washington Augustus II male 31.0 0 \n",
"868 van Melkebeke, Mr. Philemon male NaN 0 \n",
"869 Johnson, Master. Harold Theodor male 4.0 1 \n",
"870 Balkic, Mr. Cerin male 26.0 0 \n",
"871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 \n",
"872 Carlsson, Mr. Frans Olof male 33.0 0 \n",
"873 Vander Cruyssen, Mr. Victor male 47.0 0 \n",
"874 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 \n",
"875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n",
"876 Gustafsson, Mr. Alfred Ossian male 20.0 0 \n",
"877 Petroff, Mr. Nedelio male 19.0 0 \n",
"878 Laleff, Mr. Kristo male NaN 0 \n",
"879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n",
"880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n",
"881 Markun, Mr. Johann male 33.0 0 \n",
"882 Dahlberg, Miss. Gerda Ulrika female 22.0 0 \n",
"883 Banfield, Mr. Frederick James male 28.0 0 \n",
"884 Sutehall, Mr. Henry Jr male 25.0 0 \n",
"885 Rice, Mrs. William (Margaret Norton) female 39.0 0 \n",
"886 Montvila, Rev. Juozas male 27.0 0 \n",
"887 Graham, Miss. Margaret Edith female 19.0 0 \n",
"888 Johnston, Miss. Catherine Helen \"Carrie\" female NaN 1 \n",
"889 Behr, Mr. Karl Howell male 26.0 0 \n",
"890 Dooley, Mr. Patrick male 32.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S \n",
"5 0 330877 8.4583 NaN Q \n",
"6 0 17463 51.8625 E46 S \n",
"7 1 349909 21.0750 NaN S \n",
"8 2 347742 11.1333 NaN S \n",
"9 0 237736 30.0708 NaN C \n",
"10 1 PP 9549 16.7000 G6 S \n",
"11 0 113783 26.5500 C103 S \n",
"12 0 A/5. 2151 8.0500 NaN S \n",
"13 5 347082 31.2750 NaN S \n",
"14 0 350406 7.8542 NaN S \n",
"15 0 248706 16.0000 NaN S \n",
"16 1 382652 29.1250 NaN Q \n",
"17 0 244373 13.0000 NaN S \n",
"18 0 345763 18.0000 NaN S \n",
"19 0 2649 7.2250 NaN C \n",
"20 0 239865 26.0000 NaN S \n",
"21 0 248698 13.0000 D56 S \n",
"22 0 330923 8.0292 NaN Q \n",
"23 0 113788 35.5000 A6 S \n",
"24 1 349909 21.0750 NaN S \n",
"25 5 347077 31.3875 NaN S \n",
"26 0 2631 7.2250 NaN C \n",
"27 2 19950 263.0000 C23 C25 C27 S \n",
"28 0 330959 7.8792 NaN Q \n",
"29 0 349216 7.8958 NaN S \n",
".. ... ... ... ... ... \n",
"861 0 28134 11.5000 NaN S \n",
"862 0 17466 25.9292 D17 S \n",
"863 2 CA. 2343 69.5500 NaN S \n",
"864 0 233866 13.0000 NaN S \n",
"865 0 236852 13.0000 NaN S \n",
"866 0 SC/PARIS 2149 13.8583 NaN C \n",
"867 0 PC 17590 50.4958 A24 S \n",
"868 0 345777 9.5000 NaN S \n",
"869 1 347742 11.1333 NaN S \n",
"870 0 349248 7.8958 NaN S \n",
"871 1 11751 52.5542 D35 S \n",
"872 0 695 5.0000 B51 B53 B55 S \n",
"873 0 345765 9.0000 NaN S \n",
"874 0 P/PP 3381 24.0000 NaN C \n",
"875 0 2667 7.2250 NaN C \n",
"876 0 7534 9.8458 NaN S \n",
"877 0 349212 7.8958 NaN S \n",
"878 0 349217 7.8958 NaN S \n",
"879 1 11767 83.1583 C50 C \n",
"880 1 230433 26.0000 NaN S \n",
"881 0 349257 7.8958 NaN S \n",
"882 0 7552 10.5167 NaN S \n",
"883 0 C.A./SOTON 34068 10.5000 NaN S \n",
"884 0 SOTON/OQ 392076 7.0500 NaN S \n",
"885 5 382652 29.1250 NaN Q \n",
"886 0 211536 13.0000 NaN S \n",
"887 0 112053 30.0000 B42 S \n",
"888 2 W./C. 6607 23.4500 NaN S \n",
"889 0 111369 30.0000 C148 C \n",
"890 0 370376 7.7500 NaN Q \n",
"\n",
"[891 rows x 12 columns]\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "Kmx9r8wf5TiF",
"colab_type": "code",
"outputId": "ff1bb53c-7845-4e5e-c183-7ddd0ae2056e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 2223
}
},
"cell_type": "code",
"source": [
"\n",
"\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn import preprocessing\n",
"from sklearn.preprocessing import Imputer\n",
"from sklearn import preprocessing\n",
"import pandas as pd\n",
"data1 = train1.copy(deep = True)\n",
"train1['Age'].fillna(train1['Age'].median(), inplace = True)\n",
"train1['Embarked'].fillna(train1['Embarked'].mode()[0], inplace = True)\n",
"train1['Fare'].fillna(train1['Fare'].median(), inplace = True)\n",
"\n",
"from keras.utils import np_utils\n",
"from sklearn.preprocessing import LabelEncoder\n",
"import numpy as np\n",
"from numpy import argmax\n",
"data1 = train1.copy(deep = True)\n",
"label = LabelEncoder() \n",
"train1['Sex_Code'] = label.fit_transform(train1['Sex'])\n",
"train1['Ticket_Code'] = label.fit_transform(train1['Ticket'])\n",
"train1['Name_Code'] = label.fit_transform(train1['Name'])\n",
"\n",
"\n",
"\n",
"train = pandas.get_dummies(train1, columns=['Embarked'])\n",
"k=train.drop(columns=['Name','Ticket','Sex','Cabin'],axis=1)\n",
"print(k)\n",
"\n",
"X = np.asarray(k.drop(['Survived'],axis=1))\n",
"\n",
"yhat = numpy.asarray(k['Survived'])\n",
"# Generate random indices for creating a random validation set with 20% of the labelled data\n",
"validx = (numpy.random.uniform(size=len(X)) <= 0.2)\n",
"\n",
"# Create training set (80% of the labelled data)\n",
"X_trn = X[~validx]\n",
"y_trn = yhat[~validx]\n",
"\n",
"# Create validation set (20% of the labelled data)\n",
"X_val = X[validx]\n",
"y_val = yhat[validx]\n",
"test1['Age'].fillna(test1['Age'].median(), inplace = True)\n",
"test1['Embarked'].fillna(test1['Embarked'].mode()[0], inplace = True)\n",
"test1['Fare'].fillna(test1['Fare'].median(), inplace = True)\n",
"\n",
"test1['Sex_Code'] = label.fit_transform(test1['Sex'])\n",
"test1['Ticket_Code'] = label.fit_transform(test1['Ticket'])\n",
"test1['Name_Code'] = label.fit_transform(test1['Name'])\n",
"\n",
"test = pandas.get_dummies(test1, columns=['Embarked'])\n",
"v=test.drop(columns=['Name','Ticket','Sex','Cabin'],axis=1)\n",
"\n",
"X_tes = numpy.asarray(v)\n",
"\n"
],
"execution_count": 71,
"outputs": [
{
"output_type": "stream",
"text": [
" PassengerId Survived Pclass Age SibSp Parch Fare Sex_Code \\\n",
"0 1 0 3 22.0 1 0 7.2500 1 \n",
"1 2 1 1 38.0 1 0 71.2833 0 \n",
"2 3 1 3 26.0 0 0 7.9250 0 \n",
"3 4 1 1 35.0 1 0 53.1000 0 \n",
"4 5 0 3 35.0 0 0 8.0500 1 \n",
"5 6 0 3 28.0 0 0 8.4583 1 \n",
"6 7 0 1 54.0 0 0 51.8625 1 \n",
"7 8 0 3 2.0 3 1 21.0750 1 \n",
"8 9 1 3 27.0 0 2 11.1333 0 \n",
"9 10 1 2 14.0 1 0 30.0708 0 \n",
"10 11 1 3 4.0 1 1 16.7000 0 \n",
"11 12 1 1 58.0 0 0 26.5500 0 \n",
"12 13 0 3 20.0 0 0 8.0500 1 \n",
"13 14 0 3 39.0 1 5 31.2750 1 \n",
"14 15 0 3 14.0 0 0 7.8542 0 \n",
"15 16 1 2 55.0 0 0 16.0000 0 \n",
"16 17 0 3 2.0 4 1 29.1250 1 \n",
"17 18 1 2 28.0 0 0 13.0000 1 \n",
"18 19 0 3 31.0 1 0 18.0000 0 \n",
"19 20 1 3 28.0 0 0 7.2250 0 \n",
"20 21 0 2 35.0 0 0 26.0000 1 \n",
"21 22 1 2 34.0 0 0 13.0000 1 \n",
"22 23 1 3 15.0 0 0 8.0292 0 \n",
"23 24 1 1 28.0 0 0 35.5000 1 \n",
"24 25 0 3 8.0 3 1 21.0750 0 \n",
"25 26 1 3 38.0 1 5 31.3875 0 \n",
"26 27 0 3 28.0 0 0 7.2250 1 \n",
"27 28 0 1 19.0 3 2 263.0000 1 \n",
"28 29 1 3 28.0 0 0 7.8792 0 \n",
"29 30 0 3 28.0 0 0 7.8958 1 \n",
".. ... ... ... ... ... ... ... ... \n",
"861 862 0 2 21.0 1 0 11.5000 1 \n",
"862 863 1 1 48.0 0 0 25.9292 0 \n",
"863 864 0 3 28.0 8 2 69.5500 0 \n",
"864 865 0 2 24.0 0 0 13.0000 1 \n",
"865 866 1 2 42.0 0 0 13.0000 0 \n",
"866 867 1 2 27.0 1 0 13.8583 0 \n",
"867 868 0 1 31.0 0 0 50.4958 1 \n",
"868 869 0 3 28.0 0 0 9.5000 1 \n",
"869 870 1 3 4.0 1 1 11.1333 1 \n",
"870 871 0 3 26.0 0 0 7.8958 1 \n",
"871 872 1 1 47.0 1 1 52.5542 0 \n",
"872 873 0 1 33.0 0 0 5.0000 1 \n",
"873 874 0 3 47.0 0 0 9.0000 1 \n",
"874 875 1 2 28.0 1 0 24.0000 0 \n",
"875 876 1 3 15.0 0 0 7.2250 0 \n",
"876 877 0 3 20.0 0 0 9.8458 1 \n",
"877 878 0 3 19.0 0 0 7.8958 1 \n",
"878 879 0 3 28.0 0 0 7.8958 1 \n",
"879 880 1 1 56.0 0 1 83.1583 0 \n",
"880 881 1 2 25.0 0 1 26.0000 0 \n",
"881 882 0 3 33.0 0 0 7.8958 1 \n",
"882 883 0 3 22.0 0 0 10.5167 0 \n",
"883 884 0 2 28.0 0 0 10.5000 1 \n",
"884 885 0 3 25.0 0 0 7.0500 1 \n",
"885 886 0 3 39.0 0 5 29.1250 0 \n",
"886 887 0 2 27.0 0 0 13.0000 1 \n",
"887 888 1 1 19.0 0 0 30.0000 0 \n",
"888 889 0 3 28.0 1 2 23.4500 0 \n",
"889 890 1 1 26.0 0 0 30.0000 1 \n",
"890 891 0 3 32.0 0 0 7.7500 1 \n",
"\n",
" Ticket_Code Name_Code Embarked_C Embarked_Q Embarked_S \n",
"0 523 108 0 0 1 \n",
"1 596 190 1 0 0 \n",
"2 669 353 0 0 1 \n",
"3 49 272 0 0 1 \n",
"4 472 15 0 0 1 \n",
"5 275 554 0 1 0 \n",
"6 85 515 0 0 1 \n",
"7 395 624 0 0 1 \n",
"8 344 412 0 0 1 \n",
"9 132 576 1 0 0 \n",
"10 616 727 0 0 1 \n",
"11 38 95 0 0 1 \n",
"12 535 729 0 0 1 \n",
"13 333 28 0 0 1 \n",
"14 413 840 0 0 1 \n",
"15 153 359 0 0 1 \n",
"16 480 682 0 1 0 \n",
"17 151 867 0 0 1 \n",
"18 301 839 0 0 1 \n",
"19 184 512 1 0 0 \n",
"20 139 273 0 0 1 \n",
"21 152 80 0 0 1 \n",
"22 278 523 0 1 0 \n",
"23 42 765 0 0 1 \n",
"24 395 626 0 0 1 \n",
"25 329 44 0 0 1 \n",
"26 179 240 1 0 0 \n",
"27 95 260 0 0 1 \n",
"28 283 605 0 1 0 \n",
"29 362 813 0 0 1 \n",
".. ... ... ... ... ... \n",
"861 221 283 0 0 1 \n",
"862 88 797 0 0 1 \n",
"863 568 718 0 0 1 \n",
"864 120 285 0 0 1 \n",
"865 126 122 0 0 1 \n",
"866 632 230 1 0 0 \n",
"867 590 696 0 0 1 \n",
"868 309 890 0 0 1 \n",
"869 344 407 0 0 1 \n",
"870 387 59 0 0 1 \n",
"871 54 79 0 0 1 \n",
"872 502 139 0 0 1 \n",
"873 303 836 0 0 1 \n",
"874 575 4 1 0 0 \n",
"875 195 571 1 0 0 \n",
"876 504 311 0 0 1 \n",
"877 358 657 0 0 1 \n",
"878 363 451 0 0 1 \n",
"879 59 668 1 0 0 \n",
"880 115 739 0 0 1 \n",
"881 394 510 0 0 1 \n",
"882 508 193 0 0 1 \n",
"883 565 61 0 0 1 \n",
"884 650 793 0 0 1 \n",
"885 480 684 0 1 0 \n",
"886 101 548 0 0 1 \n",
"887 14 303 0 0 1 \n",
"888 675 413 0 0 1 \n",
"889 8 81 1 0 0 \n",
"890 466 220 0 1 0 \n",
"\n",
"[891 rows x 13 columns]\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "_JSbt-0Y88m1",
"colab_type": "code",
"outputId": "e126bc94-99cd-4a9b-d5a7-a691ee27e61b",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 469
}
},
"cell_type": "code",
"source": [
"models = []\n",
"models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))\n",
"models.append(('LDA', LinearDiscriminantAnalysis()))\n",
"models.append(('KNN', KNeighborsClassifier()))\n",
"models.append(('CART', DecisionTreeClassifier()))\n",
"models.append(('NB', GaussianNB()))\n",
"models.append(('SVM', SVC(gamma='auto')))\n",
"# evaluate each model in turn\n",
"results = []\n",
"names = []\n",
"scoring = 'accuracy'\n",
"seed=7\n",
"for name, model in models:\n",
"\tkfold = model_selection.KFold(n_splits=10, random_state=seed)\n",
"\tcv_results = model_selection.cross_val_score(model, X_trn, y_trn, cv=kfold, scoring=scoring)\n",
"\tresults.append(cv_results)\n",
"\tnames.append(name)\n",
"\tmsg = \"%s: %f (%f)\" % (name, cv_results.mean(), cv_results.std())\n",
"\tprint(msg)\n",
"\n",
"\n",
"\n"
],
"execution_count": 72,
"outputs": [
{
"output_type": "stream",
"text": [
"LR: 0.799457 (0.040835)\n",
"LDA: 0.803722 (0.049559)\n",
"KNN: 0.611771 (0.057707)\n",
"CART: 0.735594 (0.053920)\n",
"NB: 0.789557 (0.027672)\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"SVM: 0.625755 (0.068083)\n"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "fKOAoC4TbP_k",
"colab_type": "code",
"outputId": "3bac1994-fc9a-435c-b73a-b82033a77251",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 260
}
},
"cell_type": "code",
"source": [
"Lnn = LinearDiscriminantAnalysis()\n",
"Lnn.fit(X_trn, y_trn)\n",
"ynew = Lnn.predict(X_tes)\n",
"print(ynew)"
],
"execution_count": 74,
"outputs": [
{
"output_type": "stream",
"text": [
"[0 1 0 0 1 0 1 0 1 0 0 0 1 0 1 1 0 0 1 1 0 0 1 0 1 0 1 0 0 0 0 0 1 1 0 0 1\n",
" 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 1 1 0 0 1 1 0 1 0\n",
" 1 1 0 1 0 1 0 0 0 0 0 0 1 1 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0\n",
" 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0\n",
" 0 0 1 0 0 1 0 0 1 1 0 1 1 0 1 0 0 1 0 0 1 1 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1\n",
" 0 1 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 1 0\n",
" 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 0 1 0 0 0 0 0 1\n",
" 0 0 0 1 1 0 0 0 0 1 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0\n",
" 1 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0\n",
" 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 0 1 0 1 0 0 1 0 1 1 0 1 1 0 1 1 0\n",
" 0 1 0 0 1 1 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 1 1 0 0 0\n",
" 0 1 1 1 1 1 0 1 0 0 0]\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/discriminant_analysis.py:388: UserWarning: Variables are collinear.\n",
" warnings.warn(\"Variables are collinear.\")\n"
],
"name": "stderr"
}
]
},
{
"metadata": {
"id": "I4p5Vc9vy3i-",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
"sub = pd.DataFrame({\"PassengerId\": test1['PassengerId'],\"Survived\":ynew})\n",
"\n",
"# Create the submission csv file from the dataframe\n",
"sub.to_csv(\"sub.csv\",index=False)\n"
],
"execution_count": 0,
"outputs": []
},
{
"metadata": {
"id": "MbXt8tD6jfIa",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 52
},
"outputId": "4b1c887e-424a-4fff-9088-9df4d39d12a8"
},
"cell_type": "code",
"source": [
"!kaggle competitions submit -c titanic -f sub.csv -m \"submission_1\"\n"
],
"execution_count": 77,
"outputs": [
{
"output_type": "stream",
"text": [
"100% 2.77k/2.77k [00:03<00:00, 723B/s]\n",
"Successfully submitted to Titanic: Machine Learning from Disaster"
],
"name": "stdout"
}
]
},
{
"metadata": {
"id": "2ww4UvF7kgLZ",
"colab_type": "code",
"colab": {}
},
"cell_type": "code",
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}