Add a function to fill rest of the ScannedFile model aboutcode-org#90

* Change the models to add defaults to BooleanFields * Change the migrations due to changes in the models Signed-off-by: Ranvir Singh ranvir.singh1114@gmail.com
singh1114 · Dec 3, 2017 · 2ff6a0e · 2ff6a0e
1 parent 8ec8a9c
commit 2ff6a0e
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 14 deletions.
diff --git a/scanapp/migrations/0001_initial.py b/scanapp/migrations/0001_initial.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Generated by Django 1.11.1 on 2017-08-23 07:54
+# Generated by Django 1.11.1 on 2017-08-24 09:46
 from __future__ import unicode_literals
 
 from django.conf import settings
@@ -106,19 +106,19 @@ class Migration(migrations.Migration):
                 ('base_name', models.CharField(help_text='Base name of entity without extension', max_length=400)),
                 ('extension', models.CharField(blank=True, help_text='Extension of the entity being scanned', max_length=400, null=True)),
                 ('date', models.DateTimeField(blank=True, help_text='Date of entity being created', null=True)),
-                ('size', models.IntegerField(help_text='Size of the entity being scanned')),
+                ('size', models.IntegerField(blank=True, help_text='Size of the entity being scanned', null=True)),
                 ('sha1', models.CharField(blank=True, help_text='SHA1 Checksums of the file', max_length=400, null=True)),
                 ('md5', models.CharField(blank=True, help_text='MD5 checksums of the file', max_length=400, null=True)),
                 ('files_count', models.IntegerField(blank=True, help_text='number of files present if a directory', null=True)),
                 ('mime_type', models.CharField(blank=True, help_text='mime type of entity being scanned', max_length=400, null=True)),
                 ('file_type', models.CharField(blank=True, help_text='file type of entity being scanned. null if the entity is a directory', max_length=400, null=True)),
                 ('programming_language', models.CharField(blank=True, help_text='programming language used in the entity', max_length=400, null=True)),
-                ('is_binary', models.BooleanField(help_text='Whether the entity being scanned is binary or not')),
-                ('is_text', models.BooleanField(help_text='Whether the entity being scanned has text or not')),
-                ('is_archive', models.BooleanField(help_text='Whether the entity being scanned is archive or not')),
-                ('is_media', models.BooleanField(help_text='Whether the entity being scanned is media file or not')),
-                ('is_source', models.BooleanField(help_text='Whether the entity being scanned is source or not')),
-                ('is_script', models.BooleanField(help_text='Whether the entity being scanned is a script file or not')),
+                ('is_binary', models.BooleanField(default=False, help_text='Whether the entity being scanned is binary or not')),
+                ('is_text', models.BooleanField(default=False, help_text='Whether the entity being scanned has text or not')),
+                ('is_archive', models.BooleanField(default=False, help_text='Whether the entity being scanned is archive or not')),
+                ('is_media', models.BooleanField(default=False, help_text='Whether the entity being scanned is media file or not')),
+                ('is_source', models.BooleanField(default=False, help_text='Whether the entity being scanned is source or not')),
+                ('is_script', models.BooleanField(default=False, help_text='Whether the entity being scanned is a script file or not')),
                 ('scan', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='scanapp.Scan')),
             ],
         ),

diff --git a/scanapp/models.py b/scanapp/models.py
@@ -121,7 +121,11 @@ def __str__(self):
         null=True,
         blank=True
     )
-    size = models.IntegerField(help_text='Size of the entity being scanned')
+    size = models.IntegerField(
+        help_text='Size of the entity being scanned',
+        null=True,
+        blank=True
+    )
     sha1 = models.CharField(
         max_length=400,
         help_text='SHA1 Checksums of the file',
@@ -159,21 +163,27 @@ def __str__(self):
     )
     is_binary = models.BooleanField(
         help_text='Whether the entity being scanned is binary or not',
+        default=False
     )
     is_text = models.BooleanField(
         help_text='Whether the entity being scanned has text or not',
+        default=False
     )
     is_archive = models.BooleanField(
         help_text='Whether the entity being scanned is archive or not',
+        default=False
     )
     is_media = models.BooleanField(
         help_text='Whether the entity being scanned is media file or not',
+        default=False
     )
     is_source = models.BooleanField(
         help_text='Whether the entity being scanned is source or not',
+        default=False
     )
     is_script = models.BooleanField(
         help_text='Whether the entity being scanned is a script file or not',
+        default=False
     )
 
 

diff --git a/scanapp/tasks.py b/scanapp/tasks.py
@@ -47,8 +47,8 @@
 @app.task
 def scan_code_async(url, scan_id, path, file_name):
     """
-    Create and save a file at `path` present at `url` using `scan_id` and bare `path` and
-    `file_name` and apply the scan.
+    Create and save a file at `path` present at `url` using `scan_id`, bare `path`, `file_name`
+    and apply the scan.
     """
     r = requests.get(url)
     path = path + file_name
@@ -81,14 +81,14 @@ def apply_scan_async(path, scan_id):
     # FIXME improve error checking when calling scan in subprocess.
     scan_result = subprocess.check_output(['scancode', path])
     json_data = json.loads(scan_result)
-    save_results_to_db.delay(scan_id, json_data)
+    save_results_to_db.delay(scan_id, json_data, path)
 
 
 @app.task
-def save_results_to_db(scan_id, json_data):
+def save_results_to_db(scan_id, json_data, path):
     """
     Fill database using `json_data` for given `scan_id`
-    and add `end_scan_time` to true.
+    and call `fill_rest_scanned_file_model` with the `path`
     """
     scan = Scan.objects.get(pk=scan_id)
     scan = fill_unfilled_scan_model(
@@ -167,6 +167,38 @@ def save_results_to_db(scan_id, json_data):
             )
             scan_error.save()
 
+    fill_rest_scanned_file_model.delay(path, scan_id)
+
+
+@app.task
+def fill_rest_scanned_file_model(path, scan_id):
+    """
+    Fill the rest ScannedFile model by using another subprocess call
+    """
+    scanned_file_results = subprocess.check_output(['scancode', '--info', '-f', 'json-pp', path])
+    json_scanned_file_results = json.loads(scanned_file_results)
+    for file in json_scanned_file_results['files']:
+        scanned_file = ScannedFile.objects.get(path=file['path'])
+        scanned_file.type = file['type']
+        scanned_file.name = file['name']
+        scanned_file.base_name = file['base_name']
+        scanned_file.extension = file['extension']
+        scanned_file.date = file['date']
+        scanned_file.size = file['size']
+        scanned_file.sha1 = file['sha1']
+        scanned_file.md5 = file['md5']
+        scanned_file.files_count = file['files_count']
+        scanned_file.mime_type = file['mime_type']
+        scanned_file.file_type = file['file_type']
+        scanned_file.programming_language = file['programming_language']
+        scanned_file.is_binary = file['is_binary']
+        scanned_file.is_text = file['is_text']
+        scanned_file.is_archive = file['is_archive']
+        scanned_file.is_media = file['is_media']
+        scanned_file.is_source = file['is_source']
+        scanned_file.is_script = file['is_script']
+        scanned_file.save()
+    scan = Scan.objects.get(pk=scan_id)
     scan.scan_end_time = timezone.now()
     scan.save()