Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for validation of XML submission files #2437

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ DB_PASSWORD=secret
# to some value other than 'sync'.
#REMOTE_WORKERS=false

# Whether or not submission XML files are validated before being processed.
# Enabling this setting will trigger a scan through XML files uploaded in
# each submission in order to preemptively reject malformed files.
#VALIDATE_XML_SUBMISSIONS=true

# Should we show the most recent submission time for a project or subproject?
# Disabling this feature can improve rendering performance of index.php
# for projects with lots of subproject builds.
Expand Down
60 changes: 29 additions & 31 deletions app/Console/Commands/ValidateXml.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use App\Utils\SubmissionUtils;
use Illuminate\Console\Command;
use DOMDocument;
use App\Exceptions\CDashXMLValidationException;

class ValidateXml extends Command
{
Expand All @@ -26,66 +26,64 @@ public function handle(): int
{
// parse all input files from command line
$xml_files_args = $this->argument('xml_file');
$schemas_dir = base_path()."/app/Validators/Schemas";

// process each of the input files
$has_errors = false;
$has_skipped = false;
foreach ($xml_files_args as $input_xml_file) {
// determine the file type by peeking at its contents
if (!file_exists($input_xml_file)) {
$this->error("ERROR: Input file does not exist: '{$input_xml_file}'");
$has_skipped = true;
continue;
}
$xml_file_handle = fopen($input_xml_file, 'r');
if ($xml_file_handle === false) {
$this->error("ERROR: Could not open file: '{$input_xml_file}'");
$has_errors = true;
$has_skipped = true;
continue;
}
$xml_type = SubmissionUtils::get_xml_type($xml_file_handle)['xml_type'];
fclose($xml_file_handle);

// verify we identified a valid xml type
if ($xml_type === '') {
$this->error("ERROR: Could not determine submission"
." file type for: '{$input_xml_file}'");
try {
$xml_schema = SubmissionUtils::get_xml_type($xml_file_handle, $input_xml_file)['xml_schema'];
} catch (CDashXMLValidationException $e) {
foreach ($e->getDecodedMessage() as $error) {
$this->error($error);
}
$has_errors = true;
continue;
} finally {
fclose($xml_file_handle);
}

// verify we can find a corresponding schema file
$schema_file = "{$schemas_dir}/{$xml_type}.xsd";
if (!file_exists($schema_file)) {
$this->error("ERROR: Could not find schema file '{$schema_file}'"
." corresonding to input: '{$input_xml_file}'");
$has_errors = true;
if (!isset($xml_schema)) {
$this->warn("WARNING: Skipped input file '{$input_xml_file}' as validation"
." of this file format is currently not supported.");
$has_skipped = true;
continue;
}

// let us control the failures so we can continue
// parsing all the files instead of crashing midway
libxml_use_internal_errors(true);

// load the input file to be validated
$xml = new DOMDocument();
$xml->load($input_xml_file, LIBXML_PARSEHUGE);

// run the validator and collect errors if there are any
if (!$xml->schemaValidate($schema_file)) {
$errors = libxml_get_errors();
foreach ($errors as $error) {
if ($error->level > 2) {
$this->error("ERROR: {$error->message} in {$error->file},"
." line: {$error->line}, column: {$error->column}");
}
try {
SubmissionUtils::validate_xml($input_xml_file, $xml_schema);
} catch (CDashXMLValidationException $e) {
foreach ($e->getDecodedMessage() as $error) {
$this->error($error);
}
libxml_clear_errors();
$has_errors = true;
continue;
}

$this->line("Validated file: {$input_xml_file}.");
}

// finally, report the results
if ($has_errors) {
$this->error("FAILED: Some XML file checks did not pass!");
return Command::FAILURE;
} elseif ($has_skipped) {
$this->error("FAILED: Some XML file checks were skipped!");
return Command::FAILURE;
} else {
$this->line("SUCCESS: All XML file checks passed.");
return Command::SUCCESS;
Expand Down
31 changes: 31 additions & 0 deletions app/Exceptions/CDashXMLValidationException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

namespace App\Exceptions;

use Exception;
use RuntimeException;

class CDashXMLValidationException extends RuntimeException
{
/**
* @param array<int,string> $message
*/
public function __construct(array $message = [], int $code = 0, Exception $previous = null)
{
$encoded_msg = json_encode($message);
$encoded_msg = $encoded_msg===false ? "" : $encoded_msg;
parent::__construct($encoded_msg, $code, $previous);
}

/**
* @return array<int,string>
*/
public function getDecodedMessage(bool $assoc = false): array
{
$decoded_msg = json_decode($this->getMessage(), $assoc);
if (!isset($decoded_msg) || is_bool($decoded_msg)) {
$decoded_msg = ["An XML validation error has occurred!"];
}
return $decoded_msg;
}
}
31 changes: 31 additions & 0 deletions app/Http/Controllers/SubmissionController.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
use App\Models\Site;
use App\Utils\AuthTokenUtil;
use App\Utils\UnparsedSubmissionProcessor;
use App\Utils\SubmissionUtils;
use App\Exceptions\CDashXMLValidationException;
use CDash\Model\Build;
use CDash\Model\PendingSubmissions;
use CDash\Model\Project;
Expand Down Expand Up @@ -128,6 +130,35 @@ private function submitProcess(): Response
} elseif (intval($this->project->Id) < 1) {
abort(Response::HTTP_NOT_FOUND, 'The requested project does not exist.');
}
if ((bool) config('cdash.validate_xml_submissions') === true) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about running all submissions through the validator? I think it would be useful to log the result of submission validation for all submissions, and instead use the config setting to control whether mis-formatted submissions are entirely rejected or not.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like that idea. It seems like it would be useful information about malformed submissions that aren't "rejected"

// Figure out what type of XML file this is.
try {
$filename = "inbox/".$filename;
$xml_info = SubmissionUtils::get_xml_type(fopen(Storage::path($filename), 'r'), $filename);
} catch (CDashXMLValidationException $e) {
foreach ($e->getDecodedMessage() as $error) {
Log::error($error);
}
abort(Response::HTTP_NOT_FOUND, 'Unable to determine the Type of the submission file.');
}
$filehandle = $xml_info['file_handle'];
$handler_ref = $xml_info['xml_handler'];
$file = $xml_info['xml_type'];
$schema_file = $xml_info['xml_schema'];

// If validation is enabled and if this file has a corresponding schema, validate it
if (isset($schema_file)) {
try {
SubmissionUtils::validate_xml(storage_path("app/".$filename), $schema_file);
} catch (CDashXMLValidationException $e) {
foreach ($e->getDecodedMessage() as $error) {
Log::error("Validating $filename: ".$error);
}
abort(400, "Xml validation failed: rejected file $filename");
}
}
}


// Check if CTest provided us enough info to assign a buildid.
$pendingSubmissions = new PendingSubmissions();
Expand Down
2 changes: 1 addition & 1 deletion app/Jobs/ProcessSubmission.php
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ private function doSubmit($filename, $projectid, $buildid = null, $expected_md5
}

// Parse the XML file
$handler = ctest_parse($filehandle, $projectid, $expected_md5, $buildid);
$handler = ctest_parse($filehandle, $filename, $projectid, $expected_md5, $buildid);
fclose($filehandle);
unset($filehandle);

Expand Down
63 changes: 61 additions & 2 deletions app/Utils/SubmissionUtils.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,22 @@
use CDash\Database;
use CDash\Model\Build;
use CDash\Model\BuildUpdate;
use DOMDocument;
use App\Exceptions\CDashXMLValidationException;

class SubmissionUtils
{

/**
* Figure out what type of XML file this is
* @return array<string,mixed>
* @throws CDashXMLValidationException
*/
public static function get_xml_type(mixed $filehandle): array
public static function get_xml_type(mixed $filehandle, string $xml_file): array
{
$file = '';
$handler = null;
$schemas_dir = base_path()."/app/Validators/Schemas";
$schema_file = null;
// read file contents until we recognize its elements
while ($file === '' && !feof($filehandle)) {
$content = fread($filehandle, 8192);
Expand All @@ -30,56 +34,111 @@ public static function get_xml_type(mixed $filehandle): array
// Should be first otherwise confused with Build
$handler = \UpdateHandler::class;
$file = 'Update';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Build')) {
$handler = \BuildHandler::class;
$file = 'Build';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Configure')) {
$handler = \ConfigureHandler::class;
$file = 'Configure';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Testing')) {
$handler = \TestingHandler::class;
$file = 'Test';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<CoverageLog')) {
// Should be before coverage
$handler = \CoverageLogHandler::class;
$file = 'CoverageLog';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Coverage')) {
$handler = \CoverageHandler::class;
$file = 'Coverage';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<report')) {
$handler = \CoverageJUnitHandler::class;
$file = 'CoverageJUnit';
} elseif (str_contains($content, '<Notes')) {
$handler = \NoteHandler::class;
$file = 'Notes';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<DynamicAnalysis')) {
$handler = \DynamicAnalysisHandler::class;
$file = 'DynamicAnalysis';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Project')) {
$handler = \ProjectHandler::class;
$file = 'Project';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<Upload')) {
$handler = \UploadHandler::class;
$file = 'Upload';
$schema_file = "{$schemas_dir}/{$file}.xsd";
} elseif (str_contains($content, '<testsuite')) {
$handler = \TestingJUnitHandler::class;
$file = 'TestJUnit';
} elseif (str_contains($content, '<Done')) {
$handler = \DoneHandler::class;
$file = 'Done';
$schema_file = "{$schemas_dir}/{$file}.xsd";
}
}

// restore the file descriptor to beginning of file
rewind($filehandle);

// perform minimal error checking as a sanity check
if ($file === '') {
throw new CDashXMLValidationException(["ERROR: Could not determine submission"
." file type for: '{$xml_file}'"]);
}
if (isset($schema_file) && !file_exists($schema_file)) {
throw new CDashXMLValidationException(["ERROR: Could not find schema file '{$schema_file}'"
." to validate input file: '{$xml_file}'"]);
}

return [
'file_handle' => $filehandle,
'xml_handler' => $handler,
'xml_type' => $file,
'xml_schema' => $schema_file,
];
}

/**
* Validate the given XML file based on its type
* @throws CDashXMLValidationException
*/
public static function validate_xml(string $xml_file, string $schema_file): void
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about moving this to app/cdash/xml_handlers/abstract_xml_handler.php? That way, the $schema_file argument could be stored as a class member and users of this method won't need to know about the exact details of how the validation actually occurs.

{
$errors = [];

// let us control the failures so we can continue
// parsing files instead of crashing midway
libxml_use_internal_errors(true);

// load the input file to be validated
$xml = new DOMDocument();
$xml->load($xml_file, LIBXML_PARSEHUGE);

// run the validator and collect errors if there are any
if (!$xml->schemaValidate($schema_file)) {
$validation_errors = libxml_get_errors();
foreach ($validation_errors as $error) {
if ($error->level === LIBXML_ERR_ERROR || $error->level === LIBXML_ERR_FATAL) {
williamjallen marked this conversation as resolved.
Show resolved Hide resolved
$errors[] = "ERROR: {$error->message} in {$error->file},"
." line: {$error->line}, column: {$error->column}";
}
}
libxml_clear_errors();
}

if (count($errors) !== 0) {
throw new CDashXMLValidationException($errors);
}
}

/** Add a new build */
public static function add_build(Build $build)
{
Expand Down
7 changes: 3 additions & 4 deletions app/cdash/include/ctestparser.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@

use CDash\Database;
use App\Models\BuildFile;
use App\Utils\SubmissionUtils;
use CDash\Model\Project;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
use App\Utils\SubmissionUtils;

class CDashParseException extends RuntimeException
{
Expand Down Expand Up @@ -155,7 +155,7 @@ function parse_put_submission($filehandler, $projectid, $expected_md5, int|null
}

/** Main function to parse the incoming xml from ctest */
function ctest_parse($filehandle, $projectid, $expected_md5 = '', int|null $buildid=null): AbstractSubmissionHandler|false
function ctest_parse($filehandle, string $filename, $projectid, $expected_md5 = '', int|null $buildid=null): AbstractSubmissionHandler|false
{
// Check if this is a new style PUT submission.
try {
Expand All @@ -178,11 +178,10 @@ function ctest_parse($filehandle, $projectid, $expected_md5 = '', int|null $buil
$Project->Id = $projectid;

// Figure out what type of XML file this is.
$xml_info = SubmissionUtils::get_xml_type($filehandle);
$xml_info = SubmissionUtils::get_xml_type($filehandle, $filename);
$filehandle = $xml_info['file_handle'];
$handler_ref = $xml_info['xml_handler'];
$file = $xml_info['xml_type'];

$handler = isset($handler_ref) ? new $handler_ref($Project) : null;

rewind($filehandle);
Expand Down
5 changes: 5 additions & 0 deletions app/cdash/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -602,5 +602,10 @@ set_tests_properties(misassignedconfigure PROPERTIES DEPENDS /Feature/SubProject
add_laravel_test(/Feature/RemoteWorkers)
set_tests_properties(/Feature/RemoteWorkers PROPERTIES DEPENDS install_3)

add_laravel_test(/Unit/app/Console/Command/ValidateXmlCommandTest)

add_php_test(submissionvalidation)
set_tests_properties(submissionvalidation PROPERTIES DEPENDS createpublicdashboard)

add_subdirectory(ctest)
add_subdirectory(autoremovebuilds)
Loading