Skip to content

Commit

Permalink
Added option to optimize primitive schemas before exporting them to a…
Browse files Browse the repository at this point in the history
… file (#19)

* Added option to optimize primitive schemas before export

* Revert adding option to optimizePrimitiveSchemas

* Added PrimitiveSchemaOptimizer

* Added PrimitiveSchemaOptimizer to SubSchemaMergeCommand

* Allow string schemas

* Updated README with new command

* Fix CS

* Enhance isPrimitive to support string schemas

* Remove transformExportSchemaDefinition from interface

* Remove space

* Two spaces

* Resolve discussions

* Change primitive optimizer logic
  • Loading branch information
bajdzun authored Dec 8, 2021
1 parent 2d3d2fe commit fe1f0d1
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 20 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,14 @@ $merger->merge();
### Merge optimizers
There are optimizers that you can enable for merging schema:
- FullNameOptimizer: removes unneeded namespaces
- FieldOrderOptimizer: the first fields of a record schema will be: type, name, namespace (if present)
- FieldOrderOptimizer: the first fields of a record schema will be: type, name, namespace (if present)
- PrimitiveSchemaOptimizer: Optimizes primitive schema e.g. `{"type": "string"}` to `"string"`

How to enable optimizer:

**Console example**
```bash
./vendor/bin/avro-cli --optimizeFullNames --optimizeFieldOrder avro:subschema:merge ./example/schemaTemplates ./example/schema
./vendor/bin/avro-cli --optimizeFullNames --optimizeFieldOrder --optimizePrimitiveSchemas avro:subschema:merge ./example/schemaTemplates ./example/schema
```
**PHP Example**
```php
Expand All @@ -60,6 +61,7 @@ use PhpKafka\PhpAvroSchemaGenerator\Registry\SchemaRegistry;
use PhpKafka\PhpAvroSchemaGenerator\Merger\SchemaMerger;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\FieldOrderOptimizer;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\FullNameOptimizer;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;

$registry = (new SchemaRegistry())
->addSchemaTemplateDirectory('./schemaTemplates')
Expand All @@ -68,6 +70,7 @@ $registry = (new SchemaRegistry())
$merger = new SchemaMerger($registry, './schema');
$merger->addOptimizer(new FieldOrderOptimizer());
$merger->addOptimizer(new FullNameOptimizer());
$merger->addOptimizer(new PrimitiveSchemaOptimizer());

$merger->merge();

Expand Down Expand Up @@ -104,6 +107,6 @@ $generator->exportSchemas($schemas);

## Disclaimer
In `v1.3.0` the option `--optimizeSubSchemaNamespaces` was added. It was not working fully
in the `1.x` version and we had some discussions (#13) about it.
in the `1.x` version and we had some discussions ([#13](https://github.com/php-kafka/php-avro-schema-generator/issues/13)) about it.
Ultimately the decision was to adapt this behaviour fully in `v2.0.0` so you might want to
upgrade if you rely on that behaviour.
12 changes: 9 additions & 3 deletions src/Command/SubSchemaMergeCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

namespace PhpKafka\PhpAvroSchemaGenerator\Command;

use http\Exception\RuntimeException;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\FieldOrderOptimizer;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\FullNameOptimizer;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;
use PhpKafka\PhpAvroSchemaGenerator\Registry\SchemaRegistry;
use PhpKafka\PhpAvroSchemaGenerator\Merger\SchemaMerger;
use Symfony\Component\Console\Command\Command;
Expand All @@ -21,6 +21,7 @@ class SubSchemaMergeCommand extends Command
protected $optimizerOptionMapping = [
'optimizeFieldOrder' => FieldOrderOptimizer::class,
'optimizeFullNames' => FullNameOptimizer::class,
'optimizePrimitiveSchemas' => PrimitiveSchemaOptimizer::class,
];
protected function configure(): void
{
Expand Down Expand Up @@ -48,6 +49,12 @@ protected function configure(): void
null,
InputOption::VALUE_NONE,
'Remove namespaces if they are enclosed in the same namespace'
)
->addOption(
'optimizePrimitiveSchemas',
null,
InputOption::VALUE_NONE,
'Optimize primitive schemas with using just type as a schema'
);
}

Expand All @@ -59,7 +66,6 @@ public function execute(InputInterface $input, OutputInterface $output): int
$templateDirectoryArg = $input->getArgument('templateDirectory');
/** @var string $outputDirectoryArg */
$outputDirectoryArg = $input->getArgument('outputDirectory');
$optimizeFullNames = (bool)$input->getOption('optimizeFullNames');

$templateDirectory = $this->getPath($templateDirectoryArg);
$outputDirectory = $this->getPath($outputDirectoryArg);
Expand All @@ -71,7 +77,7 @@ public function execute(InputInterface $input, OutputInterface $output): int
$merger = new SchemaMerger($registry, $outputDirectory);

foreach ($this->optimizerOptionMapping as $optionName => $optimizerClass) {
if (true === (bool)$input->getOption($optionName)) {
if (true === (bool) $input->getOption($optionName)) {
$merger->addOptimizer(new $optimizerClass());
}
}
Expand Down
18 changes: 13 additions & 5 deletions src/Merger/SchemaMerger.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use PhpKafka\PhpAvroSchemaGenerator\Avro\Avro;
use PhpKafka\PhpAvroSchemaGenerator\Exception\SchemaMergerException;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\OptimizerInterface;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;
use PhpKafka\PhpAvroSchemaGenerator\Registry\SchemaRegistryInterface;
use PhpKafka\PhpAvroSchemaGenerator\Schema\SchemaTemplateInterface;

Expand Down Expand Up @@ -125,7 +126,12 @@ public function merge(
$resolvedTemplate = $this->getResolvedSchemaTemplate($rootSchemaTemplate);
foreach ($this->optimizers as $optimizer) {
$resolvedTemplate = $resolvedTemplate->withSchemaDefinition(
$optimizer->optimize($resolvedTemplate->getSchemaDefinition())
$optimizer instanceof PrimitiveSchemaOptimizer ?
$optimizer->optimize(
$resolvedTemplate->getSchemaDefinition(),
$resolvedTemplate->isPrimitive()
) :
$optimizer->optimize($resolvedTemplate->getSchemaDefinition())
);
}
} catch (SchemaMergerException $e) {
Expand Down Expand Up @@ -177,12 +183,14 @@ public function exportSchema(
}

/**
* @param array<string,mixed> $schemaDefinition
* @return array<string,mixed>
* @param mixed $schemaDefinition
* @return mixed
*/
public function transformExportSchemaDefinition(array $schemaDefinition): array
private function transformExportSchemaDefinition($schemaDefinition)
{
unset($schemaDefinition['schema_level']);
if (is_array($schemaDefinition)) {
unset($schemaDefinition['schema_level']);
}

return $schemaDefinition;
}
Expand Down
6 changes: 0 additions & 6 deletions src/Merger/SchemaMergerInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,6 @@ public function merge(): int;
*/
public function exportSchema(SchemaTemplateInterface $rootRootSchemaTemplate): void;

/**
* @param array<string,mixed> $schemaDefinition
* @return array<string,mixed>
*/
public function transformExportSchemaDefinition(array $schemaDefinition): array;

/**
* @param OptimizerInterface $optimizer
*/
Expand Down
39 changes: 39 additions & 0 deletions src/Optimizer/PrimitiveSchemaOptimizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?php

declare(strict_types=1);

namespace PhpKafka\PhpAvroSchemaGenerator\Optimizer;

class PrimitiveSchemaOptimizer extends AbstractOptimizer implements OptimizerInterface
{
/**
* @param string $definition
* @return string
* @throws \JsonException
*/
public function optimize(string $definition, bool $isPrimitive = false): string
{
if (false === $isPrimitive) {
return $definition;
}

$data = json_decode($definition, true, JSON_THROW_ON_ERROR);

$data = $this->processSchema($data);

return json_encode($data, JSON_THROW_ON_ERROR);
}

/**
* @param mixed $data
* @return mixed
*/
private function processSchema($data)
{
if (true === isset($data['type'])) {
$data = $data['type'];
}

return $data;
}
}
4 changes: 4 additions & 0 deletions src/Schema/SchemaTemplate.php
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ public function isPrimitive(): bool
{
$fields = json_decode($this->getSchemaDefinition(), true, JSON_THROW_ON_ERROR);

if (is_string($fields) && true === isset(self::AVRO_PRIMITIVE_TYPES[$fields])) {
return true;
}

if (true === isset($fields['type'])) {
return array_key_exists($fields['type'], self::AVRO_PRIMITIVE_TYPES);
}
Expand Down
42 changes: 42 additions & 0 deletions tests/Unit/Merger/SchemaMergerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use PhpKafka\PhpAvroSchemaGenerator\Exception\SchemaMergerException;
use PhpKafka\PhpAvroSchemaGenerator\Merger\SchemaMerger;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\OptimizerInterface;
use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;
use PhpKafka\PhpAvroSchemaGenerator\Registry\SchemaRegistryInterface;
use PhpKafka\PhpAvroSchemaGenerator\Schema\SchemaTemplateInterface;
use PHPUnit\Framework\TestCase;
Expand Down Expand Up @@ -443,6 +444,47 @@ public function testMergePrimitive()
rmdir('/tmp/foobar');
}

public function testMergePrimitiveWithOptimizerEnabled()
{
$definition = '{
"type": "string"
}';

$schemaTemplate = $this->getMockForAbstractClass(SchemaTemplateInterface::class);
$schemaTemplate
->expects(self::exactly(3))
->method('getSchemaDefinition')
->willReturn($definition);
$schemaTemplate
->expects(self::exactly(2))
->method('withSchemaDefinition')
->with($definition)
->willReturn($schemaTemplate);
$schemaTemplate
->expects(self::once())
->method('getFilename')
->willReturn('primitive-type.avsc');
$schemaTemplate
->expects(self::exactly(3))
->method('isPrimitive')
->willReturn(true);

$schemaRegistry = $this->getMockForAbstractClass(SchemaRegistryInterface::class);
$schemaRegistry
->expects(self::once())
->method('getRootSchemas')
->willReturn([$schemaTemplate]);
$optimizer = $this->getMockBuilder(PrimitiveSchemaOptimizer::class)->getMock();
$optimizer->expects(self::once())->method('optimize')->with($definition, true)->willReturn($definition);
$merger = new SchemaMerger($schemaRegistry, '/tmp/foobar');
$merger->addOptimizer($optimizer);
$merger->merge(true);

self::assertFileExists('/tmp/foobar/primitive-type.avsc');
unlink('/tmp/foobar/primitive-type.avsc');
rmdir('/tmp/foobar');
}

public function testMergeWithFilenameOption()
{
$definition = '{
Expand Down
44 changes: 44 additions & 0 deletions tests/Unit/Optimizer/PrimitiveSchemaOptimizerTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?php

declare(strict_types=1);

namespace PhpKafka\PhpAvroSchemaGenerator\Tests\Unit\Optimizer;

use PhpKafka\PhpAvroSchemaGenerator\Optimizer\PrimitiveSchemaOptimizer;
use PHPUnit\Framework\TestCase;

class PrimitiveSchemaOptimizerTest extends TestCase
{
public function testOptimize(): void
{
$schema = '{"type": "string"}';

$expectedResult = json_encode(json_decode('"string"'));

$optimizer = new PrimitiveSchemaOptimizer();

self::assertEquals($expectedResult, $optimizer->optimize($schema, true));
}

public function testOptimizeForStringSchema(): void
{
$schema = '"string"';

$expectedResult = json_encode(json_decode('"string"'));

$optimizer = new PrimitiveSchemaOptimizer();

self::assertEquals($expectedResult, $optimizer->optimize($schema, true));
}

public function testOptimizeForRecordSchema(): void
{
$schema = '{"type":"record","namespace":"com.example","name":"Book","fields":[{"name":"isbn","type":"string"}]}';

$expectedResult = json_encode(json_decode($schema));

$optimizer = new PrimitiveSchemaOptimizer();

self::assertEquals($expectedResult, $optimizer->optimize($schema, false));
}
}
20 changes: 17 additions & 3 deletions tests/Unit/Schema/SchemaTemplateTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,34 @@ public function testIsPrimitiveTrue()
{
$template = (new SchemaTemplate())->withSchemaDefinition('{"type":"string"}');

self::assertTrue($template->isPrimitive($template));
self::assertTrue($template->isPrimitive());
}

public function testIsPrimitiveFalse()
{
$template = (new SchemaTemplate())->withSchemaDefinition('{"type":"record"}');

self::assertFalse($template->isPrimitive($template));
self::assertFalse($template->isPrimitive());
}

public function testIsPrimitiveTrueForOptimizedSchema()
{
$template = (new SchemaTemplate())->withSchemaDefinition('"string"');

self::assertTrue($template->isPrimitive());
}

public function testIsPrimitiveFalseForOptimizedSchema()
{
$template = (new SchemaTemplate())->withSchemaDefinition('"foo"');

self::assertFalse($template->isPrimitive());
}

public function testIsPrimitiveFalseOnMissingType()
{
$template = (new SchemaTemplate())->withSchemaDefinition('{"foo":"bar"}');

self::assertFalse($template->isPrimitive($template));
self::assertFalse($template->isPrimitive());
}
}

0 comments on commit fe1f0d1

Please sign in to comment.