feat: reimplemented index page generation

This commit is contained in:
Dylan R. E. Moonfire 2022-06-25 21:15:33 -05:00
parent 3898396afe
commit 86403060e7
12 changed files with 463 additions and 404 deletions

View file

@ -1,179 +0,0 @@
using System;
using System.Collections.Generic;
using FluentValidation;
using Gallium;
using Serilog;
namespace Nitride.Entities;
/// <summary>
/// A Nitride operation that creates and merges entities that are intended
/// to be indexes of another entity. For example, this could be year and
/// month archive pages, tag or category pages. Support is given for
/// merging existing pages so a description could be written from a file
/// and then the index logic is automatically added.
/// </summary>
public class CreateIndexEntities<TIndexKey> : OperationBase
where TIndexKey : notnull
{
// TODO: This does not use [WithProperties] because the source generator hasn't been taught how to do generics.
private readonly ILogger logger;
private readonly IValidator<CreateIndexEntities<TIndexKey>> validator;
public CreateIndexEntities(ILogger logger)
{
// TODO: Figure out why Autofac won't let us register IValidator of generic classes.
this.validator = new CreateIndexEntitiesValidator<TIndexKey>();
this.logger = logger.ForContext(typeof(CreateIndexEntities<>));
}
/// <summary>
/// Creates an index for a given key. This will not be called for any
/// index that has been already created.
/// </summary>
public Func<TIndexKey, IList<Entity>, Entity>? CreateIndexEntity
{
get;
set;
}
/// <summary>
/// Gets or sets the function to retrieve the key from an existing
/// index page. If this returns null, then the entity is considered not
/// to be an index page.
/// </summary>
public Func<Entity, TIndexKey?>? GetIndexEntityKey { get; set; }
/// <summary>
/// A method that gets the keys for a given entity. If this returns an
/// empty list, then the entity will not added to an index.
/// </summary>
public Func<Entity, IEnumerable<TIndexKey>>? GetIndexKeys { get; set; }
/// <summary>
/// Updates an existing index entity to include new information.
/// </summary>
public Func<Entity, TIndexKey, IList<Entity>, Entity>? UpdateIndexEntity
{
get;
set;
}
/// <inheritdoc />
public override IEnumerable<Entity> Run(IEnumerable<Entity> input)
{
// Make sure we have sane data.
this.validator.ValidateAndThrow(this);
// We need to process two lists out of the output, so we need to put
// it into a list so we can enumerate through it twice. This will
// also cause the output to be reordered.
Dictionary<TIndexKey, Entity> indexes = new();
Dictionary<TIndexKey, List<Entity>> indexed = new();
List<Entity> results = new();
foreach (Entity? entity in input)
{
// See if we are an index page first.
if (this.GetIndexEntityKey != null)
{
TIndexKey? indexKey = this.GetIndexEntityKey(entity);
if (indexKey != null)
{
indexes[indexKey] = entity;
continue;
}
}
// We aren't an index, so check to see if this entity is
// something to be indexed.
foreach (TIndexKey indexedKey in this.GetIndexKeys!(entity))
{
if (!indexed.TryGetValue(indexedKey, out List<Entity>? list))
{
indexed[indexedKey] = list = new List<Entity>();
}
list.Add(entity);
}
// Add to the non-index page list.
results.Add(entity);
}
// Go through all the index pages and update them. We get a list of
// all the pages in the index and pass them into the function to
// update the existing index. Then we update the entity and add it
// to the bottom of the results list.
foreach ((TIndexKey key, Entity? oldIndex) in indexes)
{
if (!indexed.TryGetValue(key, out List<Entity>? list))
{
list = new List<Entity>();
}
Entity newEntity = this.UpdateIndexEntity!(oldIndex, key, list);
results.Add(newEntity);
}
// Go through all the known index keys and create the missing pages.
int created = 0;
foreach ((TIndexKey key, List<Entity>? list) in indexed)
{
// See if we already have a page, if we do, then we've already
// processed that page and don't have to do anything.
if (indexes.ContainsKey(key))
{
continue;
}
// We don't have that page and need to add it to the list.
Entity entity = this.CreateIndexEntity!(key, list);
created++;
results.Add(entity);
}
// Return the combined together version.
this.logger.Debug(
"Found {Old:N0} and created {New:N0} index pages for {Keys:N0} keys",
indexes.Count,
created,
indexed.Count);
return results;
}
public CreateIndexEntities<TIndexKey> WithCreateIndexEntity(Func<TIndexKey, IList<Entity>, Entity>? callback)
{
this.CreateIndexEntity = callback;
return this;
}
public CreateIndexEntities<TIndexKey> WithGetIndexEntityKey(Func<Entity, TIndexKey?>? callback)
{
this.GetIndexEntityKey = callback;
return this;
}
public CreateIndexEntities<TIndexKey> WithGetIndexKeys(Func<Entity, IEnumerable<TIndexKey>>? callback)
{
this.GetIndexKeys = callback;
return this;
}
public CreateIndexEntities<TIndexKey> WithUpdateIndexEntity(
Func<Entity, TIndexKey, IList<Entity>, Entity>? callback)
{
this.UpdateIndexEntity = callback;
return this;
}
}

View file

@ -1,14 +0,0 @@
using FluentValidation;
namespace Nitride.Entities;
public class CreateIndexEntitiesValidator<TIndexKey> : AbstractValidator<CreateIndexEntities<TIndexKey>>
where TIndexKey : notnull
{
public CreateIndexEntitiesValidator()
{
this.RuleFor(x => x.CreateIndexEntity).NotNull();
this.RuleFor(x => x.GetIndexKeys).NotNull();
this.RuleFor(x => x.UpdateIndexEntity).NotNull();
}
}

View file

@ -0,0 +1,114 @@
using System;
using System.Collections.Generic;
using System.Linq;
using FluentValidation;
using Gallium;
using Serilog;
namespace Nitride.Entities;
/// <summary>
/// A Nitride operation that creates and merges entities that are intended
/// to be indexes of another entity. Examples of this would be year and month
/// archive pages for a blog or a tag/category pages for associated data. This
/// uses the scanner to determine how many index entities are needed and then
/// merges existing entities with their data or creates new indexes for ones
/// that don't already have an index.
/// </summary>
/// <remarks>
/// This makes the assumption that there is one index per page.
/// </remarks>
[WithProperties]
public partial class CreateOrUpdateIndex : OperationBase
{
private readonly ILogger logger;
private readonly IValidator<CreateOrUpdateIndex> validator;
public CreateOrUpdateIndex(ILogger logger, IValidator<CreateOrUpdateIndex> validator)
{
this.validator = validator;
this.logger = logger.ForContext(typeof(CreateOrUpdateIndex));
}
/// <summary>
/// Creates an index for a given key. This will not be called for any
/// index that has been already created.
/// </summary>
public Func<string, IList<Entity>, Entity> CreateIndex { get; set; } = null!;
/// <summary>
/// Gets or sets the function to retrieve the key from an existing
/// index page. If this returns null, then the entity is considered not
/// to be an index page.
/// </summary>
public Func<Entity, string?> GetIndexKey { get; set; } = null!;
/// <summary>
/// Gets or sets the scanner that provides the keys.
/// </summary>
public EntityScanner Scanner { get; set; } = null!;
/// <summary>
/// Updates an existing index entity to include new information.
/// </summary>
public Func<Entity, string, IEnumerable<Entity>, Entity> UpdateIndex { get; set; } = null!;
/// <inheritdoc />
public override IEnumerable<Entity> Run(IEnumerable<Entity> input)
{
// Make sure we have sane data.
this.validator.ValidateAndThrow(this);
// Get the list of all the scanned entities.
var scanned = this.Scanner.GetScannedResults().ToDictionary(x => x.Key, x => x.Value);
// We loop through the results and look for index entities. Any one we
// find, we update with the existing entries. If we get to the end and
// still have any left over, we create those pages.
HashSet<string> existing = new();
foreach (Entity? entity in input)
{
// See if this entity is an index for anything.
string? key = this.GetIndexKey(entity);
if (key == null)
{
// Not an index page, we don't need to pay attention.
yield return entity;
}
else
{
// This is an existing entity page that needs to be updated.
IEnumerable<Entity> entries =
scanned.TryGetValue(key, out List<Entity>? list) ? list : Array.Empty<Entity>();
existing.Add(key);
yield return this.UpdateIndex(entity, key, entries);
}
}
// Once we're done with the list, we need to create the missing indexes.
foreach (string? key in scanned.Keys)
{
if (existing.Contains(key))
{
continue;
}
yield return this.CreateIndex(key, scanned[key]);
}
// Report the results.
this.logger.Debug(
"Found {Old:N0} and created {New:N0} index pages for {Keys:N0} keys",
existing.Count,
scanned.Count - existing.Count,
scanned.Keys.Count());
}
}

View file

@ -0,0 +1,14 @@
using FluentValidation;
namespace Nitride.Entities;
public class CreateOrUpdateIndexValidator : AbstractValidator<CreateOrUpdateIndex>
{
public CreateOrUpdateIndexValidator()
{
this.RuleFor(x => x.Scanner).NotNull();
this.RuleFor(x => x.GetIndexKey).NotNull();
this.RuleFor(x => x.CreateIndex).NotNull();
this.RuleFor(x => x.UpdateIndex).NotNull();
}
}

View file

@ -0,0 +1,147 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using FluentValidation;
using Gallium;
namespace Nitride.Entities;
/// <summary>
/// Implements a Nitride operation that scans the entities as they are
/// passed through the `Run` method and gathers information into a
/// `Dictionary&lt;string, List&lt;Entity&gt;&gt;` which then can be
/// queries by later operations. This handles making sure the entire
/// input has been processed before operating.
/// </summary>
[WithProperties]
public partial class EntityScanner : OperationBase
{
private readonly object locker;
private readonly ConcurrentDictionary<string, List<Entity>> results;
private readonly IValidator<EntityScanner> validator;
private bool done;
public EntityScanner(IValidator<EntityScanner> validator)
{
this.validator = validator;
this.locker = new object();
this.results = new ConcurrentDictionary<string, List<Entity>>();
}
/// <summary>
/// Gets or sets a callback function that gets the keys associated with
/// the given entity.
/// </summary>
public Func<Entity, IEnumerable<string>?> GetKeysFromEntity { get; set; } = null!;
/// <summary>
/// Gets the list of entities associated with the given key. if the key has not
/// been
/// seen, this returns an empty collection.
/// </summary>
/// <param name="key">The key to search for.</param>
/// <returns>A list of entities associated with the given key.</returns>
/// <exception cref="InvalidOperationException">
/// If the input has not been
/// completely processed, this exception is thrown.
/// </exception>
public IEnumerable<Entity> GetScannedEntities(string key)
{
// Make sure we're done processing.
this.CheckDone();
// We have the list, so return it or an empty list.
if (this.results.TryGetValue(key, out List<Entity>? list))
{
return list.AsReadOnly();
}
// We didn't have the list but we always return something.
return Array.Empty<Entity>();
}
/// <summary>
/// Gets a list of all known keys from the scanner.
/// </summary>
/// <returns></returns>
public IEnumerable<string> GetScannedKeys()
{
this.CheckDone();
return this.results.Keys.ToImmutableList();
}
/// <summary>
/// Gets a dictionary of all the results from the scanner.
/// </summary>
/// <returns></returns>
public ImmutableDictionary<string, List<Entity>> GetScannedResults()
{
this.CheckDone();
return this.results.ToImmutableDictionary();
}
/// <inheritdoc />
public override IEnumerable<Entity> Run(IEnumerable<Entity> input)
{
// Make sure we have sane data.
this.validator.ValidateAndThrow(this);
// Reset our done flag to handle re-entrant calls.
lock (this.locker)
{
this.done = false;
}
// Loop through the entities and process each one.
foreach (Entity? entity in input)
{
// Scan the given entity and see where it needs to be included.
// The entity is added to each of the keys returned by this class.
IEnumerable<string>? keysFromEntity = this.GetKeysFromEntity(entity);
if (keysFromEntity != null)
{
foreach (string key in keysFromEntity)
{
this.results.AddOrUpdate(
key,
_ => new List<Entity> { entity },
(_, list) => list.Union(new[] { entity }).ToList());
}
}
// Finish processing this entity.
yield return entity;
}
// We are done, so flip our flag and we're done processing.
lock (this.locker)
{
this.done = true;
}
}
private void CheckDone()
{
lock (this.locker)
{
// Make sure we are done, otherwise give a useful message.
if (!this.done)
{
throw new InvalidOperationException(
"Have not finished processing through the input for scanning. To finish"
+ " processing, have an operation that causes the enumerable to resolve. This can"
+ " can be as simple as a `.ToList()` operator or using another resolving operation"
+ " such as one implementing `IResolvingOperation` before any calls to `GetEntities`.");
}
}
}
}

View file

@ -0,0 +1,11 @@
using FluentValidation;
namespace Nitride.Entities;
public class EntityScannerValidator : AbstractValidator<EntityScanner>
{
public EntityScannerValidator()
{
this.RuleFor(x => x.GetKeysFromEntity).NotNull();
}
}

View file

@ -1,188 +0,0 @@
# Entities
Nitride is based on an Entity Component System (ECS) in the way it handles the
various input documents, images, feeds, and other parts that make up a website.
Implementing it this way makes it easier to create a distinction between the
different entities (much like Statiq.Web uses the DocumentType)
but allows for adding new components along the way without having the C#
limitations of a sealed enumeration or needing to implement a Javascript-style
enum for identification. Instead, if an entity needs to be identified as being
Markdown, an image, or a database query, it just adds a component to represent
that information.
The basic entity is just a simple object with an internal identifier. These
entities are also immutable. Functions that appear to manipulate actually clone,
make the change, and then return the results.
```c#
var entity = new Entity();
Console.WriteLine("Entity Id: {0}", entity.Id);
```
## Components
By itself, an entity doesn't have any meaning or purpose. These are described by
a generic collection of components that are added to the entity. Each component
has a type and then an instance of that type. These are added to the entity with
the `Add` command. If a type is not given, it is assumed to be the same type as
the parameter, but a base class or interface can be given to allow different
types to be stored in a specific component.
In effect, the type of the component is the key. Having two different types,
even with the same object, would be considered two distinct objects.
```c#
string mimeType = "text/plain";
Entity entity = new Entity();
Assert.Equal(0, entity.Count);
Entity newEntity = entity.Add(mimeType);
Assert.Equal(0, entity.Count);
Assert.Equal(1, newEntity.Count);
Assert.Equal(entity.Id, newEntity.Id);
Assert.Equal(entity, newEntity);
newEntity = newEntity.Add<object>(mimeType);
Assert.Equal(2, newEntity.Count);
```
The basic operations for entity components are:
- `Add<TType>(component)`: Adds a component as the given type. If there is
already a component there, an exception will be thrown.
- `Add(component)`: As `Add<TType>(component)` but the `TType` is the same as
`component.GetType()`.
- `Remove<TType>()`: Removes any component of the given type, if exists. If
there is no such component, then nothing happens.
- `Remove(component)`: Same as `Remove<TType>` with the component given
determining the `TType`.
- `Set<TType>(component)`: Adds or updates a component of the given type.
- `Set(component)`: Same as `Set<TType>` with the component given determining
the `TType`.
- `Copy()`: Creates a copy of the entity and assigns it a new identifier.
- `ExactCopy()`: Creates a copy of the entity with the same identifier.
As above, all of these return a new entity (or the same one if no change is made
to the entity).
### Query Components
- `bool Has<TType>()`: Returns a value indicating whether the entity has the
given component type.
- `TType Get<TType>()`: Returns the value of the registered component. If there
is no such object, this will throw an exception.
- `TType? GetOptional<TType>()`: Returns the value of the registered component.
If there is no such object, this will return the default value.
- `bool TryGet<TType>(out TType component)`: Attempt to get the component. If it
cannot be retrieved, then this will return `false` and `component` is
undefined.
## Collections
To keep with the patterns of C#, working with collection of entities uses normal
LINQ operations. For example, to combine two sets of entities together,
the `Union` LINQ command can be used:
```c#
IEnumerable<Entity> entities1;
IEnumerable<Entity> entities2;
IEnumerable<Entity> all = entities1.Union(entities2);
```
To work with the ECS, additional extension methods have been written that allow
for filtering or working with those entities.
### HasComponents
The `HasComponents` is a set of overrides that checks to see if the given entity
has the requisite components. If they don't, then that entity is filtered out.
```c#
IEnumerable<Entity> entities;
var filtered1 = entities.HasComponents<C1>();
var filtered2 = entities.HasComponents<C1, C2>();
var filtered3 = entities.HasComponents<C1, C2, C3>();
```
### NotComponents
`NotComponents` is effectively the reverse of `HasComponents` in that if the
entity has the given components, they are filtered out. This also allows up to
three different components.
This also allows the developer to ask for an entity that has two components but
not have a different of two with:
```c#
IEnumerable<Entity> entities;
var filtered = entities
.HasComponents<C1, C2>()
.NotComonents<C3, C4>();
```
### ForComponents
`ForComponents` allows for a lambda to be performed on entities that have the
given components while passing all the entities on through the function. This is
much like the `ForEach` combined with `Select` in that the changed or updated
entity will be passed on.
```c#
var entities = new Entities[]
{
new Entity().Add("value1"),
new Entity().Add(2),
new Entity().Add(3).Add("value2"),
};
var filtered = entities
.ForComponents<string>((entity, value) => entity.Set(value + "!"));
Assert.Equal(
new[] {
"value1!",
null,
"value2!",
},
filtered.Select(x => x.GetOptional<string>()));
```
There are also three overloads allowing up to three components to be pulled out
with the lambda.
### SetComponents, AddComponents, RemoveComponents
`SetComponents` (as the corresponding `AddComponents`, and `RemoveComponents`)
basically perform the same operation on the entire list. They also have the
three overloads to allow one to three components be manipulated in a single
call.
```c#
IEnumerable<Entity> entities;
var updated = entities
.AddComponents<object>(mimeType)
.AddComponents(mimeType)
.RemoveComponents<object>()
.SetComponents<object>(mimeType)
.SetComponent(mimeType);
```
### MergeEntities
`MergeComponents` combines multiple entities together if they have the same `Id`
field. The two sides of the comparison are the presence of a specific component.
```c#
IEnumerable<Entity> entities;
var combined = entities
.MergeEntities<C1, C2>(
(entity1, c1, entity2, c2) => entity1.Set(c2));
```
## Files, Paths, and Content
Entities do not have an integral concept of being a file or having contents from
the disk or anywhere else. Much of this is implemented as components from the
Nitride.IO assembly which uses [Zio](https://github.com/xoofx/zio)
for the underlying library, but can be easily replaced with a different IO
layer (or even the straight System.IO).

View file

@ -0,0 +1,9 @@
namespace Nitride;
/// <summary>
/// Indicates an operation that resolved (completely processes the input)
/// before returning from the `Run` operation.
/// </summary>
public interface IResolvingOperation : IOperation
{
}

View file

@ -28,7 +28,6 @@ public class NitrideModule : Module
// Operations
builder.RegisterValidators(this);
builder.RegisterOperators(this);
builder.RegisterGeneric(typeof(CreateIndexEntities<>)).As(typeof(CreateIndexEntities<>));
// Commands
builder.RegisterType<BuildCommand>().AsSelf().As<Command>().SingleInstance();

View file

@ -0,0 +1,146 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Gallium;
using Nitride.Entities;
using Xunit;
using Xunit.Abstractions;
namespace Nitride.Tests.Entities;
public class CreateOrUpdateIndexTests : NitrideTestBase, IDisposable
{
private readonly NitrideTestContext context;
private readonly IOperation op;
private readonly EntityScanner scanner;
public CreateOrUpdateIndexTests(ITestOutputHelper output)
: base(output)
{
this.context = this.CreateContext();
this.scanner = this.context.Resolve<EntityScanner>().WithGetKeysFromEntity(e => e.GetOptional<List<string>>());
this.op = this.context.Resolve<CreateOrUpdateIndex>()
.WithScanner(this.scanner)
.WithGetIndexKey(x => x.Get<string>().Contains("index") ? x.Get<string>().Replace("index", "cat") : null)
.WithCreateIndex((key, list) => new Entity().Add(key.Replace("cat", "index")).Add(list.ToList()))
.WithUpdateIndex((index, key, list) => index.Add(list.ToList()));
}
[Fact]
public void CreateMultipleIndexes()
{
List<Entity> input = new()
{
new Entity().Add("page1").Add(new List<string> { "cat1" }),
new Entity().Add("page2").Add(new List<string> { "cat2" }),
new Entity().Add("page3").Add(new List<string> { "cat1" }),
};
List<Tuple<string, List<string>?>> actual = this.GetActual(input);
Assert.Equal(
new[]
{
new Tuple<string, List<string>?>("index1", new List<string> { "page1", "page3" }),
new Tuple<string, List<string>?>("index2", new List<string> { "page2" }),
new Tuple<string, List<string>?>("page1", null),
new Tuple<string, List<string>?>("page2", null),
new Tuple<string, List<string>?>("page3", null),
},
actual);
}
[Fact]
public void CreateNestedIndexes()
{
List<Entity> input = new()
{
new Entity().Add("index2").Add(new List<string> { "cat1" }),
new Entity().Add("page2").Add(new List<string> { "cat2" }),
new Entity().Add("page3").Add(new List<string> { "cat1" }),
};
List<Tuple<string, List<string>?>> actual = this.GetActual(input);
Assert.Equal(
new[]
{
new Tuple<string, List<string>?>("index1", new List<string> { "index2", "page3" }),
new Tuple<string, List<string>?>("index2", new List<string> { "page2" }),
new Tuple<string, List<string>?>("page2", null),
new Tuple<string, List<string>?>("page3", null),
},
actual);
}
[Fact]
public void CreateSimpleIndex()
{
List<Entity> input = new()
{
new Entity().Add("page1").Add(new List<string> { "cat1" }),
};
List<Tuple<string, List<string>?>> actual = this.GetActual(input);
Assert.Equal(
new[]
{
new Tuple<string, List<string>?>("index1", new List<string> { "page1" }),
new Tuple<string, List<string>?>("page1", null),
},
actual);
}
/// <inheritdoc />
public void Dispose()
{
this.context.Dispose();
}
[Fact]
public void UpdateSimpleIndex()
{
List<Entity> input = new()
{
new Entity().Add("index1"),
new Entity().Add("page1").Add(new List<string> { "cat1" }),
};
var output = this.scanner.Run(input).ToList().Run(this.op).ToList();
var actual = output
.Select(
x => new Tuple<string, List<string>?>(
x.Get<string>(),
x.GetOptional<List<Entity>>()?.Select(y => y.Get<string>()).OrderBy(y => y).ToList()))
.OrderBy(x => x.Item1)
.ToList();
Assert.Equal(
new[]
{
new Tuple<string, List<string>?>("index1", new List<string> { "page1" }),
new Tuple<string, List<string>?>("page1", null),
},
actual);
}
private List<Tuple<string, List<string>?>> GetActual(List<Entity> input)
{
var output = this.scanner.Run(input).ToList().Run(this.op).ToList();
var actual = output
.Select(
x => new Tuple<string, List<string>?>(
x.Get<string>(),
x.GetOptional<List<Entity>>()?.Select(y => y.Get<string>()).OrderBy(y => y).ToList()))
.OrderBy(x => x.Item1)
.ToList();
return actual;
}
}