bogdanghita/whitebox-compression 2
Research code for the Self-learning Whitebox Compression project
diegomestre2/Cpp_project_example 2
Example structure of a Cpp project
diegomestre2/public_bi_benchmark 1
BI benchmark with user generated data and queries
bogdanghita/public_bi_benchmark-master_project 0
BI benchmark with user generated data and queries
diegomestre2/academic-kickstart 0
Easily create a beautiful website using Academic and Hugo
diegomestre2/awesome-modern-cpp 0
A collection of resources on modern C++
Pull request review commentcwida/duckdb
Pre-filtering data in zonemaps and #1303
+#include "duckdb/execution/expression_executor.hpp"+#include "duckdb/optimizer/rule/in_clause_simplification.hpp"+#include "duckdb/planner/expression/list.hpp"+#include "duckdb/planner/expression/bound_operator_expression.hpp"++namespace duckdb {++InClauseSimplificationRule::InClauseSimplificationRule(ExpressionRewriter &rewriter) : Rule(rewriter) {+ // match on InClauseExpression that has a ConstantExpression as a check+ auto op = make_unique<InClauseExpressionMatcher>();+ op->policy = SetMatcher::Policy::SOME;+ root = move(op);+}++unique_ptr<Expression> InClauseSimplificationRule::Apply(LogicalOperator &op, vector<Expression *> &bindings,+ bool &changes_made) {+ D_ASSERT(bindings[0]->expression_class == ExpressionClass::BOUND_OPERATOR);+ auto expr = (BoundOperatorExpression *)bindings[0];+ if (expr->children[0]->expression_class != ExpressionClass::BOUND_CAST) {+ return nullptr;+ }+ auto cast_expression = (BoundCastExpression *)expr->children[0].get();+ if (cast_expression->child->expression_class != ExpressionClass::BOUND_COLUMN_REF) {+ return nullptr;+ }+ //! Here we check if we can apply the expression on the constant side+ auto target_type = cast_expression->source_type();+ if (!BoundCastExpression::CastIsInvertible(target_type, cast_expression->return_type)) {+ return nullptr;+ }+ for (size_t i{1}; i < expr->children.size(); i++) {+ if (expr->children[i]->expression_class != ExpressionClass::BOUND_CONSTANT) {+ return nullptr;+ }+ D_ASSERT(expr->children[i]->IsFoldable());+ auto constant_value = ExpressionExecutor::EvaluateScalar(*expr->children[i]);+ auto new_constant = constant_value.TryCastAs(target_type);+ if (new_constant) {+ //! We can cast, so we move the new constant+ auto new_constant_expr = make_unique<BoundConstantExpression>(constant_value);+ expr->children[i] = move(new_constant_expr);
good catch
comment created time in 2 hours
A conda-smithy repository for octave.
fork in 5 hours
Pull request review commentcwida/duckdb
Pre-filtering data in zonemaps and #1303
+#include "duckdb/execution/expression_executor.hpp"+#include "duckdb/optimizer/rule/in_clause_simplification.hpp"+#include "duckdb/planner/expression/list.hpp"+#include "duckdb/planner/expression/bound_operator_expression.hpp"++namespace duckdb {++InClauseSimplificationRule::InClauseSimplificationRule(ExpressionRewriter &rewriter) : Rule(rewriter) {+ // match on InClauseExpression that has a ConstantExpression as a check+ auto op = make_unique<InClauseExpressionMatcher>();+ op->policy = SetMatcher::Policy::SOME;+ root = move(op);+}++unique_ptr<Expression> InClauseSimplificationRule::Apply(LogicalOperator &op, vector<Expression *> &bindings,+ bool &changes_made) {+ D_ASSERT(bindings[0]->expression_class == ExpressionClass::BOUND_OPERATOR);+ auto expr = (BoundOperatorExpression *)bindings[0];+ if (expr->children[0]->expression_class != ExpressionClass::BOUND_CAST) {+ return nullptr;+ }+ auto cast_expression = (BoundCastExpression *)expr->children[0].get();+ if (cast_expression->child->expression_class != ExpressionClass::BOUND_COLUMN_REF) {+ return nullptr;+ }+ //! Here we check if we can apply the expression on the constant side+ auto target_type = cast_expression->source_type();+ if (!BoundCastExpression::CastIsInvertible(target_type, cast_expression->return_type)) {+ return nullptr;+ }+ for (size_t i{1}; i < expr->children.size(); i++) {
size_t i = 1
please
comment created time in 8 hours
Pull request review commentcwida/duckdb
Pre-filtering data in zonemaps and #1303
+#include "duckdb/execution/expression_executor.hpp"+#include "duckdb/optimizer/rule/in_clause_simplification.hpp"+#include "duckdb/planner/expression/list.hpp"+#include "duckdb/planner/expression/bound_operator_expression.hpp"++namespace duckdb {++InClauseSimplificationRule::InClauseSimplificationRule(ExpressionRewriter &rewriter) : Rule(rewriter) {+ // match on InClauseExpression that has a ConstantExpression as a check+ auto op = make_unique<InClauseExpressionMatcher>();+ op->policy = SetMatcher::Policy::SOME;+ root = move(op);+}++unique_ptr<Expression> InClauseSimplificationRule::Apply(LogicalOperator &op, vector<Expression *> &bindings,+ bool &changes_made) {+ D_ASSERT(bindings[0]->expression_class == ExpressionClass::BOUND_OPERATOR);+ auto expr = (BoundOperatorExpression *)bindings[0];+ if (expr->children[0]->expression_class != ExpressionClass::BOUND_CAST) {+ return nullptr;+ }+ auto cast_expression = (BoundCastExpression *)expr->children[0].get();+ if (cast_expression->child->expression_class != ExpressionClass::BOUND_COLUMN_REF) {+ return nullptr;+ }+ //! Here we check if we can apply the expression on the constant side+ auto target_type = cast_expression->source_type();+ if (!BoundCastExpression::CastIsInvertible(target_type, cast_expression->return_type)) {+ return nullptr;+ }+ for (size_t i{1}; i < expr->children.size(); i++) {+ if (expr->children[i]->expression_class != ExpressionClass::BOUND_CONSTANT) {+ return nullptr;+ }+ D_ASSERT(expr->children[i]->IsFoldable());+ auto constant_value = ExpressionExecutor::EvaluateScalar(*expr->children[i]);+ auto new_constant = constant_value.TryCastAs(target_type);+ if (new_constant) {+ //! We can cast, so we move the new constant+ auto new_constant_expr = make_unique<BoundConstantExpression>(constant_value);+ expr->children[i] = move(new_constant_expr);
Shouldn't we first check if all children can be cast before actually modifying the IN operator? What if we have e.g.
SELECT x::VARCHAR IN ('1', y) FROM (VALUES (1, 2), (2, 3)) tbl(x, y);
The first element contains an invertible cast ('1' -> 1), but the second element is not invertible. Could you add a test that verifies this does not give problems?
comment created time in 8 hours
Pull request review commentcwida/duckdb
Pre-filtering data in zonemaps and #1303
FilterPropagateResult StatisticsPropagator::PropagateComparison(BaseStatistics & default: return FilterPropagateResult::NO_PRUNING_POSSIBLE; }+ switch (right.type.InternalType()) {
Any reason for adding this check? The left and right type should be identical no?
comment created time in 8 hours
issue commentcwida/duckdb
Auto Increment Primary Key And/or Serial
Certainly:
echo -e '42\n43\n44' > /tmp/dummy
COPY a(b) FROM '/tmp/dummy';
SELECT * FROM a;
┌───┬────┐
│ i │ b │
├───┼────┤
│ 1 │ 42 │
│ 2 │ 43 │
│ 3 │ 44 │
└───┴────┘
comment created time in 9 hours
issue commentcwida/duckdb
Auto Increment Primary Key And/or Serial
oh neat! is there any way to use this alongside read_csv/COPY?
comment created time in 9 hours
issue commentcwida/duckdb
Return empty json array in case of no results returned
This is again the SQLite shell that does this, not DuckDB
comment created time in 10 hours
issue commentcwida/duckdb
Two options, 1) pull those columns into R, and run lm
there. 2) Implement a recursive CTE that computes the fit.
comment created time in 10 hours
issue commentcwida/duckdb
Auto Increment Primary Key And/or Serial
How about using a sequence? For example
CREATE SEQUENCE seq;
CREATE TABLE a (i INTEGER DEFAULT NEXTVAL('seq'), b INTEGER);
INSERT INTO a (b) VALUES (42), (43);
SELECT * FROM a;
Result:
┌───┬────┐
│ i │ b │
├───┼────┤
│ 1 │ 42 │
│ 2 │ 43 │
└───┴────┘
comment created time in 10 hours
issue openedcwida/duckdb
Auto Increment Primary Key And/or Serial
While Auto-Incrementing ideas are more useful, common, and idiomatic is an OLTP store, they can be very useful for tracking changesets (especially for caching) in OLAP analytical tasks. Towards that end, it would be great to have the ability to specify an AUTO INCREMENT policy on a column (or something more advanced like PostgreSQLs Serial flag). While It's easy enough to do this manually with a prior COUNT(*)
query, a write-lock, and bulk insert statements, the only way to add such a column when using a scanner/reader like read_csv
is to add a new column and manually UPDATE
into that column (thereby ~defeating the purpose of those fast import mechanisms). Thoughts?
created time in 12 hours
issue commentcwida/duckdb
mavecentral java package failed in android
It looks like the version of Java might be too old? But even if you got past that, the binary inside the jar isn't compiled for ARM so it wouldn't work anyway
comment created time in 15 hours
issue openedcwida/duckdb
mavecentral java package failed in android
I try adopt duckdb into android via mavecentral, compilation is fine but hit error on runtime
Able to provide fix in mavecentral gradle plugin?
Module gradle dependencies { implementation 'org.duckdb:duckdb_jdbc:0.2.3' }
Example java url = "jdbc:duckdb:/sdcard/app/test.db"; (DuckDBConnection) DriverManager.getConnection(url);
Error java.lang.NoClassDefFoundError: Failed resolution of: [Ljava/nio/file/attribute/FileAttribute; at org.duckdb.DuckDBNative.<clinit>(DuckDBNative.java:32) at org.duckdb.DuckDBDatabase.<init>(DuckDBDatabase.java:22) at org.duckdb.DuckDBDriver.connect(DuckDBDriver.java:35)
created time in 15 hours
pull request commentcwida/duckdb
R package: Add ability to specify output timezone
It probably does make sense to normalize datetimes to UTC on their way in to DuckDB - I think this could be fairly straightforward to do when writing an existing R data.frame into DuckDB (i.e., using dbWriteTable
), though I don't think it's currently possible when the csv is loaded directly to DuckDB (e.g., using duckdb_read_csv())
, since DuckDB itself doesn't currently support that functionality. Is that right?
If that is correct, I see a potential risk in there being different behaviours for different ways of loading data into DuckDB from R. A user could initally load a csv into DuckDB using duckdb_read_csv()
where the data is treated as UTC, and could then append to that table from a data.frame in R using dbWriteTable()
, where the timestamps are normalized to UTC... thus ending up with a mismatch in timezones, potentially without the user realizing it...
comment created time in 20 hours
A conda-smithy repository for rb-ffi.
fork in 20 hours
issue commentcwida/duckdb
I mean the function with which you can regress a column on other columns. Like the lm function in R.
comment created time in 21 hours
issue commentcwida/duckdb
Could you please be more specific? Which functions are you referring to?
comment created time in 21 hours
startedqxcodepoo/arcade
started time in a day
issue openedcwida/duckdb
Return empty json array in case of no results returned
I'm using the -json output of the duckdb command as that allows me to send the query result straight to a client application without having to parse through it slowly in python.
The query returns empty string when there are no results, which is not consistent with the case when the query result is non-empty, in which case an array of json objects is returned.
% duckdb -json /tmp/xxx.db 'select 1 where 1=1'
[{"1":1}]
% duckdb -json /tmp/xxx.db 'select 1 where 1=0'
%
% # ^ empty string returned
It's no big deal because I can just wrap it in an if statement, but I think it should be made consistent. Empty result should return
[]
instead of empty string.
Alternatively it would be nice if there was an option to receive jsonlines output directly, ie. results not wrapped in an array. Also it would be nice if I could receive json data directly using the python client.
created time in a day
pull request commentcwida/duckdb
R package: Add ability to specify output timezone
Thanks for the PR, I wonder whether it might make sense to normalise dates on the way into DuckDB as well, so they are correctly stored as UTC there and you can then indeed convert them back into another TZ on querying.
comment created time in a day
startedmonologg/R-BERT
started time in a day
startedMaLeLabTs/RegexGenerator
started time in a day
A conda-smithy repository for epoxy.
fork in a day
A conda-smithy repository for ruby.
fork in a day
pull request commentcwida/duckdb
STRING_SPLIT and STRING_SPLIT_REGEX SQL functions
Sorry, I turned off github notifications and forgot to turn them back on. No reason really. For consistency I suppose renaming to _regexp would be better
comment created time in a day